import json import os import random import re import shutil from typing import Callable from typing import List, Text, Optional from langchain import PromptTemplate from pydantic.dataclasses import dataclass from dev_gpt.apis import gpt from dev_gpt.apis.gpt import _GPTConversation from dev_gpt.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub from dev_gpt.apis.pypi import is_package_on_pypi, clean_requirements_txt from dev_gpt.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \ BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \ REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \ IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES, DOCKER_BASE_IMAGE_VERSION from dev_gpt.options.generate.pm.pm import PM from dev_gpt.options.generate.templates_user import template_generate_microservice_name, \ template_generate_possible_packages, \ template_solve_code_issue, \ template_solve_pip_dependency_issue, template_is_dependency_issue, template_generate_playground, \ template_generate_function, template_generate_test, template_generate_requirements, \ template_chain_of_thought, template_summarize_error, \ template_solve_apt_get_dependency_issue from dev_gpt.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path from dev_gpt.utils.string_tools import print_colored @dataclass class TaskSpecification: task: Optional[Text] test: Optional[Text] class Generator: def __init__(self, task_description, path, model='gpt-4'): self.gpt_session = gpt.GPTSession(model=model) self.microservice_specification = TaskSpecification(task=task_description, test=None) self.microservice_root_path = path @staticmethod def extract_content_from_result(plain_text, file_name, match_single_block=False, can_contain_code_block=True): optional_line_break = '\n' if can_contain_code_block else '' # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break pattern = fr"(?:\*|\*\*| ){file_name}\*?\*?\n```(?:\w+\n)?([\s\S]*?){optional_line_break}```" matches = re.findall(pattern, plain_text, re.MULTILINE) if matches: return matches[-1].strip() elif match_single_block: # Check for a single code block single_code_block_pattern = r"```(?:\w+\n)?([\s\S]*?)```" single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE) if len(single_code_block_match) == 1: return single_code_block_match[0].strip() return '' def write_config_yml(self, class_name, dest_folder, python_file=EXECUTOR_FILE_NAME): config_content = f'''jtype: {class_name} py_modules: - {python_file} metas: name: {class_name} ''' with open(os.path.join(dest_folder, 'config.yml'), 'w', encoding='utf-8') as f: f.write(config_content) def files_to_string(self, file_name_to_content, restrict_keys=None): all_microservice_files_string = '' for file_name, tag in FILE_AND_TAG_PAIRS: if file_name in file_name_to_content and (not restrict_keys or file_name in restrict_keys): all_microservice_files_string += f'**{file_name}**\n```{tag}\n{file_name_to_content[file_name]}\n```\n\n' return all_microservice_files_string.strip() def get_default_parse_result_fn(self, files_names: List[str]): def _default_parse_result_fn(x): _parsed_results = {} for _file_name in files_names: _content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names) == 1) if _content != '': _parsed_results[_file_name] = _content return _parsed_results return _default_parse_result_fn def generate_and_persist_file( self, section_title: str, template: PromptTemplate, destination_folder: str, file_name_s: List[str] = None, parse_result_fn: Callable = None, **template_kwargs ): """This function generates file(s) using the given template and persists it/them in the given destination folder. It also returns the generated content as a dictionary mapping file_name to its content. Args: section_title (str): The title of the section to be printed in the console. template (PromptTemplate): The template to be used for generating the file(s). destination_folder (str): The destination folder where the generated file(s) should be persisted. file_name_s (List[str], optional): The name of the file(s) to be generated. Defaults to None. parse_result_fn (Callable, optional): A function that parses the generated content and returns a dictionary mapping file_name to its content. If no content could be extract, it returns an empty dictionary. Defaults to None. If None, default parsing is used which uses the file_name to extract from the generated content. **template_kwargs: The keyword arguments to be passed to the template. """ if parse_result_fn is None: parse_result_fn = self.get_default_parse_result_fn(file_name_s) print_colored('', f'\n\n############# {section_title} #############', 'blue') system_introduction_message = _GPTConversation._create_system_message( self.microservice_specification.task, self.microservice_specification.test ) conversation = self.gpt_session.get_conversation(messages=[system_introduction_message]) template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables} if 'file_name' in template.input_variables and len(file_name_s) == 1: template_kwargs['file_name'] = file_name_s[0] content_raw = conversation.chat( template.format( **template_kwargs ) ) content = parse_result_fn(content_raw) if content == {}: content_raw = conversation.chat( 'You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else '')) content = parse_result_fn(content_raw) for _file_name, _file_content in content.items(): persist_file(_file_content, os.path.join(destination_folder, _file_name)) return content def generate_microservice( self, microservice_name, packages, num_approach, ): MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, 1) os.makedirs(MICROSERVICE_FOLDER_v1) with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'jina_wrapper.py'), 'r', encoding='utf-8') as f: microservice_executor_boilerplate = f.read() microservice_executor_code = microservice_executor_boilerplate.replace('class DevGPTExecutor(Executor):', f'class {microservice_name}(Executor):') persist_file(microservice_executor_code, os.path.join(MICROSERVICE_FOLDER_v1, EXECUTOR_FILE_NAME)) with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'apis.py'), 'r', encoding='utf-8') as f: persist_file(f.read(), os.path.join(MICROSERVICE_FOLDER_v1, 'apis.py')) microservice_content = self.generate_and_persist_file( section_title='Microservice', template=template_generate_function, destination_folder=MICROSERVICE_FOLDER_v1, microservice_description=self.microservice_specification.task, test_description=self.microservice_specification.test, packages=packages, file_name_purpose=IMPLEMENTATION_FILE_NAME, tag_name=IMPLEMENTATION_FILE_TAG, file_name_s=[IMPLEMENTATION_FILE_NAME], )[IMPLEMENTATION_FILE_NAME] test_microservice_content = self.generate_and_persist_file( 'Test Microservice', template_generate_test, MICROSERVICE_FOLDER_v1, code_files_wrapped=self.files_to_string({EXECUTOR_FILE_NAME: microservice_content}), microservice_name=microservice_name, microservice_description=self.microservice_specification.task, test_description=self.microservice_specification.test, file_name_purpose=TEST_EXECUTOR_FILE_NAME, tag_name=TEST_EXECUTOR_FILE_TAG, file_name_s=[TEST_EXECUTOR_FILE_NAME], )[TEST_EXECUTOR_FILE_NAME] requirements_content = self.generate_and_persist_file( 'Requirements', template_generate_requirements, MICROSERVICE_FOLDER_v1, code_files_wrapped=self.files_to_string({ IMPLEMENTATION_FILE_NAME: microservice_content, TEST_EXECUTOR_FILE_NAME: test_microservice_content, }), file_name_purpose=REQUIREMENTS_FILE_NAME, file_name_s=[REQUIREMENTS_FILE_NAME], parse_result_fn=self.parse_result_fn_requirements, tag_name=REQUIREMENTS_FILE_TAG, )[REQUIREMENTS_FILE_NAME] # I deactivated this because 3.5-turbo was hallucinating packages that were not needed # now, in the first iteration the default dockerfile is used # self.generate_and_persist_file( # section_title='Generate Dockerfile', # template=template_generate_apt_get_install, # destination_folder=MICROSERVICE_FOLDER_v1, # file_name_s=None, # parse_result_fn=self.parse_result_fn_dockerfile, # docker_file_wrapped=self.read_docker_template(), # requirements_file_wrapped=self.files_to_string({ # REQUIREMENTS_FILE_NAME: requirements_content, # }) # ) with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', encoding='utf-8') as f: docker_file_template_lines = f.readlines() docker_file_template_lines = [ line.replace('{{APT_GET_PACKAGES}}', '').replace('{{DOCKER_BASE_IMAGE_VERSION}}', DOCKER_BASE_IMAGE_VERSION) for line in docker_file_template_lines ] docker_file_content = '\n'.join(docker_file_template_lines) persist_file(docker_file_content, os.path.join(MICROSERVICE_FOLDER_v1, 'Dockerfile')) self.write_config_yml(microservice_name, MICROSERVICE_FOLDER_v1) print('\nFirst version of the microservice generated. Start iterating on it to make the tests pass...') @staticmethod def read_docker_template(): with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', encoding='utf-8') as f: return f.read() def parse_result_fn_dockerfile(self, content_raw: str): json_string = self.extract_content_from_result(content_raw, 'apt-get-packages.json', match_single_block=True) packages = ' '.join(json.loads(json_string)['packages']) docker_file_template = self.read_docker_template() return {DOCKER_FILE_NAME: docker_file_template.replace('{{APT_GET_PACKAGES}}', '{APT_GET_PACKAGES}').replace('{{DOCKER_BASE_IMAGE_VERSION}}', DOCKER_BASE_IMAGE_VERSION).format( APT_GET_PACKAGES=packages)} def parse_result_fn_requirements(self, content_raw: str): content_parsed = self.extract_content_from_result(content_raw, 'requirements.txt', match_single_block=True) lines = content_parsed.split('\n') lines = [line for line in lines if not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])] content_modified = f'''jina==3.15.1.dev14 docarray==0.21.0 openai==0.27.5 pytest {os.linesep.join(lines)}''' return {REQUIREMENTS_FILE_NAME: content_modified} def generate_playground(self, microservice_name, microservice_path): print_colored('', '\n\n############# Playground #############', 'blue') file_name_to_content = get_all_microservice_files_with_content(microservice_path) conversation = self.gpt_session.get_conversation() conversation.chat( template_generate_playground.format( code_files_wrapped=self.files_to_string(file_name_to_content, ['test_microservice.py']), microservice_name=microservice_name, ) ) playground_content_raw = conversation.chat( template_chain_of_thought.format( file_name_purpose='app.py/the playground', file_name='app.py', tag_name='python', ) ) playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True) if playground_content == '': content_raw = conversation.chat(f'You must add the app.py code. You most not output any other code') playground_content = self.extract_content_from_result( content_raw, 'app.py', match_single_block=True ) gateway_path = os.path.join(microservice_path, 'gateway') shutil.copytree(os.path.join(os.path.dirname(__file__), 'static_files', 'gateway'), gateway_path) persist_file(playground_content, os.path.join(gateway_path, 'app.py')) # fill-in name of microservice gateway_name = f'Gateway{microservice_name}' custom_gateway_path = os.path.join(gateway_path, 'custom_gateway.py') with open(custom_gateway_path, 'r', encoding='utf-8') as f: custom_gateway_content = f.read() custom_gateway_content = custom_gateway_content.replace( 'class CustomGateway(CompositeGateway):', f'class {gateway_name}(CompositeGateway):' ) with open(custom_gateway_path, 'w', encoding='utf-8') as f: f.write(custom_gateway_content) # write config.yml self.write_config_yml(gateway_name, gateway_path, 'custom_gateway.py') # push the gateway print('Final step...') hubble_log = push_executor(gateway_path) if not is_executor_in_hub(gateway_name): raise Exception(f'{microservice_name} not in hub. Hubble logs: {hubble_log}') def debug_microservice(self, microservice_name, num_approach, packages): for i in range(1, MAX_DEBUGGING_ITERATIONS): print('Debugging iteration', i) print('Trying to debug the microservice. Might take a while...') previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i) next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i + 1) clean_requirements_txt(previous_microservice_path) log_hubble = push_executor(previous_microservice_path) error = process_error_message(log_hubble) if error: print('An error occurred during the build process. Feeding the error back to the assistant...') self.do_debug_iteration(error, next_microservice_path, previous_microservice_path) if i == MAX_DEBUGGING_ITERATIONS - 1: raise self.MaxDebugTimeReachedException('Could not debug the microservice.') else: # at the moment, there can be cases where no error log is extracted but the executor is still not published # it leads to problems later on when someone tries a run or deployment if is_executor_in_hub(microservice_name): print('Successfully build microservice.') break else: raise Exception(f'{microservice_name} not in hub. Hubble logs: {log_hubble}') return get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i) def do_debug_iteration(self, error, next_microservice_path, previous_microservice_path): os.makedirs(next_microservice_path) file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path) for file_name, content in file_name_to_content.items(): persist_file(content, os.path.join(next_microservice_path, file_name)) summarized_error = self.summarize_error(error) dock_req_string = self.files_to_string({ key: val for key, val in file_name_to_content.items() if key in ['requirements.txt', 'Dockerfile'] }) is_apt_get_dependency_issue = self.is_dependency_issue(summarized_error, dock_req_string, 'apt-get') if is_apt_get_dependency_issue: self.generate_and_persist_file( section_title='Debugging apt-get dependency issue', template=template_solve_apt_get_dependency_issue, destination_folder=next_microservice_path, file_name_s=['apt-get-packages.json'], parse_result_fn=self.parse_result_fn_dockerfile, summarized_error=summarized_error, all_files_string=dock_req_string, ) print('Dockerfile updated') else: is_pip_dependency_issue = self.is_dependency_issue(summarized_error, dock_req_string, 'PIP') if is_pip_dependency_issue: self.generate_and_persist_file( section_title='Debugging pip dependency issue', template=template_solve_pip_dependency_issue, destination_folder=next_microservice_path, file_name_s=[REQUIREMENTS_FILE_NAME], summarized_error=summarized_error, all_files_string=dock_req_string, ) else: self.generate_and_persist_file( section_title='Debugging code issue', template=template_solve_code_issue, destination_folder=next_microservice_path, file_name_s=[IMPLEMENTATION_FILE_NAME, TEST_EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME], summarized_error=summarized_error, task_description=self.microservice_specification.task, test_description=self.microservice_specification.test, all_files_string=self.files_to_string( {key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}), ) class MaxDebugTimeReachedException(BaseException): pass def is_dependency_issue(self, summarized_error, dock_req_string: str, package_manager: str): # a few heuristics to quickly jump ahead if any([error_message in summarized_error for error_message in ['AttributeError', 'NameError', 'AssertionError']]): return False if package_manager.lower() == 'pip' and any( [em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]): return True print_colored('', f'Is it a {package_manager} dependency issue?', 'blue') conversation = self.gpt_session.get_conversation() answer_raw = conversation.chat( template_is_dependency_issue.format(summarized_error=summarized_error, all_files_string=dock_req_string).replace('PACKAGE_MANAGER', package_manager) ) answer_json_string = self.extract_content_from_result(answer_raw, 'response.json', match_single_block=True, ) answer = json.loads(answer_json_string)['dependency_installation_failure'] return 'yes' in answer.lower() def generate_microservice_name(self, description): name = self.generate_and_persist_file( section_title='Generate microservice name', template=template_generate_microservice_name, destination_folder=self.microservice_root_path, file_name_s=['name.txt'], description=description )['name.txt'] return name def get_possible_packages(self): print_colored('', '\n\n############# What packages to use? #############', 'blue') packages_json_string = self.generate_and_persist_file( section_title='Generate possible packages', template=template_generate_possible_packages, destination_folder=self.microservice_root_path, file_name_s=['strategies.json'], description=self.microservice_specification.task )['strategies.json'] packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)] packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in packages_list] packages_list = self.filter_packages_list(packages_list) packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES] return packages_list # '/private/var/folders/f5/whmffl4d7q79s29jpyb6719m0000gn/T/pytest-of-florianhonicke/pytest-128/test_generation_level_0_mock_i0' # '/private/var/folders/f5/whmffl4d7q79s29jpyb6719m0000gn/T/pytest-of-florianhonicke/pytest-129/test_generation_level_0_mock_i0' def generate(self): self.microservice_specification.task, self.microservice_specification.test = PM().refine_specification(self.microservice_specification.task) os.makedirs(self.microservice_root_path) generated_name = self.generate_microservice_name(self.microservice_specification.task) microservice_name = f'{generated_name}{random.randint(0, 10_000_000)}' packages_list = self.get_possible_packages() for num_approach, packages in enumerate(packages_list): try: self.generate_microservice(microservice_name, packages, num_approach) final_version_path = self.debug_microservice(microservice_name, num_approach, packages) self.generate_playground(microservice_name, final_version_path) except self.MaxDebugTimeReachedException: print('Could not debug the Microservice with the approach:', packages) if num_approach == len(packages_list) - 1: print_colored('', f'Could not debug the Microservice with any of the approaches: {packages} giving up.', 'red') return -1 continue print(f''' You can now run or deploy your microservice: dev-gpt run --path {self.microservice_root_path} dev-gpt deploy --path {self.microservice_root_path} ''' ) return 0 def summarize_error(self, error): conversation = self.gpt_session.get_conversation() error_summary = conversation.chat(template_summarize_error.format(error=error)) return error_summary @staticmethod def replace_with_gpt_3_5_turbo_if_possible(pkg): if pkg in LANGUAGE_PACKAGES: return 'gpt_3_5_turbo' return pkg @staticmethod def filter_packages_list(packages_list): # filter out complete package lists packages_list = [ packages for packages in packages_list if all([ pkg not in BLACKLISTED_PACKAGES # no package is allowed to be blacklisted for pkg in packages ]) ] # filter out single packages packages_list = [ [ package for package in packages if (package not in UNNECESSARY_PACKAGES) and ( # all packages must be on pypi or it is gpt_3_5_turbo is_package_on_pypi(package) or package == 'gpt_3_5_turbo' ) ] for packages in packages_list ] return packages_list # def create_prototype_implementation(self): # microservice_py_lines = ['''\ # Class {microservice_name}:'''] # for sub_task in self.pm.iterate_over_sub_tasks_pydantic(self.sub_task_tree): # microservice_py_lines.append(f' {sub_task.python_fn_signature}') # microservice_py_lines.append(f' """') # microservice_py_lines.append(f' {sub_task.python_fn_docstring}') # microservice_py_lines.append(f' """') # microservice_py_lines.append(f' raise NotImplementedError') # microservice_py_str = '\n'.join(microservice_py_lines) # persist_file(os.path.join(self.microservice_root_path, 'microservice.py'), microservice_py_str)