Files
dev-gpt/dev_gpt/options/generate/generator.py
Florian Hönicke c29c212e12 refactor: cleanup
2023-05-09 18:08:09 +02:00

496 lines
25 KiB
Python

import json
import os
import random
import re
import shutil
from typing import Callable
from typing import List, Text, Optional
from langchain import PromptTemplate
from pydantic.dataclasses import dataclass
from dev_gpt.apis import gpt
from dev_gpt.apis.gpt import _GPTConversation
from dev_gpt.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub
from dev_gpt.apis.pypi import is_package_on_pypi, clean_requirements_txt
from dev_gpt.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES, DOCKER_BASE_IMAGE_VERSION
from dev_gpt.options.generate.pm.pm import PM
from dev_gpt.options.generate.templates_user import template_generate_microservice_name, \
template_generate_possible_packages, \
template_solve_code_issue, \
template_solve_pip_dependency_issue, template_is_dependency_issue, template_generate_playground, \
template_generate_function, template_generate_test, template_generate_requirements, \
template_chain_of_thought, template_summarize_error, \
template_solve_apt_get_dependency_issue
from dev_gpt.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path
from dev_gpt.utils.string_tools import print_colored
@dataclass
class TaskSpecification:
task: Optional[Text]
test: Optional[Text]
class Generator:
def __init__(self, task_description, path, model='gpt-4'):
self.gpt_session = gpt.GPTSession(model=model)
self.microservice_specification = TaskSpecification(task=task_description, test=None)
self.microservice_root_path = path
@staticmethod
def extract_content_from_result(plain_text, file_name, match_single_block=False, can_contain_code_block=True):
optional_line_break = '\n' if can_contain_code_block else '' # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break
pattern = fr"(?:\*|\*\*| ){file_name}\*?\*?\n```(?:\w+\n)?([\s\S]*?){optional_line_break}```"
matches = re.findall(pattern, plain_text, re.MULTILINE)
if matches:
return matches[-1].strip()
elif match_single_block:
# Check for a single code block
single_code_block_pattern = r"```(?:\w+\n)?([\s\S]*?)```"
single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE)
if len(single_code_block_match) == 1:
return single_code_block_match[0].strip()
return ''
def write_config_yml(self, class_name, dest_folder, python_file=EXECUTOR_FILE_NAME):
config_content = f'''jtype: {class_name}
py_modules:
- {python_file}
metas:
name: {class_name}
'''
with open(os.path.join(dest_folder, 'config.yml'), 'w', encoding='utf-8') as f:
f.write(config_content)
def files_to_string(self, file_name_to_content, restrict_keys=None):
all_microservice_files_string = ''
for file_name, tag in FILE_AND_TAG_PAIRS:
if file_name in file_name_to_content and (not restrict_keys or file_name in restrict_keys):
all_microservice_files_string += f'**{file_name}**\n```{tag}\n{file_name_to_content[file_name]}\n```\n\n'
return all_microservice_files_string.strip()
def get_default_parse_result_fn(self, files_names: List[str]):
def _default_parse_result_fn(x):
_parsed_results = {}
for _file_name in files_names:
_content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names) == 1)
if _content != '':
_parsed_results[_file_name] = _content
return _parsed_results
return _default_parse_result_fn
def generate_and_persist_file(
self,
section_title: str,
template: PromptTemplate,
destination_folder: str,
file_name_s: List[str] = None,
parse_result_fn: Callable = None,
**template_kwargs
):
"""This function generates file(s) using the given template and persists it/them in the given destination folder.
It also returns the generated content as a dictionary mapping file_name to its content.
Args:
section_title (str): The title of the section to be printed in the console.
template (PromptTemplate): The template to be used for generating the file(s).
destination_folder (str): The destination folder where the generated file(s) should be persisted.
file_name_s (List[str], optional): The name of the file(s) to be generated. Defaults to None.
parse_result_fn (Callable, optional): A function that parses the generated content and returns a dictionary
mapping file_name to its content. If no content could be extract, it returns an empty dictionary.
Defaults to None. If None, default parsing is used which uses the file_name to extract from the generated content.
**template_kwargs: The keyword arguments to be passed to the template.
"""
if parse_result_fn is None:
parse_result_fn = self.get_default_parse_result_fn(file_name_s)
print_colored('', f'\n\n############# {section_title} #############', 'blue')
system_introduction_message = _GPTConversation._create_system_message(
self.microservice_specification.task,
self.microservice_specification.test
)
conversation = self.gpt_session.get_conversation(messages=[system_introduction_message])
template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables}
if 'file_name' in template.input_variables and len(file_name_s) == 1:
template_kwargs['file_name'] = file_name_s[0]
content_raw = conversation.chat(
template.format(
**template_kwargs
)
)
content = parse_result_fn(content_raw)
if content == {}:
content_raw = conversation.chat(
'You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else ''))
content = parse_result_fn(content_raw)
for _file_name, _file_content in content.items():
persist_file(_file_content, os.path.join(destination_folder, _file_name))
return content
def generate_microservice(
self,
microservice_name,
packages,
num_approach,
):
MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages,
num_approach, 1)
os.makedirs(MICROSERVICE_FOLDER_v1)
with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'jina_wrapper.py'), 'r', encoding='utf-8') as f:
microservice_executor_boilerplate = f.read()
microservice_executor_code = microservice_executor_boilerplate.replace('class DevGPTExecutor(Executor):',
f'class {microservice_name}(Executor):')
persist_file(microservice_executor_code, os.path.join(MICROSERVICE_FOLDER_v1, EXECUTOR_FILE_NAME))
with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'apis.py'), 'r', encoding='utf-8') as f:
persist_file(f.read(), os.path.join(MICROSERVICE_FOLDER_v1, 'apis.py'))
microservice_content = self.generate_and_persist_file(
section_title='Microservice',
template=template_generate_function,
destination_folder=MICROSERVICE_FOLDER_v1,
microservice_description=self.microservice_specification.task,
test_description=self.microservice_specification.test,
packages=packages,
file_name_purpose=IMPLEMENTATION_FILE_NAME,
tag_name=IMPLEMENTATION_FILE_TAG,
file_name_s=[IMPLEMENTATION_FILE_NAME],
)[IMPLEMENTATION_FILE_NAME]
test_microservice_content = self.generate_and_persist_file(
'Test Microservice',
template_generate_test,
MICROSERVICE_FOLDER_v1,
code_files_wrapped=self.files_to_string({EXECUTOR_FILE_NAME: microservice_content}),
microservice_name=microservice_name,
microservice_description=self.microservice_specification.task,
test_description=self.microservice_specification.test,
file_name_purpose=TEST_EXECUTOR_FILE_NAME,
tag_name=TEST_EXECUTOR_FILE_TAG,
file_name_s=[TEST_EXECUTOR_FILE_NAME],
)[TEST_EXECUTOR_FILE_NAME]
requirements_content = self.generate_and_persist_file(
'Requirements',
template_generate_requirements,
MICROSERVICE_FOLDER_v1,
code_files_wrapped=self.files_to_string({
IMPLEMENTATION_FILE_NAME: microservice_content,
TEST_EXECUTOR_FILE_NAME: test_microservice_content,
}),
file_name_purpose=REQUIREMENTS_FILE_NAME,
file_name_s=[REQUIREMENTS_FILE_NAME],
parse_result_fn=self.parse_result_fn_requirements,
tag_name=REQUIREMENTS_FILE_TAG,
)[REQUIREMENTS_FILE_NAME]
# I deactivated this because 3.5-turbo was hallucinating packages that were not needed
# now, in the first iteration the default dockerfile is used
# self.generate_and_persist_file(
# section_title='Generate Dockerfile',
# template=template_generate_apt_get_install,
# destination_folder=MICROSERVICE_FOLDER_v1,
# file_name_s=None,
# parse_result_fn=self.parse_result_fn_dockerfile,
# docker_file_wrapped=self.read_docker_template(),
# requirements_file_wrapped=self.files_to_string({
# REQUIREMENTS_FILE_NAME: requirements_content,
# })
# )
with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r',
encoding='utf-8') as f:
docker_file_template_lines = f.readlines()
docker_file_template_lines = [
line.replace('{{APT_GET_PACKAGES}}', '').replace('{{DOCKER_BASE_IMAGE_VERSION}}', DOCKER_BASE_IMAGE_VERSION)
for line in docker_file_template_lines
]
docker_file_content = '\n'.join(docker_file_template_lines)
persist_file(docker_file_content, os.path.join(MICROSERVICE_FOLDER_v1, 'Dockerfile'))
self.write_config_yml(microservice_name, MICROSERVICE_FOLDER_v1)
print('\nFirst version of the microservice generated. Start iterating on it to make the tests pass...')
@staticmethod
def read_docker_template():
with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', encoding='utf-8') as f:
return f.read()
def parse_result_fn_dockerfile(self, content_raw: str):
json_string = self.extract_content_from_result(content_raw, 'apt-get-packages.json', match_single_block=True)
packages = ' '.join(json.loads(json_string)['packages'])
docker_file_template = self.read_docker_template()
return {DOCKER_FILE_NAME: docker_file_template.replace('{{APT_GET_PACKAGES}}', '{APT_GET_PACKAGES}').replace('{{DOCKER_BASE_IMAGE_VERSION}}', DOCKER_BASE_IMAGE_VERSION).format(
APT_GET_PACKAGES=packages)}
def parse_result_fn_requirements(self, content_raw: str):
content_parsed = self.extract_content_from_result(content_raw, 'requirements.txt', match_single_block=True)
lines = content_parsed.split('\n')
lines = [line for line in lines if
not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])]
content_modified = f'''jina==3.15.1.dev14
docarray==0.21.0
openai==0.27.5
pytest
{os.linesep.join(lines)}'''
return {REQUIREMENTS_FILE_NAME: content_modified}
def generate_playground(self, microservice_name, microservice_path):
print_colored('', '\n\n############# Playground #############', 'blue')
file_name_to_content = get_all_microservice_files_with_content(microservice_path)
conversation = self.gpt_session.get_conversation()
conversation.chat(
template_generate_playground.format(
code_files_wrapped=self.files_to_string(file_name_to_content, ['test_microservice.py']),
microservice_name=microservice_name,
)
)
playground_content_raw = conversation.chat(
template_chain_of_thought.format(
file_name_purpose='app.py/the playground',
file_name='app.py',
tag_name='python',
)
)
playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True)
if playground_content == '':
content_raw = conversation.chat(f'You must add the app.py code. You most not output any other code')
playground_content = self.extract_content_from_result(
content_raw, 'app.py', match_single_block=True
)
gateway_path = os.path.join(microservice_path, 'gateway')
shutil.copytree(os.path.join(os.path.dirname(__file__), 'static_files', 'gateway'), gateway_path)
persist_file(playground_content, os.path.join(gateway_path, 'app.py'))
# fill-in name of microservice
gateway_name = f'Gateway{microservice_name}'
custom_gateway_path = os.path.join(gateway_path, 'custom_gateway.py')
with open(custom_gateway_path, 'r', encoding='utf-8') as f:
custom_gateway_content = f.read()
custom_gateway_content = custom_gateway_content.replace(
'class CustomGateway(CompositeGateway):',
f'class {gateway_name}(CompositeGateway):'
)
with open(custom_gateway_path, 'w', encoding='utf-8') as f:
f.write(custom_gateway_content)
# write config.yml
self.write_config_yml(gateway_name, gateway_path, 'custom_gateway.py')
# push the gateway
print('Final step...')
hubble_log = push_executor(gateway_path)
if not is_executor_in_hub(gateway_name):
raise Exception(f'{microservice_name} not in hub. Hubble logs: {hubble_log}')
def debug_microservice(self, microservice_name, num_approach, packages):
for i in range(1, MAX_DEBUGGING_ITERATIONS):
print('Debugging iteration', i)
print('Trying to debug the microservice. Might take a while...')
previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages,
num_approach, i)
next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages,
num_approach, i + 1)
clean_requirements_txt(previous_microservice_path)
log_hubble = push_executor(previous_microservice_path)
error = process_error_message(log_hubble)
if error:
print('An error occurred during the build process. Feeding the error back to the assistant...')
self.do_debug_iteration(error, next_microservice_path, previous_microservice_path)
if i == MAX_DEBUGGING_ITERATIONS - 1:
raise self.MaxDebugTimeReachedException('Could not debug the microservice.')
else:
# at the moment, there can be cases where no error log is extracted but the executor is still not published
# it leads to problems later on when someone tries a run or deployment
if is_executor_in_hub(microservice_name):
print('Successfully build microservice.')
break
else:
raise Exception(f'{microservice_name} not in hub. Hubble logs: {log_hubble}')
return get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i)
def do_debug_iteration(self, error, next_microservice_path, previous_microservice_path):
os.makedirs(next_microservice_path)
file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path)
for file_name, content in file_name_to_content.items():
persist_file(content, os.path.join(next_microservice_path, file_name))
summarized_error = self.summarize_error(error)
dock_req_string = self.files_to_string({
key: val for key, val in file_name_to_content.items() if
key in ['requirements.txt', 'Dockerfile']
})
is_apt_get_dependency_issue = self.is_dependency_issue(summarized_error, dock_req_string, 'apt-get')
if is_apt_get_dependency_issue:
self.generate_and_persist_file(
section_title='Debugging apt-get dependency issue',
template=template_solve_apt_get_dependency_issue,
destination_folder=next_microservice_path,
file_name_s=['apt-get-packages.json'],
parse_result_fn=self.parse_result_fn_dockerfile,
summarized_error=summarized_error,
all_files_string=dock_req_string,
)
print('Dockerfile updated')
else:
is_pip_dependency_issue = self.is_dependency_issue(summarized_error, dock_req_string, 'PIP')
if is_pip_dependency_issue:
self.generate_and_persist_file(
section_title='Debugging pip dependency issue',
template=template_solve_pip_dependency_issue,
destination_folder=next_microservice_path,
file_name_s=[REQUIREMENTS_FILE_NAME],
summarized_error=summarized_error,
all_files_string=dock_req_string,
)
else:
self.generate_and_persist_file(
section_title='Debugging code issue',
template=template_solve_code_issue,
destination_folder=next_microservice_path,
file_name_s=[IMPLEMENTATION_FILE_NAME, TEST_EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME],
summarized_error=summarized_error,
task_description=self.microservice_specification.task,
test_description=self.microservice_specification.test,
all_files_string=self.files_to_string(
{key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}),
)
class MaxDebugTimeReachedException(BaseException):
pass
def is_dependency_issue(self, summarized_error, dock_req_string: str, package_manager: str):
# a few heuristics to quickly jump ahead
if any([error_message in summarized_error for error_message in
['AttributeError', 'NameError', 'AssertionError']]):
return False
if package_manager.lower() == 'pip' and any(
[em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]):
return True
print_colored('', f'Is it a {package_manager} dependency issue?', 'blue')
conversation = self.gpt_session.get_conversation()
answer_raw = conversation.chat(
template_is_dependency_issue.format(summarized_error=summarized_error,
all_files_string=dock_req_string).replace('PACKAGE_MANAGER',
package_manager)
)
answer_json_string = self.extract_content_from_result(answer_raw, 'response.json', match_single_block=True, )
answer = json.loads(answer_json_string)['dependency_installation_failure']
return 'yes' in answer.lower()
def generate_microservice_name(self, description):
name = self.generate_and_persist_file(
section_title='Generate microservice name',
template=template_generate_microservice_name,
destination_folder=self.microservice_root_path,
file_name_s=['name.txt'],
description=description
)['name.txt']
return name
def get_possible_packages(self):
print_colored('', '\n\n############# What packages to use? #############', 'blue')
packages_json_string = self.generate_and_persist_file(
section_title='Generate possible packages',
template=template_generate_possible_packages,
destination_folder=self.microservice_root_path,
file_name_s=['strategies.json'],
description=self.microservice_specification.task
)['strategies.json']
packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)]
packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in
packages_list]
packages_list = self.filter_packages_list(packages_list)
packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES]
return packages_list
# '/private/var/folders/f5/whmffl4d7q79s29jpyb6719m0000gn/T/pytest-of-florianhonicke/pytest-128/test_generation_level_0_mock_i0'
# '/private/var/folders/f5/whmffl4d7q79s29jpyb6719m0000gn/T/pytest-of-florianhonicke/pytest-129/test_generation_level_0_mock_i0'
def generate(self):
self.microservice_specification.task, self.microservice_specification.test = PM().refine_specification(self.microservice_specification.task)
os.makedirs(self.microservice_root_path)
generated_name = self.generate_microservice_name(self.microservice_specification.task)
microservice_name = f'{generated_name}{random.randint(0, 10_000_000)}'
packages_list = self.get_possible_packages()
for num_approach, packages in enumerate(packages_list):
try:
self.generate_microservice(microservice_name, packages, num_approach)
final_version_path = self.debug_microservice(microservice_name, num_approach, packages)
self.generate_playground(microservice_name, final_version_path)
except self.MaxDebugTimeReachedException:
print('Could not debug the Microservice with the approach:', packages)
if num_approach == len(packages_list) - 1:
print_colored('',
f'Could not debug the Microservice with any of the approaches: {packages} giving up.',
'red')
return -1
continue
print(f'''
You can now run or deploy your microservice:
dev-gpt run --path {self.microservice_root_path}
dev-gpt deploy --path {self.microservice_root_path}
'''
)
return 0
def summarize_error(self, error):
conversation = self.gpt_session.get_conversation()
error_summary = conversation.chat(template_summarize_error.format(error=error))
return error_summary
@staticmethod
def replace_with_gpt_3_5_turbo_if_possible(pkg):
if pkg in LANGUAGE_PACKAGES:
return 'gpt_3_5_turbo'
return pkg
@staticmethod
def filter_packages_list(packages_list):
# filter out complete package lists
packages_list = [
packages for packages in packages_list if all([
pkg not in BLACKLISTED_PACKAGES # no package is allowed to be blacklisted
for pkg in packages
])
]
# filter out single packages
packages_list = [
[
package for package in packages
if (package not in UNNECESSARY_PACKAGES)
and ( # all packages must be on pypi or it is gpt_3_5_turbo
is_package_on_pypi(package)
or package == 'gpt_3_5_turbo'
)
] for packages in packages_list
]
return packages_list
# def create_prototype_implementation(self):
# microservice_py_lines = ['''\
# Class {microservice_name}:''']
# for sub_task in self.pm.iterate_over_sub_tasks_pydantic(self.sub_task_tree):
# microservice_py_lines.append(f' {sub_task.python_fn_signature}')
# microservice_py_lines.append(f' """')
# microservice_py_lines.append(f' {sub_task.python_fn_docstring}')
# microservice_py_lines.append(f' """')
# microservice_py_lines.append(f' raise NotImplementedError')
# microservice_py_str = '\n'.join(microservice_py_lines)
# persist_file(os.path.join(self.microservice_root_path, 'microservice.py'), microservice_py_str)