From db9ae2882831a2a50e2f3200a2b3cf296737beb5 Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Tue, 11 Apr 2023 15:06:47 +0200 Subject: [PATCH 01/10] feat: support gpt turbo --- src/cli.py | 4 +++- src/executor_factory.py | 12 +++++++++--- src/gpt.py | 16 ++++++++++------ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/cli.py b/src/cli.py index 2c0bc37..2d7cc93 100644 --- a/src/cli.py +++ b/src/cli.py @@ -16,13 +16,15 @@ def main(): @click.option('--num_approaches', default=3, type=int, help='Number of num_approaches to use to fulfill the task (default: 3).') @click.option('--output_path', default='executor', help='Path to the output folder (must be empty). ') +@click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).') def create( description, test, num_approaches=3, output_path='executor', + model='gpt-4' ): - executor_factory = ExecutorFactory() + executor_factory = ExecutorFactory(model=model) executor_factory.create(description, num_approaches, output_path, test) diff --git a/src/executor_factory.py b/src/executor_factory.py index c7b0a41..ca52766 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -12,8 +12,8 @@ from src.utils.string_tools import print_colored class ExecutorFactory: - def __init__(self): - self.gpt_session = gpt.GPTSession() + def __init__(self, model='gpt-4'): + self.gpt_session = gpt.GPTSession(model=model) def extract_content_from_result(self, plain_text, file_name): pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```" @@ -21,7 +21,13 @@ class ExecutorFactory: if match: return match.group(1).strip() else: - return '' + # Check for a single code block + single_code_block_pattern = r"^```(?:\w+\n)?([\s\S]*?)```" + single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE) + if len(single_code_block_match) == 1: + return single_code_block_match[0].strip() + else: + return '' def write_config_yml(self, executor_name, dest_folder): config_content = f''' diff --git a/src/gpt.py b/src/gpt.py index 7af6da8..c3d0fff 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -11,15 +11,19 @@ from src.prompt_system import system_base_definition from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError from src.utils.string_tools import print_colored + class GPTSession: - def __init__(self): + def __init__(self, model: str = 'gpt-4'): self.get_openai_api_key() - if self.is_gpt4_available(): + if model == 'gpt-4' and self.is_gpt4_available(): self.supported_model = 'gpt-4' self.pricing_prompt = PRICING_GPT4_PROMPT self.pricing_generation = PRICING_GPT4_GENERATION - else: - self.supported_model = 'gpt-3.5-turbo' + elif (model == 'gpt-4' and not self.is_gpt4_available()) or model == 'gpt-3.5-turbo': + if model == 'gpt-4': + print_colored('GPT-4 is not available. Using GPT-3.5-turbo instead.', 'yellow') + model = 'gpt-3.5-turbo' + self.supported_model = model self.pricing_prompt = PRICING_GPT3_5_TURBO_PROMPT self.pricing_generation = PRICING_GPT3_5_TURBO_GENERATION self.chars_prompt_so_far = 0 @@ -52,8 +56,8 @@ class GPTSession: self.chars_prompt_so_far += chars_prompt self.chars_generation_so_far += chars_generation print('\n') - money_prompt = round(self.chars_prompt_so_far / 3.4 * self.pricing_prompt / 1000, 2) - money_generation = round(self.chars_generation_so_far / 3.4 * self.pricing_generation / 1000, 2) + money_prompt = round(self.chars_prompt_so_far / 3.4 * self.pricing_prompt / 1000, 3) + money_generation = round(self.chars_generation_so_far / 3.4 * self.pricing_generation / 1000, 3) print('Estimated costs on openai.com:') # print('money prompt:', f'${money_prompt}') # print('money generation:', f'${money_generation}') From da935b62308dec7d2b7bbcee5e46bcd267069422 Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Wed, 12 Apr 2023 13:58:01 +0200 Subject: [PATCH 02/10] feat: relax and clarify instructions for gpt turbo --- src/executor_factory.py | 5 ++--- src/gpt.py | 23 +++++++++++++++-------- src/prompt_system.py | 25 +++++++------------------ src/prompt_tasks.py | 22 +++++++++++++++------- 4 files changed, 39 insertions(+), 36 deletions(-) diff --git a/src/executor_factory.py b/src/executor_factory.py index ca52766..e2f8784 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -166,10 +166,9 @@ response = client.post('/', inputs=DocumentArray([d])) # always use '/' print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app ''' ) - conversation = self.gpt_session.get_conversation() + conversation = self.gpt_session.get_conversation([]) conversation.query(user_query) - playground_content_raw = conversation.query( - f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'app.py')) + playground_content_raw = conversation.query(chain_of_thought_optimization('python', 'app.py', 'the playground')) playground_content = self.extract_content_from_result(playground_content_raw, 'app.py') persist_file(playground_content, os.path.join(executor_path, 'app.py')) diff --git a/src/gpt.py b/src/gpt.py index c3d0fff..a10ac24 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -7,7 +7,7 @@ from openai.error import RateLimitError, Timeout from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \ PRICING_GPT3_5_TURBO_GENERATION -from src.prompt_system import system_base_definition +from src.prompt_system import system_base_definition, executor_example, docarray_example, client_example from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError from src.utils.string_tools import print_colored @@ -64,18 +64,25 @@ class GPTSession: print('total money so far:', f'${money_prompt + money_generation}') print('\n') - def get_conversation(self): - return _GPTConversation(self.supported_model, self.cost_callback) + def get_conversation(self, system_definition_examples: List[str] = ['executor', 'docarray', 'client']): + return _GPTConversation(self.supported_model, self.cost_callback, system_definition_examples) class _GPTConversation: - def __init__(self, model: str, cost_callback, prompt_list: List[Tuple[str, str]] = None): + def __init__(self, model: str, cost_callback, system_definition_examples: List[str] = ['executor', 'docarray', 'client']): self.model = model - if prompt_list is None: - prompt_list = [('system', system_base_definition)] + system_message = system_base_definition + if 'executor' in system_definition_examples: + system_message += f'\n{executor_example}' + if 'docarray' in system_definition_examples: + system_message += f'{docarray_example}' + if 'client' in system_definition_examples: + system_message += f'{client_example}' + + prompt_list = [('system', system_message)] self.prompt_list = prompt_list self.cost_callback = cost_callback - print_colored('system', system_base_definition, 'magenta') + print_colored('system', system_message, 'magenta') def query(self, prompt: str): print_colored('user', prompt, 'blue') @@ -100,7 +107,7 @@ class _GPTConversation: try: response_generator = openai.ChatCompletion.create( temperature=0, - max_tokens=2_000, + max_tokens=2_000 if self.model == 'gpt-4' else None, model=self.model, stream=True, messages=[ diff --git a/src/prompt_system.py b/src/prompt_system.py index ed9959b..37c34d6 100644 --- a/src/prompt_system.py +++ b/src/prompt_system.py @@ -1,7 +1,6 @@ from src.constants import FLOW_URL_PLACEHOLDER -executor_example = ''' -Using the Jina framework, users can define executors. +executor_example = '''Using the Jina framework, users can define executors. Here is an example of how an executor can be defined. It always starts with a comment: **executor.py** @@ -20,11 +19,9 @@ class MyInfoExecutor(Executor): return docs ``` -An executor gets a DocumentArray as input and returns a DocumentArray as output. -''' +An executor gets a DocumentArray as input and returns a DocumentArray as output.''' -docarray_example = f''' -A DocumentArray is a python class that can be seen as a list of Documents. +docarray_example = f'''A DocumentArray is a python class that can be seen as a list of Documents. A Document is a python class that represents a single document. Here is the protobuf definition of a Document: @@ -86,12 +83,10 @@ d8 = Document() d8.text = json.dumps([{{"id": "1", "text": ["hello", 'test']}}, {{"id": "2", "text": "world"}}]) # the document has a helper function load_uri_to_blob: # For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob. -# If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01... -''' +# If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01... ''' -client_example = f''' -After the executor is deployed, it can be called via Jina Client. +client_example = f'''After the executor is deployed, it can be called via Jina Client. Here is an example of a client file: **client.py** @@ -102,13 +97,7 @@ d = Document(uri='...') d.load_uri_to_blob() response = client.post('/', inputs=DocumentArray([d])) # the client must be called on '/' print(response[0].text) -``` -''' +```''' -system_base_definition = f''' -You are a principal engineer working at Jina - an open source company." -{executor_example} -{docarray_example} -{client_example} -''' \ No newline at end of file +system_base_definition = f'''You are a principal engineer working at Jina - an open source company.''' \ No newline at end of file diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index 7e96a5c..1df0bef 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -17,9 +17,13 @@ def general_guidelines(): ) -def _task(task, tag_name, file_name): +def _task(task, tag_name, file_name, function_name=None): + into_string = file_name + if function_name: + into_string += f"/{function_name}" + return ( - task + f"The code will go into {file_name}. Wrap the code into:\n" + task + f"The code will go into {into_string}. Make sure to wrap the code into ``` marks even if you only output code:\n" f"**{file_name}**\n" f"```{tag_name}\n" f"...code...\n" @@ -112,17 +116,21 @@ def chain_of_thought_creation(): ) -def chain_of_thought_optimization(tag_name, file_name): +def chain_of_thought_optimization(tag_name, file_name, file_name_function=None): + file_name_or_function = file_name + if file_name_function: + file_name_or_function += f"/{file_name_function}" return _task( - f'First, write down an extensive list of obvious and non-obvious observations about {file_name} that could need an adjustment. Explain why. ' + f'First, write down an extensive list of obvious and non-obvious observations about {file_name_or_function} that could need an adjustment. Explain why. ' f"Think if all the changes are required and finally decide for the changes you want to make, " f"but you are not allowed disregard the instructions in the previous message. " f"Be very hesitant to change the code. Only make a change if you are sure that it is necessary. " - f"Output only {file_name} " - f"Write the whole content of {file_name} - even if you decided to change only a small thing or even nothing. ", + f"Output only {file_name_or_function} " + f"Write the whole content of {file_name_or_function} - even if you decided to change only a small thing or even nothing. ", tag_name, - file_name + file_name, + file_name_function ) def not_allowed(): From d0274f3646f350914e65fce250a1bdfcce2422fa Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Fri, 14 Apr 2023 11:31:01 +0200 Subject: [PATCH 03/10] feat: qr code scanner works for turbo --- src/executor_factory.py | 107 +++++++++++++++++++++++++++------------- src/gpt.py | 24 ++++----- src/jina_cloud.py | 2 +- src/prompt_tasks.py | 5 +- 4 files changed, 91 insertions(+), 47 deletions(-) diff --git a/src/executor_factory.py b/src/executor_factory.py index e2f8784..8469784 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -15,7 +15,7 @@ class ExecutorFactory: def __init__(self, model='gpt-4'): self.gpt_session = gpt.GPTSession(model=model) - def extract_content_from_result(self, plain_text, file_name): + def extract_content_from_result(self, plain_text, file_name, match_single_block=False): pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```" match = re.search(pattern, plain_text, re.MULTILINE) if match: @@ -24,7 +24,7 @@ class ExecutorFactory: # Check for a single code block single_code_block_pattern = r"^```(?:\w+\n)?([\s\S]*?)```" single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE) - if len(single_code_block_match) == 1: + if match_single_block and len(single_code_block_match) == 1: return single_code_block_match[0].strip() else: return '' @@ -60,7 +60,7 @@ class ExecutorFactory: all_executor_files_string += f'```{tag}\n' all_executor_files_string += file_name_to_content[file_name] all_executor_files_string += '\n```\n\n' - return all_executor_files_string + return all_executor_files_string.strip() def wrap_content_in_code_block(self, executor_content, file_name, tag): return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n' @@ -81,14 +81,14 @@ class ExecutorFactory: user_query = ( general_guidelines() + executor_file_task(executor_name, description, test, package) - + chain_of_thought_creation() + + '\n\n' + chain_of_thought_creation() ) conversation = self.gpt_session.get_conversation() executor_content_raw = conversation.query(user_query) if is_chain_of_thought: executor_content_raw = conversation.query( f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py')) - executor_content = self.extract_content_from_result(executor_content_raw, 'executor.py') + executor_content = self.extract_content_from_result(executor_content_raw, 'executor.py', match_single_block=True) persist_file(executor_content, os.path.join(EXECUTOR_FOLDER_v1, 'executor.py')) @@ -106,7 +106,9 @@ class ExecutorFactory: chain_of_thought_optimization('python', 'test_executor.py') + "Don't add any additional tests. " ) - test_executor_content = self.extract_content_from_result(test_executor_content_raw, 'test_executor.py') + test_executor_content = self.extract_content_from_result( + test_executor_content_raw, 'test_executor.py', match_single_block=True + ) persist_file(test_executor_content, os.path.join(EXECUTOR_FOLDER_v1, 'test_executor.py')) print_colored('', '############# Requirements #############', 'red') @@ -123,7 +125,7 @@ class ExecutorFactory: requirements_content_raw = conversation.query( chain_of_thought_optimization('', requirements_path) + "Keep the same version of jina ") - requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt') + requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt', match_single_block=True) persist_file(requirements_content, requirements_path) print_colored('', '############# Dockerfile #############', 'red') @@ -139,7 +141,7 @@ class ExecutorFactory: if is_chain_of_thought: dockerfile_content_raw = conversation.query( f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) - dockerfile_content = self.extract_content_from_result(dockerfile_content_raw, 'Dockerfile') + dockerfile_content = self.extract_content_from_result(dockerfile_content_raw, 'Dockerfile', match_single_block=True) persist_file(dockerfile_content, os.path.join(EXECUTOR_FOLDER_v1, 'Dockerfile')) self.write_config_yml(executor_name, EXECUTOR_FOLDER_v1) @@ -164,12 +166,13 @@ from jina import Client, Document, DocumentArray client = Client(host='{host}') response = client.post('/', inputs=DocumentArray([d])) # always use '/' print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app +Please provide the complete file with the exact same syntax to wrap the code. ''' ) conversation = self.gpt_session.get_conversation([]) conversation.query(user_query) playground_content_raw = conversation.query(chain_of_thought_optimization('python', 'app.py', 'the playground')) - playground_content = self.extract_content_from_result(playground_content_raw, 'app.py') + playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True) persist_file(playground_content, os.path.join(executor_path, 'app.py')) def get_executor_path(self, output_path, package, version): @@ -179,6 +182,7 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul def debug_executor(self, output_path, package, description, test): MAX_DEBUGGING_ITERATIONS = 10 error_before = '' + # conversation = self.gpt_session.get_conversation() for i in range(1, MAX_DEBUGGING_ITERATIONS): print('Debugging iteration', i) previous_executor_path = self.get_executor_path(output_path, package, i) @@ -188,29 +192,56 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul if error: recreate_folder(next_executor_path) file_name_to_content = self.get_all_executor_files_with_content(previous_executor_path) - all_files_string = self.files_to_string(file_name_to_content) - user_query = ( - f"General rules: " + not_allowed() - + 'Here is the description of the task the executor must solve:\n' - + description - + '\n\nHere is the test scenario the executor must pass:\n' - + test - + 'Here are all the files I use:\n' - + all_files_string - + (('This is an error that is already fixed before:\n' - + error_before) if error_before else '') - + '\n\nNow, I get the following error:\n' - + error + '\n' - + 'Think quickly about possible reasons. ' - 'Then output the files that need change. ' - "Don't output files that don't need change. " - "If you output a file, then write the complete file. " - "Use the exact same syntax to wrap the code:\n" - f"**...**\n" - f"```...\n" - f"...code...\n" - f"```\n\n" - ) + + is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile']) + print(f'Current error is a dependency issue: {is_dependency_issue}') + if is_dependency_issue: + all_files_string = self.files_to_string({ + key: val for key, val in file_name_to_content.items() if key in ['requirements.txt', 'Dockerfile'] + }) + # user_query = ( + # f'I have the following files:\n{all_files_string}\n\n' + # + f'This error happens during the docker build process:\n{error}\n\n' + # + 'First, think about what kind of error is this? Look at exactly at the stack trace and then ' + # "suggest how to solve it. Output the files that need change. " + # "Don't output files that don't need change. If you output a file, then write the " + # "complete file. Use the exact same syntax to wrap the code:\n" + # f"**...**\n" + # f"```...\n" + # f"...code...\n" + # f"```" + # ) + user_query = ( + f"Your task is to provide guidance on how to solve an error that occurred during the Docker " + f"build process. The error message is:\n{error}\nTo solve this error, you should first " + f"identify the type of error by examining the stack trace. Once you have identified the " + f"error, you should suggest how to solve it. Your response should include the files that " + f"need to be changed, but not files that don't need to be changed. For files that need to " + f"be changed, please provide the complete file with the exact same syntax to wrap the code.\n\n" + f"You are given the following files:\n\n{all_files_string}" + ) + else: + # if i == 1: + all_files_string = self.files_to_string(file_name_to_content) + user_query = ( + f"General rules: " + not_allowed() + + f'Here is the description of the task the executor must solve:\n{description}' + + f'\n\nHere is the test scenario the executor must pass:\n{test}' + + f'Here are all the files I use:\n{all_files_string}' + + f'\n\nThis error happens during the docker build process:\n{error}\n\n' + + 'First, think about what kind of error is this? Look at exactly at the stack trace and then ' + "suggest how to solve it. Output the files that need change. " + "Don't output files that don't need change. If you output a file, then write the " + "complete file. Use the exact same syntax to wrap the code:\n" + f"**...**\n" + f"```...\n" + f"...code...\n" + f"```" + ) + # else: + # conversation.set_system_definition() + # user_query = f'Now this error happens during the docker build process:\n{error}' + conversation = self.gpt_session.get_conversation() returned_files_raw = conversation.query(user_query) for file_name, tag in FILE_AND_TAG_PAIRS: @@ -220,7 +251,7 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul for file_name, content in file_name_to_content.items(): persist_file(content, os.path.join(next_executor_path, file_name)) - error_before = error + error_before = error_before + '\n' + error else: break @@ -231,6 +262,16 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul class MaxDebugTimeReachedException(BaseException): pass + def is_dependency_issue(self, error, docker_file: str): + conversation = self.gpt_session.get_conversation([]) + answer = conversation.query( + f'Your task is to assist in identifying the root cause of a Docker build error for a python application. ' + f'The error message is as follows::\n\n{error}\n\n' + f'The docker file is as follows:\n\n{docker_file}\n\n' + f'Is this a dependency installation failure? Answer with "yes" or "no".' + ) + return 'yes' in answer.lower() + def generate_executor_name(self, description): conversation = self.gpt_session.get_conversation() user_query = f''' diff --git a/src/gpt.py b/src/gpt.py index a10ac24..a0af70b 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -71,18 +71,10 @@ class GPTSession: class _GPTConversation: def __init__(self, model: str, cost_callback, system_definition_examples: List[str] = ['executor', 'docarray', 'client']): self.model = model - system_message = system_base_definition - if 'executor' in system_definition_examples: - system_message += f'\n{executor_example}' - if 'docarray' in system_definition_examples: - system_message += f'{docarray_example}' - if 'client' in system_definition_examples: - system_message += f'{client_example}' - - prompt_list = [('system', system_message)] - self.prompt_list = prompt_list self.cost_callback = cost_callback - print_colored('system', system_message, 'magenta') + self.prompt_list = [None] + self.set_system_definition(system_definition_examples) + print_colored('system', self.prompt_list[0][1], 'magenta') def query(self, prompt: str): print_colored('user', prompt, 'blue') @@ -91,6 +83,16 @@ class _GPTConversation: self.prompt_list.append(('assistant', response)) return response + def set_system_definition(self, system_definition_examples: List[str] = []): + system_message = system_base_definition + if 'executor' in system_definition_examples: + system_message += f'\n{executor_example}' + if 'docarray' in system_definition_examples: + system_message += f'{docarray_example}' + if 'client' in system_definition_examples: + system_message += f'{client_example}' + self.prompt_list[0] = ('system', system_message) + def get_response_from_stream(self, response_generator): response_generator_with_timeout = timeout_generator_wrapper(response_generator, 10) complete_string = '' diff --git a/src/jina_cloud.py b/src/jina_cloud.py index 169323b..2f8b588 100644 --- a/src/jina_cloud.py +++ b/src/jina_cloud.py @@ -151,7 +151,7 @@ def process_error_message(error_message): if last_matching_line_index is not None: relevant_lines = lines[last_matching_line_index:] - return '\n'.join(relevant_lines[-25:]) + return '\n'.join(relevant_lines[-25:]).strip() def build_docker(path): diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index 1df0bef..2213113 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -23,11 +23,12 @@ def _task(task, tag_name, file_name, function_name=None): into_string += f"/{function_name}" return ( - task + f"The code will go into {into_string}. Make sure to wrap the code into ``` marks even if you only output code:\n" + task + f"The code will go into {into_string}. Make sure to wrap the code into ``` marks even if you only " + f"output code:\n" f"**{file_name}**\n" f"```{tag_name}\n" f"...code...\n" - f"```\n\n" + f"```\nPlease provide the complete file with the exact same syntax to wrap the code." ) From d2957b6c1a98a73c8fff30ce4b3211a1069c83e9 Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Fri, 14 Apr 2023 12:01:35 +0200 Subject: [PATCH 04/10] fix: updating files fixing dependency installation --- src/executor_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/executor_factory.py b/src/executor_factory.py index 549dc70..8e6a801 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -234,7 +234,7 @@ Please provide the complete file with the exact same syntax to wrap the code. returned_files_raw = conversation.query(user_query) for file_name, tag in FILE_AND_TAG_PAIRS: updated_file = self.extract_content_from_result(returned_files_raw, file_name) - if updated_file: + if updated_file and (not is_dependency_issue or file_name in ['requirements.txt', 'Dockerfile']): file_name_to_content[file_name] = updated_file for file_name, content in file_name_to_content.items(): From 72fcae579fc12361dfc9b0dcfa192da645e1b6ac Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Fri, 14 Apr 2023 12:09:30 +0200 Subject: [PATCH 05/10] fix: pg gets results on text --- src/executor_factory.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/executor_factory.py b/src/executor_factory.py index 8e6a801..fde0d51 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -163,10 +163,13 @@ The playground must look like it was made by a professional designer. All the ui elements are well thought out to make them visually appealing and easy to use. The executor is hosted on {host}. This is an example how you can connect to the executor assuming the document (d) is already defined: +``` from jina import Client, Document, DocumentArray client = Client(host='{host}') response = client.post('/', inputs=DocumentArray([d])) # always use '/' print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app +``` +Note that the response will always be in response[0].text Please provide the complete file with the exact same syntax to wrap the code. ''' ) From 5bf3f522340637b8e50fec683336366a1b70458d Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Fri, 14 Apr 2023 12:29:00 +0200 Subject: [PATCH 06/10] fix: fix formatting of system message --- src/gpt.py | 4 ++-- src/prompt_system.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gpt.py b/src/gpt.py index b073496..9df28e7 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -93,9 +93,9 @@ class _GPTConversation: if 'executor' in system_definition_examples: system_message += f'\n{executor_example}' if 'docarray' in system_definition_examples: - system_message += f'{docarray_example}' + system_message += f'\n{docarray_example}' if 'client' in system_definition_examples: - system_message += f'{client_example}' + system_message += f'\n{client_example}' self.prompt_list[0] = ('system', system_message) def get_response_from_stream(self, response_generator): diff --git a/src/prompt_system.py b/src/prompt_system.py index 6d6a939..f08325d 100644 --- a/src/prompt_system.py +++ b/src/prompt_system.py @@ -27,14 +27,16 @@ An Executor gets a DocumentArray as input and returns a DocumentArray as output. docarray_example = f'''A DocumentArray is a python class that can be seen as a list of Documents. A Document is a python class that represents a single document. Here is the protobuf definition of a Document: - +``` message DocumentProto {{ // used to store json data the executor gets and returns string text = 1; }} +``` Here are examples of how a DocumentArray can be defined: +``` from jina import DocumentArray, Document import json @@ -52,6 +54,7 @@ array_list = array.tolist() d3 = Document(text=json.dumps(array_list)) d4 = Document() d4.text = '{{"uri": "https://.../logo.png"}}' +``` ''' From 3ea75553e157a2754e9c68fced80d5a291707125 Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Fri, 14 Apr 2023 12:59:38 +0200 Subject: [PATCH 07/10] feat: skip dependency issue checking for certain errors --- src/executor_factory.py | 4 ++++ src/prompt_tasks.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/executor_factory.py b/src/executor_factory.py index fde0d51..9da35fa 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -254,6 +254,10 @@ Please provide the complete file with the exact same syntax to wrap the code. pass def is_dependency_issue(self, error, docker_file: str): + # a few heuristics to quickly jump ahead + if any([error_message in error for error_message in ['AttributeError', 'NameError', 'AssertionError']]): + return False + conversation = self.gpt_session.get_conversation([]) answer = conversation.query( f'Your task is to assist in identifying the root cause of a Docker build error for a python application. ' diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index 21a9973..2629df2 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -86,7 +86,7 @@ def docker_file_task(): "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. " 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. ' 'Make sure the all files are in the /workdir. ' - "The Dockerfile runs the test during the build process. " + not_allowed(), + "The Dockerfile runs the test during the build process. ", DOCKER_FILE_TAG, DOCKER_FILE_NAME ) From 9b9aa953d2e0f84ffc0f71f4db78fd3bc268f4f3 Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Fri, 14 Apr 2023 14:18:00 +0200 Subject: [PATCH 08/10] fix: executor name dockerfile files previous error --- src/executor_factory.py | 9 +++++++-- src/prompt_tasks.py | 8 +++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/executor_factory.py b/src/executor_factory.py index 9da35fa..d32d0b6 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -90,7 +90,11 @@ class ExecutorFactory: executor_content_raw = conversation.query( f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py')) executor_content = self.extract_content_from_result(executor_content_raw, 'executor.py', match_single_block=True) - + if executor_content == '': + executor_content_raw = conversation.query('Please add the executor code.') + executor_content = self.extract_content_from_result( + executor_content_raw, 'executor.py', match_single_block=True + ) persist_file(executor_content, os.path.join(EXECUTOR_FOLDER_v1, 'executor.py')) print_colored('', '############# Test Executor #############', 'red') @@ -222,6 +226,7 @@ Please provide the complete file with the exact same syntax to wrap the code. + f'\n\nHere is the test scenario the executor must pass:\n{test}' + f'Here are all the files I use:\n{all_files_string}' + f'\n\nThis error happens during the docker build process:\n{error}\n\n' + + ((f'This is an error that is already fixed before:\n{error_before}\n\n') if error_before else '') + 'Look at exactly at the stack trace. First, think about what kind of error is this? ' 'Then think about possible reasons which might have caused it. Then suggest how to ' 'solve it. Output the files that need change. ' @@ -242,7 +247,7 @@ Please provide the complete file with the exact same syntax to wrap the code. for file_name, content in file_name_to_content.items(): persist_file(content, os.path.join(next_executor_path, file_name)) - error_before = error_before + error_before = error else: break diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index 2629df2..9994e5e 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -34,7 +34,7 @@ def _task(task, tag_name, file_name, function_name=None): def executor_file_task(executor_name, executor_description, test_scenario, package): return _task(f''' -Write the executor called '{executor_name}'. +Write the executor called '{executor_name}'. The name is very important to keep. It matches the following description: '{executor_description}'. It will be tested with the following scenario: '{test_scenario}'. For the implementation use the following package: '{package}'. @@ -55,7 +55,7 @@ def test_executor_file_task(executor_name, test_scenario): if test_scenario else "" ) + "Use the following import to import the executor: " - f"from executor import {executor_name} " + f"```\nfrom executor import {executor_name}\n```" + not_allowed() + "The test must not open local files. " + "The test must not mock a function of the executor. " @@ -82,7 +82,8 @@ def docker_file_task(): "It is important to make sure that all libs are installed that are required by the python packages. " "Usually libraries are installed with apt-get. " "Be aware that the machine the docker container is running on does not have a GPU - only CPU. " - "Add the config.yml file to the Dockerfile. " + "Add the config.yml file to the Dockerfile. Note that the Dockerfile only has access to the files: " + "executor.py, requirements.txt, config.yml, test_executor.py. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. " 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. ' 'Make sure the all files are in the /workdir. ' @@ -144,4 +145,5 @@ The executor must not load data from the local file system unless it was created The executor must not use a pre-trained model unless it is explicitly mentioned in the description. The executor must not train a model. The executor must not use any attribute of Document accept Document.text. +Note that the Dockerfile only has access to the files: executor.py, requirements.txt, config.yml, test_executor.py. ''' \ No newline at end of file From cb8056e7f8e86a7b3d79a44fcff58f94a18c9538 Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Fri, 14 Apr 2023 16:47:52 +0200 Subject: [PATCH 09/10] fix: remove print --- src/executor_factory.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/executor_factory.py b/src/executor_factory.py index d32d0b6..3c08f6e 100644 --- a/src/executor_factory.py +++ b/src/executor_factory.py @@ -203,7 +203,6 @@ Please provide the complete file with the exact same syntax to wrap the code. file_name_to_content = self.get_all_executor_files_with_content(previous_executor_path) is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile']) - print(f'Current error is a dependency issue: {is_dependency_issue}') if is_dependency_issue: all_files_string = self.files_to_string({ From 78dc08209534696b4a84d1e89ff8346773e52793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Sat, 15 Apr 2023 02:43:08 +0200 Subject: [PATCH 10/10] fix: feedback --- src/options/generate/generator.py | 32 ++++++++++++------------ src/options/generate/prompt_system.py | 2 +- src/options/generate/prompt_tasks.py | 35 +++++++++++++++------------ 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py index c420a52..73b7b19 100644 --- a/src/options/generate/generator.py +++ b/src/options/generate/generator.py @@ -6,7 +6,7 @@ from src.apis import gpt from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS from src.apis.jina_cloud import process_error_message, push_executor from src.options.generate.prompt_tasks import general_guidelines, chain_of_thought_creation, executor_file_task, \ - not_allowed, chain_of_thought_optimization, test_executor_file_task, requirements_file_task, docker_file_task + not_allowed_executor, chain_of_thought_optimization, test_executor_file_task, requirements_file_task, docker_file_task from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path from src.utils.string_tools import print_colored @@ -76,10 +76,10 @@ class Generator: microservice_content_raw = conversation.query(user_query) if is_chain_of_thought: microservice_content_raw = conversation.query( - f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'microservice.py')) - microservicer_content = self.extract_content_from_result(executor_content_raw, 'microservice.py', match_single_block=True) + f"General rules: " + not_allowed_executor() + chain_of_thought_optimization('python', 'microservice.py')) + microservice_content = self.extract_content_from_result(microservice_content_raw, 'microservice.py', match_single_block=True) if microservice_content == '': - microservice_content_raw = conversation.query('Please add the executor code.') + microservice_content_raw = conversation.query('You must add the executor code.') microservice_content = self.extract_content_from_result( microservice_content_raw, 'microservice.py', match_single_block=True ) @@ -95,7 +95,7 @@ class Generator: test_microservice_content_raw = conversation.query(user_query) if is_chain_of_thought: test_microservice_content_raw = conversation.query( - f"General rules: " + not_allowed() + + f"General rules: " + not_allowed_executor() + chain_of_thought_optimization('python', 'test_microservice.py') + "Don't add any additional tests. " ) @@ -133,7 +133,7 @@ class Generator: dockerfile_content_raw = conversation.query(user_query) if is_chain_of_thought: dockerfile_content_raw = conversation.query( - f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) + f"General rules: " + not_allowed_executor() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) dockerfile_content = self.extract_content_from_result(dockerfile_content_raw, 'Dockerfile', match_single_block=True) persist_file(dockerfile_content, os.path.join(MICROSERVICE_FOLDER_v1, 'Dockerfile')) @@ -161,7 +161,7 @@ response = client.post('/', inputs=DocumentArray([d])) # always use '/' print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app ``` Note that the response will always be in response[0].text -Please provide the complete file with the exact same syntax to wrap the code. +You must provide the complete file with the exact same syntax to wrap the code. The playground (app.py) must read the host from sys.argv because it will be started with a custom host: streamlit run app.py -- --host grpc://... The playground (app.py) must not let the user configure the host on the ui. ''' @@ -198,19 +198,19 @@ The playground (app.py) must not let the user configure the host on the ui. f"identify the type of error by examining the stack trace. Once you have identified the " f"error, you should suggest how to solve it. Your response should include the files that " f"need to be changed, but not files that don't need to be changed. For files that need to " - f"be changed, please provide the complete file with the exact same syntax to wrap the code.\n\n" + f"be changed, you must provide the complete file with the exact same syntax to wrap the code.\n\n" f"You are given the following files:\n\n{all_files_string}" ) else: all_files_string = self.files_to_string(file_name_to_content) user_query = ( - f"General rules: " + not_allowed() - + f'Here is the description of the task the executor must solve:\n{description}' - + f'\n\nHere is the test scenario the executor must pass:\n{test}' - + f'Here are all the files I use:\n{all_files_string}' - + f'\n\nThis error happens during the docker build process:\n{error}\n\n' - + ((f'This is an error that is already fixed before:\n{error_before}\n\n') if error_before else '') - + 'Look at exactly at the stack trace. First, think about what kind of error is this? ' + f"General rules: " + not_allowed_executor() + + f'Here is the description of the task the executor must solve:\n{description}' + + f'\n\nHere is the test scenario the executor must pass:\n{test}' + + f'Here are all the files I use:\n{all_files_string}' + + ((f'This is an error that I already fixed before:\n{error_before}\n\n') if error_before else '') + + f'\n\nThis is the error I encounter currently during the docker build process:\n{error}\n\n' + + 'Look at the stack trace of the current error. First, think about what kind of error is this? ' 'Then think about possible reasons which might have caused it. Then suggest how to ' 'solve it. Output the files that need change. ' "Don't output files that don't need change. If you output a file, then write the " @@ -288,7 +288,7 @@ For each subtask: For each package: Write down some non-obvious thoughts about the challenges you might face for the task and give multiple approaches on how you handle them. For example, there might be some packages you must not use because they do not obay the rules: - {not_allowed()} + {not_allowed_executor()} Discuss the pros and cons for all of these packages. Create a list of package subsets that you could use to solve the task. The list is sorted in a way that the most promising subset of packages is at the top. diff --git a/src/options/generate/prompt_system.py b/src/options/generate/prompt_system.py index 96ed5ce..02a38ee 100644 --- a/src/options/generate/prompt_system.py +++ b/src/options/generate/prompt_system.py @@ -71,4 +71,4 @@ print(response[0].text) ```''' -system_base_definition = f'''You are a principal engineer working at Jina - an open source company.''' \ No newline at end of file +system_base_definition = f'''You are a principal engineer working at Jina - an open source company. You accurately satisfy all of the user's requirements.''' \ No newline at end of file diff --git a/src/options/generate/prompt_tasks.py b/src/options/generate/prompt_tasks.py index 59a91da..239f952 100644 --- a/src/options/generate/prompt_tasks.py +++ b/src/options/generate/prompt_tasks.py @@ -17,10 +17,10 @@ def general_guidelines(): ) -def _task(task, tag_name, file_name, function_name=None): +def _task(task, tag_name, file_name, purpose=None): into_string = file_name - if function_name: - into_string += f"/{function_name}" + if purpose: + into_string += f"/{purpose}" return ( task + f"The code will go into {into_string}. Make sure to wrap the code into ``` marks even if you only " @@ -28,7 +28,7 @@ def _task(task, tag_name, file_name, function_name=None): f"**{file_name}**\n" f"```{tag_name}\n" f"...code...\n" - f"```\nPlease provide the complete file with the exact same syntax to wrap the code." + f"```\nYou must provide the complete file with the exact same syntax to wrap the code." ) @@ -39,7 +39,7 @@ It matches the following description: '{executor_description}'. It will be tested with the following scenario: '{test_scenario}'. For the implementation use the following package: '{package}'. Have in mind that d.uri is never a path to a local file. It is always a url. -''' + not_allowed(), +''' + not_allowed_executor(), EXECUTOR_FILE_TAG, EXECUTOR_FILE_NAME ) @@ -56,7 +56,7 @@ def test_executor_file_task(executor_name, test_scenario): ) + "Use the following import to import the executor: " f"```\nfrom microservice import {executor_name}\n```" - + not_allowed() + + not_allowed_executor() + "The test must not open local files. " + "The test must not mock a function of the executor. " + "The test must not use other data than the one provided in the test scenario. ", @@ -78,7 +78,6 @@ def requirements_file_task(): def docker_file_task(): return _task( "Write the Dockerfile that defines the environment with all necessary dependencies that the executor uses. " - "The Dockerfile runs the test during the build process. " "It is important to make sure that all libs are installed that are required by the python packages. " "Usually libraries are installed with apt-get. " "Be aware that the machine the docker container is running on does not have a GPU - only CPU. " @@ -87,7 +86,7 @@ def docker_file_task(): "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. " 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. ' 'Make sure the all files are in the /workdir. ' - "The Dockerfile runs the test during the build process. ", + "The Dockerfile runs the test during the build process. " + not_allowed_docker(), DOCKER_FILE_TAG, DOCKER_FILE_NAME ) @@ -110,11 +109,12 @@ def streamlit_file_task(): def chain_of_thought_creation(): - return ( - "First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. " - "For example, the given package you could used in different ways and not all of them obay the rules: " - + "Discuss the pros and cons for all of these approaches and then decide for one of the approaches. " - "Then write as I told you. " + return (f''' +First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. +For example, the given package you could used in different ways and not all of them obey the instructions. +Discuss the pros and cons for all of these approaches and then decide for one of the approaches. +Then write the code. +''' ) @@ -135,7 +135,7 @@ def chain_of_thought_optimization(tag_name, file_name, file_name_function=None): file_name_function ) -def not_allowed(): +def not_allowed_executor(): return ''' The executor must not use the GPU. The executor must not access a database. @@ -145,5 +145,10 @@ The executor must not load data from the local file system unless it was created The executor must not use a pre-trained model unless it is explicitly mentioned in the description. The executor must not train a model. The executor must not use any attribute of Document accept Document.text. +''' + +def not_allowed_docker(): + return ''' Note that the Dockerfile only has access to the files: executor.py, requirements.txt, config.yml, test_executor.py. -''' \ No newline at end of file +Note that the Dockerfile runs the test_microservice.py during the build process. +'''