diff --git a/main.py b/main.py index 6f21b4e..fbc7ff7 100644 --- a/main.py +++ b/main.py @@ -1,25 +1,29 @@ import os +import re from src import gpt, jina_cloud -from src.constants import TAG_TO_FILE_NAME, EXECUTOR_FOLDER, CLIENT_FILE_NAME -from src.jina_cloud import run_client_file -from src.prompt_examples import executor_example, docarray_example, client_example +from src.constants import FILE_AND_TAG_PAIRS, EXECUTOR_FOLDER_v1, EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME, STREAMLIT_FILE_NAME +from src.jina_cloud import update_client_line_in_file +from src.prompt_system import system_base_definition from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \ - test_executor_file_task, docker_file_task, client_file_task + test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task from src.utils.io import recreate_folder -from src.utils.string import find_between, clean_content +from src.utils.string import find_differences -def extract_content_from_result(plain_text, tag): - content = find_between(plain_text, f'$$$start_{tag}$$$', f'$$$end_{tag}$$$') - clean = clean_content(content) - return clean +def extract_content_from_result(plain_text, file_name): + pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```" + match = re.search(pattern, plain_text, re.MULTILINE) + if match: + return match.group(1).strip() + else: + return None -def extract_and_write(plain_text): - for tag, file_name in TAG_TO_FILE_NAME.items(): - clean = extract_content_from_result(plain_text, tag) - full_path = os.path.join(EXECUTOR_FOLDER, file_name) +def extract_and_write(plain_text, dest_folder): + for file_name, tag in FILE_AND_TAG_PAIRS: + clean = extract_content_from_result(plain_text, file_name) + full_path = os.path.join(dest_folder, file_name) with open(full_path, 'w') as f: f.write(clean) @@ -35,8 +39,8 @@ metas: with open('executor/config.yml', 'w') as f: f.write(config_content) -def get_all_executor_files_with_content(): - folder_path = 'executor' + +def get_all_executor_files_with_content(folder_path): file_name_to_content = {} for filename in os.listdir(folder_path): file_path = os.path.join(folder_path, filename) @@ -48,7 +52,61 @@ def get_all_executor_files_with_content(): return file_name_to_content -async def main( + +def build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, + output_doc_field, output_modality, test_in, test_out): + system_definition = ( + system_base_definition + + "The user is asking you to create an executor with all the necessary files " + "and you write the complete code without leaving something out. " + ) + user_query = ( + general_guidelines() + + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, + output_modality, output_doc_field) + + test_executor_file_task(executor_name, test_in, test_out) + + requirements_file_task() + + docker_file_task() + + client_file_task() + + streamlit_file_task() + + "First, write down some non-obvious thoughts about the challenges of the task and how you handle them. " + "Then write as I told you. " + ) + plain_text = gpt.get_response(system_definition, user_query) + return plain_text + + +def build_production_ready_implementation(all_executor_files_string): + system_definition = ( + system_base_definition + + f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) " + f"The files may contain bugs. Fix all of them. " + f"Some files might have only prototype implementations and are not production ready. Add all the missing code. " + f"Some imports might be missing. Make sure to add them. Output all the files in the same format like given to you. " + ) + user_query = ( + "Fix all files, add all missing code and imports. Make it production ready. " + "Keep the same format as given to you. " + "First write down some non-obvious thoughts about what parts could need an adjustment and why. " + "Then write as I told you. " + "Here are the files: \n\n" + + all_executor_files_string + ) + all_executor_files_string_improved = gpt.get_response(system_definition, user_query) + print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved)) + return all_executor_files_string_improved + +def files_to_string(file_name_to_content): + all_executor_files_string = '' + for file_name, tag in FILE_AND_TAG_PAIRS: + all_executor_files_string += f'**{file_name}**\n' + all_executor_files_string += f'```{tag}\n' + all_executor_files_string += file_name_to_content[file_name] + all_executor_files_string += '\n```\n\n' + return all_executor_files_string + + +def main( executor_name, executor_description, input_modality, @@ -59,50 +117,70 @@ async def main( test_out, do_validation=True ): - recreate_folder(EXECUTOR_FOLDER) - system_definition = ( - "You are a principal engineer working at Jina - an open source company." - "Using the Jina framework, users can define executors. " - + executor_example - + docarray_example - + client_example - + "The user is asking you to create an executor with all the necessary files " - "and you write the complete code without leaving something out. " - ) + recreate_folder(EXECUTOR_FOLDER_v1) + recreate_folder(EXECUTOR_FOLDER_v2) - user_query = ( - general_guidelines() - + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, - output_modality, output_doc_field) - + test_executor_file_task(executor_name, test_in, test_out) - + requirements_file_task() - + docker_file_task() - + client_file_task() - ) + all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, + output_doc_field, output_modality, test_in, test_out) + extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1) - plain_text = gpt.get_response(system_definition, user_query) + file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1) - extract_and_write(plain_text) + all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1) + + all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions) + + extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2) write_config_yml(executor_name) jina_cloud.push_executor() - host = await jina_cloud.deploy_flow(executor_name, do_validation) + host = jina_cloud.deploy_flow(executor_name, do_validation) - run_client_file(f'executor/{CLIENT_FILE_NAME}', host, do_validation) + update_client_line_in_file(f'executor/{CLIENT_FILE_NAME}', host) + update_client_line_in_file(f'executor/{STREAMLIT_FILE_NAME}', host) + if do_validation: + pass - return get_all_executor_files_with_content() + return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2) if __name__ == '__main__': + ######### Level 2 task ######### main( - executor_name='MyCoolOcrExecutor', - executor_description="OCR detector", - input_modality='image', - input_doc_field='uri', - output_modality='text', - output_doc_field='text', - test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png', - test_out='> Hello, world!_', + executor_name='My3DTo2DExecutor', + executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object", + input_modality='3d', + input_doc_field='blob', + output_modality='image', + output_doc_field='blob', + test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj', + test_out='the output should be exactly one image in png format', + do_validation=False ) + + ######### Level 1 task ######### + # main( + # executor_name='MyCoolOcrExecutor', + # executor_description="OCR detector", + # input_modality='image', + # input_doc_field='uri', + # output_modality='text', + # output_doc_field='text', + # test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png', + # test_out='> Hello, world!_', + # do_validation=False + # ) + + # main( + # executor_name='MySentimentAnalyzer', + # executor_description="Sentiment analysis executor", + # input_modality='text', + # input_doc_field='text', + # output_modality='sentiment', + # output_doc_field='sentiment_label', + # test_in='This is a fantastic product! I love it!', + # test_out='positive', + # do_validation=False + # ) \ No newline at end of file diff --git a/src/constants.py b/src/constants.py index f8f8c2c..3afa3e7 100644 --- a/src/constants.py +++ b/src/constants.py @@ -3,20 +3,25 @@ TEST_EXECUTOR_FILE_NAME = 'test_executor.py' REQUIREMENTS_FILE_NAME = 'requirements.txt' DOCKER_FILE_NAME = 'Dockerfile' CLIENT_FILE_NAME = 'client.py' +STREAMLIT_FILE_NAME = 'streamlit.py' -EXECUTOR_FILE_TAG = 'executor' -TEST_EXECUTOR_FILE_TAG = 'test_executor' -REQUIREMENTS_FILE_TAG = 'requirements' +EXECUTOR_FILE_TAG = 'python' +TEST_EXECUTOR_FILE_TAG = 'python' +REQUIREMENTS_FILE_TAG = '' DOCKER_FILE_TAG = 'dockerfile' -CLIENT_FILE_TAG = 'client' +CLIENT_FILE_TAG = 'python' +STREAMLIT_FILE_TAG = 'python' -TAG_TO_FILE_NAME = { - EXECUTOR_FILE_TAG: EXECUTOR_FILE_NAME, - TEST_EXECUTOR_FILE_TAG: TEST_EXECUTOR_FILE_NAME, - REQUIREMENTS_FILE_TAG: REQUIREMENTS_FILE_NAME, - DOCKER_FILE_TAG: DOCKER_FILE_NAME, - CLIENT_FILE_TAG: CLIENT_FILE_NAME -} +FILE_AND_TAG_PAIRS = [ + (EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG), + (TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG), + (REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG), + (DOCKER_FILE_NAME, DOCKER_FILE_TAG), + (CLIENT_FILE_NAME, CLIENT_FILE_TAG), + (STREAMLIT_FILE_NAME, STREAMLIT_FILE_TAG) +] + +EXECUTOR_FOLDER_v1 = 'executor_v1' +EXECUTOR_FOLDER_v2 = 'executor_v2' -EXECUTOR_FOLDER = 'executor' FLOW_URL_PLACEHOLDER = 'jcloud.jina.ai' \ No newline at end of file diff --git a/src/gpt.py b/src/gpt.py index 111b6bf..ff306d6 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -1,6 +1,8 @@ import os +from time import sleep import openai +from openai.error import RateLimitError, Timeout from src.utils.string import print_colored @@ -9,23 +11,38 @@ openai.api_key = os.environ['OPENAI_API_KEY'] def get_response(system_definition, user_query): print_colored('system_definition', system_definition, 'magenta') print_colored('user_query', user_query, 'blue') - response = openai.ChatCompletion.create( - temperature=0, - model="gpt-4", - messages=[ - { - "role": "system", - "content": system_definition + for i in range(10): + try: + response = openai.ChatCompletion.create( + temperature=0, + max_tokens=5_000, + model="gpt-4", + stream=True, + messages=[ + { + "role": "system", + "content": system_definition - }, - { - "role": "user", - "content": - user_query - }, + }, + { + "role": "user", + "content": + user_query + }, - ] - ) - content = response['choices'][0]['message']['content'] - print_colored('agent response', content, 'green') - return content \ No newline at end of file + ] + ) + complete_string = '' + for chunk in response: + delta = chunk['choices'][0]['delta'] + if 'content' in delta: + content = delta['content'] + print_colored('' if complete_string else 'Agent response:', content, 'green', end='') + complete_string += content + return complete_string + except (RateLimitError, Timeout) as e: + print(e) + print('retrying') + sleep(3) + continue + raise Exception('Failed to get response') \ No newline at end of file diff --git a/src/jina_cloud.py b/src/jina_cloud.py index a70cfb7..34f1934 100644 --- a/src/jina_cloud.py +++ b/src/jina_cloud.py @@ -19,15 +19,13 @@ def get_user_name(): return response['data']['name'] -async def deploy_on_jcloud(flow_yaml): +def deploy_on_jcloud(flow_yaml): cloud_flow = CloudFlow(path=flow_yaml) - await cloud_flow.__aenter__() - return cloud_flow.endpoints['gateway'] + return cloud_flow.__enter__().endpoints['gateway'] - -async def deploy_flow(executor_name, do_validation): +def deploy_flow(executor_name, do_validation): flow = f''' jtype: Flow with: @@ -59,7 +57,7 @@ executors: with flow: pass print('deploy flow on jcloud') - return await deploy_on_jcloud(flow_yaml=full_flow_path) + return deploy_on_jcloud(flow_yaml=full_flow_path) def replace_client_line(file_content: str, replacement: str) -> str: @@ -70,7 +68,7 @@ def replace_client_line(file_content: str, replacement: str) -> str: break return '\n'.join(lines) -def run_client_file(file_path, host, do_validation): +def update_client_line_in_file(file_path, host): with open(file_path, 'r') as file: content = file.read() @@ -80,5 +78,4 @@ def run_client_file(file_path, host, do_validation): with open(file_path, 'w') as file: file.write(replaced_content) - if do_validation: - import executor.client # runs the client script for validation + diff --git a/src/prompt_examples.py b/src/prompt_system.py similarity index 92% rename from src/prompt_examples.py rename to src/prompt_system.py index 0177d13..c491bc1 100644 --- a/src/prompt_examples.py +++ b/src/prompt_system.py @@ -92,4 +92,13 @@ d = Document(uri='data/img.png') d.load_uri_to_blob() response = client.post('/process', inputs=DocumentArray([d])) response[0].summary() -''') \ No newline at end of file +''') + + +system_base_definition = ( + "You are a principal engineer working at Jina - an open source company." + "Using the Jina framework, users can define executors. " + + executor_example + + docarray_example + + client_example +) \ No newline at end of file diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index 86291c1..99aaf17 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -1,10 +1,10 @@ from src.constants import EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME, TEST_EXECUTOR_FILE_NAME, DOCKER_FILE_NAME, \ - DOCKER_FILE_TAG, CLIENT_FILE_TAG, CLIENT_FILE_NAME + DOCKER_FILE_TAG, CLIENT_FILE_TAG, CLIENT_FILE_NAME, STREAMLIT_FILE_TAG, STREAMLIT_FILE_NAME, EXECUTOR_FILE_TAG, \ + REQUIREMENTS_FILE_TAG, TEST_EXECUTOR_FILE_TAG def general_guidelines(): return ( - "General guidelines: " "The code you write is production ready. " "Every file starts with comments describing what the code is doing before the first import. " "Comments can only be written between tags. " @@ -20,7 +20,13 @@ def general_guidelines(): def _task(task, tag_name, file_name): - return task + f"The code will go into {file_name}. Wrap the code in the string $$$start_{tag_name}$$$...$$$end_{tag_name}$$$ \n\n" + return ( + task + f"The code will go into {file_name}. Wrap the code is wrapped into:\n" + f"**{file_name}**\n" + f"```{tag_name}\n" + f"...code...\n" + f"```\n\n" + ) def executor_file_task(executor_name, executor_description, input_modality, input_doc_field, @@ -31,28 +37,31 @@ def executor_file_task(executor_name, executor_description, input_modality, inpu f"It gets a DocumentArray as input where each document has the input modality '{input_modality}' that is stored in document.{input_doc_field}. " f"It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. " f"Have in mind that d.uri is never a path to a local file. It is always a url.", - 'executor', + EXECUTOR_FILE_TAG, EXECUTOR_FILE_NAME ) def requirements_file_task(): - return _task("Write the content of the requirements.txt file. " - "Make sure to include pytest. " - "All versions are fixed. ", 'requirements', - REQUIREMENTS_FILE_NAME) + return _task( + "Write the content of the requirements.txt file. " + "Make sure to include pytest. " + "All versions are fixed. ", + REQUIREMENTS_FILE_TAG, + REQUIREMENTS_FILE_NAME + ) def test_executor_file_task(executor_name, test_in, test_out): return _task( "Write a small unit test for the executor. " "Start the test with an extensive comment about the test case. " - + ( - "Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. " - ) if test_in and test_out else "" - "Use the following import to import the executor: " - f"from executor import {executor_name} ", - 'test_executor', + + (( + "Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. " + ) if test_in and test_out else "") + + "Use the following import to import the executor: " + f"from executor import {executor_name} ", + TEST_EXECUTOR_FILE_TAG, TEST_EXECUTOR_FILE_NAME ) @@ -66,12 +75,23 @@ def docker_file_task(): "Add the config.yml file to the Dockerfile. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. " 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] ' - "The Dockerfile runs the test during the build process. " - , DOCKER_FILE_TAG, DOCKER_FILE_NAME) + "The Dockerfile runs the test during the build process. ", + DOCKER_FILE_TAG, + DOCKER_FILE_NAME + ) def client_file_task(): return _task( - "Write the client file. " - , CLIENT_FILE_TAG, CLIENT_FILE_NAME + "Write the client file. ", + CLIENT_FILE_TAG, + CLIENT_FILE_NAME + ) + + +def streamlit_file_task(): + return _task( + "Write the streamlit file allowing to make requests . ", + STREAMLIT_FILE_TAG, + STREAMLIT_FILE_NAME ) diff --git a/src/utils/string.py b/src/utils/string.py index 269bc1e..c7a5ad1 100644 --- a/src/utils/string.py +++ b/src/utils/string.py @@ -1,3 +1,6 @@ +import difflib + + def find_between(input_string, start, end): try: start_index = input_string.index(start) + len(start) @@ -10,7 +13,7 @@ def find_between(input_string, start, end): def clean_content(content): return content.replace('```', '').strip() -def print_colored(headline, text, color_code): +def print_colored(headline, text, color_code, end='\n'): if color_code == 'black': color_code = '30' elif color_code == 'red': @@ -30,5 +33,21 @@ def print_colored(headline, text, color_code): color_start = f"\033[{color_code}m" reset = "\033[0m" bold_start = "\033[1m" - print(f"{bold_start}{color_start}{headline}{reset}") - print(f"{color_start}{text}{reset}") + if headline: + print(f"{bold_start}{color_start}{headline}{reset}") + print(f"{color_start}{text}{reset}", end=end) + + +def find_differences(a, b): + matcher = difflib.SequenceMatcher(None, a, b) + differences = set() + + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == 'replace': + diff_a = a[i1:i2] + diff_b = b[j1:j2] + # Check for mirrored results and only add non-mirrored ones + if (diff_b, diff_a) not in differences: + differences.add((diff_a, diff_b)) + + return differences