diff --git a/README.md b/README.md index 144eeea..6dd65cc 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,51 @@ -[![Watch the video](https://i.imgur.com/vKb2F1B.png)](https://user-images.githubusercontent.com/11627845/226220484-17810f7c-b184-4a03-9af2-3a977fbb014b.mov) +# 🤖 GPT Deploy +This project streamlines the creation and deployment of microservices. +Simply describe your task using natural language, and the system will automatically build and deploy your microservice. +To ensure the executor accurately aligns with your intended task, you can also provide test scenarios. +# Overview +The graphic below illustrates the process of creating a microservice and deploying it to the cloud. +```mermaid +graph TB + A[User Input: Task Description & Test Scenarios] --> B{GPT Deploy} + B -->|Identify Strategies| C[Strategy 1] + B -->|Identify Strategies| D[Strategy 2] + B -->|Identify Strategies| E[Strategy N] + C --> F[executor.py, test_executor.py, requirements.txt, Dockerfile] + D --> G[executor.py, test_executor.py, requirements.txt, Dockerfile] + E --> H[executor.py, test_executor.py, requirements.txt, Dockerfile] + F --> I{Build Image} + G --> I + H --> I + I -->|Fail| J[Apply Fix and Retry] + J --> I + I -->|Success| K[Push Docker Image to Registry] + K --> L[Deploy Microservice] + L --> M[Create Streamlit Playground] + M --> N[User Tests Microservice] +``` +1. GPT Deploy identifies several strategies to implement your task. +2. It tests each strategy until it finds one that works. +3. For each strategy, it creates the following files: +- executor.py: This is the main implementation of the microservice. +- test_executor.py: These are test cases to ensure the microservice works as expected. +- requirements.txt: This file lists the packages needed by the microservice and its tests. +- Dockerfile: This file is used to run the microservice in a container and also runs the tests when building the image. +4. GPT Deploy attempts to build the image. If the build fails, it uses the error message to apply a fix and tries again to build the image. +5. Once it finds a successful strategy, it: +- Pushes the Docker image to the registry. +- Deploys the microservice. +- Creates a Streamlit playground where you can test the microservice. +6. If it fails 10 times in a row, it moves on to the next approach. -# 🔮 vision -create, deploy and update your microservice infrastructure +[//]: # ([![Watch the video](https://i.imgur.com/vKb2F1B.png)](https://user-images.githubusercontent.com/11627845/226220484-17810f7c-b184-4a03-9af2-3a977fbb014b.mov)) -# 🏗 frontend description -The microchain-frontend is used to define the graph of microservice, their interfaces and their functionality. -Based on this definition, the backend will be generated automatically. - -# 🏗 usage single microservice -## you provide -- input_modality -- output_modality -- description of the functionality of the transformation the microservice is handling -- examples of input and output pairs - -## you get -- a microservice together with a playground -- the code to run requests # 🤏 limitations for now - stateless microservices only - deterministic microservices only to make sure input and output pairs can be used -# TODO: -- [ ] attach playground -- [ ] subtask executors -- \ No newline at end of file +# 🔮 vision +Use natural language interface to create, deploy and update your microservice infrastructure. diff --git a/microchain-frontend/.gitignore b/frontend/.gitignore similarity index 100% rename from microchain-frontend/.gitignore rename to frontend/.gitignore diff --git a/microchain-frontend/README.md b/frontend/README.md similarity index 100% rename from microchain-frontend/README.md rename to frontend/README.md diff --git a/microchain-frontend/package-lock.json b/frontend/package-lock.json similarity index 100% rename from microchain-frontend/package-lock.json rename to frontend/package-lock.json diff --git a/microchain-frontend/package.json b/frontend/package.json similarity index 100% rename from microchain-frontend/package.json rename to frontend/package.json diff --git a/microchain-frontend/public/favicon.ico b/frontend/public/favicon.ico similarity index 100% rename from microchain-frontend/public/favicon.ico rename to frontend/public/favicon.ico diff --git a/microchain-frontend/public/index.html b/frontend/public/index.html similarity index 100% rename from microchain-frontend/public/index.html rename to frontend/public/index.html diff --git a/microchain-frontend/public/logo192.png b/frontend/public/logo192.png similarity index 100% rename from microchain-frontend/public/logo192.png rename to frontend/public/logo192.png diff --git a/microchain-frontend/public/logo512.png b/frontend/public/logo512.png similarity index 100% rename from microchain-frontend/public/logo512.png rename to frontend/public/logo512.png diff --git a/microchain-frontend/public/manifest.json b/frontend/public/manifest.json similarity index 100% rename from microchain-frontend/public/manifest.json rename to frontend/public/manifest.json diff --git a/microchain-frontend/public/robots.txt b/frontend/public/robots.txt similarity index 100% rename from microchain-frontend/public/robots.txt rename to frontend/public/robots.txt diff --git a/microchain-frontend/src/App.css b/frontend/src/App.css similarity index 100% rename from microchain-frontend/src/App.css rename to frontend/src/App.css diff --git a/microchain-frontend/src/App.js b/frontend/src/App.js similarity index 100% rename from microchain-frontend/src/App.js rename to frontend/src/App.js diff --git a/microchain-frontend/src/App.test.js b/frontend/src/App.test.js similarity index 100% rename from microchain-frontend/src/App.test.js rename to frontend/src/App.test.js diff --git a/microchain-frontend/src/components/CreateExecutorForm.js b/frontend/src/components/CreateExecutorForm.js similarity index 100% rename from microchain-frontend/src/components/CreateExecutorForm.js rename to frontend/src/components/CreateExecutorForm.js diff --git a/microchain-frontend/src/components/ExecutorOutput.js b/frontend/src/components/ExecutorOutput.js similarity index 100% rename from microchain-frontend/src/components/ExecutorOutput.js rename to frontend/src/components/ExecutorOutput.js diff --git a/microchain-frontend/src/index.css b/frontend/src/index.css similarity index 100% rename from microchain-frontend/src/index.css rename to frontend/src/index.css diff --git a/microchain-frontend/src/index.js b/frontend/src/index.js similarity index 100% rename from microchain-frontend/src/index.js rename to frontend/src/index.js diff --git a/microchain-frontend/src/logo.svg b/frontend/src/logo.svg similarity index 100% rename from microchain-frontend/src/logo.svg rename to frontend/src/logo.svg diff --git a/microchain-frontend/src/setupTests.js b/frontend/src/setupTests.js similarity index 100% rename from microchain-frontend/src/setupTests.js rename to frontend/src/setupTests.js diff --git a/microchain-frontend/yarn.lock b/frontend/yarn.lock similarity index 100% rename from microchain-frontend/yarn.lock rename to frontend/yarn.lock diff --git a/main.py b/main.py index 77de1d1..6f2c519 100644 --- a/main.py +++ b/main.py @@ -1,17 +1,16 @@ -# import importlib +import random + +from src import gpt, jina_cloud +from src.jina_cloud import push_executor, process_error_message +from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \ + chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed +from src.utils.io import recreate_folder, persist_file +from src.utils.string_tools import print_colored + + import os import re -# -# from src import gpt, jina_cloud -# from src.constants import FILE_AND_TAG_PAIRS, EXECUTOR_FOLDER_v1, EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME, STREAMLIT_FILE_NAME -# from src.jina_cloud import update_client_line_in_file -# from src.prompt_system import system_base_definition -# from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \ -# test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task, chain_of_thought_creation -# from src.utils.io import recreate_folder -# from src.utils.string_tools import find_differences -# -# + from src.constants import FILE_AND_TAG_PAIRS @@ -22,16 +21,7 @@ def extract_content_from_result(plain_text, file_name): return match.group(1).strip() else: return '' -# -# -# def extract_and_write(plain_text, dest_folder): -# for file_name, tag in FILE_AND_TAG_PAIRS: -# clean = extract_content_from_result(plain_text, file_name) -# full_path = os.path.join(dest_folder, file_name) -# with open(full_path, 'w') as f: -# f.write(clean) -# -# + def write_config_yml(executor_name, dest_folder): config_content = f''' jtype: {executor_name} @@ -42,8 +32,7 @@ metas: ''' with open(os.path.join(dest_folder, 'config.yml'), 'w') as f: f.write(config_content) -# -# + def get_all_executor_files_with_content(folder_path): file_name_to_content = {} for filename in os.listdir(folder_path): @@ -55,58 +44,7 @@ def get_all_executor_files_with_content(folder_path): file_name_to_content[filename] = content return file_name_to_content -# -# -# -# -# def build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, -# output_doc_field, output_modality, test_in, test_out): -# system_definition = ( -# system_base_definition -# + "The user is asking you to create an executor with all the necessary files " -# "and you write the complete code without leaving something out. " -# ) -# user_query = ( -# general_guidelines() -# + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, -# output_modality, output_doc_field) -# + test_executor_file_task(executor_name, test_in, test_out) -# + requirements_file_task() -# + docker_file_task() -# + client_file_task() -# + streamlit_file_task() -# + chain_of_thought_creation() -# ) -# plain_text = gpt.get_response(system_definition, user_query) -# return plain_text -# -# -# def build_production_ready_implementation(all_executor_files_string): -# system_definition = ( -# system_base_definition -# + f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) " -# f"The files may contain bugs. Fix all of them. " -# -# ) -# user_query = ( -# 'Make it production ready. ' -# "Fix all files and add all missing code. " -# "Keep the same format as given to you. " -# f"Some files might have only prototype implementations and are not production ready. Add all the missing code. " -# f"Some imports might be missing. Make sure to add them. " -# f"Some libraries might be missing from the requirements.txt. Make sure to install them." -# f"Somthing might be wrong in the Dockerfile. For example, some libraries might be missing. Install them." -# f"Or not all files are copied to the right destination in the Dockerfile. Copy them to the correct destination. " -# "First write down an extensive list of obvious and non-obvious observations about the parts that could need an adjustment. Explain why. " -# "Think about if all the changes are required and finally decide for the changes you want to make. " -# f"Output all the files even the ones that did not change. " -# "Here are the files: \n\n" -# + all_executor_files_string -# ) -# all_executor_files_string_improved = gpt.get_response(system_definition, user_query) -# print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved)) -# return all_executor_files_string_improved -# + def files_to_string(file_name_to_content): all_executor_files_string = '' for file_name, tag in FILE_AND_TAG_PAIRS: @@ -116,84 +54,322 @@ def files_to_string(file_name_to_content): all_executor_files_string += file_name_to_content[file_name] all_executor_files_string += '\n```\n\n' return all_executor_files_string -# -# -# def main( -# executor_name, -# executor_description, -# input_modality, -# input_doc_field, -# output_modality, -# output_doc_field, -# test_in, -# test_out, -# do_validation=True -# ): -# recreate_folder(EXECUTOR_FOLDER_v1) -# recreate_folder(EXECUTOR_FOLDER_v2) -# recreate_folder('flow') -# -# all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, -# output_doc_field, output_modality, test_in, test_out) -# extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1) -# write_config_yml(executor_name, EXECUTOR_FOLDER_v1) -# file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1) -# all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1) -# -# all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions) -# extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2) -# write_config_yml(executor_name, EXECUTOR_FOLDER_v2) -# -# jina_cloud.push_executor(EXECUTOR_FOLDER_v2) -# -# host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow') -# -# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, CLIENT_FILE_NAME), host) -# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, STREAMLIT_FILE_NAME), host) -# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME), host) -# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, STREAMLIT_FILE_NAME), host) -# -# if do_validation: -# importlib.import_module("executor_v1.client") -# -# return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2) -# -# -# if __name__ == '__main__': -# # ######### Level 2 task ######### -# # main( -# # executor_name='My3DTo2DExecutor', -# # executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object", -# # input_modality='3d', -# # input_doc_field='blob', -# # output_modality='image', -# # output_doc_field='blob', -# # test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj', -# # test_out='the output should be exactly one image in png format', -# # do_validation=False -# # ) -# -# ######## Level 1 task ######### -# main( -# executor_name='MyCoolOcrExecutor', -# executor_description="OCR detector", -# input_modality='image', -# input_doc_field='uri', -# output_modality='text', -# output_doc_field='text', -# test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png', -# test_out='output should contain the string "Hello, world"', -# do_validation=False -# ) -# -# # main( -# # executor_name='MySentimentAnalyzer', -# # executor_description="Sentiment analysis executor", -# # input_modality='text', -# # input_doc_field='text', -# # output_modality='sentiment', -# # output_doc_field='sentiment_label', -# # test_in='This is a fantastic product! I love it!', -# # test_out='positive', -# # do_validation=False -# # ) \ No newline at end of file + + +def wrap_content_in_code_block(executor_content, file_name, tag): + return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n' + + +def create_executor( + executor_description, + test_scenario, + executor_name, + package, + is_chain_of_thought=False, +): + EXECUTOR_FOLDER_v1 = get_executor_path(package, 1) + recreate_folder(EXECUTOR_FOLDER_v1) + recreate_folder('flow') + + print_colored('', '############# Executor #############', 'red') + user_query = ( + general_guidelines() + + executor_file_task(executor_name, executor_description, test_scenario, package) + + chain_of_thought_creation() + ) + conversation = gpt.Conversation() + executor_content_raw = conversation.query(user_query) + if is_chain_of_thought: + executor_content_raw = conversation.query( + f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py')) + executor_content = extract_content_from_result(executor_content_raw, 'executor.py') + + persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py') + + print_colored('', '############# Test Executor #############', 'red') + user_query = ( + general_guidelines() + + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + + test_executor_file_task(executor_name, test_scenario) + ) + conversation = gpt.Conversation() + test_executor_content_raw = conversation.query(user_query) + if is_chain_of_thought: + test_executor_content_raw = conversation.query( + f"General rules: " + not_allowed() + + chain_of_thought_optimization('python', 'test_executor.py') + + "Don't add any additional tests. " + ) + test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py') + persist_file(test_executor_content, EXECUTOR_FOLDER_v1 + '/test_executor.py') + + print_colored('', '############# Requirements #############', 'red') + user_query = ( + general_guidelines() + + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') + + requirements_file_task() + ) + conversation = gpt.Conversation() + requirements_content_raw = conversation.query(user_query) + if is_chain_of_thought: + requirements_content_raw = conversation.query( + chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ") + + requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') + persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt') + + print_colored('', '############# Dockerfile #############', 'red') + user_query = ( + general_guidelines() + + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') + + wrap_content_in_code_block(requirements_content, 'requirements.txt', '') + + docker_file_task() + ) + conversation = gpt.Conversation() + dockerfile_content_raw = conversation.query(user_query) + if is_chain_of_thought: + dockerfile_content_raw = conversation.query( + f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) + dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') + persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile') + + write_config_yml(executor_name, EXECUTOR_FOLDER_v1) + + +def create_playground(executor_name, executor_path, host): + print_colored('', '############# Playground #############', 'red') + + file_name_to_content = get_all_executor_files_with_content(executor_path) + user_query = ( + general_guidelines() + + wrap_content_in_code_block(file_name_to_content['executor.py'], 'executor.py', 'python') + + wrap_content_in_code_block(file_name_to_content['test_executor.py'], 'test_executor.py', 'python') + + f''' +Create a playground for the executor {executor_name} using streamlit. +The executor is hosted on {host}. +This is an example how you can connect to the executor assuming the document (d) is already defined: +from jina import Client, Document, DocumentArray +client = Client(host='{host}') +response = client.post('/process', inputs=DocumentArray([d])) +print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app +''' + ) + conversation = gpt.Conversation() + conversation.query(user_query) + playground_content_raw = conversation.query( + f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'app.py')) + playground_content = extract_content_from_result(playground_content_raw, 'app.py') + persist_file(playground_content, f'{executor_path}/app.py') + +def get_executor_path(package, version): + package_path = '_'.join(package) + return f'executor/{package_path}/v{version}' + +def debug_executor(package, executor_description, test_scenario): + MAX_DEBUGGING_ITERATIONS = 10 + error_before = '' + for i in range(1, MAX_DEBUGGING_ITERATIONS): + previous_executor_path = get_executor_path(package, i) + next_executor_path = get_executor_path(package, i + 1) + log_hubble = push_executor(previous_executor_path) + error = process_error_message(log_hubble) + if error: + recreate_folder(next_executor_path) + file_name_to_content = get_all_executor_files_with_content(previous_executor_path) + all_files_string = files_to_string(file_name_to_content) + user_query = ( + f"General rules: " + not_allowed() + + 'Here is the description of the task the executor must solve:\n' + + executor_description + + '\n\nHere is the test scenario the executor must pass:\n' + + test_scenario + + 'Here are all the files I use:\n' + + all_files_string + + (('This is an error that is already fixed before:\n' + + error_before) if error_before else '') + + '\n\nNow, I get the following error:\n' + + error + '\n' + + 'Think quickly about possible reasons. ' + 'Then output the files that need change. ' + "Don't output files that don't need change. " + "If you output a file, then write the complete file. " + "Use the exact same syntax to wrap the code:\n" + f"**...**\n" + f"```...\n" + f"...code...\n" + f"```\n\n" + ) + conversation = gpt.Conversation() + returned_files_raw = conversation.query(user_query) + for file_name, tag in FILE_AND_TAG_PAIRS: + updated_file = extract_content_from_result(returned_files_raw, file_name) + if updated_file: + file_name_to_content[file_name] = updated_file + + for file_name, content in file_name_to_content.items(): + persist_file(content, f'{next_executor_path}/{file_name}') + error_before = error + + else: + break + if i == MAX_DEBUGGING_ITERATIONS - 1: + raise MaxDebugTimeReachedException('Could not debug the executor.') + return get_executor_path(package, i) + +class MaxDebugTimeReachedException(BaseException): + pass + + +def generate_executor_name(executor_description): + conversation = gpt.Conversation() + user_query = f''' +Generate a name for the executor matching the description: +"{executor_description}" +The executor name must fulfill the following criteria: +- camel case +- start with a capital letter +- only consists of lower and upper case characters +- end with Executor. + +The output is a the raw string wrapped into ``` and starting with **name.txt** like this: +**name.txt** +``` +PDFParserExecutor +``` +''' + name_raw = conversation.query(user_query) + name = extract_content_from_result(name_raw, 'name.txt') + return name + + +def main( + executor_description, + test_scenario, + threads=3, +): + generated_name = generate_executor_name(executor_description) + executor_name = f'{generated_name}{random.randint(0, 1000_000)}' + + packages = get_possible_packages(executor_description, threads) + recreate_folder('executor') + for package in packages: + try: + create_executor(executor_description, test_scenario, executor_name, package) + # executor_name = 'MicroChainExecutor790050' + executor_path = debug_executor(package, executor_description, test_scenario) + # print('Executor can be built locally, now we will push it to the cloud.') + # jina_cloud.push_executor(executor_path) + print('Deploy a jina flow') + host = jina_cloud.deploy_flow(executor_name, executor_path) + print(f'Flow is deployed create the playground for {host}') + create_playground(executor_name, executor_path, host) + except MaxDebugTimeReachedException: + print('Could not debug the executor.') + continue + print( + 'Executor name:', executor_name, '\n', + 'Executor path:', executor_path, '\n', + 'Host:', host, '\n', + 'Playground:', f'streamlit run {executor_path}/app.py', '\n', + ) + break + + +def get_possible_packages(executor_description, threads): + print_colored('', '############# What package to use? #############', 'red') + user_query = f''' +Here is the task description of the problme you need to solve: +"{executor_description}" +First, write down all the subtasks you need to solve which require python packages. +For each subtask: + Provide a list of 1 to 3 python packages you could use to solve the subtask. Prefer modern packages. + For each package: + Write down some non-obvious thoughts about the challenges you might face for the task and give multiple approaches on how you handle them. + For example, there might be some packages you must not use because they do not obay the rules: + {not_allowed()} + Discuss the pros and cons for all of these packages. +Create a list of package subsets that you could use to solve the task. +The list is sorted in a way that the most promising subset of packages is at the top. +The maximum length of the list is 5. + +The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this: +**packages.csv** +``` +package1,package2 +package2,package3,... +... +``` + ''' + conversation = gpt.Conversation() + packages_raw = conversation.query(user_query) + packages_csv_string = extract_content_from_result(packages_raw, 'packages.csv') + packages = [package.split(',') for package in packages_csv_string.split('\n')] + packages = packages[:threads] + return packages + + +if __name__ == '__main__': + # accomplished tasks: + + # main( + # executor_description="The executor takes a url of a website as input and classifies it as either individual or business.", + # test_scenario='Takes https://jina.ai/ as input and returns "business". Takes https://hanxiao.io/ as input and returns "individual". ', + # ) + + # needs to prove: + + # ######## Level 1 task ######### + # main( + # executor_description="The executor takes a pdf file as input, parses it and returns the text.", + # input_modality='pdf', + # output_modality='text', + # test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', + # ) + + # main( + # executor_description="The executor takes a url of a website as input and returns the logo of the website as an image.", + # test_scenario='Takes https://jina.ai/ as input and returns an svg image of the logo.', + # ) + + + # # # ######## Level 1 task ######### + # main( + # executor_description="The executor takes a pdf file as input, parses it and returns the text.", + # input_modality='pdf', + # output_modality='text', + # test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', + # ) + + # ######## Level 2 task ######### + # main( + # executor_description="OCR detector", + # input_modality='image', + # output_modality='text', + # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', + # ) + + # ######## Level 3 task ######### + main( + executor_description="The executor takes an mp3 file as input and returns bpm and pitch in a json.", + test_scenario='Takes https://cdn.pixabay.com/download/audio/2023/02/28/audio_550d815fa5.mp3 as input and returns a json with bpm and pitch', + ) + + ######### Level 4 task ######### + # main( + # executor_description="The executor takes 3D objects in obj format as input " + # "and outputs a 2D image projection of that object where the full object is shown. ", + # input_modality='3d', + # output_modality='image', + # test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj ' + # 'is put in and out comes a 2d rendering of it', + # ) + + # ######## Level 8 task ######### + # main( + # executor_description="The executor takes an image as input and returns a list of bounding boxes of all animals in the image.", + # input_modality='blob', + # output_modality='json', + # test_scenario='Take the image from https://thumbs.dreamstime.com/b/dog-professor-red-bow-tie-glasses-white-background-isolated-dog-professor-glasses-197036807.jpg as input and assert that the list contains at least one bounding box. ', + # ) diff --git a/micro_chain.py b/micro_chain.py deleted file mode 100644 index f71282c..0000000 --- a/micro_chain.py +++ /dev/null @@ -1,306 +0,0 @@ -import json -import random - -from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string -from src import gpt, jina_cloud -from src.constants import FILE_AND_TAG_PAIRS -from src.jina_cloud import push_executor, process_error_message -from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \ - chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed -from src.utils.io import recreate_folder, persist_file -from src.utils.string_tools import print_colored - - -def wrap_content_in_code_block(executor_content, file_name, tag): - return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n' - - -def create_executor( - executor_description, - test_scenario, - executor_name, - package, - is_chain_of_thought=False, -): - EXECUTOR_FOLDER_v1 = get_executor_path(package, 1) - recreate_folder(EXECUTOR_FOLDER_v1) - recreate_folder('flow') - - print_colored('', '############# Executor #############', 'red') - user_query = ( - general_guidelines() - + executor_file_task(executor_name, executor_description, test_scenario, package) - + chain_of_thought_creation() - ) - conversation = gpt.Conversation() - executor_content_raw = conversation.query(user_query) - if is_chain_of_thought: - executor_content_raw = conversation.query( - f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py')) - executor_content = extract_content_from_result(executor_content_raw, 'executor.py') - - persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py') - - print_colored('', '############# Test Executor #############', 'red') - user_query = ( - general_guidelines() - + wrap_content_in_code_block(executor_content, 'executor.py', 'python') - + test_executor_file_task(executor_name, test_scenario) - ) - conversation = gpt.Conversation() - test_executor_content_raw = conversation.query(user_query) - if is_chain_of_thought: - test_executor_content_raw = conversation.query( - f"General rules: " + not_allowed() + - chain_of_thought_optimization('python', 'test_executor.py') - + "Don't add any additional tests. " - ) - test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py') - persist_file(test_executor_content, EXECUTOR_FOLDER_v1 + '/test_executor.py') - - print_colored('', '############# Requirements #############', 'red') - user_query = ( - general_guidelines() - + wrap_content_in_code_block(executor_content, 'executor.py', 'python') - + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') - + requirements_file_task() - ) - conversation = gpt.Conversation() - requirements_content_raw = conversation.query(user_query) - if is_chain_of_thought: - requirements_content_raw = conversation.query( - chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ") - - requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') - persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt') - - print_colored('', '############# Dockerfile #############', 'red') - user_query = ( - general_guidelines() - + wrap_content_in_code_block(executor_content, 'executor.py', 'python') - + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') - + wrap_content_in_code_block(requirements_content, 'requirements.txt', '') - + docker_file_task() - ) - conversation = gpt.Conversation() - dockerfile_content_raw = conversation.query(user_query) - if is_chain_of_thought: - dockerfile_content_raw = conversation.query( - f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) - dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') - persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile') - - write_config_yml(executor_name, EXECUTOR_FOLDER_v1) - - -def create_playground(executor_name, executor_path, host): - print_colored('', '############# Playground #############', 'red') - - file_name_to_content = get_all_executor_files_with_content(executor_path) - user_query = ( - general_guidelines() - + wrap_content_in_code_block(file_name_to_content['executor.py'], 'executor.py', 'python') - + wrap_content_in_code_block(file_name_to_content['test_executor.py'], 'test_executor.py', 'python') - + f''' -Create a playground for the executor {executor_name} using streamlit. -The executor is hosted on {host}. -This is an example how you can connect to the executor assuming the document (d) is already defined: -from jina import Client, Document, DocumentArray -client = Client(host='{host}') -response = client.post('/process', inputs=DocumentArray([d])) -print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app -''' - ) - conversation = gpt.Conversation() - conversation.query(user_query) - playground_content_raw = conversation.query( - f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'app.py')) - playground_content = extract_content_from_result(playground_content_raw, 'app.py') - persist_file(playground_content, f'{executor_path}/app.py') - -def get_executor_path(package, version): - package_path = '_'.join(package) - return f'executor/{package_path}/v{version}' - -def debug_executor(package, executor_description, test_scenario): - MAX_DEBUGGING_ITERATIONS = 10 - error_before = '' - for i in range(1, MAX_DEBUGGING_ITERATIONS): - previous_executor_path = get_executor_path(package, i) - next_executor_path = get_executor_path(package, i + 1) - log_hubble = push_executor(previous_executor_path) - error = process_error_message(log_hubble) - if error: - recreate_folder(next_executor_path) - file_name_to_content = get_all_executor_files_with_content(previous_executor_path) - all_files_string = files_to_string(file_name_to_content) - user_query = ( - f"General rules: " + not_allowed() - + 'Here is the description of the task the executor must solve:\n' - + executor_description - + '\n\nHere is the test scenario the executor must pass:\n' - + test_scenario - + 'Here are all the files I use:\n' - + all_files_string - + (('This is an error that is already fixed before:\n' - + error_before) if error_before else '') - + '\n\nNow, I get the following error:\n' - + error + '\n' - + 'Think quickly about possible reasons. ' - 'Then output the files that need change. ' - "Don't output files that don't need change. " - "If you output a file, then write the complete file. " - "Use the exact same syntax to wrap the code:\n" - f"**...**\n" - f"```...\n" - f"...code...\n" - f"```\n\n" - ) - conversation = gpt.Conversation() - returned_files_raw = conversation.query(user_query) - for file_name, tag in FILE_AND_TAG_PAIRS: - updated_file = extract_content_from_result(returned_files_raw, file_name) - if updated_file: - file_name_to_content[file_name] = updated_file - - for file_name, content in file_name_to_content.items(): - persist_file(content, f'{next_executor_path}/{file_name}') - error_before = error - - else: - break - if i == MAX_DEBUGGING_ITERATIONS - 1: - raise MaxDebugTimeReachedException('Could not debug the executor.') - return get_executor_path(package, i) - -class MaxDebugTimeReachedException(BaseException): - pass - -def main( - executor_description, - test_scenario, - threads=3, -): - executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}' - - packages = get_possible_packages(executor_description, threads) - recreate_folder('executor') - for package in packages: - try: - create_executor(executor_description, test_scenario, executor_name, package) - # executor_name = 'MicroChainExecutor790050' - executor_path = debug_executor(package, executor_description, test_scenario) - # print('Executor can be built locally, now we will push it to the cloud.') - # jina_cloud.push_executor(executor_path) - print('Deploy a jina flow') - host = jina_cloud.deploy_flow(executor_name, executor_path) - print(f'Flow is deployed create the playground for {host}') - create_playground(executor_name, executor_path, host) - except MaxDebugTimeReachedException: - print('Could not debug the executor.') - continue - print( - 'Executor name:', executor_name, '\n', - 'Executor path:', executor_path, '\n', - 'Host:', host, '\n', - 'Playground:', f'streamlit run {executor_path}/app.py', '\n', - ) - break - - -def get_possible_packages(executor_description, threads): - print_colored('', '############# What package to use? #############', 'red') - user_query = f''' -Here is the task description of the problme you need to solve: -"{executor_description}" -First, write down all the subtasks you need to solve which require python packages. -For each subtask: - Provide a list of 1 to 3 python packages you could use to solve the subtask. Prefer modern packages. - For each package: - Write down some non-obvious thoughts about the challenges you might face for the task and give multiple approaches on how you handle them. - For example, there might be some packages you must not use because they do not obay the rules: - {not_allowed()} - Discuss the pros and cons for all of these packages. -Create a list of package subsets that you could use to solve the task. -The list is sorted in a way that the most promising subset of packages is at the top. -The maximum length of the list is 5. - -The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this: -**packages.csv** -``` -package1,package2 -package2,package3,... -... -``` - ''' - conversation = gpt.Conversation() - packages_raw = conversation.query(user_query) - packages_csv_string = extract_content_from_result(packages_raw, 'packages.csv') - packages = [package.split(',') for package in packages_csv_string.split('\n')] - packages = packages[:threads] - return packages - - -if __name__ == '__main__': - # accomplished tasks: - - # main( - # executor_description="The executor takes a url of a website as input and classifies it as either individual or business.", - # test_scenario='Takes https://jina.ai/ as input and returns "business". Takes https://hanxiao.io/ as input and returns "individual". ', - # ) - - # needs to prove: - - # ######## Level 1 task ######### - # main( - # executor_description="The executor takes a pdf file as input, parses it and returns the text.", - # input_modality='pdf', - # output_modality='text', - # test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', - # ) - - # main( - # executor_description="The executor takes a url of a website as input and returns the logo of the website as an image.", - # test_scenario='Takes https://jina.ai/ as input and returns an svg image of the logo.', - # ) - - - # # # ######## Level 1 task ######### - # main( - # executor_description="The executor takes a pdf file as input, parses it and returns the text.", - # input_modality='pdf', - # output_modality='text', - # test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', - # ) - - # ######## Level 2 task ######### - # main( - # executor_description="OCR detector", - # input_modality='image', - # output_modality='text', - # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', - # ) - - # ######## Level 3 task ######### - main( - executor_description="The executor takes an mp3 file as input and returns bpm and pitch in a json.", - test_scenario='Takes https://cdn.pixabay.com/download/audio/2023/02/28/audio_550d815fa5.mp3 as input and returns a json with bpm and pitch', - ) - - ######### Level 4 task ######### - # main( - # executor_description="The executor takes 3D objects in obj format as input " - # "and outputs a 2D image projection of that object where the full object is shown. ", - # input_modality='3d', - # output_modality='image', - # test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj ' - # 'is put in and out comes a 2d rendering of it', - # ) - - # ######## Level 8 task ######### - # main( - # executor_description="The executor takes an image as input and returns a list of bounding boxes of all animals in the image.", - # input_modality='blob', - # output_modality='json', - # test_scenario='Take the image from https://thumbs.dreamstime.com/b/dog-professor-red-bow-tie-glasses-white-background-isolated-dog-professor-glasses-197036807.jpg as input and assert that the list contains at least one bounding box. ', - # ) diff --git a/server.py b/server.py index f71dbb0..1b4383a 100644 --- a/server.py +++ b/server.py @@ -1,67 +1,55 @@ -# from fastapi import FastAPI -# from fastapi.exceptions import RequestValidationError -# from pydantic import BaseModel, HttpUrl -# from typing import Optional, Dict -# -# from starlette.middleware.cors import CORSMiddleware -# from starlette.requests import Request -# from starlette.responses import JSONResponse -# -# from main import main -# -# app = FastAPI() -# -# # Define the request model -# class CreateRequest(BaseModel): -# executor_name: str -# executor_description: str -# input_modality: str -# input_doc_field: str -# output_modality: str -# output_doc_field: str -# test_in: str -# test_out: str -# -# # Define the response model -# class CreateResponse(BaseModel): -# result: Dict[str, str] -# success: bool -# message: Optional[str] -# -# @app.post("/create", response_model=CreateResponse) -# def create_endpoint(request: CreateRequest): -# -# result = main( -# executor_name=request.executor_name, -# executor_description=request.executor_description, -# input_modality=request.input_modality, -# input_doc_field=request.input_doc_field, -# output_modality=request.output_modality, -# output_doc_field=request.output_doc_field, -# test_in=request.test_in, -# test_out=request.test_out, -# do_validation=False -# ) -# return CreateResponse(result=result, success=True, message=None) -# -# -# app.add_middleware( -# CORSMiddleware, -# allow_origins=["*"], -# allow_credentials=True, -# allow_methods=["*"], -# allow_headers=["*"], -# ) -# -# # Add a custom exception handler for RequestValidationError -# @app.exception_handler(RequestValidationError) -# def validation_exception_handler(request: Request, exc: RequestValidationError): -# return JSONResponse( -# status_code=422, -# content={"detail": exc.errors()}, -# ) -# -# -# if __name__ == "__main__": -# import uvicorn -# uvicorn.run("server:app", host="0.0.0.0", port=8000, log_level="info") +from fastapi import FastAPI +from fastapi.exceptions import RequestValidationError +from jina import Flow +from pydantic import BaseModel, HttpUrl +from typing import Optional, Dict + +from starlette.middleware.cors import CORSMiddleware +from starlette.requests import Request +from starlette.responses import JSONResponse +Flow.plot() +from main import main + +app = FastAPI() + +# Define the request model +class CreateRequest(BaseModel): + test_scenario: str + executor_description: str + +# Define the response model +class CreateResponse(BaseModel): + result: Dict[str, str] + success: bool + message: Optional[str] + +@app.post("/create", response_model=CreateResponse) +def create_endpoint(request: CreateRequest): + + result = main( + executor_description=request.executor_description, + test_scenario=request.test_scenario, + ) + return CreateResponse(result=result, success=True, message=None) + + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Add a custom exception handler for RequestValidationError +@app.exception_handler(RequestValidationError) +def validation_exception_handler(request: Request, exc: RequestValidationError): + return JSONResponse( + status_code=422, + content={"detail": exc.errors()}, + ) + + +if __name__ == "__main__": + import uvicorn + uvicorn.run("server:app", host="0.0.0.0", port=8000, log_level="info") diff --git a/src/gpt.py b/src/gpt.py index 84a1888..1a8d2e6 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -15,8 +15,10 @@ total_chars_prompt = 0 total_chars_generation = 0 class Conversation: - def __init__(self): - self.prompt_list = [('system', system_base_definition)] + def __init__(self, prompt_list: List[Tuple[str, str]] = None): + if prompt_list is None: + prompt_list = [('system', system_base_definition)] + self.prompt_list = prompt_list print_colored('system', system_base_definition, 'magenta') def query(self, prompt: str):