diff --git a/main.py b/main.py index ac90e9b..9ca4601 100644 --- a/main.py +++ b/main.py @@ -1,17 +1,17 @@ -import importlib +# import importlib import os import re - -from src import gpt, jina_cloud -from src.constants import FILE_AND_TAG_PAIRS, EXECUTOR_FOLDER_v1, EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME, STREAMLIT_FILE_NAME -from src.jina_cloud import update_client_line_in_file -from src.prompt_system import system_base_definition -from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \ - test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task -from src.utils.io import recreate_folder -from src.utils.string_tools import find_differences - - +# +# from src import gpt, jina_cloud +# from src.constants import FILE_AND_TAG_PAIRS, EXECUTOR_FOLDER_v1, EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME, STREAMLIT_FILE_NAME +# from src.jina_cloud import update_client_line_in_file +# from src.prompt_system import system_base_definition +# from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \ +# test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task, chain_of_thought_creation +# from src.utils.io import recreate_folder +# from src.utils.string_tools import find_differences +# +# def extract_content_from_result(plain_text, file_name): pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```" match = re.search(pattern, plain_text, re.MULTILINE) @@ -19,16 +19,16 @@ def extract_content_from_result(plain_text, file_name): return match.group(1).strip() else: raise ValueError(f'Could not find {file_name} in result') - - -def extract_and_write(plain_text, dest_folder): - for file_name, tag in FILE_AND_TAG_PAIRS: - clean = extract_content_from_result(plain_text, file_name) - full_path = os.path.join(dest_folder, file_name) - with open(full_path, 'w') as f: - f.write(clean) - - +# +# +# def extract_and_write(plain_text, dest_folder): +# for file_name, tag in FILE_AND_TAG_PAIRS: +# clean = extract_content_from_result(plain_text, file_name) +# full_path = os.path.join(dest_folder, file_name) +# with open(full_path, 'w') as f: +# f.write(clean) +# +# def write_config_yml(executor_name, dest_folder): config_content = f''' jtype: {executor_name} @@ -39,156 +39,157 @@ metas: ''' with open(os.path.join(dest_folder, 'config.yml'), 'w') as f: f.write(config_content) - - -def get_all_executor_files_with_content(folder_path): - file_name_to_content = {} - for filename in os.listdir(folder_path): - file_path = os.path.join(folder_path, filename) - - if os.path.isfile(file_path): - with open(file_path, 'r', encoding='utf-8') as file: - content = file.read() - file_name_to_content[filename] = content - - return file_name_to_content - - -def build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, - output_doc_field, output_modality, test_in, test_out): - system_definition = ( - system_base_definition - + "The user is asking you to create an executor with all the necessary files " - "and you write the complete code without leaving something out. " - ) - user_query = ( - general_guidelines() - + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, - output_modality, output_doc_field) - + test_executor_file_task(executor_name, test_in, test_out) - + requirements_file_task() - + docker_file_task() - + client_file_task() - + streamlit_file_task() - + "First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. " - "For example, there are different libraries you could use. " - "Discuss the pros and cons for all of these approaches and then decide for one of the approaches. " - "Then write as I told you. " - ) - plain_text = gpt.get_response(system_definition, user_query) - return plain_text - - -def build_production_ready_implementation(all_executor_files_string): - system_definition = ( - system_base_definition - + f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) " - f"The files may contain bugs. Fix all of them. " - - ) - user_query = ( - 'Make it production ready. ' - "Fix all files and add all missing code. " - "Keep the same format as given to you. " - f"Some files might have only prototype implementations and are not production ready. Add all the missing code. " - f"Some imports might be missing. Make sure to add them. " - f"Some libraries might be missing. Make sure to install them in the requirements.txt and Dockerfile. " - "First write down an extensive list of obvious and non-obvious thoughts about what parts could need an adjustment and why. " - "Think about if all the changes are required and finally decide for the changes you want to make. " - f"Output all the files even the ones that did not change. " - "Here are the files: \n\n" - + all_executor_files_string - ) - all_executor_files_string_improved = gpt.get_response(system_definition, user_query) - print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved)) - return all_executor_files_string_improved - -def files_to_string(file_name_to_content): - all_executor_files_string = '' - for file_name, tag in FILE_AND_TAG_PAIRS: - all_executor_files_string += f'**{file_name}**\n' - all_executor_files_string += f'```{tag}\n' - all_executor_files_string += file_name_to_content[file_name] - all_executor_files_string += '\n```\n\n' - return all_executor_files_string - - -def main( - executor_name, - executor_description, - input_modality, - input_doc_field, - output_modality, - output_doc_field, - test_in, - test_out, - do_validation=True -): - recreate_folder(EXECUTOR_FOLDER_v1) - recreate_folder(EXECUTOR_FOLDER_v2) - recreate_folder('flow') - - all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, - output_doc_field, output_modality, test_in, test_out) - extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1) - write_config_yml(executor_name, EXECUTOR_FOLDER_v1) - file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1) - all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1) - - all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions) - extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2) - write_config_yml(executor_name, EXECUTOR_FOLDER_v2) - - jina_cloud.push_executor(EXECUTOR_FOLDER_v2) - - host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow') - - update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, CLIENT_FILE_NAME), host) - update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, STREAMLIT_FILE_NAME), host) - update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME), host) - update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, STREAMLIT_FILE_NAME), host) - - if do_validation: - importlib.import_module("executor_v1.client") - - return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2) - - -if __name__ == '__main__': - # ######### Level 2 task ######### - # main( - # executor_name='My3DTo2DExecutor', - # executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object", - # input_modality='3d', - # input_doc_field='blob', - # output_modality='image', - # output_doc_field='blob', - # test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj', - # test_out='the output should be exactly one image in png format', - # do_validation=False - # ) - - ######## Level 1 task ######### - main( - executor_name='MyCoolOcrExecutor', - executor_description="OCR detector", - input_modality='image', - input_doc_field='uri', - output_modality='text', - output_doc_field='text', - test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png', - test_out='> Hello, world!_', - do_validation=False - ) - - # main( - # executor_name='MySentimentAnalyzer', - # executor_description="Sentiment analysis executor", - # input_modality='text', - # input_doc_field='text', - # output_modality='sentiment', - # output_doc_field='sentiment_label', - # test_in='This is a fantastic product! I love it!', - # test_out='positive', - # do_validation=False - # ) \ No newline at end of file +# +# +# def get_all_executor_files_with_content(folder_path): +# file_name_to_content = {} +# for filename in os.listdir(folder_path): +# file_path = os.path.join(folder_path, filename) +# +# if os.path.isfile(file_path): +# with open(file_path, 'r', encoding='utf-8') as file: +# content = file.read() +# file_name_to_content[filename] = content +# +# return file_name_to_content +# +# +# +# +# def build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, +# output_doc_field, output_modality, test_in, test_out): +# system_definition = ( +# system_base_definition +# + "The user is asking you to create an executor with all the necessary files " +# "and you write the complete code without leaving something out. " +# ) +# user_query = ( +# general_guidelines() +# + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, +# output_modality, output_doc_field) +# + test_executor_file_task(executor_name, test_in, test_out) +# + requirements_file_task() +# + docker_file_task() +# + client_file_task() +# + streamlit_file_task() +# + chain_of_thought_creation() +# ) +# plain_text = gpt.get_response(system_definition, user_query) +# return plain_text +# +# +# def build_production_ready_implementation(all_executor_files_string): +# system_definition = ( +# system_base_definition +# + f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) " +# f"The files may contain bugs. Fix all of them. " +# +# ) +# user_query = ( +# 'Make it production ready. ' +# "Fix all files and add all missing code. " +# "Keep the same format as given to you. " +# f"Some files might have only prototype implementations and are not production ready. Add all the missing code. " +# f"Some imports might be missing. Make sure to add them. " +# f"Some libraries might be missing from the requirements.txt. Make sure to install them." +# f"Somthing might be wrong in the Dockerfile. For example, some libraries might be missing. Install them." +# f"Or not all files are copied to the right destination in the Dockerfile. Copy them to the correct destination. " +# "First write down an extensive list of obvious and non-obvious observations about the parts that could need an adjustment. Explain why. " +# "Think about if all the changes are required and finally decide for the changes you want to make. " +# f"Output all the files even the ones that did not change. " +# "Here are the files: \n\n" +# + all_executor_files_string +# ) +# all_executor_files_string_improved = gpt.get_response(system_definition, user_query) +# print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved)) +# return all_executor_files_string_improved +# +# def files_to_string(file_name_to_content): +# all_executor_files_string = '' +# for file_name, tag in FILE_AND_TAG_PAIRS: +# all_executor_files_string += f'**{file_name}**\n' +# all_executor_files_string += f'```{tag}\n' +# all_executor_files_string += file_name_to_content[file_name] +# all_executor_files_string += '\n```\n\n' +# return all_executor_files_string +# +# +# def main( +# executor_name, +# executor_description, +# input_modality, +# input_doc_field, +# output_modality, +# output_doc_field, +# test_in, +# test_out, +# do_validation=True +# ): +# recreate_folder(EXECUTOR_FOLDER_v1) +# recreate_folder(EXECUTOR_FOLDER_v2) +# recreate_folder('flow') +# +# all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, +# output_doc_field, output_modality, test_in, test_out) +# extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1) +# write_config_yml(executor_name, EXECUTOR_FOLDER_v1) +# file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1) +# all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1) +# +# all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions) +# extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2) +# write_config_yml(executor_name, EXECUTOR_FOLDER_v2) +# +# jina_cloud.push_executor(EXECUTOR_FOLDER_v2) +# +# host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow') +# +# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, CLIENT_FILE_NAME), host) +# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, STREAMLIT_FILE_NAME), host) +# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME), host) +# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, STREAMLIT_FILE_NAME), host) +# +# if do_validation: +# importlib.import_module("executor_v1.client") +# +# return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2) +# +# +# if __name__ == '__main__': +# # ######### Level 2 task ######### +# # main( +# # executor_name='My3DTo2DExecutor', +# # executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object", +# # input_modality='3d', +# # input_doc_field='blob', +# # output_modality='image', +# # output_doc_field='blob', +# # test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj', +# # test_out='the output should be exactly one image in png format', +# # do_validation=False +# # ) +# +# ######## Level 1 task ######### +# main( +# executor_name='MyCoolOcrExecutor', +# executor_description="OCR detector", +# input_modality='image', +# input_doc_field='uri', +# output_modality='text', +# output_doc_field='text', +# test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png', +# test_out='output should contain the string "Hello, world"', +# do_validation=False +# ) +# +# # main( +# # executor_name='MySentimentAnalyzer', +# # executor_description="Sentiment analysis executor", +# # input_modality='text', +# # input_doc_field='text', +# # output_modality='sentiment', +# # output_doc_field='sentiment_label', +# # test_in='This is a fantastic product! I love it!', +# # test_out='positive', +# # do_validation=False +# # ) \ No newline at end of file diff --git a/micro_chain.py b/micro_chain.py new file mode 100644 index 0000000..4176a23 --- /dev/null +++ b/micro_chain.py @@ -0,0 +1,106 @@ +import random + +from main import extract_content_from_result, write_config_yml +from src import gpt, jina_cloud +from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \ + chain_of_thought_optimization, requirements_file_task, docker_file_task +from src.utils.io import recreate_folder, persist_file +from src.utils.string_tools import print_colored + + +def wrap_content_in_code_block(executor_content, file_name, tag): + return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n' + + +def main( + executor_description, + input_modality, + # input_doc_field, + output_modality, + # output_doc_field, + test_scenario, + do_validation=True +): + input_doc_field = 'text' if input_modality == 'text' else 'blob' + output_doc_field = 'text' if output_modality == 'text' else 'blob' + # random integer at the end of the executor name to avoid name clashes + executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}' + recreate_folder('executor') + recreate_folder('flow') + + print_colored('', '############# Executor #############', 'red') + user_query = ( + general_guidelines() + + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, + output_modality, output_doc_field) + + chain_of_thought_creation() + ) + conversation = gpt.Conversation() + conversation.query(user_query) + executor_content_raw = conversation.query(chain_of_thought_optimization('python', 'executor.py')) + executor_content = extract_content_from_result(executor_content_raw, 'executor.py') + persist_file(executor_content, 'executor.py') + + print_colored('', '############# Test Executor #############', 'red') + user_query = ( + general_guidelines() + + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + + test_executor_file_task(executor_name, test_scenario) + ) + conversation = gpt.Conversation() + conversation.query(user_query) + test_executor_content_raw = conversation.query( + chain_of_thought_optimization('python', 'test_executor.py') + + "Don't add any additional tests. " + ) + test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py') + persist_file(test_executor_content, 'test_executor.py') + + print_colored('', '############# Requirements #############', 'red') + user_query = ( + general_guidelines() + + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') + + requirements_file_task() + ) + conversation = gpt.Conversation() + conversation.query(user_query) + requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt')) + + requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') + persist_file(requirements_content, 'requirements.txt') + + print_colored('', '############# Dockerfile #############', 'red') + user_query = ( + general_guidelines() + + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') + + wrap_content_in_code_block(requirements_content, 'requirements.txt', '') + + docker_file_task() + ) + conversation = gpt.Conversation() + conversation.query(user_query) + dockerfile_content_raw = conversation.query(chain_of_thought_optimization('dockerfile', 'Dockerfile')) + dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') + persist_file(dockerfile_content, 'Dockerfile') + + write_config_yml(executor_name, 'executor') + + jina_cloud.push_executor('executor') + + host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow') + + # create playgorund and client.py + + +if __name__ == '__main__': + ######## Level 1 task ######### + main( + executor_description="OCR detector", + input_modality='image', + # input_doc_field='blob', + output_modality='text', + # output_doc_field='text', + test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', + do_validation=False + ) diff --git a/server.py b/server.py index ce135ae..f71dbb0 100644 --- a/server.py +++ b/server.py @@ -1,67 +1,67 @@ -from fastapi import FastAPI -from fastapi.exceptions import RequestValidationError -from pydantic import BaseModel, HttpUrl -from typing import Optional, Dict - -from starlette.middleware.cors import CORSMiddleware -from starlette.requests import Request -from starlette.responses import JSONResponse - -from main import main - -app = FastAPI() - -# Define the request model -class CreateRequest(BaseModel): - executor_name: str - executor_description: str - input_modality: str - input_doc_field: str - output_modality: str - output_doc_field: str - test_in: str - test_out: str - -# Define the response model -class CreateResponse(BaseModel): - result: Dict[str, str] - success: bool - message: Optional[str] - -@app.post("/create", response_model=CreateResponse) -def create_endpoint(request: CreateRequest): - - result = main( - executor_name=request.executor_name, - executor_description=request.executor_description, - input_modality=request.input_modality, - input_doc_field=request.input_doc_field, - output_modality=request.output_modality, - output_doc_field=request.output_doc_field, - test_in=request.test_in, - test_out=request.test_out, - do_validation=False - ) - return CreateResponse(result=result, success=True, message=None) - - -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -# Add a custom exception handler for RequestValidationError -@app.exception_handler(RequestValidationError) -def validation_exception_handler(request: Request, exc: RequestValidationError): - return JSONResponse( - status_code=422, - content={"detail": exc.errors()}, - ) - - -if __name__ == "__main__": - import uvicorn - uvicorn.run("server:app", host="0.0.0.0", port=8000, log_level="info") +# from fastapi import FastAPI +# from fastapi.exceptions import RequestValidationError +# from pydantic import BaseModel, HttpUrl +# from typing import Optional, Dict +# +# from starlette.middleware.cors import CORSMiddleware +# from starlette.requests import Request +# from starlette.responses import JSONResponse +# +# from main import main +# +# app = FastAPI() +# +# # Define the request model +# class CreateRequest(BaseModel): +# executor_name: str +# executor_description: str +# input_modality: str +# input_doc_field: str +# output_modality: str +# output_doc_field: str +# test_in: str +# test_out: str +# +# # Define the response model +# class CreateResponse(BaseModel): +# result: Dict[str, str] +# success: bool +# message: Optional[str] +# +# @app.post("/create", response_model=CreateResponse) +# def create_endpoint(request: CreateRequest): +# +# result = main( +# executor_name=request.executor_name, +# executor_description=request.executor_description, +# input_modality=request.input_modality, +# input_doc_field=request.input_doc_field, +# output_modality=request.output_modality, +# output_doc_field=request.output_doc_field, +# test_in=request.test_in, +# test_out=request.test_out, +# do_validation=False +# ) +# return CreateResponse(result=result, success=True, message=None) +# +# +# app.add_middleware( +# CORSMiddleware, +# allow_origins=["*"], +# allow_credentials=True, +# allow_methods=["*"], +# allow_headers=["*"], +# ) +# +# # Add a custom exception handler for RequestValidationError +# @app.exception_handler(RequestValidationError) +# def validation_exception_handler(request: Request, exc: RequestValidationError): +# return JSONResponse( +# status_code=422, +# content={"detail": exc.errors()}, +# ) +# +# +# if __name__ == "__main__": +# import uvicorn +# uvicorn.run("server:app", host="0.0.0.0", port=8000, log_level="info") diff --git a/src/gpt.py b/src/gpt.py index 1391075..9443e70 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -1,36 +1,44 @@ import os from time import sleep +from typing import Union, List, Tuple import openai from openai.error import RateLimitError, Timeout -from src.utils.io import timeout_generator_wrapper +from src.prompt_system import system_base_definition +from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError from src.utils.string_tools import print_colored openai.api_key = os.environ['OPENAI_API_KEY'] -def get_response(system_definition, user_query): - print_colored('system_definition', system_definition, 'magenta') - print_colored('user_query', user_query, 'blue') + +class Conversation: + def __init__(self): + self.prompt_list = [('system', system_base_definition)] + print_colored('system', system_base_definition, 'magenta') + + def query(self, prompt: str): + print_colored('user', prompt, 'blue') + self.prompt_list.append(('user', prompt)) + response = get_response(self.prompt_list) + self.prompt_list.append(('assistant', response)) + return response + + +def get_response(prompt_list: List[Tuple[str, str]]): for i in range(10): try: response_generator = openai.ChatCompletion.create( temperature=0, - max_tokens=5_000, + max_tokens=4_000, model="gpt-4", stream=True, messages=[ { - "role": "system", - "content": system_definition - - }, - { - "role": "user", - "content": - user_query - }, - + "role": prompt[0], + "content": prompt[1] + } + for prompt in prompt_list ] ) response_generator_with_timeout = timeout_generator_wrapper(response_generator, 5) @@ -40,12 +48,13 @@ def get_response(system_definition, user_query): delta = chunk['choices'][0]['delta'] if 'content' in delta: content = delta['content'] - print_colored('' if complete_string else 'Agent response:', content, 'green', end='') + print_colored('' if complete_string else 'assistent', content, 'green', end='') complete_string += content + print('\n') return complete_string - except (RateLimitError, Timeout, ConnectionError) as e: + except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e: print(e) print('retrying') sleep(3) continue - raise Exception('Failed to get response') \ No newline at end of file + raise Exception('Failed to get response') diff --git a/src/jina_cloud.py b/src/jina_cloud.py index 3851d51..cb4a94a 100644 --- a/src/jina_cloud.py +++ b/src/jina_cloud.py @@ -9,7 +9,7 @@ from src.constants import FLOW_URL_PLACEHOLDER def push_executor(dir_path): - cmd = f'jina hub push {dir_path}/. --verbose' + cmd = f'jina hub push {dir_path}/. --verbose --replay' os.system(cmd) def get_user_name(): diff --git a/src/prompt_system.py b/src/prompt_system.py index 16f999f..3f7bc2a 100644 --- a/src/prompt_system.py +++ b/src/prompt_system.py @@ -1,31 +1,34 @@ from src.constants import FLOW_URL_PLACEHOLDER -executor_example = "Here is an example of how an executor can be defined. It always starts with a comment:" -''' +executor_example = ''' +Using the Jina framework, users can define executors. +Here is an example of how an executor can be defined. It always starts with a comment: -# this executor takes ... as input and returns ... as output -# it processes each document in the following way: ... +**executor.py** +```python +# this executor binary files as input and returns the length of each binary file as output from jina import Executor, requests, DocumentArray, Document class MyInfoExecutor(Executor): def __init__(self, **kwargs): super().__init__() - @requests + @requests(on='/process') # this decorator is needed for every executor endpoint def foo(self, docs: DocumentArray, **kwargs) => DocumentArray: for d in docs: d.load_uri_to_blob() d.tags['my_info'] = {'byte_length': len(d.blob)} d.blob = None return docs -''' -"An executor gets a DocumentArray as input and returns a DocumentArray as output. " +``` -docarray_example = ( - "A DocumentArray is a python class that can be seen as a list of Documents. " - "A Document is a python class that represents a single document. " - "Here is the protobuf definition of a Document: " +An executor gets a DocumentArray as input and returns a DocumentArray as output. ''' +docarray_example = ''' +A DocumentArray is a python class that can be seen as a list of Documents. +A Document is a python class that represents a single document. +Here is the protobuf definition of a Document: + message DocumentProto { // A hexdigest that represents a unique document ID string id = 1; @@ -57,9 +60,8 @@ message DocumentProto { google.protobuf.Struct tags = 9; } -''' - "Here is an example of how a DocumentArray can be defined: " -''' + +Here is an example of how a DocumentArray can be defined: from jina import DocumentArray, Document @@ -82,25 +84,27 @@ docs = DocumentArray([ # For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob. # If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01... ''' -) -client_example = ( -"After the executor is deployed, it can be called via Jina Client. " -"Here is an example of a client file: " -f''' + +client_example = f''' +After the executor is deployed, it can be called via Jina Client. +Here is an example of a client file: + +**client.py** +```python from jina import Client, Document, DocumentArray client = Client(host='{FLOW_URL_PLACEHOLDER}') d = Document(uri='data/img.png') d.load_uri_to_blob() response = client.post('/process', inputs=DocumentArray([d])) response[0].summary() -''') +``` +''' -system_base_definition = ( - "You are a principal engineer working at Jina - an open source company." - "Using the Jina framework, users can define executors. " - + executor_example - + docarray_example - + client_example -) \ No newline at end of file +system_base_definition = f''' +You are a principal engineer working at Jina - an open source company." +{executor_example} +{docarray_example} +{client_example} +''' \ No newline at end of file diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index 86df418..97fb9e9 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -11,8 +11,6 @@ def general_guidelines(): "Then all imports are listed. " "It is important to import all modules that could be needed in the executor code. " "Always import: " - "from typing import Dict, List, Optional, Tuple, Union " - "from io import BytesIO " "from jina import Executor, DocumentArray, Document, requests " "Start from top-level and then fully implement all methods. " "\n" @@ -21,7 +19,7 @@ def general_guidelines(): def _task(task, tag_name, file_name): return ( - task + f"The code will go into {file_name}. Wrap the code is wrapped into:\n" + task + f"The code will go into {file_name}. Wrap the code into:\n" f"**{file_name}**\n" f"```{tag_name}\n" f"...code...\n" @@ -31,34 +29,39 @@ def _task(task, tag_name, file_name): def executor_file_task(executor_name, executor_description, input_modality, input_doc_field, output_modality, output_doc_field): - return _task( - f"Write the executor called '{executor_name}'. " - f"It matches the following description: '{executor_description}'. " - f"It gets a DocumentArray as input where each document has the input modality '{input_modality}' that is stored in document.{input_doc_field}. " - f"It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. " - f"Have in mind that d.uri is never a path to a local file. It is always a url.", - EXECUTOR_FILE_TAG, - EXECUTOR_FILE_NAME - ) + return _task(f''' +Write the executor called '{executor_name}'. +It matches the following description: '{executor_description}'. +It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}. +It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. +Have in mind that d.uri is never a path to a local file. It is always a url. +The executor is not allowed to use the GPU. +The executor is not allowed to access external apis. +''', + EXECUTOR_FILE_TAG, + EXECUTOR_FILE_NAME + ) def requirements_file_task(): return _task( "Write the content of the requirements.txt file. " "Make sure to include pytest. " - "All versions are fixed. ", + "Make sure that jina==3.14.1. " + "All versions are fixed using ~=, ==, <, >, <=, >=. The package versions should not have conflicts. ", REQUIREMENTS_FILE_TAG, REQUIREMENTS_FILE_NAME ) -def test_executor_file_task(executor_name, test_in, test_out): +def test_executor_file_task(executor_name, test_scenario): return _task( "Write a small unit test for the executor. " "Start the test with an extensive comment about the test case. " - + (( - "Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. " - ) if test_in and test_out else "") + + ( + f"Write a single test case that tests the following scenario: '{test_scenario}'. " + if test_scenario else "" + ) + "Use the following import to import the executor: " f"from executor import {executor_name} ", TEST_EXECUTOR_FILE_TAG, @@ -72,6 +75,7 @@ def docker_file_task(): "The Dockerfile runs the test during the build process. " "It is important to make sure that all libs are installed that are required by the python packages. " "Usually libraries are installed with apt-get. " + "Be aware that the machine the docker container is running on does not have a GPU - only CPU. " "Add the config.yml file to the Dockerfile. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. " 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] ' @@ -95,3 +99,26 @@ def streamlit_file_task(): STREAMLIT_FILE_TAG, STREAMLIT_FILE_NAME ) + + +def chain_of_thought_creation(): + return ( + "First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. " + "For example, there are different libraries you could use. " + "Discuss the pros and cons for all of these approaches and then decide for one of the approaches. " + "Then write as I told you. " + ) + + +def chain_of_thought_optimization(tag_name, file_name): + return _task( + f'First, write down an extensive list of obvious and non-obvious observations about {file_name} that could need an adjustment. Explain why. ' + f"Think if all the changes are required and finally decide for the changes you want to make, " + f"but you are not allowed disregard the instructions in the previous message. " + f"Be very hesitant to change the code. Only make a change if you are sure that it is necessary. " + + f"Output only {file_name} " + f"Write the whole content of {file_name} - even if you decided to change only a small thing or even nothing. ", + tag_name, + file_name + ) diff --git a/src/utils/io.py b/src/utils/io.py index 3963b98..05de15a 100644 --- a/src/utils/io.py +++ b/src/utils/io.py @@ -9,6 +9,10 @@ def recreate_folder(folder_path): shutil.rmtree(folder_path) os.makedirs(folder_path) +def persist_file(file_content, file_name): + with open(f'executor/{file_name}', 'w') as f: + f.write(file_content) + class GenerationTimeoutError(Exception): pass