feat: chain of thought

This commit is contained in:
Florian Hönicke
2023-03-22 18:16:57 +01:00
parent d1954317fc
commit 1c8272e706
8 changed files with 456 additions and 305 deletions

351
main.py
View File

@@ -1,17 +1,17 @@
import importlib
# import importlib
import os
import re
from src import gpt, jina_cloud
from src.constants import FILE_AND_TAG_PAIRS, EXECUTOR_FOLDER_v1, EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME, STREAMLIT_FILE_NAME
from src.jina_cloud import update_client_line_in_file
from src.prompt_system import system_base_definition
from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \
test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task
from src.utils.io import recreate_folder
from src.utils.string_tools import find_differences
#
# from src import gpt, jina_cloud
# from src.constants import FILE_AND_TAG_PAIRS, EXECUTOR_FOLDER_v1, EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME, STREAMLIT_FILE_NAME
# from src.jina_cloud import update_client_line_in_file
# from src.prompt_system import system_base_definition
# from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \
# test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task, chain_of_thought_creation
# from src.utils.io import recreate_folder
# from src.utils.string_tools import find_differences
#
#
def extract_content_from_result(plain_text, file_name):
pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```"
match = re.search(pattern, plain_text, re.MULTILINE)
@@ -19,16 +19,16 @@ def extract_content_from_result(plain_text, file_name):
return match.group(1).strip()
else:
raise ValueError(f'Could not find {file_name} in result')
def extract_and_write(plain_text, dest_folder):
for file_name, tag in FILE_AND_TAG_PAIRS:
clean = extract_content_from_result(plain_text, file_name)
full_path = os.path.join(dest_folder, file_name)
with open(full_path, 'w') as f:
f.write(clean)
#
#
# def extract_and_write(plain_text, dest_folder):
# for file_name, tag in FILE_AND_TAG_PAIRS:
# clean = extract_content_from_result(plain_text, file_name)
# full_path = os.path.join(dest_folder, file_name)
# with open(full_path, 'w') as f:
# f.write(clean)
#
#
def write_config_yml(executor_name, dest_folder):
config_content = f'''
jtype: {executor_name}
@@ -39,156 +39,157 @@ metas:
'''
with open(os.path.join(dest_folder, 'config.yml'), 'w') as f:
f.write(config_content)
def get_all_executor_files_with_content(folder_path):
file_name_to_content = {}
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
if os.path.isfile(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
file_name_to_content[filename] = content
return file_name_to_content
def build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
output_doc_field, output_modality, test_in, test_out):
system_definition = (
system_base_definition
+ "The user is asking you to create an executor with all the necessary files "
"and you write the complete code without leaving something out. "
)
user_query = (
general_guidelines()
+ executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
output_modality, output_doc_field)
+ test_executor_file_task(executor_name, test_in, test_out)
+ requirements_file_task()
+ docker_file_task()
+ client_file_task()
+ streamlit_file_task()
+ "First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. "
"For example, there are different libraries you could use. "
"Discuss the pros and cons for all of these approaches and then decide for one of the approaches. "
"Then write as I told you. "
)
plain_text = gpt.get_response(system_definition, user_query)
return plain_text
def build_production_ready_implementation(all_executor_files_string):
system_definition = (
system_base_definition
+ f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) "
f"The files may contain bugs. Fix all of them. "
)
user_query = (
'Make it production ready. '
"Fix all files and add all missing code. "
"Keep the same format as given to you. "
f"Some files might have only prototype implementations and are not production ready. Add all the missing code. "
f"Some imports might be missing. Make sure to add them. "
f"Some libraries might be missing. Make sure to install them in the requirements.txt and Dockerfile. "
"First write down an extensive list of obvious and non-obvious thoughts about what parts could need an adjustment and why. "
"Think about if all the changes are required and finally decide for the changes you want to make. "
f"Output all the files even the ones that did not change. "
"Here are the files: \n\n"
+ all_executor_files_string
)
all_executor_files_string_improved = gpt.get_response(system_definition, user_query)
print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved))
return all_executor_files_string_improved
def files_to_string(file_name_to_content):
all_executor_files_string = ''
for file_name, tag in FILE_AND_TAG_PAIRS:
all_executor_files_string += f'**{file_name}**\n'
all_executor_files_string += f'```{tag}\n'
all_executor_files_string += file_name_to_content[file_name]
all_executor_files_string += '\n```\n\n'
return all_executor_files_string
def main(
executor_name,
executor_description,
input_modality,
input_doc_field,
output_modality,
output_doc_field,
test_in,
test_out,
do_validation=True
):
recreate_folder(EXECUTOR_FOLDER_v1)
recreate_folder(EXECUTOR_FOLDER_v2)
recreate_folder('flow')
all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
output_doc_field, output_modality, test_in, test_out)
extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1)
write_config_yml(executor_name, EXECUTOR_FOLDER_v1)
file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1)
all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1)
all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions)
extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2)
write_config_yml(executor_name, EXECUTOR_FOLDER_v2)
jina_cloud.push_executor(EXECUTOR_FOLDER_v2)
host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow')
update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, CLIENT_FILE_NAME), host)
update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, STREAMLIT_FILE_NAME), host)
update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME), host)
update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, STREAMLIT_FILE_NAME), host)
if do_validation:
importlib.import_module("executor_v1.client")
return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2)
if __name__ == '__main__':
# ######### Level 2 task #########
# main(
# executor_name='My3DTo2DExecutor',
# executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object",
# input_modality='3d',
# input_doc_field='blob',
# output_modality='image',
# output_doc_field='blob',
# test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj',
# test_out='the output should be exactly one image in png format',
# do_validation=False
# )
######## Level 1 task #########
main(
executor_name='MyCoolOcrExecutor',
executor_description="OCR detector",
input_modality='image',
input_doc_field='uri',
output_modality='text',
output_doc_field='text',
test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png',
test_out='> Hello, world!_',
do_validation=False
)
# main(
# executor_name='MySentimentAnalyzer',
# executor_description="Sentiment analysis executor",
# input_modality='text',
# input_doc_field='text',
# output_modality='sentiment',
# output_doc_field='sentiment_label',
# test_in='This is a fantastic product! I love it!',
# test_out='positive',
# do_validation=False
# )
#
#
# def get_all_executor_files_with_content(folder_path):
# file_name_to_content = {}
# for filename in os.listdir(folder_path):
# file_path = os.path.join(folder_path, filename)
#
# if os.path.isfile(file_path):
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# file_name_to_content[filename] = content
#
# return file_name_to_content
#
#
#
#
# def build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
# output_doc_field, output_modality, test_in, test_out):
# system_definition = (
# system_base_definition
# + "The user is asking you to create an executor with all the necessary files "
# "and you write the complete code without leaving something out. "
# )
# user_query = (
# general_guidelines()
# + executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
# output_modality, output_doc_field)
# + test_executor_file_task(executor_name, test_in, test_out)
# + requirements_file_task()
# + docker_file_task()
# + client_file_task()
# + streamlit_file_task()
# + chain_of_thought_creation()
# )
# plain_text = gpt.get_response(system_definition, user_query)
# return plain_text
#
#
# def build_production_ready_implementation(all_executor_files_string):
# system_definition = (
# system_base_definition
# + f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) "
# f"The files may contain bugs. Fix all of them. "
#
# )
# user_query = (
# 'Make it production ready. '
# "Fix all files and add all missing code. "
# "Keep the same format as given to you. "
# f"Some files might have only prototype implementations and are not production ready. Add all the missing code. "
# f"Some imports might be missing. Make sure to add them. "
# f"Some libraries might be missing from the requirements.txt. Make sure to install them."
# f"Somthing might be wrong in the Dockerfile. For example, some libraries might be missing. Install them."
# f"Or not all files are copied to the right destination in the Dockerfile. Copy them to the correct destination. "
# "First write down an extensive list of obvious and non-obvious observations about the parts that could need an adjustment. Explain why. "
# "Think about if all the changes are required and finally decide for the changes you want to make. "
# f"Output all the files even the ones that did not change. "
# "Here are the files: \n\n"
# + all_executor_files_string
# )
# all_executor_files_string_improved = gpt.get_response(system_definition, user_query)
# print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved))
# return all_executor_files_string_improved
#
# def files_to_string(file_name_to_content):
# all_executor_files_string = ''
# for file_name, tag in FILE_AND_TAG_PAIRS:
# all_executor_files_string += f'**{file_name}**\n'
# all_executor_files_string += f'```{tag}\n'
# all_executor_files_string += file_name_to_content[file_name]
# all_executor_files_string += '\n```\n\n'
# return all_executor_files_string
#
#
# def main(
# executor_name,
# executor_description,
# input_modality,
# input_doc_field,
# output_modality,
# output_doc_field,
# test_in,
# test_out,
# do_validation=True
# ):
# recreate_folder(EXECUTOR_FOLDER_v1)
# recreate_folder(EXECUTOR_FOLDER_v2)
# recreate_folder('flow')
#
# all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
# output_doc_field, output_modality, test_in, test_out)
# extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1)
# write_config_yml(executor_name, EXECUTOR_FOLDER_v1)
# file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1)
# all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1)
#
# all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions)
# extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2)
# write_config_yml(executor_name, EXECUTOR_FOLDER_v2)
#
# jina_cloud.push_executor(EXECUTOR_FOLDER_v2)
#
# host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow')
#
# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, CLIENT_FILE_NAME), host)
# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, STREAMLIT_FILE_NAME), host)
# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME), host)
# update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, STREAMLIT_FILE_NAME), host)
#
# if do_validation:
# importlib.import_module("executor_v1.client")
#
# return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2)
#
#
# if __name__ == '__main__':
# # ######### Level 2 task #########
# # main(
# # executor_name='My3DTo2DExecutor',
# # executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object",
# # input_modality='3d',
# # input_doc_field='blob',
# # output_modality='image',
# # output_doc_field='blob',
# # test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj',
# # test_out='the output should be exactly one image in png format',
# # do_validation=False
# # )
#
# ######## Level 1 task #########
# main(
# executor_name='MyCoolOcrExecutor',
# executor_description="OCR detector",
# input_modality='image',
# input_doc_field='uri',
# output_modality='text',
# output_doc_field='text',
# test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png',
# test_out='output should contain the string "Hello, world"',
# do_validation=False
# )
#
# # main(
# # executor_name='MySentimentAnalyzer',
# # executor_description="Sentiment analysis executor",
# # input_modality='text',
# # input_doc_field='text',
# # output_modality='sentiment',
# # output_doc_field='sentiment_label',
# # test_in='This is a fantastic product! I love it!',
# # test_out='positive',
# # do_validation=False
# # )

106
micro_chain.py Normal file
View File

@@ -0,0 +1,106 @@
import random
from main import extract_content_from_result, write_config_yml
from src import gpt, jina_cloud
from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \
chain_of_thought_optimization, requirements_file_task, docker_file_task
from src.utils.io import recreate_folder, persist_file
from src.utils.string_tools import print_colored
def wrap_content_in_code_block(executor_content, file_name, tag):
return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n'
def main(
executor_description,
input_modality,
# input_doc_field,
output_modality,
# output_doc_field,
test_scenario,
do_validation=True
):
input_doc_field = 'text' if input_modality == 'text' else 'blob'
output_doc_field = 'text' if output_modality == 'text' else 'blob'
# random integer at the end of the executor name to avoid name clashes
executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
recreate_folder('executor')
recreate_folder('flow')
print_colored('', '############# Executor #############', 'red')
user_query = (
general_guidelines()
+ executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
output_modality, output_doc_field)
+ chain_of_thought_creation()
)
conversation = gpt.Conversation()
conversation.query(user_query)
executor_content_raw = conversation.query(chain_of_thought_optimization('python', 'executor.py'))
executor_content = extract_content_from_result(executor_content_raw, 'executor.py')
persist_file(executor_content, 'executor.py')
print_colored('', '############# Test Executor #############', 'red')
user_query = (
general_guidelines()
+ wrap_content_in_code_block(executor_content, 'executor.py', 'python')
+ test_executor_file_task(executor_name, test_scenario)
)
conversation = gpt.Conversation()
conversation.query(user_query)
test_executor_content_raw = conversation.query(
chain_of_thought_optimization('python', 'test_executor.py')
+ "Don't add any additional tests. "
)
test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py')
persist_file(test_executor_content, 'test_executor.py')
print_colored('', '############# Requirements #############', 'red')
user_query = (
general_guidelines()
+ wrap_content_in_code_block(executor_content, 'executor.py', 'python')
+ wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python')
+ requirements_file_task()
)
conversation = gpt.Conversation()
conversation.query(user_query)
requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt'))
requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt')
persist_file(requirements_content, 'requirements.txt')
print_colored('', '############# Dockerfile #############', 'red')
user_query = (
general_guidelines()
+ wrap_content_in_code_block(executor_content, 'executor.py', 'python')
+ wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python')
+ wrap_content_in_code_block(requirements_content, 'requirements.txt', '')
+ docker_file_task()
)
conversation = gpt.Conversation()
conversation.query(user_query)
dockerfile_content_raw = conversation.query(chain_of_thought_optimization('dockerfile', 'Dockerfile'))
dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile')
persist_file(dockerfile_content, 'Dockerfile')
write_config_yml(executor_name, 'executor')
jina_cloud.push_executor('executor')
host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow')
# create playgorund and client.py
if __name__ == '__main__':
######## Level 1 task #########
main(
executor_description="OCR detector",
input_modality='image',
# input_doc_field='blob',
output_modality='text',
# output_doc_field='text',
test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
do_validation=False
)

134
server.py
View File

@@ -1,67 +1,67 @@
from fastapi import FastAPI
from fastapi.exceptions import RequestValidationError
from pydantic import BaseModel, HttpUrl
from typing import Optional, Dict
from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
from main import main
app = FastAPI()
# Define the request model
class CreateRequest(BaseModel):
executor_name: str
executor_description: str
input_modality: str
input_doc_field: str
output_modality: str
output_doc_field: str
test_in: str
test_out: str
# Define the response model
class CreateResponse(BaseModel):
result: Dict[str, str]
success: bool
message: Optional[str]
@app.post("/create", response_model=CreateResponse)
def create_endpoint(request: CreateRequest):
result = main(
executor_name=request.executor_name,
executor_description=request.executor_description,
input_modality=request.input_modality,
input_doc_field=request.input_doc_field,
output_modality=request.output_modality,
output_doc_field=request.output_doc_field,
test_in=request.test_in,
test_out=request.test_out,
do_validation=False
)
return CreateResponse(result=result, success=True, message=None)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add a custom exception handler for RequestValidationError
@app.exception_handler(RequestValidationError)
def validation_exception_handler(request: Request, exc: RequestValidationError):
return JSONResponse(
status_code=422,
content={"detail": exc.errors()},
)
if __name__ == "__main__":
import uvicorn
uvicorn.run("server:app", host="0.0.0.0", port=8000, log_level="info")
# from fastapi import FastAPI
# from fastapi.exceptions import RequestValidationError
# from pydantic import BaseModel, HttpUrl
# from typing import Optional, Dict
#
# from starlette.middleware.cors import CORSMiddleware
# from starlette.requests import Request
# from starlette.responses import JSONResponse
#
# from main import main
#
# app = FastAPI()
#
# # Define the request model
# class CreateRequest(BaseModel):
# executor_name: str
# executor_description: str
# input_modality: str
# input_doc_field: str
# output_modality: str
# output_doc_field: str
# test_in: str
# test_out: str
#
# # Define the response model
# class CreateResponse(BaseModel):
# result: Dict[str, str]
# success: bool
# message: Optional[str]
#
# @app.post("/create", response_model=CreateResponse)
# def create_endpoint(request: CreateRequest):
#
# result = main(
# executor_name=request.executor_name,
# executor_description=request.executor_description,
# input_modality=request.input_modality,
# input_doc_field=request.input_doc_field,
# output_modality=request.output_modality,
# output_doc_field=request.output_doc_field,
# test_in=request.test_in,
# test_out=request.test_out,
# do_validation=False
# )
# return CreateResponse(result=result, success=True, message=None)
#
#
# app.add_middleware(
# CORSMiddleware,
# allow_origins=["*"],
# allow_credentials=True,
# allow_methods=["*"],
# allow_headers=["*"],
# )
#
# # Add a custom exception handler for RequestValidationError
# @app.exception_handler(RequestValidationError)
# def validation_exception_handler(request: Request, exc: RequestValidationError):
# return JSONResponse(
# status_code=422,
# content={"detail": exc.errors()},
# )
#
#
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run("server:app", host="0.0.0.0", port=8000, log_level="info")

View File

@@ -1,36 +1,44 @@
import os
from time import sleep
from typing import Union, List, Tuple
import openai
from openai.error import RateLimitError, Timeout
from src.utils.io import timeout_generator_wrapper
from src.prompt_system import system_base_definition
from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError
from src.utils.string_tools import print_colored
openai.api_key = os.environ['OPENAI_API_KEY']
def get_response(system_definition, user_query):
print_colored('system_definition', system_definition, 'magenta')
print_colored('user_query', user_query, 'blue')
class Conversation:
def __init__(self):
self.prompt_list = [('system', system_base_definition)]
print_colored('system', system_base_definition, 'magenta')
def query(self, prompt: str):
print_colored('user', prompt, 'blue')
self.prompt_list.append(('user', prompt))
response = get_response(self.prompt_list)
self.prompt_list.append(('assistant', response))
return response
def get_response(prompt_list: List[Tuple[str, str]]):
for i in range(10):
try:
response_generator = openai.ChatCompletion.create(
temperature=0,
max_tokens=5_000,
max_tokens=4_000,
model="gpt-4",
stream=True,
messages=[
{
"role": "system",
"content": system_definition
},
{
"role": "user",
"content":
user_query
},
"role": prompt[0],
"content": prompt[1]
}
for prompt in prompt_list
]
)
response_generator_with_timeout = timeout_generator_wrapper(response_generator, 5)
@@ -40,10 +48,11 @@ def get_response(system_definition, user_query):
delta = chunk['choices'][0]['delta']
if 'content' in delta:
content = delta['content']
print_colored('' if complete_string else 'Agent response:', content, 'green', end='')
print_colored('' if complete_string else 'assistent', content, 'green', end='')
complete_string += content
print('\n')
return complete_string
except (RateLimitError, Timeout, ConnectionError) as e:
except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:
print(e)
print('retrying')
sleep(3)

View File

@@ -9,7 +9,7 @@ from src.constants import FLOW_URL_PLACEHOLDER
def push_executor(dir_path):
cmd = f'jina hub push {dir_path}/. --verbose'
cmd = f'jina hub push {dir_path}/. --verbose --replay'
os.system(cmd)
def get_user_name():

View File

@@ -1,31 +1,34 @@
from src.constants import FLOW_URL_PLACEHOLDER
executor_example = "Here is an example of how an executor can be defined. It always starts with a comment:"
'''
executor_example = '''
Using the Jina framework, users can define executors.
Here is an example of how an executor can be defined. It always starts with a comment:
# this executor takes ... as input and returns ... as output
# it processes each document in the following way: ...
**executor.py**
```python
# this executor binary files as input and returns the length of each binary file as output
from jina import Executor, requests, DocumentArray, Document
class MyInfoExecutor(Executor):
def __init__(self, **kwargs):
super().__init__()
@requests
@requests(on='/process') # this decorator is needed for every executor endpoint
def foo(self, docs: DocumentArray, **kwargs) => DocumentArray:
for d in docs:
d.load_uri_to_blob()
d.tags['my_info'] = {'byte_length': len(d.blob)}
d.blob = None
return docs
'''
"An executor gets a DocumentArray as input and returns a DocumentArray as output. "
```
docarray_example = (
"A DocumentArray is a python class that can be seen as a list of Documents. "
"A Document is a python class that represents a single document. "
"Here is the protobuf definition of a Document: "
An executor gets a DocumentArray as input and returns a DocumentArray as output.
'''
docarray_example = '''
A DocumentArray is a python class that can be seen as a list of Documents.
A Document is a python class that represents a single document.
Here is the protobuf definition of a Document:
message DocumentProto {
// A hexdigest that represents a unique document ID
string id = 1;
@@ -57,9 +60,8 @@ message DocumentProto {
google.protobuf.Struct tags = 9;
}
'''
"Here is an example of how a DocumentArray can be defined: "
'''
Here is an example of how a DocumentArray can be defined:
from jina import DocumentArray, Document
@@ -82,25 +84,27 @@ docs = DocumentArray([
# For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob.
# If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01...
'''
)
client_example = (
"After the executor is deployed, it can be called via Jina Client. "
"Here is an example of a client file: "
f'''
client_example = f'''
After the executor is deployed, it can be called via Jina Client.
Here is an example of a client file:
**client.py**
```python
from jina import Client, Document, DocumentArray
client = Client(host='{FLOW_URL_PLACEHOLDER}')
d = Document(uri='data/img.png')
d.load_uri_to_blob()
response = client.post('/process', inputs=DocumentArray([d]))
response[0].summary()
''')
```
'''
system_base_definition = (
"You are a principal engineer working at Jina - an open source company."
"Using the Jina framework, users can define executors. "
+ executor_example
+ docarray_example
+ client_example
)
system_base_definition = f'''
You are a principal engineer working at Jina - an open source company."
{executor_example}
{docarray_example}
{client_example}
'''

View File

@@ -11,8 +11,6 @@ def general_guidelines():
"Then all imports are listed. "
"It is important to import all modules that could be needed in the executor code. "
"Always import: "
"from typing import Dict, List, Optional, Tuple, Union "
"from io import BytesIO "
"from jina import Executor, DocumentArray, Document, requests "
"Start from top-level and then fully implement all methods. "
"\n"
@@ -21,7 +19,7 @@ def general_guidelines():
def _task(task, tag_name, file_name):
return (
task + f"The code will go into {file_name}. Wrap the code is wrapped into:\n"
task + f"The code will go into {file_name}. Wrap the code into:\n"
f"**{file_name}**\n"
f"```{tag_name}\n"
f"...code...\n"
@@ -31,12 +29,15 @@ def _task(task, tag_name, file_name):
def executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
output_modality, output_doc_field):
return _task(
f"Write the executor called '{executor_name}'. "
f"It matches the following description: '{executor_description}'. "
f"It gets a DocumentArray as input where each document has the input modality '{input_modality}' that is stored in document.{input_doc_field}. "
f"It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. "
f"Have in mind that d.uri is never a path to a local file. It is always a url.",
return _task(f'''
Write the executor called '{executor_name}'.
It matches the following description: '{executor_description}'.
It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}.
It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}.
Have in mind that d.uri is never a path to a local file. It is always a url.
The executor is not allowed to use the GPU.
The executor is not allowed to access external apis.
''',
EXECUTOR_FILE_TAG,
EXECUTOR_FILE_NAME
)
@@ -46,19 +47,21 @@ def requirements_file_task():
return _task(
"Write the content of the requirements.txt file. "
"Make sure to include pytest. "
"All versions are fixed. ",
"Make sure that jina==3.14.1. "
"All versions are fixed using ~=, ==, <, >, <=, >=. The package versions should not have conflicts. ",
REQUIREMENTS_FILE_TAG,
REQUIREMENTS_FILE_NAME
)
def test_executor_file_task(executor_name, test_in, test_out):
def test_executor_file_task(executor_name, test_scenario):
return _task(
"Write a small unit test for the executor. "
"Start the test with an extensive comment about the test case. "
+ ((
"Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. "
) if test_in and test_out else "")
+ (
f"Write a single test case that tests the following scenario: '{test_scenario}'. "
if test_scenario else ""
)
+ "Use the following import to import the executor: "
f"from executor import {executor_name} ",
TEST_EXECUTOR_FILE_TAG,
@@ -72,6 +75,7 @@ def docker_file_task():
"The Dockerfile runs the test during the build process. "
"It is important to make sure that all libs are installed that are required by the python packages. "
"Usually libraries are installed with apt-get. "
"Be aware that the machine the docker container is running on does not have a GPU - only CPU. "
"Add the config.yml file to the Dockerfile. "
"The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
@@ -95,3 +99,26 @@ def streamlit_file_task():
STREAMLIT_FILE_TAG,
STREAMLIT_FILE_NAME
)
def chain_of_thought_creation():
return (
"First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. "
"For example, there are different libraries you could use. "
"Discuss the pros and cons for all of these approaches and then decide for one of the approaches. "
"Then write as I told you. "
)
def chain_of_thought_optimization(tag_name, file_name):
return _task(
f'First, write down an extensive list of obvious and non-obvious observations about {file_name} that could need an adjustment. Explain why. '
f"Think if all the changes are required and finally decide for the changes you want to make, "
f"but you are not allowed disregard the instructions in the previous message. "
f"Be very hesitant to change the code. Only make a change if you are sure that it is necessary. "
f"Output only {file_name} "
f"Write the whole content of {file_name} - even if you decided to change only a small thing or even nothing. ",
tag_name,
file_name
)

View File

@@ -9,6 +9,10 @@ def recreate_folder(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)
def persist_file(file_content, file_name):
with open(f'executor/{file_name}', 'w') as f:
f.write(file_content)
class GenerationTimeoutError(Exception):
pass