feat: chain of thought

This commit is contained in:
Florian Hönicke
2023-03-21 16:43:56 +01:00
parent 42305fbdb7
commit d1954317fc
8 changed files with 105 additions and 57 deletions

90
main.py
View File

@@ -1,3 +1,4 @@
import importlib
import os import os
import re import re
@@ -8,7 +9,7 @@ from src.prompt_system import system_base_definition
from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \ from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \
test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task
from src.utils.io import recreate_folder from src.utils.io import recreate_folder
from src.utils.string import find_differences from src.utils.string_tools import find_differences
def extract_content_from_result(plain_text, file_name): def extract_content_from_result(plain_text, file_name):
@@ -17,7 +18,7 @@ def extract_content_from_result(plain_text, file_name):
if match: if match:
return match.group(1).strip() return match.group(1).strip()
else: else:
return None raise ValueError(f'Could not find {file_name} in result')
def extract_and_write(plain_text, dest_folder): def extract_and_write(plain_text, dest_folder):
@@ -28,7 +29,7 @@ def extract_and_write(plain_text, dest_folder):
f.write(clean) f.write(clean)
def write_config_yml(executor_name): def write_config_yml(executor_name, dest_folder):
config_content = f''' config_content = f'''
jtype: {executor_name} jtype: {executor_name}
py_modules: py_modules:
@@ -36,7 +37,7 @@ py_modules:
metas: metas:
name: {executor_name} name: {executor_name}
''' '''
with open('executor/config.yml', 'w') as f: with open(os.path.join(dest_folder, 'config.yml'), 'w') as f:
f.write(config_content) f.write(config_content)
@@ -69,7 +70,9 @@ def build_prototype_implementation(executor_description, executor_name, input_do
+ docker_file_task() + docker_file_task()
+ client_file_task() + client_file_task()
+ streamlit_file_task() + streamlit_file_task()
+ "First, write down some non-obvious thoughts about the challenges of the task and how you handle them. " + "First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. "
"For example, there are different libraries you could use. "
"Discuss the pros and cons for all of these approaches and then decide for one of the approaches. "
"Then write as I told you. " "Then write as I told you. "
) )
plain_text = gpt.get_response(system_definition, user_query) plain_text = gpt.get_response(system_definition, user_query)
@@ -81,14 +84,18 @@ def build_production_ready_implementation(all_executor_files_string):
system_base_definition system_base_definition
+ f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) " + f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) "
f"The files may contain bugs. Fix all of them. " f"The files may contain bugs. Fix all of them. "
f"Some files might have only prototype implementations and are not production ready. Add all the missing code. "
f"Some imports might be missing. Make sure to add them. Output all the files in the same format like given to you. "
) )
user_query = ( user_query = (
"Fix all files, add all missing code and imports. Make it production ready. " 'Make it production ready. '
"Fix all files and add all missing code. "
"Keep the same format as given to you. " "Keep the same format as given to you. "
"First write down some non-obvious thoughts about what parts could need an adjustment and why. " f"Some files might have only prototype implementations and are not production ready. Add all the missing code. "
"Then write as I told you. " f"Some imports might be missing. Make sure to add them. "
f"Some libraries might be missing. Make sure to install them in the requirements.txt and Dockerfile. "
"First write down an extensive list of obvious and non-obvious thoughts about what parts could need an adjustment and why. "
"Think about if all the changes are required and finally decide for the changes you want to make. "
f"Output all the files even the ones that did not change. "
"Here are the files: \n\n" "Here are the files: \n\n"
+ all_executor_files_string + all_executor_files_string
) )
@@ -119,60 +126,61 @@ def main(
): ):
recreate_folder(EXECUTOR_FOLDER_v1) recreate_folder(EXECUTOR_FOLDER_v1)
recreate_folder(EXECUTOR_FOLDER_v2) recreate_folder(EXECUTOR_FOLDER_v2)
recreate_folder('flow')
all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality, all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
output_doc_field, output_modality, test_in, test_out) output_doc_field, output_modality, test_in, test_out)
extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1) extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1)
write_config_yml(executor_name, EXECUTOR_FOLDER_v1)
file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1) file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1)
all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1) all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1)
all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions) all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions)
extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2) extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2)
write_config_yml(executor_name, EXECUTOR_FOLDER_v2)
write_config_yml(executor_name) jina_cloud.push_executor(EXECUTOR_FOLDER_v2)
jina_cloud.push_executor() host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow')
host = jina_cloud.deploy_flow(executor_name, do_validation) update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, CLIENT_FILE_NAME), host)
update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, STREAMLIT_FILE_NAME), host)
update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME), host)
update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, STREAMLIT_FILE_NAME), host)
update_client_line_in_file(f'executor/{CLIENT_FILE_NAME}', host)
update_client_line_in_file(f'executor/{STREAMLIT_FILE_NAME}', host)
if do_validation: if do_validation:
pass importlib.import_module("executor_v1.client")
return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2) return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2)
if __name__ == '__main__': if __name__ == '__main__':
######### Level 2 task ######### # ######### Level 2 task #########
main(
executor_name='My3DTo2DExecutor',
executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object",
input_modality='3d',
input_doc_field='blob',
output_modality='image',
output_doc_field='blob',
test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj',
test_out='the output should be exactly one image in png format',
do_validation=False
)
######### Level 1 task #########
# main( # main(
# executor_name='MyCoolOcrExecutor', # executor_name='My3DTo2DExecutor',
# executor_description="OCR detector", # executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object",
# input_modality='image', # input_modality='3d',
# input_doc_field='uri', # input_doc_field='blob',
# output_modality='text', # output_modality='image',
# output_doc_field='text', # output_doc_field='blob',
# test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png', # test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj',
# test_out='> Hello, world!_', # test_out='the output should be exactly one image in png format',
# do_validation=False # do_validation=False
# ) # )
######## Level 1 task #########
main(
executor_name='MyCoolOcrExecutor',
executor_description="OCR detector",
input_modality='image',
input_doc_field='uri',
output_modality='text',
output_doc_field='text',
test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png',
test_out='> Hello, world!_',
do_validation=False
)
# main( # main(
# executor_name='MySentimentAnalyzer', # executor_name='MySentimentAnalyzer',
# executor_description="Sentiment analysis executor", # executor_description="Sentiment analysis executor",

View File

@@ -29,9 +29,9 @@ class CreateResponse(BaseModel):
message: Optional[str] message: Optional[str]
@app.post("/create", response_model=CreateResponse) @app.post("/create", response_model=CreateResponse)
async def create_endpoint(request: CreateRequest): def create_endpoint(request: CreateRequest):
result = await main( result = main(
executor_name=request.executor_name, executor_name=request.executor_name,
executor_description=request.executor_description, executor_description=request.executor_description,
input_modality=request.input_modality, input_modality=request.input_modality,
@@ -55,7 +55,7 @@ app.add_middleware(
# Add a custom exception handler for RequestValidationError # Add a custom exception handler for RequestValidationError
@app.exception_handler(RequestValidationError) @app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError): def validation_exception_handler(request: Request, exc: RequestValidationError):
return JSONResponse( return JSONResponse(
status_code=422, status_code=422,
content={"detail": exc.errors()}, content={"detail": exc.errors()},

View File

@@ -4,7 +4,8 @@ from time import sleep
import openai import openai
from openai.error import RateLimitError, Timeout from openai.error import RateLimitError, Timeout
from src.utils.string import print_colored from src.utils.io import timeout_generator_wrapper
from src.utils.string_tools import print_colored
openai.api_key = os.environ['OPENAI_API_KEY'] openai.api_key = os.environ['OPENAI_API_KEY']
@@ -13,7 +14,7 @@ def get_response(system_definition, user_query):
print_colored('user_query', user_query, 'blue') print_colored('user_query', user_query, 'blue')
for i in range(10): for i in range(10):
try: try:
response = openai.ChatCompletion.create( response_generator = openai.ChatCompletion.create(
temperature=0, temperature=0,
max_tokens=5_000, max_tokens=5_000,
model="gpt-4", model="gpt-4",
@@ -32,15 +33,17 @@ def get_response(system_definition, user_query):
] ]
) )
response_generator_with_timeout = timeout_generator_wrapper(response_generator, 5)
complete_string = '' complete_string = ''
for chunk in response: for chunk in response_generator_with_timeout:
delta = chunk['choices'][0]['delta'] delta = chunk['choices'][0]['delta']
if 'content' in delta: if 'content' in delta:
content = delta['content'] content = delta['content']
print_colored('' if complete_string else 'Agent response:', content, 'green', end='') print_colored('' if complete_string else 'Agent response:', content, 'green', end='')
complete_string += content complete_string += content
return complete_string return complete_string
except (RateLimitError, Timeout) as e: except (RateLimitError, Timeout, ConnectionError) as e:
print(e) print(e)
print('retrying') print('retrying')
sleep(3) sleep(3)

View File

@@ -1,4 +1,3 @@
import asyncio
import os import os
from multiprocessing.connection import Client from multiprocessing.connection import Client
@@ -9,8 +8,8 @@ from jina import Flow
from src.constants import FLOW_URL_PLACEHOLDER from src.constants import FLOW_URL_PLACEHOLDER
def push_executor(): def push_executor(dir_path):
cmd = 'jina hub push executor/. --verbose' cmd = f'jina hub push {dir_path}/. --verbose'
os.system(cmd) os.system(cmd)
def get_user_name(): def get_user_name():
@@ -25,7 +24,7 @@ def deploy_on_jcloud(flow_yaml):
def deploy_flow(executor_name, do_validation): def deploy_flow(executor_name, do_validation, dest_folder):
flow = f''' flow = f'''
jtype: Flow jtype: Flow
with: with:
@@ -47,7 +46,8 @@ executors:
instance: C4 instance: C4
capacity: spot capacity: spot
''' '''
full_flow_path = os.path.join('executor', 'flow.yml') full_flow_path = os.path.join(dest_folder,
'flow.yml')
with open(full_flow_path, 'w') as f: with open(full_flow_path, 'w') as f:
f.write(flow) f.write(flow)

View File

@@ -6,14 +6,16 @@ executor_example = "Here is an example of how an executor can be defined. It alw
# this executor takes ... as input and returns ... as output # this executor takes ... as input and returns ... as output
# it processes each document in the following way: ... # it processes each document in the following way: ...
from jina import Executor, requests, DocumentArray, Document from jina import Executor, requests, DocumentArray, Document
class MyExecutor(Executor): class MyInfoExecutor(Executor):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__() super().__init__()
@requests @requests
def foo(self, docs: DocumentArray, **kwargs) => DocumentArray: def foo(self, docs: DocumentArray, **kwargs) => DocumentArray:
for d in docs: for d in docs:
d.text = 'hello world'" d.load_uri_to_blob()
d.tags['my_info'] = {'byte_length': len(d.blob)}
d.blob = None
return docs return docs
''' '''
"An executor gets a DocumentArray as input and returns a DocumentArray as output. " "An executor gets a DocumentArray as input and returns a DocumentArray as output. "

View File

@@ -73,7 +73,7 @@ def docker_file_task():
"It is important to make sure that all libs are installed that are required by the python packages. " "It is important to make sure that all libs are installed that are required by the python packages. "
"Usually libraries are installed with apt-get. " "Usually libraries are installed with apt-get. "
"Add the config.yml file to the Dockerfile. " "Add the config.yml file to the Dockerfile. "
"The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] ' 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
"The Dockerfile runs the test during the build process. ", "The Dockerfile runs the test during the build process. ",
DOCKER_FILE_TAG, DOCKER_FILE_TAG,

View File

@@ -1,8 +1,43 @@
import os import os
import shutil import shutil
import concurrent.futures
import concurrent.futures
from typing import Generator
def recreate_folder(folder_path): def recreate_folder(folder_path):
if os.path.exists(folder_path) and os.path.isdir(folder_path): if os.path.exists(folder_path) and os.path.isdir(folder_path):
shutil.rmtree(folder_path) shutil.rmtree(folder_path)
os.makedirs(folder_path) os.makedirs(folder_path)
class GenerationTimeoutError(Exception):
pass
def timeout_generator_wrapper(generator, timeout):
def generator_func():
for item in generator:
yield item
def wrapper() -> Generator:
gen = generator_func()
while True:
try:
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(next, gen)
yield future.result(timeout=timeout)
except StopIteration:
break
except concurrent.futures.TimeoutError:
raise GenerationTimeoutError(f"Generation took longer than {timeout} seconds")
return wrapper()
# def my_generator():
# for i in range(10):
# sleep(3)
# yield 1
#
#
# my_generator_with_timeout = timeout_generator_wrapper(my_generator, 2.9)
# for item in my_generator_with_timeout():
# print(item)