feat: chain of thought

This commit is contained in:
Florian Hönicke
2023-03-21 01:56:36 +01:00
parent 1163ab50f7
commit ab4c7bc090
7 changed files with 254 additions and 109 deletions

174
main.py
View File

@@ -1,25 +1,29 @@
import os import os
import re
from src import gpt, jina_cloud from src import gpt, jina_cloud
from src.constants import TAG_TO_FILE_NAME, EXECUTOR_FOLDER, CLIENT_FILE_NAME from src.constants import FILE_AND_TAG_PAIRS, EXECUTOR_FOLDER_v1, EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME, STREAMLIT_FILE_NAME
from src.jina_cloud import run_client_file from src.jina_cloud import update_client_line_in_file
from src.prompt_examples import executor_example, docarray_example, client_example from src.prompt_system import system_base_definition
from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \ from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \
test_executor_file_task, docker_file_task, client_file_task test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task
from src.utils.io import recreate_folder from src.utils.io import recreate_folder
from src.utils.string import find_between, clean_content from src.utils.string import find_differences
def extract_content_from_result(plain_text, tag): def extract_content_from_result(plain_text, file_name):
content = find_between(plain_text, f'$$$start_{tag}$$$', f'$$$end_{tag}$$$') pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```"
clean = clean_content(content) match = re.search(pattern, plain_text, re.MULTILINE)
return clean if match:
return match.group(1).strip()
else:
return None
def extract_and_write(plain_text): def extract_and_write(plain_text, dest_folder):
for tag, file_name in TAG_TO_FILE_NAME.items(): for file_name, tag in FILE_AND_TAG_PAIRS:
clean = extract_content_from_result(plain_text, tag) clean = extract_content_from_result(plain_text, file_name)
full_path = os.path.join(EXECUTOR_FOLDER, file_name) full_path = os.path.join(dest_folder, file_name)
with open(full_path, 'w') as f: with open(full_path, 'w') as f:
f.write(clean) f.write(clean)
@@ -35,8 +39,8 @@ metas:
with open('executor/config.yml', 'w') as f: with open('executor/config.yml', 'w') as f:
f.write(config_content) f.write(config_content)
def get_all_executor_files_with_content():
folder_path = 'executor' def get_all_executor_files_with_content(folder_path):
file_name_to_content = {} file_name_to_content = {}
for filename in os.listdir(folder_path): for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename) file_path = os.path.join(folder_path, filename)
@@ -48,7 +52,61 @@ def get_all_executor_files_with_content():
return file_name_to_content return file_name_to_content
async def main(
def build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
output_doc_field, output_modality, test_in, test_out):
system_definition = (
system_base_definition
+ "The user is asking you to create an executor with all the necessary files "
"and you write the complete code without leaving something out. "
)
user_query = (
general_guidelines()
+ executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
output_modality, output_doc_field)
+ test_executor_file_task(executor_name, test_in, test_out)
+ requirements_file_task()
+ docker_file_task()
+ client_file_task()
+ streamlit_file_task()
+ "First, write down some non-obvious thoughts about the challenges of the task and how you handle them. "
"Then write as I told you. "
)
plain_text = gpt.get_response(system_definition, user_query)
return plain_text
def build_production_ready_implementation(all_executor_files_string):
system_definition = (
system_base_definition
+ f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) "
f"The files may contain bugs. Fix all of them. "
f"Some files might have only prototype implementations and are not production ready. Add all the missing code. "
f"Some imports might be missing. Make sure to add them. Output all the files in the same format like given to you. "
)
user_query = (
"Fix all files, add all missing code and imports. Make it production ready. "
"Keep the same format as given to you. "
"First write down some non-obvious thoughts about what parts could need an adjustment and why. "
"Then write as I told you. "
"Here are the files: \n\n"
+ all_executor_files_string
)
all_executor_files_string_improved = gpt.get_response(system_definition, user_query)
print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved))
return all_executor_files_string_improved
def files_to_string(file_name_to_content):
all_executor_files_string = ''
for file_name, tag in FILE_AND_TAG_PAIRS:
all_executor_files_string += f'**{file_name}**\n'
all_executor_files_string += f'```{tag}\n'
all_executor_files_string += file_name_to_content[file_name]
all_executor_files_string += '\n```\n\n'
return all_executor_files_string
def main(
executor_name, executor_name,
executor_description, executor_description,
input_modality, input_modality,
@@ -59,50 +117,70 @@ async def main(
test_out, test_out,
do_validation=True do_validation=True
): ):
recreate_folder(EXECUTOR_FOLDER) recreate_folder(EXECUTOR_FOLDER_v1)
system_definition = ( recreate_folder(EXECUTOR_FOLDER_v2)
"You are a principal engineer working at Jina - an open source company."
"Using the Jina framework, users can define executors. "
+ executor_example
+ docarray_example
+ client_example
+ "The user is asking you to create an executor with all the necessary files "
"and you write the complete code without leaving something out. "
)
user_query = ( all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
general_guidelines() output_doc_field, output_modality, test_in, test_out)
+ executor_file_task(executor_name, executor_description, input_modality, input_doc_field, extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1)
output_modality, output_doc_field)
+ test_executor_file_task(executor_name, test_in, test_out)
+ requirements_file_task()
+ docker_file_task()
+ client_file_task()
)
plain_text = gpt.get_response(system_definition, user_query) file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1)
extract_and_write(plain_text) all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1)
all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions)
extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2)
write_config_yml(executor_name) write_config_yml(executor_name)
jina_cloud.push_executor() jina_cloud.push_executor()
host = await jina_cloud.deploy_flow(executor_name, do_validation) host = jina_cloud.deploy_flow(executor_name, do_validation)
run_client_file(f'executor/{CLIENT_FILE_NAME}', host, do_validation) update_client_line_in_file(f'executor/{CLIENT_FILE_NAME}', host)
update_client_line_in_file(f'executor/{STREAMLIT_FILE_NAME}', host)
if do_validation:
pass
return get_all_executor_files_with_content() return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2)
if __name__ == '__main__': if __name__ == '__main__':
######### Level 2 task #########
main( main(
executor_name='MyCoolOcrExecutor', executor_name='My3DTo2DExecutor',
executor_description="OCR detector", executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object",
input_modality='image', input_modality='3d',
input_doc_field='uri', input_doc_field='blob',
output_modality='text', output_modality='image',
output_doc_field='text', output_doc_field='blob',
test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png', test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj',
test_out='> Hello, world!_', test_out='the output should be exactly one image in png format',
do_validation=False
) )
######### Level 1 task #########
# main(
# executor_name='MyCoolOcrExecutor',
# executor_description="OCR detector",
# input_modality='image',
# input_doc_field='uri',
# output_modality='text',
# output_doc_field='text',
# test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png',
# test_out='> Hello, world!_',
# do_validation=False
# )
# main(
# executor_name='MySentimentAnalyzer',
# executor_description="Sentiment analysis executor",
# input_modality='text',
# input_doc_field='text',
# output_modality='sentiment',
# output_doc_field='sentiment_label',
# test_in='This is a fantastic product! I love it!',
# test_out='positive',
# do_validation=False
# )

View File

@@ -3,20 +3,25 @@ TEST_EXECUTOR_FILE_NAME = 'test_executor.py'
REQUIREMENTS_FILE_NAME = 'requirements.txt' REQUIREMENTS_FILE_NAME = 'requirements.txt'
DOCKER_FILE_NAME = 'Dockerfile' DOCKER_FILE_NAME = 'Dockerfile'
CLIENT_FILE_NAME = 'client.py' CLIENT_FILE_NAME = 'client.py'
STREAMLIT_FILE_NAME = 'streamlit.py'
EXECUTOR_FILE_TAG = 'executor' EXECUTOR_FILE_TAG = 'python'
TEST_EXECUTOR_FILE_TAG = 'test_executor' TEST_EXECUTOR_FILE_TAG = 'python'
REQUIREMENTS_FILE_TAG = 'requirements' REQUIREMENTS_FILE_TAG = ''
DOCKER_FILE_TAG = 'dockerfile' DOCKER_FILE_TAG = 'dockerfile'
CLIENT_FILE_TAG = 'client' CLIENT_FILE_TAG = 'python'
STREAMLIT_FILE_TAG = 'python'
TAG_TO_FILE_NAME = { FILE_AND_TAG_PAIRS = [
EXECUTOR_FILE_TAG: EXECUTOR_FILE_NAME, (EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG),
TEST_EXECUTOR_FILE_TAG: TEST_EXECUTOR_FILE_NAME, (TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG),
REQUIREMENTS_FILE_TAG: REQUIREMENTS_FILE_NAME, (REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG),
DOCKER_FILE_TAG: DOCKER_FILE_NAME, (DOCKER_FILE_NAME, DOCKER_FILE_TAG),
CLIENT_FILE_TAG: CLIENT_FILE_NAME (CLIENT_FILE_NAME, CLIENT_FILE_TAG),
} (STREAMLIT_FILE_NAME, STREAMLIT_FILE_TAG)
]
EXECUTOR_FOLDER_v1 = 'executor_v1'
EXECUTOR_FOLDER_v2 = 'executor_v2'
EXECUTOR_FOLDER = 'executor'
FLOW_URL_PLACEHOLDER = 'jcloud.jina.ai' FLOW_URL_PLACEHOLDER = 'jcloud.jina.ai'

View File

@@ -1,6 +1,8 @@
import os import os
from time import sleep
import openai import openai
from openai.error import RateLimitError, Timeout
from src.utils.string import print_colored from src.utils.string import print_colored
@@ -9,9 +11,13 @@ openai.api_key = os.environ['OPENAI_API_KEY']
def get_response(system_definition, user_query): def get_response(system_definition, user_query):
print_colored('system_definition', system_definition, 'magenta') print_colored('system_definition', system_definition, 'magenta')
print_colored('user_query', user_query, 'blue') print_colored('user_query', user_query, 'blue')
for i in range(10):
try:
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
temperature=0, temperature=0,
max_tokens=5_000,
model="gpt-4", model="gpt-4",
stream=True,
messages=[ messages=[
{ {
"role": "system", "role": "system",
@@ -26,6 +32,17 @@ def get_response(system_definition, user_query):
] ]
) )
content = response['choices'][0]['message']['content'] complete_string = ''
print_colored('agent response', content, 'green') for chunk in response:
return content delta = chunk['choices'][0]['delta']
if 'content' in delta:
content = delta['content']
print_colored('' if complete_string else 'Agent response:', content, 'green', end='')
complete_string += content
return complete_string
except (RateLimitError, Timeout) as e:
print(e)
print('retrying')
sleep(3)
continue
raise Exception('Failed to get response')

View File

@@ -19,15 +19,13 @@ def get_user_name():
return response['data']['name'] return response['data']['name']
async def deploy_on_jcloud(flow_yaml): def deploy_on_jcloud(flow_yaml):
cloud_flow = CloudFlow(path=flow_yaml) cloud_flow = CloudFlow(path=flow_yaml)
await cloud_flow.__aenter__() return cloud_flow.__enter__().endpoints['gateway']
return cloud_flow.endpoints['gateway']
def deploy_flow(executor_name, do_validation):
async def deploy_flow(executor_name, do_validation):
flow = f''' flow = f'''
jtype: Flow jtype: Flow
with: with:
@@ -59,7 +57,7 @@ executors:
with flow: with flow:
pass pass
print('deploy flow on jcloud') print('deploy flow on jcloud')
return await deploy_on_jcloud(flow_yaml=full_flow_path) return deploy_on_jcloud(flow_yaml=full_flow_path)
def replace_client_line(file_content: str, replacement: str) -> str: def replace_client_line(file_content: str, replacement: str) -> str:
@@ -70,7 +68,7 @@ def replace_client_line(file_content: str, replacement: str) -> str:
break break
return '\n'.join(lines) return '\n'.join(lines)
def run_client_file(file_path, host, do_validation): def update_client_line_in_file(file_path, host):
with open(file_path, 'r') as file: with open(file_path, 'r') as file:
content = file.read() content = file.read()
@@ -80,5 +78,4 @@ def run_client_file(file_path, host, do_validation):
with open(file_path, 'w') as file: with open(file_path, 'w') as file:
file.write(replaced_content) file.write(replaced_content)
if do_validation:
import executor.client # runs the client script for validation

View File

@@ -93,3 +93,12 @@ d.load_uri_to_blob()
response = client.post('/process', inputs=DocumentArray([d])) response = client.post('/process', inputs=DocumentArray([d]))
response[0].summary() response[0].summary()
''') ''')
system_base_definition = (
"You are a principal engineer working at Jina - an open source company."
"Using the Jina framework, users can define executors. "
+ executor_example
+ docarray_example
+ client_example
)

View File

@@ -1,10 +1,10 @@
from src.constants import EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME, TEST_EXECUTOR_FILE_NAME, DOCKER_FILE_NAME, \ from src.constants import EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME, TEST_EXECUTOR_FILE_NAME, DOCKER_FILE_NAME, \
DOCKER_FILE_TAG, CLIENT_FILE_TAG, CLIENT_FILE_NAME DOCKER_FILE_TAG, CLIENT_FILE_TAG, CLIENT_FILE_NAME, STREAMLIT_FILE_TAG, STREAMLIT_FILE_NAME, EXECUTOR_FILE_TAG, \
REQUIREMENTS_FILE_TAG, TEST_EXECUTOR_FILE_TAG
def general_guidelines(): def general_guidelines():
return ( return (
"General guidelines: "
"The code you write is production ready. " "The code you write is production ready. "
"Every file starts with comments describing what the code is doing before the first import. " "Every file starts with comments describing what the code is doing before the first import. "
"Comments can only be written between tags. " "Comments can only be written between tags. "
@@ -20,7 +20,13 @@ def general_guidelines():
def _task(task, tag_name, file_name): def _task(task, tag_name, file_name):
return task + f"The code will go into {file_name}. Wrap the code in the string $$$start_{tag_name}$$$...$$$end_{tag_name}$$$ \n\n" return (
task + f"The code will go into {file_name}. Wrap the code is wrapped into:\n"
f"**{file_name}**\n"
f"```{tag_name}\n"
f"...code...\n"
f"```\n\n"
)
def executor_file_task(executor_name, executor_description, input_modality, input_doc_field, def executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
@@ -31,28 +37,31 @@ def executor_file_task(executor_name, executor_description, input_modality, inpu
f"It gets a DocumentArray as input where each document has the input modality '{input_modality}' that is stored in document.{input_doc_field}. " f"It gets a DocumentArray as input where each document has the input modality '{input_modality}' that is stored in document.{input_doc_field}. "
f"It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. " f"It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. "
f"Have in mind that d.uri is never a path to a local file. It is always a url.", f"Have in mind that d.uri is never a path to a local file. It is always a url.",
'executor', EXECUTOR_FILE_TAG,
EXECUTOR_FILE_NAME EXECUTOR_FILE_NAME
) )
def requirements_file_task(): def requirements_file_task():
return _task("Write the content of the requirements.txt file. " return _task(
"Write the content of the requirements.txt file. "
"Make sure to include pytest. " "Make sure to include pytest. "
"All versions are fixed. ", 'requirements', "All versions are fixed. ",
REQUIREMENTS_FILE_NAME) REQUIREMENTS_FILE_TAG,
REQUIREMENTS_FILE_NAME
)
def test_executor_file_task(executor_name, test_in, test_out): def test_executor_file_task(executor_name, test_in, test_out):
return _task( return _task(
"Write a small unit test for the executor. " "Write a small unit test for the executor. "
"Start the test with an extensive comment about the test case. " "Start the test with an extensive comment about the test case. "
+ ( + ((
"Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. " "Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. "
) if test_in and test_out else "" ) if test_in and test_out else "")
"Use the following import to import the executor: " + "Use the following import to import the executor: "
f"from executor import {executor_name} ", f"from executor import {executor_name} ",
'test_executor', TEST_EXECUTOR_FILE_TAG,
TEST_EXECUTOR_FILE_NAME TEST_EXECUTOR_FILE_NAME
) )
@@ -66,12 +75,23 @@ def docker_file_task():
"Add the config.yml file to the Dockerfile. " "Add the config.yml file to the Dockerfile. "
"The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] ' 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
"The Dockerfile runs the test during the build process. " "The Dockerfile runs the test during the build process. ",
, DOCKER_FILE_TAG, DOCKER_FILE_NAME) DOCKER_FILE_TAG,
DOCKER_FILE_NAME
)
def client_file_task(): def client_file_task():
return _task( return _task(
"Write the client file. " "Write the client file. ",
, CLIENT_FILE_TAG, CLIENT_FILE_NAME CLIENT_FILE_TAG,
CLIENT_FILE_NAME
)
def streamlit_file_task():
return _task(
"Write the streamlit file allowing to make requests . ",
STREAMLIT_FILE_TAG,
STREAMLIT_FILE_NAME
) )

View File

@@ -1,3 +1,6 @@
import difflib
def find_between(input_string, start, end): def find_between(input_string, start, end):
try: try:
start_index = input_string.index(start) + len(start) start_index = input_string.index(start) + len(start)
@@ -10,7 +13,7 @@ def find_between(input_string, start, end):
def clean_content(content): def clean_content(content):
return content.replace('```', '').strip() return content.replace('```', '').strip()
def print_colored(headline, text, color_code): def print_colored(headline, text, color_code, end='\n'):
if color_code == 'black': if color_code == 'black':
color_code = '30' color_code = '30'
elif color_code == 'red': elif color_code == 'red':
@@ -30,5 +33,21 @@ def print_colored(headline, text, color_code):
color_start = f"\033[{color_code}m" color_start = f"\033[{color_code}m"
reset = "\033[0m" reset = "\033[0m"
bold_start = "\033[1m" bold_start = "\033[1m"
if headline:
print(f"{bold_start}{color_start}{headline}{reset}") print(f"{bold_start}{color_start}{headline}{reset}")
print(f"{color_start}{text}{reset}") print(f"{color_start}{text}{reset}", end=end)
def find_differences(a, b):
matcher = difflib.SequenceMatcher(None, a, b)
differences = set()
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'replace':
diff_a = a[i1:i2]
diff_b = b[j1:j2]
# Check for mirrored results and only add non-mirrored ones
if (diff_b, diff_a) not in differences:
differences.add((diff_a, diff_b))
return differences