feat: chain of thought

This commit is contained in:
Florian Hönicke
2023-03-22 18:16:57 +01:00
parent d1954317fc
commit 1c8272e706
8 changed files with 456 additions and 305 deletions

View File

@@ -1,36 +1,44 @@
import os
from time import sleep
from typing import Union, List, Tuple
import openai
from openai.error import RateLimitError, Timeout
from src.utils.io import timeout_generator_wrapper
from src.prompt_system import system_base_definition
from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError
from src.utils.string_tools import print_colored
openai.api_key = os.environ['OPENAI_API_KEY']
def get_response(system_definition, user_query):
print_colored('system_definition', system_definition, 'magenta')
print_colored('user_query', user_query, 'blue')
class Conversation:
def __init__(self):
self.prompt_list = [('system', system_base_definition)]
print_colored('system', system_base_definition, 'magenta')
def query(self, prompt: str):
print_colored('user', prompt, 'blue')
self.prompt_list.append(('user', prompt))
response = get_response(self.prompt_list)
self.prompt_list.append(('assistant', response))
return response
def get_response(prompt_list: List[Tuple[str, str]]):
for i in range(10):
try:
response_generator = openai.ChatCompletion.create(
temperature=0,
max_tokens=5_000,
max_tokens=4_000,
model="gpt-4",
stream=True,
messages=[
{
"role": "system",
"content": system_definition
},
{
"role": "user",
"content":
user_query
},
"role": prompt[0],
"content": prompt[1]
}
for prompt in prompt_list
]
)
response_generator_with_timeout = timeout_generator_wrapper(response_generator, 5)
@@ -40,12 +48,13 @@ def get_response(system_definition, user_query):
delta = chunk['choices'][0]['delta']
if 'content' in delta:
content = delta['content']
print_colored('' if complete_string else 'Agent response:', content, 'green', end='')
print_colored('' if complete_string else 'assistent', content, 'green', end='')
complete_string += content
print('\n')
return complete_string
except (RateLimitError, Timeout, ConnectionError) as e:
except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:
print(e)
print('retrying')
sleep(3)
continue
raise Exception('Failed to get response')
raise Exception('Failed to get response')

View File

@@ -9,7 +9,7 @@ from src.constants import FLOW_URL_PLACEHOLDER
def push_executor(dir_path):
cmd = f'jina hub push {dir_path}/. --verbose'
cmd = f'jina hub push {dir_path}/. --verbose --replay'
os.system(cmd)
def get_user_name():

View File

@@ -1,31 +1,34 @@
from src.constants import FLOW_URL_PLACEHOLDER
executor_example = "Here is an example of how an executor can be defined. It always starts with a comment:"
'''
executor_example = '''
Using the Jina framework, users can define executors.
Here is an example of how an executor can be defined. It always starts with a comment:
# this executor takes ... as input and returns ... as output
# it processes each document in the following way: ...
**executor.py**
```python
# this executor binary files as input and returns the length of each binary file as output
from jina import Executor, requests, DocumentArray, Document
class MyInfoExecutor(Executor):
def __init__(self, **kwargs):
super().__init__()
@requests
@requests(on='/process') # this decorator is needed for every executor endpoint
def foo(self, docs: DocumentArray, **kwargs) => DocumentArray:
for d in docs:
d.load_uri_to_blob()
d.tags['my_info'] = {'byte_length': len(d.blob)}
d.blob = None
return docs
'''
"An executor gets a DocumentArray as input and returns a DocumentArray as output. "
```
docarray_example = (
"A DocumentArray is a python class that can be seen as a list of Documents. "
"A Document is a python class that represents a single document. "
"Here is the protobuf definition of a Document: "
An executor gets a DocumentArray as input and returns a DocumentArray as output.
'''
docarray_example = '''
A DocumentArray is a python class that can be seen as a list of Documents.
A Document is a python class that represents a single document.
Here is the protobuf definition of a Document:
message DocumentProto {
// A hexdigest that represents a unique document ID
string id = 1;
@@ -57,9 +60,8 @@ message DocumentProto {
google.protobuf.Struct tags = 9;
}
'''
"Here is an example of how a DocumentArray can be defined: "
'''
Here is an example of how a DocumentArray can be defined:
from jina import DocumentArray, Document
@@ -82,25 +84,27 @@ docs = DocumentArray([
# For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob.
# If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01...
'''
)
client_example = (
"After the executor is deployed, it can be called via Jina Client. "
"Here is an example of a client file: "
f'''
client_example = f'''
After the executor is deployed, it can be called via Jina Client.
Here is an example of a client file:
**client.py**
```python
from jina import Client, Document, DocumentArray
client = Client(host='{FLOW_URL_PLACEHOLDER}')
d = Document(uri='data/img.png')
d.load_uri_to_blob()
response = client.post('/process', inputs=DocumentArray([d]))
response[0].summary()
''')
```
'''
system_base_definition = (
"You are a principal engineer working at Jina - an open source company."
"Using the Jina framework, users can define executors. "
+ executor_example
+ docarray_example
+ client_example
)
system_base_definition = f'''
You are a principal engineer working at Jina - an open source company."
{executor_example}
{docarray_example}
{client_example}
'''

View File

@@ -11,8 +11,6 @@ def general_guidelines():
"Then all imports are listed. "
"It is important to import all modules that could be needed in the executor code. "
"Always import: "
"from typing import Dict, List, Optional, Tuple, Union "
"from io import BytesIO "
"from jina import Executor, DocumentArray, Document, requests "
"Start from top-level and then fully implement all methods. "
"\n"
@@ -21,7 +19,7 @@ def general_guidelines():
def _task(task, tag_name, file_name):
return (
task + f"The code will go into {file_name}. Wrap the code is wrapped into:\n"
task + f"The code will go into {file_name}. Wrap the code into:\n"
f"**{file_name}**\n"
f"```{tag_name}\n"
f"...code...\n"
@@ -31,34 +29,39 @@ def _task(task, tag_name, file_name):
def executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
output_modality, output_doc_field):
return _task(
f"Write the executor called '{executor_name}'. "
f"It matches the following description: '{executor_description}'. "
f"It gets a DocumentArray as input where each document has the input modality '{input_modality}' that is stored in document.{input_doc_field}. "
f"It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. "
f"Have in mind that d.uri is never a path to a local file. It is always a url.",
EXECUTOR_FILE_TAG,
EXECUTOR_FILE_NAME
)
return _task(f'''
Write the executor called '{executor_name}'.
It matches the following description: '{executor_description}'.
It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}.
It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}.
Have in mind that d.uri is never a path to a local file. It is always a url.
The executor is not allowed to use the GPU.
The executor is not allowed to access external apis.
''',
EXECUTOR_FILE_TAG,
EXECUTOR_FILE_NAME
)
def requirements_file_task():
return _task(
"Write the content of the requirements.txt file. "
"Make sure to include pytest. "
"All versions are fixed. ",
"Make sure that jina==3.14.1. "
"All versions are fixed using ~=, ==, <, >, <=, >=. The package versions should not have conflicts. ",
REQUIREMENTS_FILE_TAG,
REQUIREMENTS_FILE_NAME
)
def test_executor_file_task(executor_name, test_in, test_out):
def test_executor_file_task(executor_name, test_scenario):
return _task(
"Write a small unit test for the executor. "
"Start the test with an extensive comment about the test case. "
+ ((
"Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. "
) if test_in and test_out else "")
+ (
f"Write a single test case that tests the following scenario: '{test_scenario}'. "
if test_scenario else ""
)
+ "Use the following import to import the executor: "
f"from executor import {executor_name} ",
TEST_EXECUTOR_FILE_TAG,
@@ -72,6 +75,7 @@ def docker_file_task():
"The Dockerfile runs the test during the build process. "
"It is important to make sure that all libs are installed that are required by the python packages. "
"Usually libraries are installed with apt-get. "
"Be aware that the machine the docker container is running on does not have a GPU - only CPU. "
"Add the config.yml file to the Dockerfile. "
"The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
@@ -95,3 +99,26 @@ def streamlit_file_task():
STREAMLIT_FILE_TAG,
STREAMLIT_FILE_NAME
)
def chain_of_thought_creation():
return (
"First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. "
"For example, there are different libraries you could use. "
"Discuss the pros and cons for all of these approaches and then decide for one of the approaches. "
"Then write as I told you. "
)
def chain_of_thought_optimization(tag_name, file_name):
return _task(
f'First, write down an extensive list of obvious and non-obvious observations about {file_name} that could need an adjustment. Explain why. '
f"Think if all the changes are required and finally decide for the changes you want to make, "
f"but you are not allowed disregard the instructions in the previous message. "
f"Be very hesitant to change the code. Only make a change if you are sure that it is necessary. "
f"Output only {file_name} "
f"Write the whole content of {file_name} - even if you decided to change only a small thing or even nothing. ",
tag_name,
file_name
)

View File

@@ -9,6 +9,10 @@ def recreate_folder(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)
def persist_file(file_content, file_name):
with open(f'executor/{file_name}', 'w') as f:
f.write(file_content)
class GenerationTimeoutError(Exception):
pass