feat: playground

This commit is contained in:
Florian Hönicke
2023-03-23 17:41:44 +01:00
parent 970ddd1ebf
commit 50f47e91b2
6 changed files with 157 additions and 75 deletions

View File

@@ -6,7 +6,7 @@ from src import gpt, jina_cloud
from src.constants import FILE_AND_TAG_PAIRS from src.constants import FILE_AND_TAG_PAIRS
from src.jina_cloud import build_docker from src.jina_cloud import build_docker
from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \ from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \
chain_of_thought_optimization, requirements_file_task, docker_file_task chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed
from src.utils.io import recreate_folder, persist_file from src.utils.io import recreate_folder, persist_file
from src.utils.string_tools import print_colored from src.utils.string_tools import print_colored
@@ -15,17 +15,19 @@ def wrap_content_in_code_block(executor_content, file_name, tag):
return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n' return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n'
def main(
def create_executor(
executor_description, executor_description,
input_modality, input_modality,
output_modality, output_modality,
test_scenario, test_scenario,
do_validation=True executor_name
): ):
input_doc_field = 'text' if input_modality == 'text' else 'blob' input_doc_field = 'text' if input_modality == 'text' else 'blob'
output_doc_field = 'text' if output_modality == 'text' else 'blob' output_doc_field = 'text' if output_modality == 'text' else 'blob'
# random integer at the end of the executor name to avoid name clashes # random integer at the end of the executor name to avoid name clashes
executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
recreate_folder('executor') recreate_folder('executor')
EXECUTOR_FOLDER_v1 = 'executor/v1' EXECUTOR_FOLDER_v1 = 'executor/v1'
recreate_folder(EXECUTOR_FOLDER_v1) recreate_folder(EXECUTOR_FOLDER_v1)
@@ -40,9 +42,9 @@ def main(
) )
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) conversation.query(user_query)
executor_content_raw = conversation.query(chain_of_thought_optimization('python', 'executor.py')) executor_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py'))
executor_content = extract_content_from_result(executor_content_raw, 'executor.py') executor_content = extract_content_from_result(executor_content_raw, 'executor.py')
persist_file(executor_content, 'executor.py') persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py')
print_colored('', '############# Test Executor #############', 'red') print_colored('', '############# Test Executor #############', 'red')
user_query = ( user_query = (
@@ -53,11 +55,12 @@ def main(
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) conversation.query(user_query)
test_executor_content_raw = conversation.query( test_executor_content_raw = conversation.query(
f"General rules: " + not_allowed() +
chain_of_thought_optimization('python', 'test_executor.py') chain_of_thought_optimization('python', 'test_executor.py')
+ "Don't add any additional tests. " + "Don't add any additional tests. "
) )
test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py') test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py')
persist_file(test_executor_content, 'test_executor.py') persist_file(test_executor_content, EXECUTOR_FOLDER_v1 + '/test_executor.py')
print_colored('', '############# Requirements #############', 'red') print_colored('', '############# Requirements #############', 'red')
user_query = ( user_query = (
@@ -71,7 +74,7 @@ def main(
requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ") requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ")
requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt')
persist_file(requirements_content, 'requirements.txt') persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt')
print_colored('', '############# Dockerfile #############', 'red') print_colored('', '############# Dockerfile #############', 'red')
user_query = ( user_query = (
@@ -83,24 +86,50 @@ def main(
) )
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) conversation.query(user_query)
dockerfile_content_raw = conversation.query(chain_of_thought_optimization('dockerfile', 'Dockerfile')) dockerfile_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile'))
dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile')
persist_file(dockerfile_content, 'Dockerfile') persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile')
write_config_yml(executor_name, EXECUTOR_FOLDER_v1) write_config_yml(executor_name, EXECUTOR_FOLDER_v1)
for i in range(1, 20): def create_playground(executor_name, executor_path, host):
print_colored('', '############# Playground #############', 'red')
file_name_to_content = get_all_executor_files_with_content(executor_path)
user_query = (
general_guidelines()
+ wrap_content_in_code_block(file_name_to_content['executor.py'], 'executor.py', 'python')
+ wrap_content_in_code_block(file_name_to_content['test_executor.py'], 'test_executor.py', 'python')
+ f'''
Create a playground for the executor {executor_name} using streamlit.
The executor is hosted on {host}.
This is an example how you can connect to the executor assuming the document (d) is already defined:
from jina import Client, Document, DocumentArray
client = Client(host='{host}')
response = client.post('/process', inputs=DocumentArray([d]))
print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app
'''
)
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query)
playground_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py'))
playground_content = extract_content_from_result(playground_content_raw, 'playground.py')
persist_file(playground_content, f'{executor_path}/playground.py')
def debug_executor():
for i in range(1, 20):
error = build_docker(f'executor/v{i}') error = build_docker(f'executor/v{i}')
if error: if error:
recreate_folder(f'executor/v{i + 1}') recreate_folder(f'executor/v{i + 1}')
file_name_to_content = get_all_executor_files_with_content(f'executor/v{i}') file_name_to_content = get_all_executor_files_with_content(f'executor/v{i}')
all_files_string = files_to_string(file_name_to_content) all_files_string = files_to_string(file_name_to_content)
user_query = ( user_query = (
'Here are all the files I use:\n' f"General rules: " + not_allowed()
+ 'Here are all the files I use:\n'
+ all_files_string + all_files_string
+ 'I got the following error:\n' + 'I got the following error:\n'
+ error + error + '\n'
+ 'Think quickly about possible reasons. ' + 'Think quickly about possible reasons. '
'Then output the files that need change. ' 'Then output the files that need change. '
"Don't output files that don't need change. " "Don't output files that don't need change. "
@@ -111,6 +140,7 @@ def main(
f"...code...\n" f"...code...\n"
f"```\n\n" f"```\n\n"
) )
conversation = gpt.Conversation()
returned_files_raw = conversation.query(user_query) returned_files_raw = conversation.query(user_query)
for file_name, tag in FILE_AND_TAG_PAIRS: for file_name, tag in FILE_AND_TAG_PAIRS:
updated_file = extract_content_from_result(returned_files_raw, file_name) updated_file = extract_content_from_result(returned_files_raw, file_name)
@@ -121,33 +151,64 @@ def main(
persist_file(content, f'executor/v{i + 1}/{file_name}') persist_file(content, f'executor/v{i + 1}/{file_name}')
else: else:
break break
return f'executor/v{i}'
jina_cloud.push_executor('executor') def main(
host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow') executor_description,
input_modality,
# create playgorund and client.py output_modality,
test_scenario,
):
executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
create_executor(executor_description, input_modality, output_modality, test_scenario, executor_name)
executor_path = debug_executor()
print('Executor can be built locally, now we will push it to the cloud.')
jina_cloud.push_executor(executor_path)
print('Deploy a jina flow')
host = jina_cloud.deploy_flow(executor_name, 'flow')
print(f'Flow is deployed create the playground for {host}')
executor_name = 'MicroChainExecutor48442'
executor_path = 'executor/v2'
host = 'grpcs://mybelovedocrflow-24a412bc63.wolf.jina.ai'
create_playground(executor_name, executor_path, host)
if __name__ == '__main__': if __name__ == '__main__':
# ######## Level 1 task ######### # ######## Level 1 task #########
main(
executor_description="The executor takes a pdf file as input, parses it and returns the text.",
input_modality='pdf',
output_modality='text',
test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
)
# money prompt: $0.56
# money generation: $0.22
# total money: $0.78
# ######## Level 2 task #########
# main( # main(
# executor_description="OCR detector", # executor_description="OCR detector",
# input_modality='image', # input_modality='image',
# # input_doc_field='blob',
# output_modality='text', # output_modality='text',
# # output_doc_field='text',
# test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
# do_validation=False
# ) # )
######### Level 2 task #########
main( # ######## Level 3 task #########
executor_description="The executor takes 3D objects in obj format as input " # main(
"and outputs a 2D image projection of that object where the full object is shown. ", # executor_description="The executor takes an mp3 file as input and returns bpm and pitch in the tags.",
input_modality='3d', # input_modality='audio',
output_modality='image', # output_modality='tags',
test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj ' # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
'is put in and out comes a 2d rendering of it', # )
do_validation=False
) ######### Level 4 task #########
# main(
# executor_description="The executor takes 3D objects in obj format as input "
# "and outputs a 2D image projection of that object where the full object is shown. ",
# input_modality='3d',
# output_modality='image',
# test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj '
# 'is put in and out comes a 2d rendering of it',
# )

View File

@@ -1,5 +1,6 @@
jina[perf]==3.14.2.dev18 jina==3.14.1
openai pyrender~=0.1.45
ptest trimesh~=3.10.0
jcloud numpy~=1.22.3
uvicorn Pillow~=9.0.1
requests~=2.27.1

View File

@@ -11,6 +11,8 @@ from src.utils.string_tools import print_colored
openai.api_key = os.environ['OPENAI_API_KEY'] openai.api_key = os.environ['OPENAI_API_KEY']
total_chars_prompt = 0
total_chars_generation = 0
class Conversation: class Conversation:
def __init__(self): def __init__(self):
@@ -26,6 +28,7 @@ class Conversation:
def get_response(prompt_list: List[Tuple[str, str]]): def get_response(prompt_list: List[Tuple[str, str]]):
global total_chars_prompt, total_chars_generation
for i in range(10): for i in range(10):
try: try:
response_generator = openai.ChatCompletion.create( response_generator = openai.ChatCompletion.create(
@@ -42,7 +45,7 @@ def get_response(prompt_list: List[Tuple[str, str]]):
] ]
) )
response_generator_with_timeout = timeout_generator_wrapper(response_generator, 5) response_generator_with_timeout = timeout_generator_wrapper(response_generator, 5)
total_chars_prompt += sum(len(prompt[1]) for prompt in prompt_list)
complete_string = '' complete_string = ''
for chunk in response_generator_with_timeout: for chunk in response_generator_with_timeout:
delta = chunk['choices'][0]['delta'] delta = chunk['choices'][0]['delta']
@@ -50,6 +53,13 @@ def get_response(prompt_list: List[Tuple[str, str]]):
content = delta['content'] content = delta['content']
print_colored('' if complete_string else 'assistent', content, 'green', end='') print_colored('' if complete_string else 'assistent', content, 'green', end='')
complete_string += content complete_string += content
total_chars_generation += len(content)
print('\n')
money_prompt = round(total_chars_prompt / 3.4 * 0.03 / 1000, 2)
money_generation = round(total_chars_generation / 3.4 * 0.06 / 1000, 2)
print('money prompt:', f'${money_prompt}')
print('money generation:', f'${money_generation}')
print('total money:', f'${money_prompt + money_generation}')
print('\n') print('\n')
return complete_string return complete_string
except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e: except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:

View File

@@ -1,5 +1,4 @@
import os import os
from multiprocessing.connection import Client
import subprocess import subprocess
import re import re
@@ -7,7 +6,6 @@ import hubble
from jcloud.flow import CloudFlow from jcloud.flow import CloudFlow
from jina import Flow from jina import Flow
from src.constants import FLOW_URL_PLACEHOLDER
def push_executor(dir_path): def push_executor(dir_path):
@@ -26,7 +24,7 @@ def deploy_on_jcloud(flow_yaml):
def deploy_flow(executor_name, do_validation, dest_folder): def deploy_flow(executor_name, dest_folder):
flow = f''' flow = f'''
jtype: Flow jtype: Flow
with: with:
@@ -53,7 +51,6 @@ executors:
with open(full_flow_path, 'w') as f: with open(full_flow_path, 'w') as f:
f.write(flow) f.write(flow)
if do_validation:
print('try local execution') print('try local execution')
flow = Flow.load_config(full_flow_path) flow = Flow.load_config(full_flow_path)
with flow: with flow:
@@ -86,7 +83,7 @@ def build_docker(path):
lines = error_message.split('\n') lines = error_message.split('\n')
relevant_lines = [] relevant_lines = []
pattern = re.compile(r"^#\d+ \[\d+/\d+\]") # Pattern to match lines like "#11 [7/8]" pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]"
last_matching_line_index = None last_matching_line_index = None
for index, line in enumerate(lines): for index, line in enumerate(lines):

View File

@@ -66,7 +66,13 @@ Here is an example of how a DocumentArray can be defined:
from jina import DocumentArray, Document from jina import DocumentArray, Document
d1 = Document(text='hello') d1 = Document(text='hello')
d2 = Document(blob=b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIHDR\\x00\\x00\\x03L\\x00\\x00\\x01\\x18\\x08\\x06\\x00\\x00\\x00o...')
# you can load binary data into a document
url = 'https://...'
response = requests.get(url)
obj_data = response.content
d2 = Document(blob=obj_data) # blob is bytes like b'\\x89PNG\\r\\n\\x1a\\n\
d3 = Document(tensor=numpy.array([1, 2, 3]), chunks=[Document(uri=/local/path/to/file)] d3 = Document(tensor=numpy.array([1, 2, 3]), chunks=[Document(uri=/local/path/to/file)]
d4 = Document( d4 = Document(
uri='https://docs.docarray.org/img/logo.png', uri='https://docs.docarray.org/img/logo.png',
@@ -76,13 +82,13 @@ d5 = Document()
d5.tensor = np.ones((2,4)) d5.tensor = np.ones((2,4))
d5.uri = 'https://audio.com/audio.mp3' d5.uri = 'https://audio.com/audio.mp3'
d6 = Document() d6 = Document()
d6.blob = b'RIFF\\x00\\x00\\x00\\x00WAVEfmt \\x10\\x00...' d6.blob # like b'RIFF\\x00\\x00\\x00\\x00WAVEfmt \\x10\\x00...'
docs = DocumentArray([ docs = DocumentArray([
d1, d2, d3, d4 d1, d2, d3, d4
]) ])
# the document has a helper function load_uri_to_blob: # the document has a helper function load_uri_to_blob:
# For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob. # For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob.
# If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01... # If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01...
''' '''
@@ -94,10 +100,10 @@ Here is an example of a client file:
```python ```python
from jina import Client, Document, DocumentArray from jina import Client, Document, DocumentArray
client = Client(host='{FLOW_URL_PLACEHOLDER}') client = Client(host='{FLOW_URL_PLACEHOLDER}')
d = Document(uri='data/img.png') d = Document(uri='...')
d.load_uri_to_blob() d.load_uri_to_blob()
response = client.post('/process', inputs=DocumentArray([d])) response = client.post('/process', inputs=DocumentArray([d]))
response[0].summary() print(response[0].text)
``` ```
''' '''

View File

@@ -35,27 +35,12 @@ It matches the following description: '{executor_description}'.
It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}. It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}.
It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}.
Have in mind that d.uri is never a path to a local file. It is always a url. Have in mind that d.uri is never a path to a local file. It is always a url.
The executor is not allowed to use the GPU. ''' + not_allowed(),
The executor is not allowed to access a database.
The executor is not allowed to access a display.
The executor is not allowed to access external apis.
''',
EXECUTOR_FILE_TAG, EXECUTOR_FILE_TAG,
EXECUTOR_FILE_NAME EXECUTOR_FILE_NAME
) )
def requirements_file_task():
return _task(
"Write the content of the requirements.txt file. "
"Make sure to include pytest. "
"Make sure that jina==3.14.1. "
"All versions are fixed using ~=, ==, <, >, <=, >=. The package versions should not have conflicts. ",
REQUIREMENTS_FILE_TAG,
REQUIREMENTS_FILE_NAME
)
def test_executor_file_task(executor_name, test_scenario): def test_executor_file_task(executor_name, test_scenario):
return _task( return _task(
"Write a small unit test for the executor. " "Write a small unit test for the executor. "
@@ -65,11 +50,23 @@ def test_executor_file_task(executor_name, test_scenario):
if test_scenario else "" if test_scenario else ""
) )
+ "Use the following import to import the executor: " + "Use the following import to import the executor: "
f"from executor import {executor_name} ", f"from executor import {executor_name} "
+ not_allowed()
+ "The test is not allowed to open local files. ",
TEST_EXECUTOR_FILE_TAG, TEST_EXECUTOR_FILE_TAG,
TEST_EXECUTOR_FILE_NAME TEST_EXECUTOR_FILE_NAME
) )
def requirements_file_task():
return _task(
"Write the content of the requirements.txt file. "
"Make sure to include pytest. "
"Make sure that jina==3.14.1. "
"All versions are fixed using ~=, ==, <, >, <=, >=. The package versions should not have conflicts. ",
REQUIREMENTS_FILE_TAG,
REQUIREMENTS_FILE_NAME
)
def docker_file_task(): def docker_file_task():
return _task( return _task(
@@ -80,8 +77,9 @@ def docker_file_task():
"Be aware that the machine the docker container is running on does not have a GPU - only CPU. " "Be aware that the machine the docker container is running on does not have a GPU - only CPU. "
"Add the config.yml file to the Dockerfile. " "Add the config.yml file to the Dockerfile. "
"The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] ' 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. '
"The Dockerfile runs the test during the build process. ", 'Make sure the all files are in the /workdir. '
"The Dockerfile runs the test during the build process. " + not_allowed(),
DOCKER_FILE_TAG, DOCKER_FILE_TAG,
DOCKER_FILE_NAME DOCKER_FILE_NAME
) )
@@ -106,8 +104,9 @@ def streamlit_file_task():
def chain_of_thought_creation(): def chain_of_thought_creation():
return ( return (
"First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. " "First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. "
"For example, there are different libraries you could use. " "For example, there are different libraries you could use and not all of them obay the rules: "
"Discuss the pros and cons for all of these approaches and then decide for one of the approaches. " + not_allowed()
+ "Discuss the pros and cons for all of these approaches and then decide for one of the approaches. "
"Then write as I told you. " "Then write as I told you. "
) )
@@ -124,3 +123,11 @@ def chain_of_thought_optimization(tag_name, file_name):
tag_name, tag_name,
file_name file_name
) )
def not_allowed():
return '''
The executor is not allowed to use the GPU.
The executor is not allowed to access a database.
The executor is not allowed to access a display.
The executor is not allowed to access external apis.
'''