feat: stable

This commit is contained in:
Florian Hönicke
2023-03-28 14:53:05 +02:00
parent 50f47e91b2
commit 11dbc8b162
3 changed files with 166 additions and 82 deletions

View File

@@ -1,10 +1,9 @@
import random import random
from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string
from src import gpt, jina_cloud from src import gpt, jina_cloud
from src.constants import FILE_AND_TAG_PAIRS from src.constants import FILE_AND_TAG_PAIRS
from src.jina_cloud import build_docker from src.jina_cloud import push_executor, process_error_message
from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \ from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \
chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed
from src.utils.io import recreate_folder, persist_file from src.utils.io import recreate_folder, persist_file
@@ -15,35 +14,33 @@ def wrap_content_in_code_block(executor_content, file_name, tag):
return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n' return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n'
def create_executor( def create_executor(
executor_description, executor_description,
input_modality,
output_modality,
test_scenario, test_scenario,
executor_name executor_name,
is_chain_of_thought=False,
): ):
input_doc_field = 'text' if input_modality == 'text' else 'blob'
output_doc_field = 'text' if output_modality == 'text' else 'blob'
# random integer at the end of the executor name to avoid name clashes
recreate_folder('executor') recreate_folder('executor')
EXECUTOR_FOLDER_v1 = 'executor/v1' EXECUTOR_FOLDER_v1 = 'executor/v1'
recreate_folder(EXECUTOR_FOLDER_v1) recreate_folder(EXECUTOR_FOLDER_v1)
recreate_folder('flow') recreate_folder('flow')
print_colored('', '############# Executor #############', 'red') print_colored('', '############# Executor #############', 'red')
user_query = ( user_query = (
general_guidelines() general_guidelines()
+ executor_file_task(executor_name, executor_description, input_modality, input_doc_field, + executor_file_task(executor_name, executor_description, test_scenario)
output_modality, output_doc_field)
+ chain_of_thought_creation() + chain_of_thought_creation()
) )
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) executor_content_raw = conversation.query(user_query)
executor_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py')) if is_chain_of_thought:
executor_content_raw = conversation.query(
f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py'))
executor_content = extract_content_from_result(executor_content_raw, 'executor.py') executor_content = extract_content_from_result(executor_content_raw, 'executor.py')
persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py') persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py')
print_colored('', '############# Test Executor #############', 'red') print_colored('', '############# Test Executor #############', 'red')
@@ -53,7 +50,8 @@ def create_executor(
+ test_executor_file_task(executor_name, test_scenario) + test_executor_file_task(executor_name, test_scenario)
) )
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) test_executor_content_raw = conversation.query(user_query)
if is_chain_of_thought:
test_executor_content_raw = conversation.query( test_executor_content_raw = conversation.query(
f"General rules: " + not_allowed() + f"General rules: " + not_allowed() +
chain_of_thought_optimization('python', 'test_executor.py') chain_of_thought_optimization('python', 'test_executor.py')
@@ -70,8 +68,10 @@ def create_executor(
+ requirements_file_task() + requirements_file_task()
) )
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) requirements_content_raw = conversation.query(user_query)
requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ") if is_chain_of_thought:
requirements_content_raw = conversation.query(
chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ")
requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt')
persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt') persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt')
@@ -85,13 +85,16 @@ def create_executor(
+ docker_file_task() + docker_file_task()
) )
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) dockerfile_content_raw = conversation.query(user_query)
dockerfile_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) if is_chain_of_thought:
dockerfile_content_raw = conversation.query(
f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile'))
dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile')
persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile') persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile')
write_config_yml(executor_name, EXECUTOR_FOLDER_v1) write_config_yml(executor_name, EXECUTOR_FOLDER_v1)
def create_playground(executor_name, executor_path, host): def create_playground(executor_name, executor_path, host):
print_colored('', '############# Playground #############', 'red') print_colored('', '############# Playground #############', 'red')
@@ -112,14 +115,19 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul
) )
conversation = gpt.Conversation() conversation = gpt.Conversation()
conversation.query(user_query) conversation.query(user_query)
playground_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py')) playground_content_raw = conversation.query(
f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py'))
playground_content = extract_content_from_result(playground_content_raw, 'playground.py') playground_content = extract_content_from_result(playground_content_raw, 'playground.py')
persist_file(playground_content, f'{executor_path}/playground.py') persist_file(playground_content, f'{executor_path}/playground.py')
def debug_executor():
for i in range(1, 20):
error = build_docker(f'executor/v{i}') def debug_executor():
MAX_DEBUGGING_ITERATIONS = 20
error_before = ''
for i in range(1, MAX_DEBUGGING_ITERATIONS):
# error_docker = build_docker(f'executor/v{i}')
log_hubble = push_executor(f'executor/v{i}')
error = process_error_message(log_hubble)
if error: if error:
recreate_folder(f'executor/v{i + 1}') recreate_folder(f'executor/v{i + 1}')
file_name_to_content = get_all_executor_files_with_content(f'executor/v{i}') file_name_to_content = get_all_executor_files_with_content(f'executor/v{i}')
@@ -128,7 +136,9 @@ def debug_executor():
f"General rules: " + not_allowed() f"General rules: " + not_allowed()
+ 'Here are all the files I use:\n' + 'Here are all the files I use:\n'
+ all_files_string + all_files_string
+ 'I got the following error:\n' + (('This is an error that is already fixed before:\n'
+ error_before) if error_before else '')
+ '\n\nNow, I get the following error:\n'
+ error + '\n' + error + '\n'
+ 'Think quickly about possible reasons. ' + 'Think quickly about possible reasons. '
'Then output the files that need change. ' 'Then output the files that need change. '
@@ -149,8 +159,12 @@ def debug_executor():
for file_name, content in file_name_to_content.items(): for file_name, content in file_name_to_content.items():
persist_file(content, f'executor/v{i + 1}/{file_name}') persist_file(content, f'executor/v{i + 1}/{file_name}')
error_before = error
else: else:
break break
if i == MAX_DEBUGGING_ITERATIONS - 1:
raise Exception('Could not debug the executor.')
return f'executor/v{i}' return f'executor/v{i}'
@@ -161,31 +175,48 @@ def main(
test_scenario, test_scenario,
): ):
executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}' executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
create_executor(executor_description, input_modality, output_modality, test_scenario, executor_name) create_executor(executor_description, test_scenario, executor_name)
# executor_name = 'MicroChainExecutor790050'
executor_path = debug_executor() executor_path = debug_executor()
print('Executor can be built locally, now we will push it to the cloud.') # print('Executor can be built locally, now we will push it to the cloud.')
jina_cloud.push_executor(executor_path) # jina_cloud.push_executor(executor_path)
print('Deploy a jina flow') print('Deploy a jina flow')
host = jina_cloud.deploy_flow(executor_name, 'flow') host = jina_cloud.deploy_flow(executor_name, 'flow')
print(f'Flow is deployed create the playground for {host}') print(f'Flow is deployed create the playground for {host}')
executor_name = 'MicroChainExecutor48442'
executor_path = 'executor/v2'
host = 'grpcs://mybelovedocrflow-24a412bc63.wolf.jina.ai'
create_playground(executor_name, executor_path, host) create_playground(executor_name, executor_path, host)
print(
'Executor name:', executor_name, '\n',
'Executor path:', executor_path, '\n',
'Host:', host, '\n',
'Playground:', f'streamlit run {executor_path}/playground.py', '\n',
)
if __name__ == '__main__': if __name__ == '__main__':
# ######## Level 1 task ######### # ######## Level 1 task #########
main( # main(
executor_description="The executor takes a pdf file as input, parses it and returns the text.", # executor_description="The executor takes a pdf file as input, parses it and returns the text.",
input_modality='pdf', # input_modality='pdf',
output_modality='text', # output_modality='text',
test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', # test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
) # )
# money prompt: $0.56
# money generation: $0.22
# total money: $0.78
main(
executor_description="The executor takes a url of a website as input and returns the logo of the website as an image.",
input_modality='url',
output_modality='image',
test_scenario='Takes https://jina.ai/ as input and returns an svg image of the logo.',
)
# # # ######## Level 1 task #########
# main(
# executor_description="The executor takes a pdf file as input, parses it and returns the text.",
# input_modality='pdf',
# output_modality='text',
# test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
# )
# ######## Level 2 task ######### # ######## Level 2 task #########
# main( # main(
# executor_description="OCR detector", # executor_description="OCR detector",
@@ -194,13 +225,12 @@ if __name__ == '__main__':
# test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
# ) # )
# ######## Level 3 task ######### # ######## Level 3 task #########
# main( # main(
# executor_description="The executor takes an mp3 file as input and returns bpm and pitch in the tags.", # executor_description="The executor takes an mp3 file as input and returns bpm and pitch in a json.",
# input_modality='audio', # input_modality='audio',
# output_modality='tags', # output_modality='json',
# test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a json with bpm and pitch',
# ) # )
######### Level 4 task ######### ######### Level 4 task #########
@@ -212,3 +242,11 @@ if __name__ == '__main__':
# test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj ' # test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj '
# 'is put in and out comes a 2d rendering of it', # 'is put in and out comes a 2d rendering of it',
# ) # )
# ######## Level 8 task #########
# main(
# executor_description="The executor takes an image as input and returns a list of bounding boxes of all animals in the image.",
# input_modality='blob',
# output_modality='json',
# test_scenario='Take the image from https://thumbs.dreamstime.com/b/dog-professor-red-bow-tie-glasses-white-background-isolated-dog-professor-glasses-197036807.jpg as input and assert that the list contains at least one bounding box. ',
# )

View File

@@ -1,16 +1,59 @@
import hashlib
import json
import os import os
import subprocess import subprocess
import re import re
from argparse import Namespace
from pathlib import Path
import hubble import hubble
from hubble.executor.helper import upload_file, archive_package, get_request_header
from jcloud.flow import CloudFlow from jcloud.flow import CloudFlow
from jina import Flow from jina import Flow
def push_executor(dir_path): def push_executor(dir_path):
cmd = f'jina hub push {dir_path}/. --verbose --replay' dir_path = Path(dir_path)
os.system(cmd)
md5_hash = hashlib.md5()
bytesio = archive_package(dir_path)
content = bytesio.getvalue()
md5_hash.update(content)
md5_digest = md5_hash.hexdigest()
form_data = {
'public': 'True',
'private': 'False',
'verbose': 'True',
'md5sum': md5_digest,
}
req_header = get_request_header()
resp = upload_file(
'https://api.hubble.jina.ai/v2/rpc/executor.push',
'filename',
content,
dict_data=form_data,
headers=req_header,
stream=False,
method='post',
)
json_lines_str = resp.content.decode('utf-8')
if 'exited on non-zero code' not in json_lines_str:
return ''
responses = []
for json_line in json_lines_str.splitlines():
if 'exit code:' in json_line:
break
d = json.loads(json_line)
if 'payload' in d and type(d['payload']) == str:
responses.append(d['payload'])
elif type(d) == str:
responses.append(d)
return '\n'.join(responses)
def get_user_name(): def get_user_name():
client = hubble.Client(max_retries=None, jsonify=True) client = hubble.Client(max_retries=None, jsonify=True)
@@ -51,10 +94,10 @@ executors:
with open(full_flow_path, 'w') as f: with open(full_flow_path, 'w') as f:
f.write(flow) f.write(flow)
print('try local execution') # print('try local execution')
flow = Flow.load_config(full_flow_path) # flow = Flow.load_config(full_flow_path)
with flow: # with flow:
pass # pass
print('deploy flow on jcloud') print('deploy flow on jcloud')
return deploy_on_jcloud(flow_yaml=full_flow_path) return deploy_on_jcloud(flow_yaml=full_flow_path)
@@ -78,8 +121,7 @@ def update_client_line_in_file(file_path, host):
file.write(replaced_content) file.write(replaced_content)
def build_docker(path): def process_error_message(error_message):
def process_error_message(error_message):
lines = error_message.split('\n') lines = error_message.split('\n')
relevant_lines = [] relevant_lines = []
@@ -93,7 +135,10 @@ def build_docker(path):
if last_matching_line_index is not None: if last_matching_line_index is not None:
relevant_lines = lines[last_matching_line_index:] relevant_lines = lines[last_matching_line_index:]
return '\n'.join(relevant_lines) return '\n'.join(relevant_lines[-25:])
def build_docker(path):
# The command to build the Docker image # The command to build the Docker image
cmd = f"docker build -t micromagic {path}" cmd = f"docker build -t micromagic {path}"

View File

@@ -27,11 +27,12 @@ def _task(task, tag_name, file_name):
) )
def executor_file_task(executor_name, executor_description, input_modality, input_doc_field, def executor_file_task(executor_name, executor_description, test_scenario, input_modality, input_doc_field,
output_modality, output_doc_field): output_modality, output_doc_field):
return _task(f''' return _task(f'''
Write the executor called '{executor_name}'. Write the executor called '{executor_name}'.
It matches the following description: '{executor_description}'. It matches the following description: '{executor_description}'.
It will be tested with the following scenario: '{test_scenario}'.
It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}. It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}.
It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}.
Have in mind that d.uri is never a path to a local file. It is always a url. Have in mind that d.uri is never a path to a local file. It is always a url.