From 11dbc8b162f905cb8bc5c99d62a64e227f8732ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Tue, 28 Mar 2023 14:53:05 +0200 Subject: [PATCH] feat: stable --- micro_chain.py | 160 +++++++++++++++++++++++++++----------------- src/jina_cloud.py | 85 +++++++++++++++++------ src/prompt_tasks.py | 3 +- 3 files changed, 166 insertions(+), 82 deletions(-) diff --git a/micro_chain.py b/micro_chain.py index 14eb995..cda3897 100644 --- a/micro_chain.py +++ b/micro_chain.py @@ -1,10 +1,9 @@ import random from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string - from src import gpt, jina_cloud from src.constants import FILE_AND_TAG_PAIRS -from src.jina_cloud import build_docker +from src.jina_cloud import push_executor, process_error_message from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \ chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed from src.utils.io import recreate_folder, persist_file @@ -15,35 +14,33 @@ def wrap_content_in_code_block(executor_content, file_name, tag): return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n' - - def create_executor( executor_description, - input_modality, - output_modality, test_scenario, - executor_name + executor_name, + is_chain_of_thought=False, ): - input_doc_field = 'text' if input_modality == 'text' else 'blob' - output_doc_field = 'text' if output_modality == 'text' else 'blob' - # random integer at the end of the executor name to avoid name clashes recreate_folder('executor') EXECUTOR_FOLDER_v1 = 'executor/v1' recreate_folder(EXECUTOR_FOLDER_v1) recreate_folder('flow') + + print_colored('', '############# Executor #############', 'red') user_query = ( general_guidelines() - + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, - output_modality, output_doc_field) + + executor_file_task(executor_name, executor_description, test_scenario) + chain_of_thought_creation() ) conversation = gpt.Conversation() - conversation.query(user_query) - executor_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py')) + executor_content_raw = conversation.query(user_query) + if is_chain_of_thought: + executor_content_raw = conversation.query( + f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py')) executor_content = extract_content_from_result(executor_content_raw, 'executor.py') + persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py') print_colored('', '############# Test Executor #############', 'red') @@ -53,12 +50,13 @@ def create_executor( + test_executor_file_task(executor_name, test_scenario) ) conversation = gpt.Conversation() - conversation.query(user_query) - test_executor_content_raw = conversation.query( - f"General rules: " + not_allowed() + - chain_of_thought_optimization('python', 'test_executor.py') - + "Don't add any additional tests. " - ) + test_executor_content_raw = conversation.query(user_query) + if is_chain_of_thought: + test_executor_content_raw = conversation.query( + f"General rules: " + not_allowed() + + chain_of_thought_optimization('python', 'test_executor.py') + + "Don't add any additional tests. " + ) test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py') persist_file(test_executor_content, EXECUTOR_FOLDER_v1 + '/test_executor.py') @@ -70,8 +68,10 @@ def create_executor( + requirements_file_task() ) conversation = gpt.Conversation() - conversation.query(user_query) - requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ") + requirements_content_raw = conversation.query(user_query) + if is_chain_of_thought: + requirements_content_raw = conversation.query( + chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ") requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt') @@ -85,13 +85,16 @@ def create_executor( + docker_file_task() ) conversation = gpt.Conversation() - conversation.query(user_query) - dockerfile_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) + dockerfile_content_raw = conversation.query(user_query) + if is_chain_of_thought: + dockerfile_content_raw = conversation.query( + f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile')) dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile') write_config_yml(executor_name, EXECUTOR_FOLDER_v1) + def create_playground(executor_name, executor_path, host): print_colored('', '############# Playground #############', 'red') @@ -112,33 +115,40 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul ) conversation = gpt.Conversation() conversation.query(user_query) - playground_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py')) + playground_content_raw = conversation.query( + f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py')) playground_content = extract_content_from_result(playground_content_raw, 'playground.py') persist_file(playground_content, f'{executor_path}/playground.py') -def debug_executor(): - for i in range(1, 20): - error = build_docker(f'executor/v{i}') +def debug_executor(): + MAX_DEBUGGING_ITERATIONS = 20 + error_before = '' + for i in range(1, MAX_DEBUGGING_ITERATIONS): + # error_docker = build_docker(f'executor/v{i}') + log_hubble = push_executor(f'executor/v{i}') + error = process_error_message(log_hubble) if error: recreate_folder(f'executor/v{i + 1}') file_name_to_content = get_all_executor_files_with_content(f'executor/v{i}') all_files_string = files_to_string(file_name_to_content) user_query = ( - f"General rules: " + not_allowed() - + 'Here are all the files I use:\n' - + all_files_string - + 'I got the following error:\n' - + error + '\n' - + 'Think quickly about possible reasons. ' - 'Then output the files that need change. ' - "Don't output files that don't need change. " - "If you output a file, then write the complete file. " - "Use the exact same syntax to wrap the code:\n" - f"**...**\n" - f"```...\n" - f"...code...\n" - f"```\n\n" + f"General rules: " + not_allowed() + + 'Here are all the files I use:\n' + + all_files_string + + (('This is an error that is already fixed before:\n' + + error_before) if error_before else '') + + '\n\nNow, I get the following error:\n' + + error + '\n' + + 'Think quickly about possible reasons. ' + 'Then output the files that need change. ' + "Don't output files that don't need change. " + "If you output a file, then write the complete file. " + "Use the exact same syntax to wrap the code:\n" + f"**...**\n" + f"```...\n" + f"...code...\n" + f"```\n\n" ) conversation = gpt.Conversation() returned_files_raw = conversation.query(user_query) @@ -149,8 +159,12 @@ def debug_executor(): for file_name, content in file_name_to_content.items(): persist_file(content, f'executor/v{i + 1}/{file_name}') + error_before = error + else: break + if i == MAX_DEBUGGING_ITERATIONS - 1: + raise Exception('Could not debug the executor.') return f'executor/v{i}' @@ -161,31 +175,48 @@ def main( test_scenario, ): executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}' - create_executor(executor_description, input_modality, output_modality, test_scenario, executor_name) + create_executor(executor_description, test_scenario, executor_name) + # executor_name = 'MicroChainExecutor790050' executor_path = debug_executor() - print('Executor can be built locally, now we will push it to the cloud.') - jina_cloud.push_executor(executor_path) + # print('Executor can be built locally, now we will push it to the cloud.') + # jina_cloud.push_executor(executor_path) print('Deploy a jina flow') host = jina_cloud.deploy_flow(executor_name, 'flow') print(f'Flow is deployed create the playground for {host}') - executor_name = 'MicroChainExecutor48442' - executor_path = 'executor/v2' - host = 'grpcs://mybelovedocrflow-24a412bc63.wolf.jina.ai' create_playground(executor_name, executor_path, host) + print( + 'Executor name:', executor_name, '\n', + 'Executor path:', executor_path, '\n', + 'Host:', host, '\n', + 'Playground:', f'streamlit run {executor_path}/playground.py', '\n', + ) + if __name__ == '__main__': # ######## Level 1 task ######### - main( - executor_description="The executor takes a pdf file as input, parses it and returns the text.", - input_modality='pdf', - output_modality='text', - test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', - ) - # money prompt: $0.56 - # money generation: $0.22 - # total money: $0.78 + # main( + # executor_description="The executor takes a pdf file as input, parses it and returns the text.", + # input_modality='pdf', + # output_modality='text', + # test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', + # ) + main( + executor_description="The executor takes a url of a website as input and returns the logo of the website as an image.", + input_modality='url', + output_modality='image', + test_scenario='Takes https://jina.ai/ as input and returns an svg image of the logo.', + ) + + # # # ######## Level 1 task ######### + # main( + # executor_description="The executor takes a pdf file as input, parses it and returns the text.", + # input_modality='pdf', + # output_modality='text', + # test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long', + # ) + # ######## Level 2 task ######### # main( # executor_description="OCR detector", @@ -194,13 +225,12 @@ if __name__ == '__main__': # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', # ) - # ######## Level 3 task ######### # main( - # executor_description="The executor takes an mp3 file as input and returns bpm and pitch in the tags.", + # executor_description="The executor takes an mp3 file as input and returns bpm and pitch in a json.", # input_modality='audio', - # output_modality='tags', - # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', + # output_modality='json', + # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a json with bpm and pitch', # ) ######### Level 4 task ######### @@ -212,3 +242,11 @@ if __name__ == '__main__': # test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj ' # 'is put in and out comes a 2d rendering of it', # ) + + # ######## Level 8 task ######### + # main( + # executor_description="The executor takes an image as input and returns a list of bounding boxes of all animals in the image.", + # input_modality='blob', + # output_modality='json', + # test_scenario='Take the image from https://thumbs.dreamstime.com/b/dog-professor-red-bow-tie-glasses-white-background-isolated-dog-professor-glasses-197036807.jpg as input and assert that the list contains at least one bounding box. ', + # ) diff --git a/src/jina_cloud.py b/src/jina_cloud.py index 1ea1fa5..fc67939 100644 --- a/src/jina_cloud.py +++ b/src/jina_cloud.py @@ -1,16 +1,59 @@ +import hashlib +import json import os import subprocess import re +from argparse import Namespace +from pathlib import Path import hubble +from hubble.executor.helper import upload_file, archive_package, get_request_header from jcloud.flow import CloudFlow from jina import Flow def push_executor(dir_path): - cmd = f'jina hub push {dir_path}/. --verbose --replay' - os.system(cmd) + dir_path = Path(dir_path) + + md5_hash = hashlib.md5() + bytesio = archive_package(dir_path) + content = bytesio.getvalue() + md5_hash.update(content) + md5_digest = md5_hash.hexdigest() + + form_data = { + 'public': 'True', + 'private': 'False', + 'verbose': 'True', + 'md5sum': md5_digest, + } + req_header = get_request_header() + resp = upload_file( + 'https://api.hubble.jina.ai/v2/rpc/executor.push', + 'filename', + content, + dict_data=form_data, + headers=req_header, + stream=False, + method='post', + ) + json_lines_str = resp.content.decode('utf-8') + if 'exited on non-zero code' not in json_lines_str: + return '' + responses = [] + for json_line in json_lines_str.splitlines(): + if 'exit code:' in json_line: + break + + d = json.loads(json_line) + + if 'payload' in d and type(d['payload']) == str: + responses.append(d['payload']) + elif type(d) == str: + responses.append(d) + return '\n'.join(responses) + def get_user_name(): client = hubble.Client(max_retries=None, jsonify=True) @@ -51,10 +94,10 @@ executors: with open(full_flow_path, 'w') as f: f.write(flow) - print('try local execution') - flow = Flow.load_config(full_flow_path) - with flow: - pass + # print('try local execution') + # flow = Flow.load_config(full_flow_path) + # with flow: + # pass print('deploy flow on jcloud') return deploy_on_jcloud(flow_yaml=full_flow_path) @@ -78,22 +121,24 @@ def update_client_line_in_file(file_path, host): file.write(replaced_content) +def process_error_message(error_message): + lines = error_message.split('\n') + relevant_lines = [] + + pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]" + last_matching_line_index = None + + for index, line in enumerate(lines): + if pattern.match(line): + last_matching_line_index = index + + if last_matching_line_index is not None: + relevant_lines = lines[last_matching_line_index:] + + return '\n'.join(relevant_lines[-25:]) + def build_docker(path): - def process_error_message(error_message): - lines = error_message.split('\n') - relevant_lines = [] - pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]" - last_matching_line_index = None - - for index, line in enumerate(lines): - if pattern.match(line): - last_matching_line_index = index - - if last_matching_line_index is not None: - relevant_lines = lines[last_matching_line_index:] - - return '\n'.join(relevant_lines) # The command to build the Docker image cmd = f"docker build -t micromagic {path}" diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index e4d093b..70c93a2 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -27,11 +27,12 @@ def _task(task, tag_name, file_name): ) -def executor_file_task(executor_name, executor_description, input_modality, input_doc_field, +def executor_file_task(executor_name, executor_description, test_scenario, input_modality, input_doc_field, output_modality, output_doc_field): return _task(f''' Write the executor called '{executor_name}'. It matches the following description: '{executor_description}'. +It will be tested with the following scenario: '{test_scenario}'. It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}. It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. Have in mind that d.uri is never a path to a local file. It is always a url.