diff --git a/.gitignore b/.gitignore index ba6b1af..80e54f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -/executor/ +/executor_level2/ diff --git a/main.py b/main.py index 9ca4601..77de1d1 100644 --- a/main.py +++ b/main.py @@ -12,13 +12,16 @@ import re # from src.utils.string_tools import find_differences # # +from src.constants import FILE_AND_TAG_PAIRS + + def extract_content_from_result(plain_text, file_name): pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```" match = re.search(pattern, plain_text, re.MULTILINE) if match: return match.group(1).strip() else: - raise ValueError(f'Could not find {file_name} in result') + return '' # # # def extract_and_write(plain_text, dest_folder): @@ -41,17 +44,17 @@ metas: f.write(config_content) # # -# def get_all_executor_files_with_content(folder_path): -# file_name_to_content = {} -# for filename in os.listdir(folder_path): -# file_path = os.path.join(folder_path, filename) -# -# if os.path.isfile(file_path): -# with open(file_path, 'r', encoding='utf-8') as file: -# content = file.read() -# file_name_to_content[filename] = content -# -# return file_name_to_content +def get_all_executor_files_with_content(folder_path): + file_name_to_content = {} + for filename in os.listdir(folder_path): + file_path = os.path.join(folder_path, filename) + + if os.path.isfile(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + content = file.read() + file_name_to_content[filename] = content + + return file_name_to_content # # # @@ -104,14 +107,15 @@ metas: # print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved)) # return all_executor_files_string_improved # -# def files_to_string(file_name_to_content): -# all_executor_files_string = '' -# for file_name, tag in FILE_AND_TAG_PAIRS: -# all_executor_files_string += f'**{file_name}**\n' -# all_executor_files_string += f'```{tag}\n' -# all_executor_files_string += file_name_to_content[file_name] -# all_executor_files_string += '\n```\n\n' -# return all_executor_files_string +def files_to_string(file_name_to_content): + all_executor_files_string = '' + for file_name, tag in FILE_AND_TAG_PAIRS: + if file_name in file_name_to_content: + all_executor_files_string += f'**{file_name}**\n' + all_executor_files_string += f'```{tag}\n' + all_executor_files_string += file_name_to_content[file_name] + all_executor_files_string += '\n```\n\n' + return all_executor_files_string # # # def main( diff --git a/micro_chain.py b/micro_chain.py index 4176a23..288dc7b 100644 --- a/micro_chain.py +++ b/micro_chain.py @@ -1,7 +1,10 @@ import random -from main import extract_content_from_result, write_config_yml +from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string + from src import gpt, jina_cloud +from src.constants import FILE_AND_TAG_PAIRS +from src.jina_cloud import build_docker from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \ chain_of_thought_optimization, requirements_file_task, docker_file_task from src.utils.io import recreate_folder, persist_file @@ -21,86 +24,132 @@ def main( test_scenario, do_validation=True ): - input_doc_field = 'text' if input_modality == 'text' else 'blob' - output_doc_field = 'text' if output_modality == 'text' else 'blob' - # random integer at the end of the executor name to avoid name clashes - executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}' - recreate_folder('executor') - recreate_folder('flow') + # input_doc_field = 'text' if input_modality == 'text' else 'blob' + # output_doc_field = 'text' if output_modality == 'text' else 'blob' + # # random integer at the end of the executor name to avoid name clashes + # executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}' + # recreate_folder('executor') + # EXECUTOR_FOLDER_v1 = 'executor/v1' + # recreate_folder(EXECUTOR_FOLDER_v1) + # recreate_folder('flow') + # + # print_colored('', '############# Executor #############', 'red') + # user_query = ( + # general_guidelines() + # + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, + # output_modality, output_doc_field) + # + chain_of_thought_creation() + # ) + # conversation = gpt.Conversation() + # conversation.query(user_query) + # executor_content_raw = conversation.query(chain_of_thought_optimization('python', 'executor.py')) + # executor_content = extract_content_from_result(executor_content_raw, 'executor.py') + # persist_file(executor_content, 'executor.py') + # + # print_colored('', '############# Test Executor #############', 'red') + # user_query = ( + # general_guidelines() + # + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + # + test_executor_file_task(executor_name, test_scenario) + # ) + # conversation = gpt.Conversation() + # conversation.query(user_query) + # test_executor_content_raw = conversation.query( + # chain_of_thought_optimization('python', 'test_executor.py') + # + "Don't add any additional tests. " + # ) + # test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py') + # persist_file(test_executor_content, 'test_executor.py') + # + # print_colored('', '############# Requirements #############', 'red') + # user_query = ( + # general_guidelines() + # + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + # + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') + # + requirements_file_task() + # ) + # conversation = gpt.Conversation() + # conversation.query(user_query) + # requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ") + # + # requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') + # persist_file(requirements_content, 'requirements.txt') + # + # print_colored('', '############# Dockerfile #############', 'red') + # user_query = ( + # general_guidelines() + # + wrap_content_in_code_block(executor_content, 'executor.py', 'python') + # + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') + # + wrap_content_in_code_block(requirements_content, 'requirements.txt', '') + # + docker_file_task() + # ) + # conversation = gpt.Conversation() + # conversation.query(user_query) + # dockerfile_content_raw = conversation.query(chain_of_thought_optimization('dockerfile', 'Dockerfile')) + # dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') + # persist_file(dockerfile_content, 'Dockerfile') + # + # write_config_yml(executor_name, EXECUTOR_FOLDER_v1) - print_colored('', '############# Executor #############', 'red') - user_query = ( - general_guidelines() - + executor_file_task(executor_name, executor_description, input_modality, input_doc_field, - output_modality, output_doc_field) - + chain_of_thought_creation() - ) - conversation = gpt.Conversation() - conversation.query(user_query) - executor_content_raw = conversation.query(chain_of_thought_optimization('python', 'executor.py')) - executor_content = extract_content_from_result(executor_content_raw, 'executor.py') - persist_file(executor_content, 'executor.py') + for i in range(1, 20): + conversation = gpt.Conversation() + error = build_docker(f'executor_level2/v{i}') + if error: + recreate_folder(f'executor_level2/v{i + 1}') + file_name_to_content = get_all_executor_files_with_content(f'executor_level2/v{i}') + all_files_string = files_to_string(file_name_to_content) + user_query = ( + 'Here are all the files I use:\n' + + all_files_string + + 'I got the following error:\n' + + error + + 'Think quickly about possible reasons. ' + 'Then output the files that need change. ' + "Don't output files that don't need change. " + "If you output a file, then write the complete file. " + "Use the exact same syntax to wrap the code:\n" + f"**...**\n" + f"```...\n" + f"...code...\n" + f"```\n\n" + ) + returned_files_raw = conversation.query(user_query) + for file_name, tag in FILE_AND_TAG_PAIRS: + updated_file = extract_content_from_result(returned_files_raw, file_name) + if updated_file: + file_name_to_content[file_name] = updated_file - print_colored('', '############# Test Executor #############', 'red') - user_query = ( - general_guidelines() - + wrap_content_in_code_block(executor_content, 'executor.py', 'python') - + test_executor_file_task(executor_name, test_scenario) - ) - conversation = gpt.Conversation() - conversation.query(user_query) - test_executor_content_raw = conversation.query( - chain_of_thought_optimization('python', 'test_executor.py') - + "Don't add any additional tests. " - ) - test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py') - persist_file(test_executor_content, 'test_executor.py') + for file_name, content in file_name_to_content.items(): + persist_file(content, f'executor_level2/v{i + 1}/{file_name}') + else: + break - print_colored('', '############# Requirements #############', 'red') - user_query = ( - general_guidelines() - + wrap_content_in_code_block(executor_content, 'executor.py', 'python') - + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') - + requirements_file_task() - ) - conversation = gpt.Conversation() - conversation.query(user_query) - requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt')) - - requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt') - persist_file(requirements_content, 'requirements.txt') - - print_colored('', '############# Dockerfile #############', 'red') - user_query = ( - general_guidelines() - + wrap_content_in_code_block(executor_content, 'executor.py', 'python') - + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python') - + wrap_content_in_code_block(requirements_content, 'requirements.txt', '') - + docker_file_task() - ) - conversation = gpt.Conversation() - conversation.query(user_query) - dockerfile_content_raw = conversation.query(chain_of_thought_optimization('dockerfile', 'Dockerfile')) - dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile') - persist_file(dockerfile_content, 'Dockerfile') - - write_config_yml(executor_name, 'executor') - - jina_cloud.push_executor('executor') + error = jina_cloud.push_executor('executor_level2') host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow') # create playgorund and client.py if __name__ == '__main__': - ######## Level 1 task ######### + # ######## Level 1 task ######### + # main( + # executor_description="OCR detector", + # input_modality='image', + # # input_doc_field='blob', + # output_modality='text', + # # output_doc_field='text', + # test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', + # do_validation=False + # ) + + ######### Level 2 task ######### main( - executor_description="OCR detector", - input_modality='image', - # input_doc_field='blob', - output_modality='text', - # output_doc_field='text', - test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"', + executor_description="The executor takes 3D objects in obj format as input " + "and outputs a 2D image projection of that object where the full object is shown. ", + input_modality='3d', + output_modality='image', + test_scenario='Test that 3d object from https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj ' + 'is put in and out comes a 2d rendering of it', do_validation=False ) diff --git a/src/gpt.py b/src/gpt.py index 9443e70..38ba4c3 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -30,7 +30,7 @@ def get_response(prompt_list: List[Tuple[str, str]]): try: response_generator = openai.ChatCompletion.create( temperature=0, - max_tokens=4_000, + max_tokens=2_000, model="gpt-4", stream=True, messages=[ diff --git a/src/jina_cloud.py b/src/jina_cloud.py index cb4a94a..12d5995 100644 --- a/src/jina_cloud.py +++ b/src/jina_cloud.py @@ -1,5 +1,7 @@ import os from multiprocessing.connection import Client +import subprocess +import re import hubble from jcloud.flow import CloudFlow @@ -79,3 +81,36 @@ def update_client_line_in_file(file_path, host): file.write(replaced_content) +def build_docker(path): + def process_error_message(error_message): + lines = error_message.split('\n') + relevant_lines = [] + + pattern = re.compile(r"^#\d+ \[\d+/\d+\]") # Pattern to match lines like "#11 [7/8]" + last_matching_line_index = None + + for index, line in enumerate(lines): + if pattern.match(line): + last_matching_line_index = index + + if last_matching_line_index is not None: + relevant_lines = lines[last_matching_line_index:] + + return '\n'.join(relevant_lines) + + # The command to build the Docker image + cmd = f"docker build -t micromagic {path}" + + # Run the command and capture the output + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + stdout, stderr = process.communicate() + + # Check if there was an error + if process.returncode != 0: + error_message = stderr.decode("utf-8") + relevant_error_message = process_error_message(error_message) + return relevant_error_message + else: + print("Docker build completed successfully.") + return '' + diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index 97fb9e9..45ecbcc 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -36,6 +36,8 @@ It gets a DocumentArray as input where each document has the input modality '{in It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. Have in mind that d.uri is never a path to a local file. It is always a url. The executor is not allowed to use the GPU. +The executor is not allowed to access a database. +The executor is not allowed to access a display. The executor is not allowed to access external apis. ''', EXECUTOR_FILE_TAG, diff --git a/src/utils/io.py b/src/utils/io.py index 05de15a..2f5e04e 100644 --- a/src/utils/io.py +++ b/src/utils/io.py @@ -10,7 +10,7 @@ def recreate_folder(folder_path): os.makedirs(folder_path) def persist_file(file_content, file_name): - with open(f'executor/{file_name}', 'w') as f: + with open(f'{file_name}', 'w') as f: f.write(file_content)