feat: error feedback

2025-12-20 23:24:20 +01:00 · 2023-03-22 23:34:49 +01:00
parent 1c8272e706
commit f408378c33
7 changed files with 184 additions and 94 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-/executor/
+/executor_level2/
--- a/main.py
+++ b/main.py
@@ -12,13 +12,16 @@ import re
 # from src.utils.string_tools import find_differences
 #
 #
+from src.constants import FILE_AND_TAG_PAIRS
+
+
 def extract_content_from_result(plain_text, file_name):
    pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)```"
    match = re.search(pattern, plain_text, re.MULTILINE)
    if match:
        return match.group(1).strip()
    else:
-        raise ValueError(f'Could not find {file_name} in result')
+        return ''
 #
 #
 # def extract_and_write(plain_text, dest_folder):
@@ -41,17 +44,17 @@ metas:
        f.write(config_content)
 #
 #
-# def get_all_executor_files_with_content(folder_path):
-#     file_name_to_content = {}
-#     for filename in os.listdir(folder_path):
-#         file_path = os.path.join(folder_path, filename)
-#
-#         if os.path.isfile(file_path):
-#             with open(file_path, 'r', encoding='utf-8') as file:
-#                 content = file.read()
-#                 file_name_to_content[filename] = content
-#
-#     return file_name_to_content
+def get_all_executor_files_with_content(folder_path):
+    file_name_to_content = {}
+    for filename in os.listdir(folder_path):
+        file_path = os.path.join(folder_path, filename)
+
+        if os.path.isfile(file_path):
+            with open(file_path, 'r', encoding='utf-8') as file:
+                content = file.read()
+                file_name_to_content[filename] = content
+
+    return file_name_to_content
 #
 #
 #
@@ -104,14 +107,15 @@ metas:
 #     print('DIFFERENCES:', find_differences(all_executor_files_string, all_executor_files_string_improved))
 #     return all_executor_files_string_improved
 #
-# def files_to_string(file_name_to_content):
-#     all_executor_files_string = ''
-#     for file_name, tag in FILE_AND_TAG_PAIRS:
-#         all_executor_files_string += f'**{file_name}**\n'
-#         all_executor_files_string += f'```{tag}\n'
-#         all_executor_files_string += file_name_to_content[file_name]
-#         all_executor_files_string += '\n```\n\n'
-#     return all_executor_files_string
+def files_to_string(file_name_to_content):
+    all_executor_files_string = ''
+    for file_name, tag in FILE_AND_TAG_PAIRS:
+        if file_name in file_name_to_content:
+            all_executor_files_string += f'**{file_name}**\n'
+            all_executor_files_string += f'```{tag}\n'
+            all_executor_files_string += file_name_to_content[file_name]
+            all_executor_files_string += '\n```\n\n'
+    return all_executor_files_string
 #
 #
 # def main(
--- a/micro_chain.py
+++ b/micro_chain.py
@@ -1,7 +1,10 @@
 import random

-from main import extract_content_from_result, write_config_yml
+from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string
+
 from src import gpt, jina_cloud
+from src.constants import FILE_AND_TAG_PAIRS
+from src.jina_cloud import build_docker
 from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \
    chain_of_thought_optimization, requirements_file_task, docker_file_task
 from src.utils.io import recreate_folder, persist_file
@@ -21,86 +24,132 @@ def main(
        test_scenario,
        do_validation=True
 ):
-    input_doc_field = 'text' if input_modality == 'text' else 'blob'
-    output_doc_field = 'text' if output_modality == 'text' else 'blob'
-    # random integer at the end of the executor name to avoid name clashes
-    executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
-    recreate_folder('executor')
-    recreate_folder('flow')
+    # input_doc_field = 'text' if input_modality == 'text' else 'blob'
+    # output_doc_field = 'text' if output_modality == 'text' else 'blob'
+    # # random integer at the end of the executor name to avoid name clashes
+    # executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
+    # recreate_folder('executor')
+    # EXECUTOR_FOLDER_v1 = 'executor/v1'
+    # recreate_folder(EXECUTOR_FOLDER_v1)
+    # recreate_folder('flow')
+    #
+    # print_colored('', '############# Executor #############', 'red')
+    # user_query = (
+    #         general_guidelines()
+    #         + executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
+    #                              output_modality, output_doc_field)
+    #         + chain_of_thought_creation()
+    # )
+    # conversation = gpt.Conversation()
+    # conversation.query(user_query)
+    # executor_content_raw = conversation.query(chain_of_thought_optimization('python', 'executor.py'))
+    # executor_content = extract_content_from_result(executor_content_raw, 'executor.py')
+    # persist_file(executor_content, 'executor.py')
+    #
+    # print_colored('', '############# Test Executor #############', 'red')
+    # user_query = (
+    #         general_guidelines()
+    #         + wrap_content_in_code_block(executor_content, 'executor.py', 'python')
+    #         + test_executor_file_task(executor_name, test_scenario)
+    # )
+    # conversation = gpt.Conversation()
+    # conversation.query(user_query)
+    # test_executor_content_raw = conversation.query(
+    #     chain_of_thought_optimization('python', 'test_executor.py')
+    #     + "Don't add any additional tests. "
+    # )
+    # test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py')
+    # persist_file(test_executor_content, 'test_executor.py')
+    #
+    # print_colored('', '############# Requirements #############', 'red')
+    # user_query = (
+    #         general_guidelines()
+    #         + wrap_content_in_code_block(executor_content, 'executor.py', 'python')
+    #         + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python')
+    #         + requirements_file_task()
+    # )
+    # conversation = gpt.Conversation()
+    # conversation.query(user_query)
+    # requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ")
+    #
+    # requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt')
+    # persist_file(requirements_content, 'requirements.txt')
+    #
+    # print_colored('', '############# Dockerfile #############', 'red')
+    # user_query = (
+    #         general_guidelines()
+    #         + wrap_content_in_code_block(executor_content, 'executor.py', 'python')
+    #         + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python')
+    #         + wrap_content_in_code_block(requirements_content, 'requirements.txt', '')
+    #         + docker_file_task()
+    # )
+    # conversation = gpt.Conversation()
+    # conversation.query(user_query)
+    # dockerfile_content_raw = conversation.query(chain_of_thought_optimization('dockerfile', 'Dockerfile'))
+    # dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile')
+    # persist_file(dockerfile_content, 'Dockerfile')
+    #
+    # write_config_yml(executor_name, EXECUTOR_FOLDER_v1)

-    print_colored('', '############# Executor #############', 'red')
-    user_query = (
-            general_guidelines()
-            + executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
-                                 output_modality, output_doc_field)
-            + chain_of_thought_creation()
-    )
+    for i in range(1, 20):
        conversation = gpt.Conversation()
-    conversation.query(user_query)
-    executor_content_raw = conversation.query(chain_of_thought_optimization('python', 'executor.py'))
-    executor_content = extract_content_from_result(executor_content_raw, 'executor.py')
-    persist_file(executor_content, 'executor.py')
-
-    print_colored('', '############# Test Executor #############', 'red')
+        error = build_docker(f'executor_level2/v{i}')
+        if error:
+            recreate_folder(f'executor_level2/v{i + 1}')
+            file_name_to_content = get_all_executor_files_with_content(f'executor_level2/v{i}')
+            all_files_string = files_to_string(file_name_to_content)
            user_query = (
-            general_guidelines()
-            + wrap_content_in_code_block(executor_content, 'executor.py', 'python')
-            + test_executor_file_task(executor_name, test_scenario)
+                'Here are all the files I use:\n'
+                + all_files_string
+                + 'I got the following error:\n'
+                + error
+                + 'Think quickly about possible reasons. '
+                  'Then output the files that need change. '
+                  "Don't output files that don't need change. "
+                  "If you output a file, then write the complete file. "
+                  "Use the exact same syntax to wrap the code:\n"
+                   f"**...**\n"
+                   f"```...\n"
+                   f"...code...\n"
+                   f"```\n\n"
            )
-    conversation = gpt.Conversation()
-    conversation.query(user_query)
-    test_executor_content_raw = conversation.query(
-        chain_of_thought_optimization('python', 'test_executor.py')
-        + "Don't add any additional tests. "
-    )
-    test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py')
-    persist_file(test_executor_content, 'test_executor.py')
+            returned_files_raw = conversation.query(user_query)
+            for file_name, tag in FILE_AND_TAG_PAIRS:
+                updated_file = extract_content_from_result(returned_files_raw, file_name)
+                if updated_file:
+                    file_name_to_content[file_name] = updated_file

-    print_colored('', '############# Requirements #############', 'red')
-    user_query = (
-            general_guidelines()
-            + wrap_content_in_code_block(executor_content, 'executor.py', 'python')
-            + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python')
-            + requirements_file_task()
-    )
-    conversation = gpt.Conversation()
-    conversation.query(user_query)
-    requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt'))
+            for file_name, content in file_name_to_content.items():
+                persist_file(content, f'executor_level2/v{i + 1}/{file_name}')
+        else:
+            break

-    requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt')
-    persist_file(requirements_content, 'requirements.txt')
-
-    print_colored('', '############# Dockerfile #############', 'red')
-    user_query = (
-            general_guidelines()
-            + wrap_content_in_code_block(executor_content, 'executor.py', 'python')
-            + wrap_content_in_code_block(test_executor_content, 'test_executor.py', 'python')
-            + wrap_content_in_code_block(requirements_content, 'requirements.txt', '')
-            + docker_file_task()
-    )
-    conversation = gpt.Conversation()
-    conversation.query(user_query)
-    dockerfile_content_raw = conversation.query(chain_of_thought_optimization('dockerfile', 'Dockerfile'))
-    dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile')
-    persist_file(dockerfile_content, 'Dockerfile')
-
-    write_config_yml(executor_name, 'executor')
-
-    jina_cloud.push_executor('executor')

+    error = jina_cloud.push_executor('executor_level2')
    host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow')

    # create playgorund and client.py


 if __name__ == '__main__':
-    ######## Level 1 task #########
+    # ######## Level 1 task #########
+    # main(
+    #     executor_description="OCR detector",
+    #     input_modality='image',
+    #     # input_doc_field='blob',
+    #     output_modality='text',
+    #     # output_doc_field='text',
+    #     test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
+    #     do_validation=False
+    # )
+
+    ######### Level 2 task #########
    main(
-        executor_description="OCR detector",
-        input_modality='image',
-        # input_doc_field='blob',
-        output_modality='text',
-        # output_doc_field='text',
-        test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
+        executor_description="The executor takes 3D objects in obj format as input "
+                             "and outputs a 2D image projection of that object where the full object is shown. ",
+        input_modality='3d',
+        output_modality='image',
+        test_scenario='Test that 3d object from https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj '
+                      'is put in and out comes a 2d rendering of it',
        do_validation=False
    )
--- a/src/gpt.py
+++ b/src/gpt.py
@@ -30,7 +30,7 @@ def get_response(prompt_list: List[Tuple[str, str]]):
        try:
            response_generator = openai.ChatCompletion.create(
                temperature=0,
-                max_tokens=4_000,
+                max_tokens=2_000,
                model="gpt-4",
                stream=True,
                messages=[
--- a/src/jina_cloud.py
+++ b/src/jina_cloud.py
@@ -1,5 +1,7 @@
 import os
 from multiprocessing.connection import Client
+import subprocess
+import re

 import hubble
 from jcloud.flow import CloudFlow
@@ -79,3 +81,36 @@ def update_client_line_in_file(file_path, host):
        file.write(replaced_content)


+def build_docker(path):
+    def process_error_message(error_message):
+        lines = error_message.split('\n')
+        relevant_lines = []
+
+        pattern = re.compile(r"^#\d+ \[\d+/\d+\]")  # Pattern to match lines like "#11 [7/8]"
+        last_matching_line_index = None
+
+        for index, line in enumerate(lines):
+            if pattern.match(line):
+                last_matching_line_index = index
+
+        if last_matching_line_index is not None:
+            relevant_lines = lines[last_matching_line_index:]
+
+        return '\n'.join(relevant_lines)
+
+    # The command to build the Docker image
+    cmd = f"docker build -t micromagic {path}"
+
+    # Run the command and capture the output
+    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+    stdout, stderr = process.communicate()
+
+    # Check if there was an error
+    if process.returncode != 0:
+        error_message = stderr.decode("utf-8")
+        relevant_error_message = process_error_message(error_message)
+        return relevant_error_message
+    else:
+        print("Docker build completed successfully.")
+        return ''
+
--- a/src/prompt_tasks.py
+++ b/src/prompt_tasks.py
@@ -36,6 +36,8 @@ It gets a DocumentArray as input where each document has the input modality '{in
 It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}.
 Have in mind that d.uri is never a path to a local file. It is always a url.
 The executor is not allowed to use the GPU.
+The executor is not allowed to access a database.
+The executor is not allowed to access a display.
 The executor is not allowed to access external apis. 
 ''',
                 EXECUTOR_FILE_TAG,
--- a/src/utils/io.py
+++ b/src/utils/io.py
@@ -10,7 +10,7 @@ def recreate_folder(folder_path):
    os.makedirs(folder_path)

 def persist_file(file_content, file_name):
-    with open(f'executor/{file_name}', 'w') as f:
+    with open(f'{file_name}', 'w') as f:
        f.write(file_content)