feat: stable

2025-12-23 16:44:20 +01:00 · 2023-03-28 14:53:05 +02:00
parent 50f47e91b2
commit 11dbc8b162
3 changed files with 166 additions and 82 deletions
--- a/micro_chain.py
+++ b/micro_chain.py
@@ -1,10 +1,9 @@
 import random

 from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string
-
 from src import gpt, jina_cloud
 from src.constants import FILE_AND_TAG_PAIRS
-from src.jina_cloud import build_docker
+from src.jina_cloud import push_executor, process_error_message
 from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \
    chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed
 from src.utils.io import recreate_folder, persist_file
@@ -15,35 +14,33 @@ def wrap_content_in_code_block(executor_content, file_name, tag):
    return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n'


-
-
 def create_executor(
        executor_description,
-        input_modality,
-        output_modality,
        test_scenario,
-        executor_name
+        executor_name,
+        is_chain_of_thought=False,
 ):
-    input_doc_field = 'text' if input_modality == 'text' else 'blob'
-    output_doc_field = 'text' if output_modality == 'text' else 'blob'
-    # random integer at the end of the executor name to avoid name clashes

    recreate_folder('executor')
    EXECUTOR_FOLDER_v1 = 'executor/v1'
    recreate_folder(EXECUTOR_FOLDER_v1)
    recreate_folder('flow')

+
+
    print_colored('', '############# Executor #############', 'red')
    user_query = (
            general_guidelines()
-            + executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
-                                 output_modality, output_doc_field)
+            + executor_file_task(executor_name, executor_description, test_scenario)
            + chain_of_thought_creation()
    )
    conversation = gpt.Conversation()
-    conversation.query(user_query)
-    executor_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py'))
+    executor_content_raw = conversation.query(user_query)
+    if is_chain_of_thought:
+        executor_content_raw = conversation.query(
+            f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py'))
    executor_content = extract_content_from_result(executor_content_raw, 'executor.py')
+
    persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py')

    print_colored('', '############# Test Executor #############', 'red')
@@ -53,12 +50,13 @@ def create_executor(
            + test_executor_file_task(executor_name, test_scenario)
    )
    conversation = gpt.Conversation()
-    conversation.query(user_query)
-    test_executor_content_raw = conversation.query(
-        f"General rules: " + not_allowed() +
-        chain_of_thought_optimization('python', 'test_executor.py')
-        + "Don't add any additional tests. "
-    )
+    test_executor_content_raw = conversation.query(user_query)
+    if is_chain_of_thought:
+        test_executor_content_raw = conversation.query(
+            f"General rules: " + not_allowed() +
+            chain_of_thought_optimization('python', 'test_executor.py')
+            + "Don't add any additional tests. "
+        )
    test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py')
    persist_file(test_executor_content, EXECUTOR_FOLDER_v1 + '/test_executor.py')

@@ -70,8 +68,10 @@ def create_executor(
            + requirements_file_task()
    )
    conversation = gpt.Conversation()
-    conversation.query(user_query)
-    requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ")
+    requirements_content_raw = conversation.query(user_query)
+    if is_chain_of_thought:
+        requirements_content_raw = conversation.query(
+            chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ")

    requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt')
    persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt')
@@ -85,13 +85,16 @@ def create_executor(
            + docker_file_task()
    )
    conversation = gpt.Conversation()
-    conversation.query(user_query)
-    dockerfile_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile'))
+    dockerfile_content_raw = conversation.query(user_query)
+    if is_chain_of_thought:
+        dockerfile_content_raw = conversation.query(
+            f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile'))
    dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile')
    persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile')

    write_config_yml(executor_name, EXECUTOR_FOLDER_v1)

+
 def create_playground(executor_name, executor_path, host):
    print_colored('', '############# Playground #############', 'red')

@@ -112,33 +115,40 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul
    )
    conversation = gpt.Conversation()
    conversation.query(user_query)
-    playground_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py'))
+    playground_content_raw = conversation.query(
+        f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py'))
    playground_content = extract_content_from_result(playground_content_raw, 'playground.py')
    persist_file(playground_content, f'{executor_path}/playground.py')

-def debug_executor():
-    for i in range(1, 20):

-        error = build_docker(f'executor/v{i}')
+def debug_executor():
+    MAX_DEBUGGING_ITERATIONS = 20
+    error_before = ''
+    for i in range(1, MAX_DEBUGGING_ITERATIONS):
+        # error_docker = build_docker(f'executor/v{i}')
+        log_hubble = push_executor(f'executor/v{i}')
+        error = process_error_message(log_hubble)
        if error:
            recreate_folder(f'executor/v{i + 1}')
            file_name_to_content = get_all_executor_files_with_content(f'executor/v{i}')
            all_files_string = files_to_string(file_name_to_content)
            user_query = (
-                f"General rules: " + not_allowed()
-                + 'Here are all the files I use:\n'
-                + all_files_string
-                + 'I got the following error:\n'
-                + error + '\n'
-                + 'Think quickly about possible reasons. '
-                  'Then output the files that need change. '
-                  "Don't output files that don't need change. "
-                  "If you output a file, then write the complete file. "
-                  "Use the exact same syntax to wrap the code:\n"
-                   f"**...**\n"
-                   f"```...\n"
-                   f"...code...\n"
-                   f"```\n\n"
+                    f"General rules: " + not_allowed()
+                    + 'Here are all the files I use:\n'
+                    + all_files_string
+                    + (('This is an error that is already fixed before:\n'
+                        + error_before) if error_before else '')
+                    + '\n\nNow, I get the following error:\n'
+                    + error + '\n'
+                    + 'Think quickly about possible reasons. '
+                      'Then output the files that need change. '
+                      "Don't output files that don't need change. "
+                      "If you output a file, then write the complete file. "
+                      "Use the exact same syntax to wrap the code:\n"
+                      f"**...**\n"
+                      f"```...\n"
+                      f"...code...\n"
+                      f"```\n\n"
            )
            conversation = gpt.Conversation()
            returned_files_raw = conversation.query(user_query)
@@ -149,8 +159,12 @@ def debug_executor():

            for file_name, content in file_name_to_content.items():
                persist_file(content, f'executor/v{i + 1}/{file_name}')
+            error_before = error
+
        else:
            break
+        if i == MAX_DEBUGGING_ITERATIONS - 1:
+            raise Exception('Could not debug the executor.')
    return f'executor/v{i}'


@@ -161,31 +175,48 @@ def main(
        test_scenario,
 ):
    executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
-    create_executor(executor_description, input_modality, output_modality, test_scenario, executor_name)
+    create_executor(executor_description, test_scenario, executor_name)
+    # executor_name = 'MicroChainExecutor790050'
    executor_path = debug_executor()
-    print('Executor can be built locally, now we will push it to the cloud.')
-    jina_cloud.push_executor(executor_path)
+    # print('Executor can be built locally, now we will push it to the cloud.')
+    # jina_cloud.push_executor(executor_path)
    print('Deploy a jina flow')
    host = jina_cloud.deploy_flow(executor_name, 'flow')
    print(f'Flow is deployed create the playground for {host}')
-    executor_name = 'MicroChainExecutor48442'
-    executor_path = 'executor/v2'
-    host = 'grpcs://mybelovedocrflow-24a412bc63.wolf.jina.ai'
    create_playground(executor_name, executor_path, host)
+    print(
+        'Executor name:', executor_name, '\n',
+        'Executor path:', executor_path, '\n',
+        'Host:', host, '\n',
+        'Playground:', f'streamlit run {executor_path}/playground.py', '\n',
+    )
+

 if __name__ == '__main__':
    # ######## Level 1 task #########
-    main(
-        executor_description="The executor takes a pdf file as input, parses it and returns the text.",
-        input_modality='pdf',
-        output_modality='text',
-        test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
-    )
-    # money prompt: $0.56
-    # money generation: $0.22
-    # total money: $0.78
+    # main(
+    #     executor_description="The executor takes a pdf file as input, parses it and returns the text.",
+    #     input_modality='pdf',
+    #     output_modality='text',
+    #     test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
+    # )


+    main(
+        executor_description="The executor takes a url of a website as input and returns the logo of the website as an image.",
+        input_modality='url',
+        output_modality='image',
+        test_scenario='Takes https://jina.ai/ as input  and returns an svg image of the logo.',
+    )
+
+    # # # ######## Level 1 task #########
+    # main(
+    #     executor_description="The executor takes a pdf file as input, parses it and returns the text.",
+    #     input_modality='pdf',
+    #     output_modality='text',
+    #     test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
+    # )
+
    # ######## Level 2 task #########
    # main(
    #     executor_description="OCR detector",
@@ -194,13 +225,12 @@ if __name__ == '__main__':
    #     test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
    # )

-
    # ######## Level 3 task #########
    # main(
-    #     executor_description="The executor takes an mp3 file as input and returns bpm and pitch in the tags.",
+    #     executor_description="The executor takes an mp3 file as input and returns bpm and pitch in a json.",
    #     input_modality='audio',
-    #     output_modality='tags',
-    #     test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
+    #     output_modality='json',
+    #     test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a json with bpm and pitch',
    # )

    ######### Level 4 task #########
@@ -212,3 +242,11 @@ if __name__ == '__main__':
    #     test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj '
    #                   'is put in and out comes a 2d rendering of it',
    # )
+
+    # ######## Level 8 task #########
+    # main(
+    #     executor_description="The executor takes an image as input and returns a list of bounding boxes of all animals in the image.",
+    #     input_modality='blob',
+    #     output_modality='json',
+    #     test_scenario='Take the image from https://thumbs.dreamstime.com/b/dog-professor-red-bow-tie-glasses-white-background-isolated-dog-professor-glasses-197036807.jpg as input and assert that the list contains at least one bounding box. ',
+    # )
--- a/src/jina_cloud.py
+++ b/src/jina_cloud.py
@@ -1,16 +1,59 @@
+import hashlib
+import json
 import os
 import subprocess
 import re
+from argparse import Namespace
+from pathlib import Path

 import hubble
+from hubble.executor.helper import upload_file, archive_package, get_request_header
 from jcloud.flow import CloudFlow
 from jina import Flow



 def push_executor(dir_path):
-    cmd = f'jina hub push {dir_path}/. --verbose --replay'
-    os.system(cmd)
+    dir_path = Path(dir_path)
+
+    md5_hash = hashlib.md5()
+    bytesio = archive_package(dir_path)
+    content = bytesio.getvalue()
+    md5_hash.update(content)
+    md5_digest = md5_hash.hexdigest()
+
+    form_data = {
+        'public': 'True',
+        'private': 'False',
+        'verbose': 'True',
+        'md5sum': md5_digest,
+    }
+    req_header = get_request_header()
+    resp = upload_file(
+        'https://api.hubble.jina.ai/v2/rpc/executor.push',
+        'filename',
+        content,
+        dict_data=form_data,
+        headers=req_header,
+        stream=False,
+        method='post',
+    )
+    json_lines_str = resp.content.decode('utf-8')
+    if 'exited on non-zero code' not in json_lines_str:
+        return ''
+    responses = []
+    for json_line in json_lines_str.splitlines():
+        if 'exit code:' in json_line:
+            break
+
+        d = json.loads(json_line)
+
+        if 'payload' in d and type(d['payload']) == str:
+            responses.append(d['payload'])
+        elif type(d) == str:
+            responses.append(d)
+    return '\n'.join(responses)
+

 def get_user_name():
    client = hubble.Client(max_retries=None, jsonify=True)
@@ -51,10 +94,10 @@ executors:
    with open(full_flow_path, 'w') as f:
        f.write(flow)

-    print('try local execution')
-    flow = Flow.load_config(full_flow_path)
-    with flow:
-        pass
+    # print('try local execution')
+    # flow = Flow.load_config(full_flow_path)
+    # with flow:
+    #     pass
    print('deploy flow on jcloud')
    return deploy_on_jcloud(flow_yaml=full_flow_path)

@@ -78,22 +121,24 @@ def update_client_line_in_file(file_path, host):
        file.write(replaced_content)


+def process_error_message(error_message):
+    lines = error_message.split('\n')
+    relevant_lines = []
+
+    pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]")  # Pattern to match lines like "#11 [7/8]"
+    last_matching_line_index = None
+
+    for index, line in enumerate(lines):
+        if pattern.match(line):
+            last_matching_line_index = index
+
+    if last_matching_line_index is not None:
+        relevant_lines = lines[last_matching_line_index:]
+
+    return '\n'.join(relevant_lines[-25:])
+
 def build_docker(path):
-    def process_error_message(error_message):
-        lines = error_message.split('\n')
-        relevant_lines = []

-        pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]")  # Pattern to match lines like "#11 [7/8]"
-        last_matching_line_index = None
-
-        for index, line in enumerate(lines):
-            if pattern.match(line):
-                last_matching_line_index = index
-
-        if last_matching_line_index is not None:
-            relevant_lines = lines[last_matching_line_index:]
-
-        return '\n'.join(relevant_lines)

    # The command to build the Docker image
    cmd = f"docker build -t micromagic {path}"
--- a/src/prompt_tasks.py
+++ b/src/prompt_tasks.py
@@ -27,11 +27,12 @@ def _task(task, tag_name, file_name):
    )


-def executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
+def executor_file_task(executor_name, executor_description, test_scenario, input_modality, input_doc_field,
                       output_modality, output_doc_field):
    return _task(f'''
 Write the executor called '{executor_name}'.
 It matches the following description: '{executor_description}'.
+It will be tested with the following scenario: '{test_scenario}'.
 It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}.
 It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}.
 Have in mind that d.uri is never a path to a local file. It is always a url.