feat: chain of thought

2025-12-20 15:14:20 +01:00 · 2023-03-21 16:43:56 +01:00
parent 42305fbdb7
commit d1954317fc
8 changed files with 105 additions and 57 deletions
--- a/main.py
+++ b/main.py
@@ -1,3 +1,4 @@
+import importlib
 import os
 import re

@@ -8,7 +9,7 @@ from src.prompt_system import system_base_definition
 from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \
    test_executor_file_task, docker_file_task, client_file_task, streamlit_file_task
 from src.utils.io import recreate_folder
-from src.utils.string import find_differences
+from src.utils.string_tools import find_differences


 def extract_content_from_result(plain_text, file_name):
@@ -17,7 +18,7 @@ def extract_content_from_result(plain_text, file_name):
    if match:
        return match.group(1).strip()
    else:
-        return None
+        raise ValueError(f'Could not find {file_name} in result')


 def extract_and_write(plain_text, dest_folder):
@@ -28,7 +29,7 @@ def extract_and_write(plain_text, dest_folder):
            f.write(clean)


-def write_config_yml(executor_name):
+def write_config_yml(executor_name, dest_folder):
    config_content = f'''
 jtype: {executor_name}
 py_modules:
@@ -36,7 +37,7 @@ py_modules:
 metas:
  name: {executor_name}
    '''
-    with open('executor/config.yml', 'w') as f:
+    with open(os.path.join(dest_folder, 'config.yml'), 'w') as f:
        f.write(config_content)


@@ -69,7 +70,9 @@ def build_prototype_implementation(executor_description, executor_name, input_do
            + docker_file_task()
            + client_file_task()
            + streamlit_file_task()
-            + "First, write down some non-obvious thoughts about the challenges of the task and how you handle them. "
+            + "First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. "
+              "For example, there are different libraries you could use. "
+              "Discuss the pros and cons for all of these approaches and then decide for one of the approaches. "
              "Then write as I told you. "
    )
    plain_text = gpt.get_response(system_definition, user_query)
@@ -81,14 +84,18 @@ def build_production_ready_implementation(all_executor_files_string):
            system_base_definition
            + f"The user gives you the code of the executor and all other files needed ({', '.join([e[0] for e in FILE_AND_TAG_PAIRS])}) "
              f"The files may contain bugs. Fix all of them. "
-              f"Some files might have only prototype implementations and are not production ready. Add all the missing code. "
-              f"Some imports might be missing. Make sure to add them. Output all the files in the same format like given to you. "
+
    )
    user_query = (
-        "Fix all files, add all missing code and imports. Make it production ready. "
+        'Make it production ready. '
+        "Fix all files and add all missing code. "
        "Keep the same format as given to you. "
-        "First write down some non-obvious thoughts about what parts could need an adjustment and why. "
-        "Then write as I told you. "
+        f"Some files might have only prototype implementations and are not production ready. Add all the missing code. "
+        f"Some imports might be missing. Make sure to add them. "
+        f"Some libraries might be missing. Make sure to install them in the requirements.txt and Dockerfile. "
+        "First write down an extensive list of obvious and non-obvious thoughts about what parts could need an adjustment and why. "
+        "Think about if all the changes are required and finally decide for the changes you want to make. "
+        f"Output all the files even the ones that did not change. "
        "Here are the files: \n\n"
        + all_executor_files_string
    )
@@ -119,60 +126,61 @@ def main(
 ):
    recreate_folder(EXECUTOR_FOLDER_v1)
    recreate_folder(EXECUTOR_FOLDER_v2)
+    recreate_folder('flow')

    all_executor_files_string = build_prototype_implementation(executor_description, executor_name, input_doc_field, input_modality,
                                                output_doc_field, output_modality, test_in, test_out)
    extract_and_write(all_executor_files_string, EXECUTOR_FOLDER_v1)
-
+    write_config_yml(executor_name, EXECUTOR_FOLDER_v1)
    file_name_to_content_v1 = get_all_executor_files_with_content(EXECUTOR_FOLDER_v1)
-
    all_executor_files_string_no_instructions = files_to_string(file_name_to_content_v1)

    all_executor_files_string_improved = build_production_ready_implementation(all_executor_files_string_no_instructions)
-
    extract_and_write(all_executor_files_string_improved, EXECUTOR_FOLDER_v2)
+    write_config_yml(executor_name, EXECUTOR_FOLDER_v2)

-    write_config_yml(executor_name)
+    jina_cloud.push_executor(EXECUTOR_FOLDER_v2)

-    jina_cloud.push_executor()
+    host = jina_cloud.deploy_flow(executor_name, do_validation, 'flow')

-    host = jina_cloud.deploy_flow(executor_name, do_validation)
+    update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, CLIENT_FILE_NAME), host)
+    update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v1, STREAMLIT_FILE_NAME), host)
+    update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, CLIENT_FILE_NAME), host)
+    update_client_line_in_file(os.path.join(EXECUTOR_FOLDER_v2, STREAMLIT_FILE_NAME), host)

-    update_client_line_in_file(f'executor/{CLIENT_FILE_NAME}', host)
-    update_client_line_in_file(f'executor/{STREAMLIT_FILE_NAME}', host)
    if do_validation:
-        pass
+        importlib.import_module("executor_v1.client")

    return get_all_executor_files_with_content(EXECUTOR_FOLDER_v2)


 if __name__ == '__main__':
-    ######### Level 2 task #########
-    main(
-        executor_name='My3DTo2DExecutor',
-        executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object",
-        input_modality='3d',
-        input_doc_field='blob',
-        output_modality='image',
-        output_doc_field='blob',
-        test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj',
-        test_out='the output should be exactly one image in png format',
-        do_validation=False
-    )
-
-    ######### Level 1 task #########
+    # ######### Level 2 task #########
    # main(
-    #     executor_name='MyCoolOcrExecutor',
-    #     executor_description="OCR detector",
-    #     input_modality='image',
-    #     input_doc_field='uri',
-    #     output_modality='text',
-    #     output_doc_field='text',
-    #     test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png',
-    #     test_out='> Hello, world!_',
+    #     executor_name='My3DTo2DExecutor',
+    #     executor_description="The executor takes 3D objects in obj format as input and outputs a 2D image projection of that object",
+    #     input_modality='3d',
+    #     input_doc_field='blob',
+    #     output_modality='image',
+    #     output_doc_field='blob',
+    #     test_in='https://raw.githubusercontent.com/makehumancommunity/communityassets-wip/master/clothes/leotard_fs/leotard_fs.obj',
+    #     test_out='the output should be exactly one image in png format',
    #     do_validation=False
    # )

+    ######## Level 1 task #########
+    main(
+        executor_name='MyCoolOcrExecutor',
+        executor_description="OCR detector",
+        input_modality='image',
+        input_doc_field='uri',
+        output_modality='text',
+        output_doc_field='text',
+        test_in='https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png',
+        test_out='> Hello, world!_',
+        do_validation=False
+    )
+
    # main(
    #     executor_name='MySentimentAnalyzer',
    #     executor_description="Sentiment analysis executor",
--- a/server.py
+++ b/server.py
@@ -29,9 +29,9 @@ class CreateResponse(BaseModel):
    message: Optional[str]

@app.post("/create", response_model=CreateResponse)
-async def create_endpoint(request: CreateRequest):
+def create_endpoint(request: CreateRequest):

-    result = await main(
+    result = main(
        executor_name=request.executor_name,
        executor_description=request.executor_description,
        input_modality=request.input_modality,
@@ -55,7 +55,7 @@ app.add_middleware(

 # Add a custom exception handler for RequestValidationError
@app.exception_handler(RequestValidationError)
-async def validation_exception_handler(request: Request, exc: RequestValidationError):
+def validation_exception_handler(request: Request, exc: RequestValidationError):
    return JSONResponse(
        status_code=422,
        content={"detail": exc.errors()},
--- a/src/gpt.py
+++ b/src/gpt.py
@@ -4,7 +4,8 @@ from time import sleep
 import openai
 from openai.error import RateLimitError, Timeout

-from src.utils.string import print_colored
+from src.utils.io import timeout_generator_wrapper
+from src.utils.string_tools import print_colored

 openai.api_key = os.environ['OPENAI_API_KEY']

@@ -13,7 +14,7 @@ def get_response(system_definition, user_query):
    print_colored('user_query', user_query, 'blue')
    for i in range(10):
        try:
-            response = openai.ChatCompletion.create(
+            response_generator = openai.ChatCompletion.create(
                temperature=0,
                max_tokens=5_000,
                model="gpt-4",
@@ -32,15 +33,17 @@ def get_response(system_definition, user_query):

                ]
            )
+            response_generator_with_timeout = timeout_generator_wrapper(response_generator, 5)
+
            complete_string = ''
-            for chunk in response:
+            for chunk in response_generator_with_timeout:
                delta = chunk['choices'][0]['delta']
                if 'content' in delta:
                    content = delta['content']
                    print_colored('' if complete_string else 'Agent response:', content, 'green', end='')
                    complete_string += content
            return complete_string
-        except (RateLimitError, Timeout) as e:
+        except (RateLimitError, Timeout, ConnectionError) as e:
            print(e)
            print('retrying')
            sleep(3)
--- a/src/jina_cloud.py
+++ b/src/jina_cloud.py
@@ -1,4 +1,3 @@
-import asyncio
 import os
 from multiprocessing.connection import Client

@@ -9,8 +8,8 @@ from jina import Flow
 from src.constants import FLOW_URL_PLACEHOLDER


-def push_executor():
-    cmd = 'jina hub push executor/. --verbose'
+def push_executor(dir_path):
+    cmd = f'jina hub push {dir_path}/. --verbose'
    os.system(cmd)

 def get_user_name():
@@ -25,7 +24,7 @@ def deploy_on_jcloud(flow_yaml):



-def deploy_flow(executor_name, do_validation):
+def deploy_flow(executor_name, do_validation, dest_folder):
    flow = f'''
 jtype: Flow
 with:
@@ -47,7 +46,8 @@ executors:
        instance: C4
        capacity: spot
 '''
-    full_flow_path = os.path.join('executor', 'flow.yml')
+    full_flow_path = os.path.join(dest_folder,
+                     'flow.yml')
    with open(full_flow_path, 'w') as f:
        f.write(flow)

--- a/src/prompt_system.py
+++ b/src/prompt_system.py
@@ -6,14 +6,16 @@ executor_example = "Here is an example of how an executor can be defined. It alw
 # this executor takes ... as input and returns ... as output
 # it processes each document in the following way: ...
 from jina import Executor, requests, DocumentArray, Document
-class MyExecutor(Executor):
+class MyInfoExecutor(Executor):
    def __init__(self, **kwargs):
        super().__init__()

    @requests
    def foo(self, docs: DocumentArray, **kwargs) => DocumentArray:
        for d in docs:
-            d.text = 'hello world'"
+            d.load_uri_to_blob()
+            d.tags['my_info'] = {'byte_length': len(d.blob)}
+            d.blob = None
        return docs
 '''
 "An executor gets a DocumentArray as input and returns a DocumentArray as output. "
--- a/src/prompt_tasks.py
+++ b/src/prompt_tasks.py
@@ -73,7 +73,7 @@ def docker_file_task():
        "It is important to make sure that all libs are installed that are required by the python packages. "
        "Usually libraries are installed with apt-get. "
        "Add the config.yml file to the Dockerfile. "
-        "The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. "
+        "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
        'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
        "The Dockerfile runs the test during the build process. ",
        DOCKER_FILE_TAG,
--- a/src/utils/io.py
+++ b/src/utils/io.py
@@ -1,8 +1,43 @@
 import os
 import shutil
-
+import concurrent.futures
+import concurrent.futures
+from typing import Generator

 def recreate_folder(folder_path):
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        shutil.rmtree(folder_path)
    os.makedirs(folder_path)
+
+
+class GenerationTimeoutError(Exception):
+    pass
+
+def timeout_generator_wrapper(generator, timeout):
+    def generator_func():
+        for item in generator:
+            yield item
+
+    def wrapper() -> Generator:
+        gen = generator_func()
+        while True:
+            try:
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(next, gen)
+                    yield future.result(timeout=timeout)
+            except StopIteration:
+                break
+            except concurrent.futures.TimeoutError:
+                raise GenerationTimeoutError(f"Generation took longer than {timeout} seconds")
+
+    return wrapper()
+
+# def my_generator():
+#     for i in range(10):
+#         sleep(3)
+#         yield 1
+#
+#
+# my_generator_with_timeout = timeout_generator_wrapper(my_generator, 2.9)
+# for item in my_generator_with_timeout():
+#     print(item)
--- a/src/utils/string_tools.py
+++ b/src/utils/string_tools.py