feat: chain of thought

2025-12-28 19:04:21 +01:00 · 2023-03-21 01:56:36 +01:00
parent 1163ab50f7
commit ab4c7bc090
7 changed files with 254 additions and 109 deletions
--- a/src/constants.py
+++ b/src/constants.py
@@ -3,20 +3,25 @@ TEST_EXECUTOR_FILE_NAME = 'test_executor.py'
 REQUIREMENTS_FILE_NAME = 'requirements.txt'
 DOCKER_FILE_NAME = 'Dockerfile'
 CLIENT_FILE_NAME = 'client.py'
+STREAMLIT_FILE_NAME = 'streamlit.py'

-EXECUTOR_FILE_TAG = 'executor'
-TEST_EXECUTOR_FILE_TAG = 'test_executor'
-REQUIREMENTS_FILE_TAG = 'requirements'
+EXECUTOR_FILE_TAG = 'python'
+TEST_EXECUTOR_FILE_TAG = 'python'
+REQUIREMENTS_FILE_TAG = ''
 DOCKER_FILE_TAG = 'dockerfile'
-CLIENT_FILE_TAG = 'client'
+CLIENT_FILE_TAG = 'python'
+STREAMLIT_FILE_TAG = 'python'

-TAG_TO_FILE_NAME = {
-    EXECUTOR_FILE_TAG: EXECUTOR_FILE_NAME,
-    TEST_EXECUTOR_FILE_TAG: TEST_EXECUTOR_FILE_NAME,
-    REQUIREMENTS_FILE_TAG: REQUIREMENTS_FILE_NAME,
-    DOCKER_FILE_TAG: DOCKER_FILE_NAME,
-    CLIENT_FILE_TAG: CLIENT_FILE_NAME
-}
+FILE_AND_TAG_PAIRS = [
+    (EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG),
+    (TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG),
+    (REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG),
+    (DOCKER_FILE_NAME, DOCKER_FILE_TAG),
+    (CLIENT_FILE_NAME, CLIENT_FILE_TAG),
+    (STREAMLIT_FILE_NAME, STREAMLIT_FILE_TAG)
+]
+
+EXECUTOR_FOLDER_v1 = 'executor_v1'
+EXECUTOR_FOLDER_v2 = 'executor_v2'

-EXECUTOR_FOLDER = 'executor'
 FLOW_URL_PLACEHOLDER = 'jcloud.jina.ai'
--- a/src/gpt.py
+++ b/src/gpt.py
@@ -1,6 +1,8 @@
 import os
+from time import sleep

 import openai
+from openai.error import RateLimitError, Timeout

 from src.utils.string import print_colored

@@ -9,23 +11,38 @@ openai.api_key = os.environ['OPENAI_API_KEY']
 def get_response(system_definition, user_query):
    print_colored('system_definition', system_definition, 'magenta')
    print_colored('user_query', user_query, 'blue')
-    response = openai.ChatCompletion.create(
-        temperature=0,
-        model="gpt-4",
-        messages=[
-            {
-                "role": "system",
-                "content": system_definition
+    for i in range(10):
+        try:
+            response = openai.ChatCompletion.create(
+                temperature=0,
+                max_tokens=5_000,
+                model="gpt-4",
+                stream=True,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": system_definition

-            },
-            {
-                "role": "user",
-                "content":
-                    user_query
-            },
+                    },
+                    {
+                        "role": "user",
+                        "content":
+                            user_query
+                    },

-        ]
-    )
-    content = response['choices'][0]['message']['content']
-    print_colored('agent response', content, 'green')
-    return content
+                ]
+            )
+            complete_string = ''
+            for chunk in response:
+                delta = chunk['choices'][0]['delta']
+                if 'content' in delta:
+                    content = delta['content']
+                    print_colored('' if complete_string else 'Agent response:', content, 'green', end='')
+                    complete_string += content
+            return complete_string
+        except (RateLimitError, Timeout) as e:
+            print(e)
+            print('retrying')
+            sleep(3)
+            continue
+    raise Exception('Failed to get response')
--- a/src/jina_cloud.py
+++ b/src/jina_cloud.py
@@ -19,15 +19,13 @@ def get_user_name():
    return response['data']['name']


-async def deploy_on_jcloud(flow_yaml):
+def deploy_on_jcloud(flow_yaml):
    cloud_flow = CloudFlow(path=flow_yaml)
-    await cloud_flow.__aenter__()
-    return cloud_flow.endpoints['gateway']
+    return cloud_flow.__enter__().endpoints['gateway']



-
-async def deploy_flow(executor_name, do_validation):
+def deploy_flow(executor_name, do_validation):
    flow = f'''
 jtype: Flow
 with:
@@ -59,7 +57,7 @@ executors:
        with flow:
            pass
    print('deploy flow on jcloud')
-    return await deploy_on_jcloud(flow_yaml=full_flow_path)
+    return deploy_on_jcloud(flow_yaml=full_flow_path)


 def replace_client_line(file_content: str, replacement: str) -> str:
@@ -70,7 +68,7 @@ def replace_client_line(file_content: str, replacement: str) -> str:
            break
    return '\n'.join(lines)

-def run_client_file(file_path, host, do_validation):
+def update_client_line_in_file(file_path, host):
    with open(file_path, 'r') as file:
        content = file.read()

@@ -80,5 +78,4 @@ def run_client_file(file_path, host, do_validation):
    with open(file_path, 'w') as file:
        file.write(replaced_content)

-    if do_validation:
-        import executor.client  # runs the client script for validation
+
--- a/src/prompt_examples.py
+++ b/src/prompt_examples.py
@@ -92,4 +92,13 @@ d = Document(uri='data/img.png')
 d.load_uri_to_blob()
 response = client.post('/process', inputs=DocumentArray([d]))
 response[0].summary()
-''')
+''')
+
+
+system_base_definition = (
+        "You are a principal engineer working at Jina - an open source company."
+        "Using the Jina framework, users can define executors. "
+        + executor_example
+        + docarray_example
+        + client_example
+)
--- a/src/prompt_tasks.py
+++ b/src/prompt_tasks.py
@@ -1,10 +1,10 @@
 from src.constants import EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME, TEST_EXECUTOR_FILE_NAME, DOCKER_FILE_NAME, \
-    DOCKER_FILE_TAG, CLIENT_FILE_TAG, CLIENT_FILE_NAME
+    DOCKER_FILE_TAG, CLIENT_FILE_TAG, CLIENT_FILE_NAME, STREAMLIT_FILE_TAG, STREAMLIT_FILE_NAME, EXECUTOR_FILE_TAG, \
+    REQUIREMENTS_FILE_TAG, TEST_EXECUTOR_FILE_TAG


 def general_guidelines():
    return (
-        "General guidelines: "
        "The code you write is production ready. "
        "Every file starts with comments describing what the code is doing before the first import. "
        "Comments can only be written between tags. "
@@ -20,7 +20,13 @@ def general_guidelines():


 def _task(task, tag_name, file_name):
-    return task + f"The code will go into {file_name}. Wrap the code in the string $$$start_{tag_name}$$$...$$$end_{tag_name}$$$ \n\n"
+    return (
+            task + f"The code will go into {file_name}. Wrap the code is wrapped into:\n"
+                   f"**{file_name}**\n"
+                   f"```{tag_name}\n"
+                   f"...code...\n"
+                   f"```\n\n"
+    )


 def executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
@@ -31,28 +37,31 @@ def executor_file_task(executor_name, executor_description, input_modality, inpu
        f"It gets a DocumentArray as input where each document has the input modality '{input_modality}' that is stored in document.{input_doc_field}. "
        f"It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}. "
        f"Have in mind that d.uri is never a path to a local file. It is always a url.",
-        'executor',
+        EXECUTOR_FILE_TAG,
        EXECUTOR_FILE_NAME
    )


 def requirements_file_task():
-    return _task("Write the content of the requirements.txt file. "
-                 "Make sure to include pytest. "
-                 "All versions are fixed. ", 'requirements',
-                 REQUIREMENTS_FILE_NAME)
+    return _task(
+        "Write the content of the requirements.txt file. "
+        "Make sure to include pytest. "
+        "All versions are fixed. ",
+        REQUIREMENTS_FILE_TAG,
+        REQUIREMENTS_FILE_NAME
+    )


 def test_executor_file_task(executor_name, test_in, test_out):
    return _task(
        "Write a small unit test for the executor. "
        "Start the test with an extensive comment about the test case. "
-        + (
-                "Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. "
-        ) if test_in and test_out else ""
-                                                   "Use the following import to import the executor: "
-                                                   f"from executor import {executor_name} ",
-        'test_executor',
+        + ((
+                   "Test that the executor converts the input '" + test_in + "' to the output '" + test_out + "'. "
+           ) if test_in and test_out else "")
+        + "Use the following import to import the executor: "
+          f"from executor import {executor_name} ",
+        TEST_EXECUTOR_FILE_TAG,
        TEST_EXECUTOR_FILE_NAME
    )

@@ -66,12 +75,23 @@ def docker_file_task():
        "Add the config.yml file to the Dockerfile. "
        "The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. "
        'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
-        "The Dockerfile runs the test during the build process. "
-        , DOCKER_FILE_TAG, DOCKER_FILE_NAME)
+        "The Dockerfile runs the test during the build process. ",
+        DOCKER_FILE_TAG,
+        DOCKER_FILE_NAME
+    )


 def client_file_task():
    return _task(
-        "Write the client file. "
-        , CLIENT_FILE_TAG, CLIENT_FILE_NAME
+        "Write the client file. ",
+        CLIENT_FILE_TAG,
+        CLIENT_FILE_NAME
+    )
+
+
+def streamlit_file_task():
+    return _task(
+        "Write the streamlit file allowing to make requests . ",
+        STREAMLIT_FILE_TAG,
+        STREAMLIT_FILE_NAME
    )
--- a/src/utils/string.py
+++ b/src/utils/string.py
@@ -1,3 +1,6 @@
+import difflib
+
+
 def find_between(input_string, start, end):
    try:
        start_index = input_string.index(start) + len(start)
@@ -10,7 +13,7 @@ def find_between(input_string, start, end):
 def clean_content(content):
    return content.replace('```', '').strip()

-def print_colored(headline, text, color_code):
+def print_colored(headline, text, color_code, end='\n'):
    if color_code == 'black':
        color_code = '30'
    elif color_code == 'red':
@@ -30,5 +33,21 @@ def print_colored(headline, text, color_code):
    color_start = f"\033[{color_code}m"
    reset = "\033[0m"
    bold_start = "\033[1m"
-    print(f"{bold_start}{color_start}{headline}{reset}")
-    print(f"{color_start}{text}{reset}")
+    if headline:
+        print(f"{bold_start}{color_start}{headline}{reset}")
+    print(f"{color_start}{text}{reset}", end=end)
+
+
+def find_differences(a, b):
+    matcher = difflib.SequenceMatcher(None, a, b)
+    differences = set()
+
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag == 'replace':
+            diff_a = a[i1:i2]
+            diff_b = b[j1:j2]
+            # Check for mirrored results and only add non-mirrored ones
+            if (diff_b, diff_a) not in differences:
+                differences.add((diff_a, diff_b))
+
+    return differences