fix: prompt optimizations

2025-12-18 22:24:21 +01:00 · 2023-04-17 11:23:13 +02:00
parent bf2cd9edc7
commit badf295f71
7 changed files with 146 additions and 132 deletions
--- a/src/apis/gpt.py
+++ b/src/apis/gpt.py
@@ -4,7 +4,7 @@ from time import sleep
 from typing import List, Tuple, Optional

 import openai
-from openai.error import RateLimitError, Timeout
+from openai.error import RateLimitError, Timeout, APIConnectionError

 from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
    PRICING_GPT3_5_TURBO_GENERATION, CHARS_PER_TOKEN
@@ -80,11 +80,11 @@ class _GPTConversation:
        self.cost_callback = cost_callback
        self.prompt_list: List[Optional[Tuple]] = [None]
        self.set_system_definition(system_definition_examples)
-        if 'verbose' in os.environ:
+        if os.environ['VERBOSE'].lower() == 'true':
            print_colored('system', self.prompt_list[0][1], 'magenta')

    def query(self, prompt: str):
-        if 'verbose' in os.environ:
+        if os.environ['VERBOSE'].lower() == 'true':
            print_colored('user', prompt, 'blue')
        self.prompt_list.append(('user', prompt))
        response = self.get_response(self.prompt_list)
@@ -117,7 +117,7 @@ class _GPTConversation:
            try:
                response_generator = openai.ChatCompletion.create(
                    temperature=0,
-                    max_tokens=2_000 if self.model == 'gpt-4' else None,
+                    max_tokens=None,
                    model=self.model,
                    stream=True,
                    messages=[
@@ -131,7 +131,7 @@ class _GPTConversation:

                complete_string = self.get_response_from_stream(response_generator)

-            except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:
+            except (RateLimitError, Timeout, ConnectionError, APIConnectionError, GenerationTimeoutError) as e:
                print('/n', e)
                print('retrying...')
                sleep(3)
--- a/src/apis/jina_cloud.py
+++ b/src/apis/jina_cloud.py
@@ -248,6 +248,13 @@ def update_client_line_in_file(file_path, host):
        file.write(replaced_content)


+def remove_after_stderr(relevant_lines):
+    for index, line in enumerate(relevant_lines):
+        if '--- Captured stderr call ----' in line:
+            return relevant_lines[:index]
+    return relevant_lines
+
+
 def process_error_message(error_message):
    lines = error_message.split('\n')

@@ -263,6 +270,8 @@ def process_error_message(error_message):
    if last_matching_line_index is not None:
        relevant_lines = lines[last_matching_line_index:]

+    relevant_lines = remove_after_stderr(relevant_lines)
+
    response = '\n'.join(relevant_lines[-25:]).strip()

    # the following code tests the case that the docker file is corrupted and can not be parsed
--- a/src/constants.py
+++ b/src/constants.py
@@ -33,4 +33,8 @@ CHARS_PER_TOKEN = 3.4
 NUM_IMPLEMENTATION_STRATEGIES = 5
 MAX_DEBUGGING_ITERATIONS = 10

-DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
+DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
+
+PROBLEMATIC_PACKAGES = [
+    'Pyrender', 'Trimesh', 'ModernGL', 'PyOpenGL', 'Pyglet', 'pythreejs', 'panda3d' # because they need a screen
+]
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -3,10 +3,12 @@ import random
 import re

 from src.apis import gpt
-from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS
 from src.apis.jina_cloud import process_error_message, push_executor
-from src.options.generate.prompt_tasks import general_guidelines, chain_of_thought_creation, executor_file_task, \
-    not_allowed_executor, chain_of_thought_optimization, test_executor_file_task, requirements_file_task, docker_file_task
+from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
+    PROBLEMATIC_PACKAGES
+from src.options.generate.prompt_tasks import general_guidelines, executor_file_task, \
+    not_allowed_executor, chain_of_thought_optimization, test_executor_file_task, requirements_file_task, \
+    docker_file_task, not_allowed_docker
 from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path
 from src.utils.string_tools import print_colored

@@ -70,14 +72,15 @@ class Generator:
        user_query = (
                general_guidelines()
                + executor_file_task(microservice_name, description, test, package)
-                + '\n\n' + chain_of_thought_creation()
        )
        conversation = self.gpt_session.get_conversation()
        microservice_content_raw = conversation.query(user_query)
        if is_chain_of_thought:
            microservice_content_raw = conversation.query(
-                f"General rules: " + not_allowed_executor() + chain_of_thought_optimization('python', 'microservice.py'))
-        microservice_content = self.extract_content_from_result(microservice_content_raw, 'microservice.py', match_single_block=True)
+                f"General rules: " + not_allowed_executor() + chain_of_thought_optimization('python',
+                                                                                            'microservice.py'))
+        microservice_content = self.extract_content_from_result(microservice_content_raw, 'microservice.py',
+                                                                match_single_block=True)
        if microservice_content == '':
            microservice_content_raw = conversation.query('You must add the executor code.')
            microservice_content = self.extract_content_from_result(
@@ -118,7 +121,8 @@ class Generator:
            requirements_content_raw = conversation.query(
                chain_of_thought_optimization('', requirements_path) + "Keep the same version of jina ")

-        requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt', match_single_block=True)
+        requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt',
+                                                                match_single_block=True)
        persist_file(requirements_content, requirements_path)

        print_colored('', '############# Dockerfile #############', 'blue')
@@ -134,7 +138,8 @@ class Generator:
        if is_chain_of_thought:
            dockerfile_content_raw = conversation.query(
                f"General rules: " + not_allowed_executor() + chain_of_thought_optimization('dockerfile', 'Dockerfile'))
-        dockerfile_content = self.extract_content_from_result(dockerfile_content_raw, 'Dockerfile', match_single_block=True)
+        dockerfile_content = self.extract_content_from_result(dockerfile_content_raw, 'Dockerfile',
+                                                              match_single_block=True)
        persist_file(dockerfile_content, os.path.join(MICROSERVICE_FOLDER_v1, 'Dockerfile'))

        self.write_config_yml(microservice_name, MICROSERVICE_FOLDER_v1)
@@ -172,7 +177,6 @@ The playground (app.py) must not let the user configure the host on the ui.
        playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True)
        persist_file(playground_content, os.path.join(microservice_path, 'app.py'))

-
    def debug_microservice(self, path, microservice_name, num_approach, packages, description, test):
        error_before = ''
        for i in range(1, MAX_DEBUGGING_ITERATIONS):
@@ -184,55 +188,9 @@ The playground (app.py) must not let the user configure the host on the ui.
            error = process_error_message(log_hubble)
            if error:
                print('An error occurred during the build process. Feeding the error back to the assistent...')
-                os.makedirs(next_microservice_path)
-                file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path)
-
-                is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile'])
-
-                if is_dependency_issue:
-                    all_files_string = self.files_to_string({
-                        key: val for key, val in file_name_to_content.items() if key in ['requirements.txt', 'Dockerfile']
-                    })
-                    user_query = (
-                        f"Your task is to provide guidance on how to solve an error that occurred during the Docker "
-                        f"build process. The error message is:\n{error}\nTo solve this error, you should first "
-                        f"identify the type of error by examining the stack trace. Once you have identified the "
-                        f"error, you should suggest how to solve it. Your response should include the files that "
-                        f"need to be changed, but not files that don't need to be changed. For files that need to "
-                        f"be changed, you must provide the complete file with the exact same syntax to wrap the code.\n\n"
-                        f"You are given the following files:\n\n{all_files_string}"
-                    )
-                else:
-                    all_files_string = self.files_to_string(file_name_to_content)
-                    user_query = (
-                             f"General rules: " + not_allowed_executor()
-                             + f'Here is the description of the task the executor must solve:\n{description}'
-                             + f'\n\nHere is the test scenario the executor must pass:\n{test}'
-                             + f'Here are all the files I use:\n{all_files_string}'
-                             + ((f'This is an error that I already fixed before:\n{error_before}\n\n') if error_before else '')
-                             + f'\n\nThis is the error I encounter currently during the docker build process:\n{error}\n\n'
-                             + 'Look at the stack trace of the current error. First, think about what kind of error is this? '
-                              'Then think about possible reasons which might have caused it. Then suggest how to '
-                              'solve it. Output the files that need change. '
-                              "Don't output files that don't need change. If you output a file, then write the "
-                              "complete file. Use the exact same syntax to wrap the code:\n"
-                              f"**...**\n"
-                              f"```...\n"
-                              f"...code...\n"
-                              f"```"
-                    )
-
-                conversation = self.gpt_session.get_conversation()
-                returned_files_raw = conversation.query(user_query)
-                for file_name, tag in FILE_AND_TAG_PAIRS:
-                    updated_file = self.extract_content_from_result(returned_files_raw, file_name)
-                    if updated_file and (not is_dependency_issue or file_name in ['requirements.txt', 'Dockerfile']):
-                        file_name_to_content[file_name] = updated_file
-
-                for file_name, content in file_name_to_content.items():
-                    persist_file(content, os.path.join(next_microservice_path, file_name))
+                self.do_debug_iteration(description, error, error_before, next_microservice_path,
+                                        previous_microservice_path, test)
                error_before = error
-
            else:
                print('Successfully build microservice.')
                break
@@ -240,6 +198,78 @@ The playground (app.py) must not let the user configure the host on the ui.
                raise self.MaxDebugTimeReachedException('Could not debug the microservice.')
        return get_microservice_path(path, microservice_name, packages, num_approach, i)

+    def do_debug_iteration(self, description, error, error_before, next_microservice_path, previous_microservice_path,
+                           test):
+        os.makedirs(next_microservice_path)
+        file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path)
+        is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile'])
+        if is_dependency_issue:
+            all_files_string = self.files_to_string({
+                key: val for key, val in file_name_to_content.items() if
+                key in ['requirements.txt', 'Dockerfile']
+            })
+            user_query = self.get_user_query_dependency_issue(all_files_string, error)
+        else:
+            user_query = self.get_user_query_code_issue(description, error, file_name_to_content,
+                                                        test)
+        conversation = self.gpt_session.get_conversation()
+        returned_files_raw = conversation.query(user_query)
+        for file_name, tag in FILE_AND_TAG_PAIRS:
+            updated_file = self.extract_content_from_result(returned_files_raw, file_name)
+            if updated_file and (not is_dependency_issue or file_name in ['requirements.txt', 'Dockerfile']):
+                file_name_to_content[file_name] = updated_file
+        for file_name, content in file_name_to_content.items():
+            persist_file(content, os.path.join(next_microservice_path, file_name))
+
+    def get_user_query_dependency_issue(self, all_files_string, error):
+        user_query = (
+            f'''
+Your task is to provide guidance on how to solve an error that occurred during the Docker build process. 
+The error message is:
+**microservice.log**
+```
+{error}
+```
+To solve this error, you should:
+1. Identify the type of error by examining the stack trace. 
+2. Suggest how to solve it. 
+3. Your suggestion must include the files that need to be changed, but not files that don't need to be changed. 
+For files that need to be changed, you must provide the complete file with the exact same syntax to wrap the code.
+Obey the following rules: {not_allowed_docker()}
+
+You are given the following files:
+
+{all_files_string}"
+'''
+        )
+        return user_query
+
+    def get_user_query_code_issue(self, description, error, file_name_to_content, test):
+        all_files_string = self.files_to_string(file_name_to_content)
+        return f'''
+General rules: {not_allowed_executor()}
+Here is the description of the task the executor must solve:
+{description}
+
+Here is the test scenario the executor must pass:\n{test}
+Here are all the files I use:
+{all_files_string}
+
+
+This is the error I encounter currently during the docker build process:
+{error}
+
+Look at the stack trace of the current error. First, think about what kind of error is this? 
+Then think about possible reasons which might have caused it. Then suggest how to 
+solve it. Output all the files that need change. 
+Don't output files that don't need change. If you output a file, then write the 
+complete file. Use the exact same syntax to wrap the code:
+**...**
+```...
+...code...
+```
+'''
+
    class MaxDebugTimeReachedException(BaseException):
        pass

@@ -286,19 +316,18 @@ Here is the task description of the problem you need to solve:
 "{description}"
 1. Write down all the non-trivial subtasks you need to solve.
 2. Find out what is the core problem to solve.
-3. Provide a list of all python packages you can think of that could directly be used to solve the core problem.
-3. Provide a list of the 7 most promising python packages that fulfill the following requirements:
- can directly be used to solve the core problem
- has a stable api among different versions
- does not have system requirements
+3. List up to 15 Python packages that are specifically designed or have functionalities to solve the complete core problem.
+4. For each of the 15 package think if it fulfills the following requirements:
+a) specifically designed or have functionalities to solve the complete core problem.
+b) has a stable api among different versions
+c) does not have system requirements
+d) can solve the task when running in a docker container
+e) the implementation of the core problem using the package would obey the following rules:
+{not_allowed_executor()}
+When answering, just write "yes" or "no".

-For each package:
-    a) Write down some non-obvious challenges you might face with the package when implementing your task and give multiple approaches on how you handle them.
-    For example, you might find out that you must not use the package because it does not obey the rules:
-    {not_allowed_executor()}
-    b) Discuss the pros and cons for the package.
-
-4. Output the best 5 python packages starting with the best one.
+5. Output the most suitable 5 python packages starting with the best one. 
+If the package is mentioned in the description, then it is automatically the best one.

 The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this:
 **packages.csv**
@@ -322,15 +351,20 @@ package5
        generated_name = self.generate_microservice_name(description)
        microservice_name = f'{generated_name}{random.randint(0, 10_000_000)}'
        packages_list = self.get_possible_packages(description)
+        packages_list = [packages for packages in packages_list if len(set(packages).intersection(set(PROBLEMATIC_PACKAGES))) == 0]
        for num_approach, packages in enumerate(packages_list):
            try:
-                self.generate_microservice(description, test, microservice_path, microservice_name, packages, num_approach)
-                final_version_path = self.debug_microservice(microservice_path, microservice_name, num_approach, packages, description, test)
+                self.generate_microservice(description, test, microservice_path, microservice_name, packages,
+                                           num_approach)
+                final_version_path = self.debug_microservice(microservice_path, microservice_name, num_approach,
+                                                             packages, description, test)
                self.generate_playground(microservice_name, final_version_path)
            except self.MaxDebugTimeReachedException:
                print('Could not debug the Microservice with the approach:', packages)
                if num_approach == len(packages_list) - 1:
-                    print_colored('', f'Could not debug the Microservice with any of the approaches: {packages} giving up.', 'red')
+                    print_colored('',
+                                  f'Could not debug the Microservice with any of the approaches: {packages} giving up.',
+                                  'red')
                continue
            print(f'''
 You can now run or deploy your microservice:
@@ -339,4 +373,3 @@ gptdeploy deploy --path {microservice_path}
 '''
                  )
            break
-
--- a/src/options/generate/prompt_tasks.py
+++ b/src/options/generate/prompt_tasks.py
@@ -38,11 +38,16 @@ Write the executor called '{executor_name}'. The name is very important to keep.
 It matches the following description: '{executor_description}'.
 It will be tested with the following scenario: '{test_scenario}'.
 For the implementation use the following package: '{package}'.
+
+Obey the following rules:
 Have in mind that d.uri is never a path to a local file. It is always a url.
-''' + not_allowed_executor(),
-                 EXECUTOR_FILE_TAG,
-                 EXECUTOR_FILE_NAME
-                 )
+{not_allowed_executor()}
+Your approach:
+1. Identify the core challenge when implementing the executor.
+2. Think about solutions for these challenges.
+3. Decide for one of the solutions.
+4. Write the code.
+''', EXECUTOR_FILE_TAG, EXECUTOR_FILE_NAME)


 def test_executor_file_task(executor_name, test_scenario):
@@ -107,17 +112,6 @@ def streamlit_file_task():
        STREAMLIT_FILE_NAME
    )

-
-def chain_of_thought_creation():
-    return (f'''
-First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. 
-For example, the given package you could used in different ways and not all of them obey the instructions.
-Discuss the pros and cons for all of these approaches and then decide for one of the approaches. 
-Then write the code. 
-'''
-    )
-
-
 def chain_of_thought_optimization(tag_name, file_name, file_name_function=None):
    file_name_or_function = file_name
    if file_name_function:
@@ -137,19 +131,21 @@ def chain_of_thought_optimization(tag_name, file_name, file_name_function=None):

 def not_allowed_executor():
    return '''
-The executor must not use the GPU.
-The executor must not access a database.
-The executor must not access a display.
-The executor must not access external apis except unless it is explicitly mentioned in the description or test case (e.g. by mentioning the api that should be used or by providing a URL to access the data). 
-The executor must not load data from the local file system unless it was created by the executor itself.
-The executor must not use a pre-trained model unless it is explicitly mentioned in the description.
-The executor must not train a model.
-The executor must not use any attribute of Document accept Document.text.
+The executor and the test must not use the GPU.
+The executor and the test must not access a database.
+The executor and the test must not access a display.
+The executor and the test must not access external apis except unless it is explicitly mentioned in the description or test case (e.g. by mentioning the api that should be used or by providing a URL to access the data). 
+The executor and the test must not load data from the local file system unless it was created by the executor itself.
+The executor and the test must not use a pre-trained model unless it is explicitly mentioned in the description.
+The executor and the test must not train a model.
+The executor and the test must not use any attribute of Document accept Document.text.
+The executor and the test must not contain prototype or placeholder implementations.
+The executor and the test must run in a docker container based on debian.
 '''

 def not_allowed_docker():
    return '''
 Note that the Dockerfile only has access to the files: microservice.py, requirements.txt, config.yml, test_microservice.py.
 Note that the Dockerfile runs the test_microservice.py during the build process.
-Note that it is not allowed to attach a virtual display when running test_microservice.py.
+The Dockerfile must not attach a virtual display when running test_microservice.py.
 '''
--- a/src/utils/io.py
+++ b/src/utils/io.py
@@ -49,7 +49,7 @@ def timeout_generator_wrapper(generator, timeout):
            except StopIteration:
                break
            except concurrent.futures.TimeoutError:
-                raise GenerationTimeoutError(f"Generation took longer than {timeout} seconds")
+                raise GenerationTimeoutError(f"Generation took too long")

    return wrapper()

--- a/src/utils/string_tools.py
+++ b/src/utils/string_tools.py
@@ -1,22 +1,9 @@
-import difflib
 import os
 import platform

 if platform.system() == "Windows":
    os.system("color")

-def find_between(input_string, start, end):
-    try:
-        start_index = input_string.index(start) + len(start)
-        end_index = input_string.index(end, start_index)
-        return input_string[start_index:end_index]
-    except ValueError:
-        raise ValueError(f'Could not find {start} and {end} in {input_string}')
-
-
-def clean_content(content):
-    return content.replace('```', '').strip()
-
 def print_colored(headline, text, color_code, end='\n'):
    if color_code == 'black':
        color_code = '30'
@@ -40,18 +27,3 @@ def print_colored(headline, text, color_code, end='\n'):
    if headline:
        print(f"{bold_start}{color_start}{headline}{reset}")
    print(f"{color_start}{text}{reset}", end=end)
-
-
-def find_differences(a, b):
-    matcher = difflib.SequenceMatcher(None, a, b)
-    differences = set()
-
-    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
-        if tag == 'replace':
-            diff_a = a[i1:i2]
-            diff_b = b[j1:j2]
-            # Check for mirrored results and only add non-mirrored ones
-            if (diff_b, diff_a) not in differences:
-                differences.add((diff_a, diff_b))
-
-    return differences