fix: missing merge

2025-12-22 16:14:20 +01:00 · 2023-04-16 19:59:38 +02:00
parent 2038f04553
commit bf2cd9edc7
9 changed files with 97 additions and 51 deletions
--- a/README.md
+++ b/README.md
@@ -475,7 +475,7 @@ Make sure it is only printed twice in case it changed.
 - [ ] test feedback for playground generation (could be part of the debugging)
 - [ ] should we send everything via json in the text attribute for simplicity?
 - [ ] fix release workflow
- 
+- [ ] after the user specified the task, ask them questions back if the task is not clear enough or something is missing

 Proposal:
 - [ ] just generate the non-jina related code and insert it into an executor template
--- a/src/apis/gpt.py
+++ b/src/apis/gpt.py
@@ -1,12 +1,13 @@
 import os
 from time import sleep
-from typing import List, Tuple
+
+from typing import List, Tuple, Optional

 import openai
 from openai.error import RateLimitError, Timeout

 from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
-    PRICING_GPT3_5_TURBO_GENERATION
+    PRICING_GPT3_5_TURBO_GENERATION, CHARS_PER_TOKEN
 from src.options.generate.prompt_system import system_base_definition, executor_example, docarray_example, client_example
 from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError
 from src.utils.string_tools import print_colored
@@ -61,27 +62,29 @@ If you have updated it already, please restart your terminal.
        self.chars_prompt_so_far += chars_prompt
        self.chars_generation_so_far += chars_generation
        print('\n')
-        money_prompt = round(self.chars_prompt_so_far / 3.4 * self.pricing_prompt / 1000, 3)
-        money_generation = round(self.chars_generation_so_far / 3.4 * self.pricing_generation / 1000, 3)
-        print('Estimated costs on openai.com:')
-        # print('money prompt:', f'${money_prompt}')
-        # print('money generation:', f'${money_generation}')
-        print('total money spent so far:', f'${money_prompt + money_generation}')
+        money_prompt = self.calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt)
+        money_generation = self.calculate_money_spent(self.chars_generation_so_far, self.pricing_generation)
+        print('Total money spent so far on openai.com:', f'${money_prompt + money_generation}')
        print('\n')

    def get_conversation(self, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
        return _GPTConversation(self.supported_model, self.cost_callback, system_definition_examples)

+    def calculate_money_spent(self, num_chars, price):
+        return round(num_chars / CHARS_PER_TOKEN * price / 1000, 3)
+

 class _GPTConversation:
    def __init__(self, model: str, cost_callback, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
        self.model = model
        self.cost_callback = cost_callback
-        self.prompt_list = [None]
+        self.prompt_list: List[Optional[Tuple]] = [None]
        self.set_system_definition(system_definition_examples)
+        if 'verbose' in os.environ:
            print_colored('system', self.prompt_list[0][1], 'magenta')

    def query(self, prompt: str):
+        if 'verbose' in os.environ:
            print_colored('user', prompt, 'blue')
        self.prompt_list.append(('user', prompt))
        response = self.get_response(self.prompt_list)
@@ -129,8 +132,8 @@ class _GPTConversation:
                complete_string = self.get_response_from_stream(response_generator)

            except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:
-                print(e)
-                print('retrying, be aware that this might affect the cost calculation')
+                print('/n', e)
+                print('retrying...')
                sleep(3)
                continue
            chars_prompt = sum(len(prompt[1]) for prompt in prompt_list)
--- a/src/apis/jina_cloud.py
+++ b/src/apis/jina_cloud.py
@@ -162,7 +162,19 @@ def run_locally(executor_name, microservice_version_path):
    if is_docker_running():
        use_docker = True
    else:
-        click.echo('Docker daemon doesn\'t seem to be running. Trying to start it without docker')
+        click.echo('''
+Docker daemon doesn\'t seem to be running.
+It might be important to run your microservice within a docker container.
+Your machine might not have all the dependencies installed.
+You have 3 options:
+a) start the docker daemon
+b) run gptdeploy deploy... to deploy your microservice on Jina Cloud. All dependencies will be installed there.
+c) try to run your microservice locally without docker. It is worth a try but might fail.
+'''
+                   )
+        user_input = click.prompt('Do you want to run your microservice locally without docker? (Y/n)', type=str, default='y')
+        if user_input.lower() != 'y':
+            exit(1)
        use_docker = False
    print('Run a jina flow locally')
    full_flow_path = create_flow_yaml(microservice_version_path, executor_name, use_docker)
@@ -238,6 +250,7 @@ def update_client_line_in_file(file_path, host):

 def process_error_message(error_message):
    lines = error_message.split('\n')
+
    relevant_lines = []

    pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]")  # Pattern to match lines like "#11 [7/8]"
@@ -250,7 +263,16 @@ def process_error_message(error_message):
    if last_matching_line_index is not None:
        relevant_lines = lines[last_matching_line_index:]

-    return '\n'.join(relevant_lines[-25:]).strip()
+    response = '\n'.join(relevant_lines[-25:]).strip()
+
+    # the following code tests the case that the docker file is corrupted and can not be parsed
+    # the method above will not return a relevant error message in this case
+    # but the last line of the error message will start with "error"
+
+    last_line = lines[-1]
+    if not response and last_line.startswith('error: '):
+        return last_line
+    return response


 def build_docker(path):
--- a/src/cli.py
+++ b/src/cli.py
@@ -44,21 +44,25 @@ def main(ctx):
@main.command()
@click.option('--description', required=True, help='Description of the microservice.')
@click.option('--test', required=True, help='Test scenario for the microservice.')
-@path_param
@click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).')
+@click.option('--verbose', default=False, is_flag=True, help='Verbose mode.')
+@path_param
 def generate(
        description,
        test,
+        model,
+        verbose,
        path,
-        model='gpt-4'
 ):
-    from src.options.generate.generator import Generator
+    os.environ['VERBOSE'] = str(verbose)
    path = os.path.expanduser(path)
    path = os.path.abspath(path)
    if os.path.exists(path):
        if os.listdir(path):
            click.echo(f"Error: The path {path} you provided via --path is not empty. Please choose a directory that does not exist or is empty.")
            return
+
+    from src.options.generate.generator import Generator
    generator = Generator(model=model)
    generator.generate(description, test, path)

--- a/src/constants.py
+++ b/src/constants.py
@@ -28,7 +28,9 @@ PRICING_GPT4_GENERATION = 0.06
 PRICING_GPT3_5_TURBO_PROMPT = 0.002
 PRICING_GPT3_5_TURBO_GENERATION = 0.002

-NUM_IMPLEMENTATION_STRATEGIES = 3
+CHARS_PER_TOKEN = 3.4
+
+NUM_IMPLEMENTATION_STRATEGIES = 5
 MAX_DEBUGGING_ITERATIONS = 10

 DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -66,7 +66,7 @@ class Generator:
        MICROSERVICE_FOLDER_v1 = get_microservice_path(path, microservice_name, package, num_approach, 1)
        os.makedirs(MICROSERVICE_FOLDER_v1)

-        print_colored('', '############# Microservice #############', 'red')
+        print_colored('', '############# Microservice #############', 'blue')
        user_query = (
                general_guidelines()
                + executor_file_task(microservice_name, description, test, package)
@@ -85,7 +85,7 @@ class Generator:
            )
        persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'microservice.py'))

-        print_colored('', '############# Test Microservice #############', 'red')
+        print_colored('', '############# Test Microservice #############', 'blue')
        user_query = (
                general_guidelines()
                + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python')
@@ -99,12 +99,12 @@ class Generator:
                chain_of_thought_optimization('python', 'test_microservice.py')
                + "Don't add any additional tests. "
            )
-        microservice_content = self.extract_content_from_result(
-            microservice_content_raw, 'microservice.py', match_single_block=True
+        test_microservice_content = self.extract_content_from_result(
+            test_microservice_content_raw, 'microservice.py', match_single_block=True
        )
-        persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py'))
+        persist_file(test_microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py'))

-        print_colored('', '############# Requirements #############', 'red')
+        print_colored('', '############# Requirements #############', 'blue')
        requirements_path = os.path.join(MICROSERVICE_FOLDER_v1, 'requirements.txt')
        user_query = (
                general_guidelines()
@@ -121,7 +121,7 @@ class Generator:
        requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt', match_single_block=True)
        persist_file(requirements_content, requirements_path)

-        print_colored('', '############# Dockerfile #############', 'red')
+        print_colored('', '############# Dockerfile #############', 'blue')
        user_query = (
                general_guidelines()
                + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python')
@@ -141,7 +141,7 @@ class Generator:
        print('First version of the microservice generated. Start iterating on it to make the tests pass...')

    def generate_playground(self, microservice_name, microservice_path):
-        print_colored('', '############# Playground #############', 'red')
+        print_colored('', '############# Playground #############', 'blue')

        file_name_to_content = get_all_microservice_files_with_content(microservice_path)
        user_query = (
@@ -170,7 +170,7 @@ The playground (app.py) must not let the user configure the host on the ui.
        conversation.query(user_query)
        playground_content_raw = conversation.query(chain_of_thought_optimization('python', 'app.py', 'the playground'))
        playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True)
-        persist_file(playground_content, os.path.join(miicroservice_path, 'app.py'))
+        persist_file(playground_content, os.path.join(microservice_path, 'app.py'))


    def debug_microservice(self, path, microservice_name, num_approach, packages, description, test):
@@ -183,8 +183,9 @@ The playground (app.py) must not let the user configure the host on the ui.
            log_hubble = push_executor(previous_microservice_path)
            error = process_error_message(log_hubble)
            if error:
+                print('An error occurred during the build process. Feeding the error back to the assistent...')
                os.makedirs(next_microservice_path)
-                file_name_to_content = self.get_all_microservice_files_with_content(previous_executor_path)
+                file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path)

                is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile'])

@@ -247,6 +248,7 @@ The playground (app.py) must not let the user configure the host on the ui.
        if any([error_message in error for error_message in ['AttributeError', 'NameError', 'AssertionError']]):
            return False

+        print_colored('', 'Is it a dependency issue?', 'blue')
        conversation = self.gpt_session.get_conversation([])
        answer = conversation.query(
            f'Your task is to assist in identifying the root cause of a Docker build error for a python application. '
@@ -278,27 +280,34 @@ PDFParserExecutor
        return name

    def get_possible_packages(self, description):
-        print_colored('', '############# What package to use? #############', 'red')
+        print_colored('', '############# What packages to use? #############', 'blue')
        user_query = f'''
-Here is the task description of the problme you need to solve:
+Here is the task description of the problem you need to solve:
 "{description}"
-First, write down all the subtasks you need to solve which require python packages.
-For each subtask:
-    Provide a list of 1 to 3 python packages you could use to solve the subtask. Prefer modern packages.
+1. Write down all the non-trivial subtasks you need to solve.
+2. Find out what is the core problem to solve.
+3. Provide a list of all python packages you can think of that could directly be used to solve the core problem.
+3. Provide a list of the 7 most promising python packages that fulfill the following requirements:
+- can directly be used to solve the core problem
+- has a stable api among different versions
+- does not have system requirements
+
 For each package:
-        Write down some non-obvious thoughts about the challenges you might face for the task and give multiple approaches on how you handle them.
-        For example, there might be some packages you must not use because they do not obay the rules:
+    a) Write down some non-obvious challenges you might face with the package when implementing your task and give multiple approaches on how you handle them.
+    For example, you might find out that you must not use the package because it does not obey the rules:
    {not_allowed_executor()}
-        Discuss the pros and cons for all of these packages.
-Create a list of package subsets that you could use to solve the task.
-The list is sorted in a way that the most promising subset of packages is at the top.
-The maximum length of the list is 5.
+    b) Discuss the pros and cons for the package.
+
+4. Output the best 5 python packages starting with the best one.

 The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this:
 **packages.csv**
 ```
-package1,package2
-package2,package3,...
+package1
+package2
+package3
+package4
+package5
 ...
 ```
 '''
@@ -319,7 +328,9 @@ package2,package3,...
                final_version_path = self.debug_microservice(microservice_path, microservice_name, num_approach, packages, description, test)
                self.generate_playground(microservice_name, final_version_path)
            except self.MaxDebugTimeReachedException:
-                print('Could not debug the Microservice.')
+                print('Could not debug the Microservice with the approach:', packages)
+                if num_approach == len(packages_list) - 1:
+                    print_colored('', f'Could not debug the Microservice with any of the approaches: {packages} giving up.', 'red')
                continue
            print(f'''
 You can now run or deploy your microservice:
--- a/src/options/generate/prompt_system.py
+++ b/src/options/generate/prompt_system.py
@@ -71,4 +71,8 @@ print(response[0].text)
 ```'''


-system_base_definition = f'''You are a principal engineer working at Jina - an open source company. You accurately satisfy all of the user's requirements.'''
+system_base_definition = f'''
+It is the year 2021. 
+You are a principal engineer working at Jina - an open source company. 
+You accurately satisfy all of the user's requirements.
+'''
--- a/src/options/generate/prompt_tasks.py
+++ b/src/options/generate/prompt_tasks.py
@@ -51,7 +51,7 @@ def test_executor_file_task(executor_name, test_scenario):
        "Start the test with an extensive comment about the test case. "
        + (
            f"Write a single test case that tests the following scenario: '{test_scenario}'. "
-            f"In case the test scenario is not precise enough, test the most general case without any assumptions."
+            f"In case the test scenario is not precise enough, test a general case without any assumptions."
            if test_scenario else ""
        )
        + "Use the following import to import the executor: "
@@ -82,7 +82,7 @@ def docker_file_task():
        "Usually libraries are installed with apt-get. "
        "Be aware that the machine the docker container is running on does not have a GPU - only CPU. "
        "Add the config.yml file to the Dockerfile. Note that the Dockerfile only has access to the files: "
-        "executor.py, requirements.txt, config.yml, test_executor.py. "
+        "microservice.py, requirements.txt, config.yml, test_microservice.py. "
        "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
        'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. '
        'Make sure the all files are in the /workdir. '
@@ -149,6 +149,7 @@ The executor must not use any attribute of Document accept Document.text.

 def not_allowed_docker():
    return '''
-Note that the Dockerfile only has access to the files: executor.py, requirements.txt, config.yml, test_executor.py.
+Note that the Dockerfile only has access to the files: microservice.py, requirements.txt, config.yml, test_microservice.py.
 Note that the Dockerfile runs the test_microservice.py during the build process.
+Note that it is not allowed to attach a virtual display when running test_microservice.py.
 '''
--- a/src/utils/io.py
+++ b/src/utils/io.py
@@ -1,6 +1,5 @@
 import os

-import concurrent.futures
 import concurrent.futures
 from typing import Generator
 import sys