fix: missing merge

2025-12-21 23:54:19 +01:00 · 2023-04-16 19:59:38 +02:00
parent 2038f04553
commit bf2cd9edc7
9 changed files with 97 additions and 51 deletions
--- a/README.md
+++ b/README.md
@@ -475,7 +475,7 @@ Make sure it is only printed twice in case it changed.
 - [ ] test feedback for playground generation (could be part of the debugging)
 - [ ] should we send everything via json in the text attribute for simplicity?
 - [ ] fix release workflow
- 
+- [ ] after the user specified the task, ask them questions back if the task is not clear enough or something is missing
 Proposal:
 - [ ] just generate the non-jina related code and insert it into an executor template
--- a/src/apis/gpt.py
+++ b/src/apis/gpt.py
@@ -1,12 +1,13 @@
 import os
 from time import sleep
-from typing import List, Tuple
+
 from typing import List, Tuple, Optional
 import openai
 from openai.error import RateLimitError, Timeout
 from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
-    PRICING_GPT3_5_TURBO_GENERATION
+    PRICING_GPT3_5_TURBO_GENERATION, CHARS_PER_TOKEN
 from src.options.generate.prompt_system import system_base_definition, executor_example, docarray_example, client_example
 from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError
 from src.utils.string_tools import print_colored
@@ -61,27 +62,29 @@ If you have updated it already, please restart your terminal.
        self.chars_prompt_so_far += chars_prompt
        self.chars_generation_so_far += chars_generation
        print('\n')
-        money_prompt = round(self.chars_prompt_so_far / 3.4 * self.pricing_prompt / 1000, 3)
+        money_prompt = self.calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt)
-        money_generation = round(self.chars_generation_so_far / 3.4 * self.pricing_generation / 1000, 3)
+        money_generation = self.calculate_money_spent(self.chars_generation_so_far, self.pricing_generation)
-        print('Estimated costs on openai.com:')
+        print('Total money spent so far on openai.com:', f'${money_prompt + money_generation}')
        # print('money prompt:', f'${money_prompt}')
        # print('money generation:', f'${money_generation}')
        print('total money spent so far:', f'${money_prompt + money_generation}')
        print('\n')
    def get_conversation(self, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
        return _GPTConversation(self.supported_model, self.cost_callback, system_definition_examples)
    def calculate_money_spent(self, num_chars, price):
        return round(num_chars / CHARS_PER_TOKEN * price / 1000, 3)
 class _GPTConversation:
    def __init__(self, model: str, cost_callback, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
        self.model = model
        self.cost_callback = cost_callback
-        self.prompt_list = [None]
+        self.prompt_list: List[Optional[Tuple]] = [None]
        self.set_system_definition(system_definition_examples)
        if 'verbose' in os.environ:
            print_colored('system', self.prompt_list[0][1], 'magenta')
    def query(self, prompt: str):
        if 'verbose' in os.environ:
            print_colored('user', prompt, 'blue')
        self.prompt_list.append(('user', prompt))
        response = self.get_response(self.prompt_list)
@@ -129,8 +132,8 @@ class _GPTConversation:
                complete_string = self.get_response_from_stream(response_generator)
            except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:
-                print(e)
+                print('/n', e)
-                print('retrying, be aware that this might affect the cost calculation')
+                print('retrying...')
                sleep(3)
                continue
            chars_prompt = sum(len(prompt[1]) for prompt in prompt_list)
--- a/src/apis/jina_cloud.py
+++ b/src/apis/jina_cloud.py
@@ -162,7 +162,19 @@ def run_locally(executor_name, microservice_version_path):
    if is_docker_running():
        use_docker = True
    else:
-        click.echo('Docker daemon doesn\'t seem to be running. Trying to start it without docker')
+        click.echo('''
 Docker daemon doesn\'t seem to be running.
 It might be important to run your microservice within a docker container.
 Your machine might not have all the dependencies installed.
 You have 3 options:
 a) start the docker daemon
 b) run gptdeploy deploy... to deploy your microservice on Jina Cloud. All dependencies will be installed there.
 c) try to run your microservice locally without docker. It is worth a try but might fail.
 '''
                   )
        user_input = click.prompt('Do you want to run your microservice locally without docker? (Y/n)', type=str, default='y')
        if user_input.lower() != 'y':
            exit(1)
        use_docker = False
    print('Run a jina flow locally')
    full_flow_path = create_flow_yaml(microservice_version_path, executor_name, use_docker)
@@ -238,6 +250,7 @@ def update_client_line_in_file(file_path, host):
 def process_error_message(error_message):
    lines = error_message.split('\n')
    relevant_lines = []
    pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]")  # Pattern to match lines like "#11 [7/8]"
@@ -250,7 +263,16 @@ def process_error_message(error_message):
    if last_matching_line_index is not None:
        relevant_lines = lines[last_matching_line_index:]
-    return '\n'.join(relevant_lines[-25:]).strip()
+    response = '\n'.join(relevant_lines[-25:]).strip()
    # the following code tests the case that the docker file is corrupted and can not be parsed
    # the method above will not return a relevant error message in this case
    # but the last line of the error message will start with "error"
    last_line = lines[-1]
    if not response and last_line.startswith('error: '):
        return last_line
    return response
 def build_docker(path):
--- a/src/cli.py
+++ b/src/cli.py
@@ -44,21 +44,25 @@ def main(ctx):
@main.command()
@click.option('--description', required=True, help='Description of the microservice.')
@click.option('--test', required=True, help='Test scenario for the microservice.')
@path_param
@click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).')
@click.option('--verbose', default=False, is_flag=True, help='Verbose mode.')
@path_param
 def generate(
        description,
        test,
        model,
        verbose,
        path,
        model='gpt-4'
 ):
-    from src.options.generate.generator import Generator
+    os.environ['VERBOSE'] = str(verbose)
    path = os.path.expanduser(path)
    path = os.path.abspath(path)
    if os.path.exists(path):
        if os.listdir(path):
            click.echo(f"Error: The path {path} you provided via --path is not empty. Please choose a directory that does not exist or is empty.")
            return
    from src.options.generate.generator import Generator
    generator = Generator(model=model)
    generator.generate(description, test, path)
--- a/src/constants.py
+++ b/src/constants.py
@@ -28,7 +28,9 @@ PRICING_GPT4_GENERATION = 0.06
 PRICING_GPT3_5_TURBO_PROMPT = 0.002
 PRICING_GPT3_5_TURBO_GENERATION = 0.002
-NUM_IMPLEMENTATION_STRATEGIES = 3
+CHARS_PER_TOKEN = 3.4
 NUM_IMPLEMENTATION_STRATEGIES = 5
 MAX_DEBUGGING_ITERATIONS = 10
 DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -66,7 +66,7 @@ class Generator:
        MICROSERVICE_FOLDER_v1 = get_microservice_path(path, microservice_name, package, num_approach, 1)
        os.makedirs(MICROSERVICE_FOLDER_v1)
-        print_colored('', '############# Microservice #############', 'red')
+        print_colored('', '############# Microservice #############', 'blue')
        user_query = (
                general_guidelines()
                + executor_file_task(microservice_name, description, test, package)
@@ -85,7 +85,7 @@ class Generator:
            )
        persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'microservice.py'))
-        print_colored('', '############# Test Microservice #############', 'red')
+        print_colored('', '############# Test Microservice #############', 'blue')
        user_query = (
                general_guidelines()
                + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python')
@@ -99,12 +99,12 @@ class Generator:
                chain_of_thought_optimization('python', 'test_microservice.py')
                + "Don't add any additional tests. "
            )
-        microservice_content = self.extract_content_from_result(
+        test_microservice_content = self.extract_content_from_result(
-            microservice_content_raw, 'microservice.py', match_single_block=True
+            test_microservice_content_raw, 'microservice.py', match_single_block=True
        )
-        persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py'))
+        persist_file(test_microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py'))
-        print_colored('', '############# Requirements #############', 'red')
+        print_colored('', '############# Requirements #############', 'blue')
        requirements_path = os.path.join(MICROSERVICE_FOLDER_v1, 'requirements.txt')
        user_query = (
                general_guidelines()
@@ -121,7 +121,7 @@ class Generator:
        requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt', match_single_block=True)
        persist_file(requirements_content, requirements_path)
-        print_colored('', '############# Dockerfile #############', 'red')
+        print_colored('', '############# Dockerfile #############', 'blue')
        user_query = (
                general_guidelines()
                + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python')
@@ -141,7 +141,7 @@ class Generator:
        print('First version of the microservice generated. Start iterating on it to make the tests pass...')
    def generate_playground(self, microservice_name, microservice_path):
-        print_colored('', '############# Playground #############', 'red')
+        print_colored('', '############# Playground #############', 'blue')
        file_name_to_content = get_all_microservice_files_with_content(microservice_path)
        user_query = (
@@ -170,7 +170,7 @@ The playground (app.py) must not let the user configure the host on the ui.
        conversation.query(user_query)
        playground_content_raw = conversation.query(chain_of_thought_optimization('python', 'app.py', 'the playground'))
        playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True)
-        persist_file(playground_content, os.path.join(miicroservice_path, 'app.py'))
+        persist_file(playground_content, os.path.join(microservice_path, 'app.py'))
    def debug_microservice(self, path, microservice_name, num_approach, packages, description, test):
@@ -183,8 +183,9 @@ The playground (app.py) must not let the user configure the host on the ui.
            log_hubble = push_executor(previous_microservice_path)
            error = process_error_message(log_hubble)
            if error:
                print('An error occurred during the build process. Feeding the error back to the assistent...')
                os.makedirs(next_microservice_path)
-                file_name_to_content = self.get_all_microservice_files_with_content(previous_executor_path)
+                file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path)
                is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile'])
@@ -247,6 +248,7 @@ The playground (app.py) must not let the user configure the host on the ui.
        if any([error_message in error for error_message in ['AttributeError', 'NameError', 'AssertionError']]):
            return False
        print_colored('', 'Is it a dependency issue?', 'blue')
        conversation = self.gpt_session.get_conversation([])
        answer = conversation.query(
            f'Your task is to assist in identifying the root cause of a Docker build error for a python application. '
@@ -278,27 +280,34 @@ PDFParserExecutor
        return name
    def get_possible_packages(self, description):
-        print_colored('', '############# What package to use? #############', 'red')
+        print_colored('', '############# What packages to use? #############', 'blue')
        user_query = f'''
-Here is the task description of the problme you need to solve:
+Here is the task description of the problem you need to solve:
 "{description}"
-First, write down all the subtasks you need to solve which require python packages.
+1. Write down all the non-trivial subtasks you need to solve.
-For each subtask:
+2. Find out what is the core problem to solve.
-    Provide a list of 1 to 3 python packages you could use to solve the subtask. Prefer modern packages.
+3. Provide a list of all python packages you can think of that could directly be used to solve the core problem.
-    For each package:
+3. Provide a list of the 7 most promising python packages that fulfill the following requirements:
-        Write down some non-obvious thoughts about the challenges you might face for the task and give multiple approaches on how you handle them.
+- can directly be used to solve the core problem
-        For example, there might be some packages you must not use because they do not obay the rules:
+- has a stable api among different versions
 - does not have system requirements
 For each package:
    a) Write down some non-obvious challenges you might face with the package when implementing your task and give multiple approaches on how you handle them.
    For example, you might find out that you must not use the package because it does not obey the rules:
    {not_allowed_executor()}
-        Discuss the pros and cons for all of these packages.
+    b) Discuss the pros and cons for the package.
-Create a list of package subsets that you could use to solve the task.
+
-The list is sorted in a way that the most promising subset of packages is at the top.
+4. Output the best 5 python packages starting with the best one.
 The maximum length of the list is 5.
 The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this:
 **packages.csv**
 ```
-package1,package2
+package1
-package2,package3,...
+package2
 package3
 package4
 package5
 ...
 ```
 '''
@@ -319,7 +328,9 @@ package2,package3,...
                final_version_path = self.debug_microservice(microservice_path, microservice_name, num_approach, packages, description, test)
                self.generate_playground(microservice_name, final_version_path)
            except self.MaxDebugTimeReachedException:
-                print('Could not debug the Microservice.')
+                print('Could not debug the Microservice with the approach:', packages)
                if num_approach == len(packages_list) - 1:
                    print_colored('', f'Could not debug the Microservice with any of the approaches: {packages} giving up.', 'red')
                continue
            print(f'''
 You can now run or deploy your microservice:
--- a/src/options/generate/prompt_system.py
+++ b/src/options/generate/prompt_system.py
@@ -71,4 +71,8 @@ print(response[0].text)
 ```'''
-system_base_definition = f'''You are a principal engineer working at Jina - an open source company. You accurately satisfy all of the user's requirements.'''
+system_base_definition = f'''
 It is the year 2021. 
 You are a principal engineer working at Jina - an open source company. 
 You accurately satisfy all of the user's requirements.
 '''
--- a/src/options/generate/prompt_tasks.py
+++ b/src/options/generate/prompt_tasks.py
@@ -51,7 +51,7 @@ def test_executor_file_task(executor_name, test_scenario):
        "Start the test with an extensive comment about the test case. "
        + (
            f"Write a single test case that tests the following scenario: '{test_scenario}'. "
-            f"In case the test scenario is not precise enough, test the most general case without any assumptions."
+            f"In case the test scenario is not precise enough, test a general case without any assumptions."
            if test_scenario else ""
        )
        + "Use the following import to import the executor: "
@@ -82,7 +82,7 @@ def docker_file_task():
        "Usually libraries are installed with apt-get. "
        "Be aware that the machine the docker container is running on does not have a GPU - only CPU. "
        "Add the config.yml file to the Dockerfile. Note that the Dockerfile only has access to the files: "
-        "executor.py, requirements.txt, config.yml, test_executor.py. "
+        "microservice.py, requirements.txt, config.yml, test_microservice.py. "
        "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
        'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. '
        'Make sure the all files are in the /workdir. '
@@ -149,6 +149,7 @@ The executor must not use any attribute of Document accept Document.text.
 def not_allowed_docker():
    return '''
-Note that the Dockerfile only has access to the files: executor.py, requirements.txt, config.yml, test_executor.py.
+Note that the Dockerfile only has access to the files: microservice.py, requirements.txt, config.yml, test_microservice.py.
 Note that the Dockerfile runs the test_microservice.py during the build process.
 Note that it is not allowed to attach a virtual display when running test_microservice.py.
 '''
--- a/src/utils/io.py
+++ b/src/utils/io.py
@@ -1,6 +1,5 @@
 import os
 import concurrent.futures
 import concurrent.futures
 from typing import Generator
 import sys