From bf2cd9edc77286d0de86a819d886df127f1659bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Sun, 16 Apr 2023 19:59:38 +0200
Subject: [PATCH] fix: missing merge

---
 README.md                             |  2 +-
 src/apis/gpt.py                       | 29 ++++++------
 src/apis/jina_cloud.py                | 26 ++++++++++-
 src/cli.py                            | 10 +++--
 src/constants.py                      |  4 +-
 src/options/generate/generator.py     | 63 ++++++++++++++++-----------
 src/options/generate/prompt_system.py |  6 ++-
 src/options/generate/prompt_tasks.py  |  7 +--
 src/utils/io.py                       |  1 -
 9 files changed, 97 insertions(+), 51 deletions(-)

diff --git a/README.md b/README.md
index 8f30a33..e428db9 100644
--- a/README.md
+++ b/README.md
@@ -475,7 +475,7 @@ Make sure it is only printed twice in case it changed.
 - [ ] test feedback for playground generation (could be part of the debugging)
 - [ ] should we send everything via json in the text attribute for simplicity?
 - [ ] fix release workflow
-- 
+- [ ] after the user specified the task, ask them questions back if the task is not clear enough or something is missing
 
 Proposal:
 - [ ] just generate the non-jina related code and insert it into an executor template
diff --git a/src/apis/gpt.py b/src/apis/gpt.py
index 038ec85..1ce579e 100644
--- a/src/apis/gpt.py
+++ b/src/apis/gpt.py
@@ -1,12 +1,13 @@
 import os
 from time import sleep
-from typing import List, Tuple
+
+from typing import List, Tuple, Optional
 
 import openai
 from openai.error import RateLimitError, Timeout
 
 from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
-    PRICING_GPT3_5_TURBO_GENERATION
+    PRICING_GPT3_5_TURBO_GENERATION, CHARS_PER_TOKEN
 from src.options.generate.prompt_system import system_base_definition, executor_example, docarray_example, client_example
 from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError
 from src.utils.string_tools import print_colored
@@ -61,28 +62,30 @@ If you have updated it already, please restart your terminal.
         self.chars_prompt_so_far += chars_prompt
         self.chars_generation_so_far += chars_generation
         print('\n')
-        money_prompt = round(self.chars_prompt_so_far / 3.4 * self.pricing_prompt / 1000, 3)
-        money_generation = round(self.chars_generation_so_far / 3.4 * self.pricing_generation / 1000, 3)
-        print('Estimated costs on openai.com:')
-        # print('money prompt:', f'${money_prompt}')
-        # print('money generation:', f'${money_generation}')
-        print('total money spent so far:', f'${money_prompt + money_generation}')
+        money_prompt = self.calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt)
+        money_generation = self.calculate_money_spent(self.chars_generation_so_far, self.pricing_generation)
+        print('Total money spent so far on openai.com:', f'${money_prompt + money_generation}')
         print('\n')
 
     def get_conversation(self, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
         return _GPTConversation(self.supported_model, self.cost_callback, system_definition_examples)
 
+    def calculate_money_spent(self, num_chars, price):
+        return round(num_chars / CHARS_PER_TOKEN * price / 1000, 3)
+
 
 class _GPTConversation:
     def __init__(self, model: str, cost_callback, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
         self.model = model
         self.cost_callback = cost_callback
-        self.prompt_list = [None]
+        self.prompt_list: List[Optional[Tuple]] = [None]
         self.set_system_definition(system_definition_examples)
-        print_colored('system', self.prompt_list[0][1], 'magenta')
+        if 'verbose' in os.environ:
+            print_colored('system', self.prompt_list[0][1], 'magenta')
 
     def query(self, prompt: str):
-        print_colored('user', prompt, 'blue')
+        if 'verbose' in os.environ:
+            print_colored('user', prompt, 'blue')
         self.prompt_list.append(('user', prompt))
         response = self.get_response(self.prompt_list)
         self.prompt_list.append(('assistant', response))
@@ -129,8 +132,8 @@ class _GPTConversation:
                 complete_string = self.get_response_from_stream(response_generator)
 
             except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:
-                print(e)
-                print('retrying, be aware that this might affect the cost calculation')
+                print('/n', e)
+                print('retrying...')
                 sleep(3)
                 continue
             chars_prompt = sum(len(prompt[1]) for prompt in prompt_list)
diff --git a/src/apis/jina_cloud.py b/src/apis/jina_cloud.py
index 0a688c0..5769b96 100644
--- a/src/apis/jina_cloud.py
+++ b/src/apis/jina_cloud.py
@@ -162,7 +162,19 @@ def run_locally(executor_name, microservice_version_path):
     if is_docker_running():
         use_docker = True
     else:
-        click.echo('Docker daemon doesn\'t seem to be running. Trying to start it without docker')
+        click.echo('''
+Docker daemon doesn\'t seem to be running.
+It might be important to run your microservice within a docker container.
+Your machine might not have all the dependencies installed.
+You have 3 options:
+a) start the docker daemon
+b) run gptdeploy deploy... to deploy your microservice on Jina Cloud. All dependencies will be installed there.
+c) try to run your microservice locally without docker. It is worth a try but might fail.
+'''
+                   )
+        user_input = click.prompt('Do you want to run your microservice locally without docker? (Y/n)', type=str, default='y')
+        if user_input.lower() != 'y':
+            exit(1)
         use_docker = False
     print('Run a jina flow locally')
     full_flow_path = create_flow_yaml(microservice_version_path, executor_name, use_docker)
@@ -238,6 +250,7 @@ def update_client_line_in_file(file_path, host):
 
 def process_error_message(error_message):
     lines = error_message.split('\n')
+
     relevant_lines = []
 
     pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]")  # Pattern to match lines like "#11 [7/8]"
@@ -250,7 +263,16 @@ def process_error_message(error_message):
     if last_matching_line_index is not None:
         relevant_lines = lines[last_matching_line_index:]
 
-    return '\n'.join(relevant_lines[-25:]).strip()
+    response = '\n'.join(relevant_lines[-25:]).strip()
+
+    # the following code tests the case that the docker file is corrupted and can not be parsed
+    # the method above will not return a relevant error message in this case
+    # but the last line of the error message will start with "error"
+
+    last_line = lines[-1]
+    if not response and last_line.startswith('error: '):
+        return last_line
+    return response
 
 
 def build_docker(path):
diff --git a/src/cli.py b/src/cli.py
index 9f2173a..db24889 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -44,21 +44,25 @@ def main(ctx):
 @main.command()
 @click.option('--description', required=True, help='Description of the microservice.')
 @click.option('--test', required=True, help='Test scenario for the microservice.')
-@path_param
 @click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).')
+@click.option('--verbose', default=False, is_flag=True, help='Verbose mode.')
+@path_param
 def generate(
         description,
         test,
+        model,
+        verbose,
         path,
-        model='gpt-4'
 ):
-    from src.options.generate.generator import Generator
+    os.environ['VERBOSE'] = str(verbose)
     path = os.path.expanduser(path)
     path = os.path.abspath(path)
     if os.path.exists(path):
         if os.listdir(path):
             click.echo(f"Error: The path {path} you provided via --path is not empty. Please choose a directory that does not exist or is empty.")
             return
+
+    from src.options.generate.generator import Generator
     generator = Generator(model=model)
     generator.generate(description, test, path)
 
diff --git a/src/constants.py b/src/constants.py
index 6c34bec..44d1143 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -28,7 +28,9 @@ PRICING_GPT4_GENERATION = 0.06
 PRICING_GPT3_5_TURBO_PROMPT = 0.002
 PRICING_GPT3_5_TURBO_GENERATION = 0.002
 
-NUM_IMPLEMENTATION_STRATEGIES = 3
+CHARS_PER_TOKEN = 3.4
+
+NUM_IMPLEMENTATION_STRATEGIES = 5
 MAX_DEBUGGING_ITERATIONS = 10
 
 DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
\ No newline at end of file
diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index 73b7b19..6d1e610 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -66,7 +66,7 @@ class Generator:
         MICROSERVICE_FOLDER_v1 = get_microservice_path(path, microservice_name, package, num_approach, 1)
         os.makedirs(MICROSERVICE_FOLDER_v1)
 
-        print_colored('', '############# Microservice #############', 'red')
+        print_colored('', '############# Microservice #############', 'blue')
         user_query = (
                 general_guidelines()
                 + executor_file_task(microservice_name, description, test, package)
@@ -85,7 +85,7 @@ class Generator:
             )
         persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'microservice.py'))
 
-        print_colored('', '############# Test Microservice #############', 'red')
+        print_colored('', '############# Test Microservice #############', 'blue')
         user_query = (
                 general_guidelines()
                 + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python')
@@ -99,12 +99,12 @@ class Generator:
                 chain_of_thought_optimization('python', 'test_microservice.py')
                 + "Don't add any additional tests. "
             )
-        microservice_content = self.extract_content_from_result(
-            microservice_content_raw, 'microservice.py', match_single_block=True
+        test_microservice_content = self.extract_content_from_result(
+            test_microservice_content_raw, 'microservice.py', match_single_block=True
         )
-        persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py'))
+        persist_file(test_microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py'))
 
-        print_colored('', '############# Requirements #############', 'red')
+        print_colored('', '############# Requirements #############', 'blue')
         requirements_path = os.path.join(MICROSERVICE_FOLDER_v1, 'requirements.txt')
         user_query = (
                 general_guidelines()
@@ -121,7 +121,7 @@ class Generator:
         requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt', match_single_block=True)
         persist_file(requirements_content, requirements_path)
 
-        print_colored('', '############# Dockerfile #############', 'red')
+        print_colored('', '############# Dockerfile #############', 'blue')
         user_query = (
                 general_guidelines()
                 + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python')
@@ -141,7 +141,7 @@ class Generator:
         print('First version of the microservice generated. Start iterating on it to make the tests pass...')
 
     def generate_playground(self, microservice_name, microservice_path):
-        print_colored('', '############# Playground #############', 'red')
+        print_colored('', '############# Playground #############', 'blue')
 
         file_name_to_content = get_all_microservice_files_with_content(microservice_path)
         user_query = (
@@ -170,7 +170,7 @@ The playground (app.py) must not let the user configure the host on the ui.
         conversation.query(user_query)
         playground_content_raw = conversation.query(chain_of_thought_optimization('python', 'app.py', 'the playground'))
         playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True)
-        persist_file(playground_content, os.path.join(miicroservice_path, 'app.py'))
+        persist_file(playground_content, os.path.join(microservice_path, 'app.py'))
 
 
     def debug_microservice(self, path, microservice_name, num_approach, packages, description, test):
@@ -183,8 +183,9 @@ The playground (app.py) must not let the user configure the host on the ui.
             log_hubble = push_executor(previous_microservice_path)
             error = process_error_message(log_hubble)
             if error:
+                print('An error occurred during the build process. Feeding the error back to the assistent...')
                 os.makedirs(next_microservice_path)
-                file_name_to_content = self.get_all_microservice_files_with_content(previous_executor_path)
+                file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path)
 
                 is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile'])
 
@@ -247,6 +248,7 @@ The playground (app.py) must not let the user configure the host on the ui.
         if any([error_message in error for error_message in ['AttributeError', 'NameError', 'AssertionError']]):
             return False
 
+        print_colored('', 'Is it a dependency issue?', 'blue')
         conversation = self.gpt_session.get_conversation([])
         answer = conversation.query(
             f'Your task is to assist in identifying the root cause of a Docker build error for a python application. '
@@ -278,27 +280,34 @@ PDFParserExecutor
         return name
 
     def get_possible_packages(self, description):
-        print_colored('', '############# What package to use? #############', 'red')
+        print_colored('', '############# What packages to use? #############', 'blue')
         user_query = f'''
-Here is the task description of the problme you need to solve:
+Here is the task description of the problem you need to solve:
 "{description}"
-First, write down all the subtasks you need to solve which require python packages.
-For each subtask:
-    Provide a list of 1 to 3 python packages you could use to solve the subtask. Prefer modern packages.
-    For each package:
-        Write down some non-obvious thoughts about the challenges you might face for the task and give multiple approaches on how you handle them.
-        For example, there might be some packages you must not use because they do not obay the rules:
-        {not_allowed_executor()}
-        Discuss the pros and cons for all of these packages.
-Create a list of package subsets that you could use to solve the task.
-The list is sorted in a way that the most promising subset of packages is at the top.
-The maximum length of the list is 5.
+1. Write down all the non-trivial subtasks you need to solve.
+2. Find out what is the core problem to solve.
+3. Provide a list of all python packages you can think of that could directly be used to solve the core problem.
+3. Provide a list of the 7 most promising python packages that fulfill the following requirements:
+- can directly be used to solve the core problem
+- has a stable api among different versions
+- does not have system requirements
+
+For each package:
+    a) Write down some non-obvious challenges you might face with the package when implementing your task and give multiple approaches on how you handle them.
+    For example, you might find out that you must not use the package because it does not obey the rules:
+    {not_allowed_executor()}
+    b) Discuss the pros and cons for the package.
+
+4. Output the best 5 python packages starting with the best one.
 
 The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this:
 **packages.csv**
 ```
-package1,package2
-package2,package3,...
+package1
+package2
+package3
+package4
+package5
 ...
 ```
 '''
@@ -319,7 +328,9 @@ package2,package3,...
                 final_version_path = self.debug_microservice(microservice_path, microservice_name, num_approach, packages, description, test)
                 self.generate_playground(microservice_name, final_version_path)
             except self.MaxDebugTimeReachedException:
-                print('Could not debug the Microservice.')
+                print('Could not debug the Microservice with the approach:', packages)
+                if num_approach == len(packages_list) - 1:
+                    print_colored('', f'Could not debug the Microservice with any of the approaches: {packages} giving up.', 'red')
                 continue
             print(f'''
 You can now run or deploy your microservice:
diff --git a/src/options/generate/prompt_system.py b/src/options/generate/prompt_system.py
index 02a38ee..8b0c987 100644
--- a/src/options/generate/prompt_system.py
+++ b/src/options/generate/prompt_system.py
@@ -71,4 +71,8 @@ print(response[0].text)
 ```'''
 
 
-system_base_definition = f'''You are a principal engineer working at Jina - an open source company. You accurately satisfy all of the user's requirements.'''
\ No newline at end of file
+system_base_definition = f'''
+It is the year 2021. 
+You are a principal engineer working at Jina - an open source company. 
+You accurately satisfy all of the user's requirements.
+'''
\ No newline at end of file
diff --git a/src/options/generate/prompt_tasks.py b/src/options/generate/prompt_tasks.py
index 239f952..b9ac614 100644
--- a/src/options/generate/prompt_tasks.py
+++ b/src/options/generate/prompt_tasks.py
@@ -51,7 +51,7 @@ def test_executor_file_task(executor_name, test_scenario):
         "Start the test with an extensive comment about the test case. "
         + (
             f"Write a single test case that tests the following scenario: '{test_scenario}'. "
-            f"In case the test scenario is not precise enough, test the most general case without any assumptions."
+            f"In case the test scenario is not precise enough, test a general case without any assumptions."
             if test_scenario else ""
         )
         + "Use the following import to import the executor: "
@@ -82,7 +82,7 @@ def docker_file_task():
         "Usually libraries are installed with apt-get. "
         "Be aware that the machine the docker container is running on does not have a GPU - only CPU. "
         "Add the config.yml file to the Dockerfile. Note that the Dockerfile only has access to the files: "
-        "executor.py, requirements.txt, config.yml, test_executor.py. "
+        "microservice.py, requirements.txt, config.yml, test_microservice.py. "
         "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
         'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. '
         'Make sure the all files are in the /workdir. '
@@ -149,6 +149,7 @@ The executor must not use any attribute of Document accept Document.text.
 
 def not_allowed_docker():
     return '''
-Note that the Dockerfile only has access to the files: executor.py, requirements.txt, config.yml, test_executor.py.
+Note that the Dockerfile only has access to the files: microservice.py, requirements.txt, config.yml, test_microservice.py.
 Note that the Dockerfile runs the test_microservice.py during the build process.
+Note that it is not allowed to attach a virtual display when running test_microservice.py.
 '''
diff --git a/src/utils/io.py b/src/utils/io.py
index 68f388d..c3a70b0 100644
--- a/src/utils/io.py
+++ b/src/utils/io.py
@@ -1,6 +1,5 @@
 import os
 
-import concurrent.futures
 import concurrent.futures
 from typing import Generator
 import sys