From bf2cd9edc77286d0de86a819d886df127f1659bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Sun, 16 Apr 2023 19:59:38 +0200 Subject: [PATCH] fix: missing merge --- README.md | 2 +- src/apis/gpt.py | 29 ++++++------ src/apis/jina_cloud.py | 26 ++++++++++- src/cli.py | 10 +++-- src/constants.py | 4 +- src/options/generate/generator.py | 63 ++++++++++++++++----------- src/options/generate/prompt_system.py | 6 ++- src/options/generate/prompt_tasks.py | 7 +-- src/utils/io.py | 1 - 9 files changed, 97 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 8f30a33..e428db9 100644 --- a/README.md +++ b/README.md @@ -475,7 +475,7 @@ Make sure it is only printed twice in case it changed. - [ ] test feedback for playground generation (could be part of the debugging) - [ ] should we send everything via json in the text attribute for simplicity? - [ ] fix release workflow -- +- [ ] after the user specified the task, ask them questions back if the task is not clear enough or something is missing Proposal: - [ ] just generate the non-jina related code and insert it into an executor template diff --git a/src/apis/gpt.py b/src/apis/gpt.py index 038ec85..1ce579e 100644 --- a/src/apis/gpt.py +++ b/src/apis/gpt.py @@ -1,12 +1,13 @@ import os from time import sleep -from typing import List, Tuple + +from typing import List, Tuple, Optional import openai from openai.error import RateLimitError, Timeout from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \ - PRICING_GPT3_5_TURBO_GENERATION + PRICING_GPT3_5_TURBO_GENERATION, CHARS_PER_TOKEN from src.options.generate.prompt_system import system_base_definition, executor_example, docarray_example, client_example from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError from src.utils.string_tools import print_colored @@ -61,28 +62,30 @@ If you have updated it already, please restart your terminal. self.chars_prompt_so_far += chars_prompt self.chars_generation_so_far += chars_generation print('\n') - money_prompt = round(self.chars_prompt_so_far / 3.4 * self.pricing_prompt / 1000, 3) - money_generation = round(self.chars_generation_so_far / 3.4 * self.pricing_generation / 1000, 3) - print('Estimated costs on openai.com:') - # print('money prompt:', f'${money_prompt}') - # print('money generation:', f'${money_generation}') - print('total money spent so far:', f'${money_prompt + money_generation}') + money_prompt = self.calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt) + money_generation = self.calculate_money_spent(self.chars_generation_so_far, self.pricing_generation) + print('Total money spent so far on openai.com:', f'${money_prompt + money_generation}') print('\n') def get_conversation(self, system_definition_examples: List[str] = ['executor', 'docarray', 'client']): return _GPTConversation(self.supported_model, self.cost_callback, system_definition_examples) + def calculate_money_spent(self, num_chars, price): + return round(num_chars / CHARS_PER_TOKEN * price / 1000, 3) + class _GPTConversation: def __init__(self, model: str, cost_callback, system_definition_examples: List[str] = ['executor', 'docarray', 'client']): self.model = model self.cost_callback = cost_callback - self.prompt_list = [None] + self.prompt_list: List[Optional[Tuple]] = [None] self.set_system_definition(system_definition_examples) - print_colored('system', self.prompt_list[0][1], 'magenta') + if 'verbose' in os.environ: + print_colored('system', self.prompt_list[0][1], 'magenta') def query(self, prompt: str): - print_colored('user', prompt, 'blue') + if 'verbose' in os.environ: + print_colored('user', prompt, 'blue') self.prompt_list.append(('user', prompt)) response = self.get_response(self.prompt_list) self.prompt_list.append(('assistant', response)) @@ -129,8 +132,8 @@ class _GPTConversation: complete_string = self.get_response_from_stream(response_generator) except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e: - print(e) - print('retrying, be aware that this might affect the cost calculation') + print('/n', e) + print('retrying...') sleep(3) continue chars_prompt = sum(len(prompt[1]) for prompt in prompt_list) diff --git a/src/apis/jina_cloud.py b/src/apis/jina_cloud.py index 0a688c0..5769b96 100644 --- a/src/apis/jina_cloud.py +++ b/src/apis/jina_cloud.py @@ -162,7 +162,19 @@ def run_locally(executor_name, microservice_version_path): if is_docker_running(): use_docker = True else: - click.echo('Docker daemon doesn\'t seem to be running. Trying to start it without docker') + click.echo(''' +Docker daemon doesn\'t seem to be running. +It might be important to run your microservice within a docker container. +Your machine might not have all the dependencies installed. +You have 3 options: +a) start the docker daemon +b) run gptdeploy deploy... to deploy your microservice on Jina Cloud. All dependencies will be installed there. +c) try to run your microservice locally without docker. It is worth a try but might fail. +''' + ) + user_input = click.prompt('Do you want to run your microservice locally without docker? (Y/n)', type=str, default='y') + if user_input.lower() != 'y': + exit(1) use_docker = False print('Run a jina flow locally') full_flow_path = create_flow_yaml(microservice_version_path, executor_name, use_docker) @@ -238,6 +250,7 @@ def update_client_line_in_file(file_path, host): def process_error_message(error_message): lines = error_message.split('\n') + relevant_lines = [] pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]" @@ -250,7 +263,16 @@ def process_error_message(error_message): if last_matching_line_index is not None: relevant_lines = lines[last_matching_line_index:] - return '\n'.join(relevant_lines[-25:]).strip() + response = '\n'.join(relevant_lines[-25:]).strip() + + # the following code tests the case that the docker file is corrupted and can not be parsed + # the method above will not return a relevant error message in this case + # but the last line of the error message will start with "error" + + last_line = lines[-1] + if not response and last_line.startswith('error: '): + return last_line + return response def build_docker(path): diff --git a/src/cli.py b/src/cli.py index 9f2173a..db24889 100644 --- a/src/cli.py +++ b/src/cli.py @@ -44,21 +44,25 @@ def main(ctx): @main.command() @click.option('--description', required=True, help='Description of the microservice.') @click.option('--test', required=True, help='Test scenario for the microservice.') -@path_param @click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).') +@click.option('--verbose', default=False, is_flag=True, help='Verbose mode.') +@path_param def generate( description, test, + model, + verbose, path, - model='gpt-4' ): - from src.options.generate.generator import Generator + os.environ['VERBOSE'] = str(verbose) path = os.path.expanduser(path) path = os.path.abspath(path) if os.path.exists(path): if os.listdir(path): click.echo(f"Error: The path {path} you provided via --path is not empty. Please choose a directory that does not exist or is empty.") return + + from src.options.generate.generator import Generator generator = Generator(model=model) generator.generate(description, test, path) diff --git a/src/constants.py b/src/constants.py index 6c34bec..44d1143 100644 --- a/src/constants.py +++ b/src/constants.py @@ -28,7 +28,9 @@ PRICING_GPT4_GENERATION = 0.06 PRICING_GPT3_5_TURBO_PROMPT = 0.002 PRICING_GPT3_5_TURBO_GENERATION = 0.002 -NUM_IMPLEMENTATION_STRATEGIES = 3 +CHARS_PER_TOKEN = 3.4 + +NUM_IMPLEMENTATION_STRATEGIES = 5 MAX_DEBUGGING_ITERATIONS = 10 DEMO_TOKEN = '45372338e04f5a41af949024db929d46' \ No newline at end of file diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py index 73b7b19..6d1e610 100644 --- a/src/options/generate/generator.py +++ b/src/options/generate/generator.py @@ -66,7 +66,7 @@ class Generator: MICROSERVICE_FOLDER_v1 = get_microservice_path(path, microservice_name, package, num_approach, 1) os.makedirs(MICROSERVICE_FOLDER_v1) - print_colored('', '############# Microservice #############', 'red') + print_colored('', '############# Microservice #############', 'blue') user_query = ( general_guidelines() + executor_file_task(microservice_name, description, test, package) @@ -85,7 +85,7 @@ class Generator: ) persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'microservice.py')) - print_colored('', '############# Test Microservice #############', 'red') + print_colored('', '############# Test Microservice #############', 'blue') user_query = ( general_guidelines() + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python') @@ -99,12 +99,12 @@ class Generator: chain_of_thought_optimization('python', 'test_microservice.py') + "Don't add any additional tests. " ) - microservice_content = self.extract_content_from_result( - microservice_content_raw, 'microservice.py', match_single_block=True + test_microservice_content = self.extract_content_from_result( + test_microservice_content_raw, 'microservice.py', match_single_block=True ) - persist_file(microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py')) + persist_file(test_microservice_content, os.path.join(MICROSERVICE_FOLDER_v1, 'test_microservice.py')) - print_colored('', '############# Requirements #############', 'red') + print_colored('', '############# Requirements #############', 'blue') requirements_path = os.path.join(MICROSERVICE_FOLDER_v1, 'requirements.txt') user_query = ( general_guidelines() @@ -121,7 +121,7 @@ class Generator: requirements_content = self.extract_content_from_result(requirements_content_raw, 'requirements.txt', match_single_block=True) persist_file(requirements_content, requirements_path) - print_colored('', '############# Dockerfile #############', 'red') + print_colored('', '############# Dockerfile #############', 'blue') user_query = ( general_guidelines() + self.wrap_content_in_code_block(microservice_content, 'microservice.py', 'python') @@ -141,7 +141,7 @@ class Generator: print('First version of the microservice generated. Start iterating on it to make the tests pass...') def generate_playground(self, microservice_name, microservice_path): - print_colored('', '############# Playground #############', 'red') + print_colored('', '############# Playground #############', 'blue') file_name_to_content = get_all_microservice_files_with_content(microservice_path) user_query = ( @@ -170,7 +170,7 @@ The playground (app.py) must not let the user configure the host on the ui. conversation.query(user_query) playground_content_raw = conversation.query(chain_of_thought_optimization('python', 'app.py', 'the playground')) playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True) - persist_file(playground_content, os.path.join(miicroservice_path, 'app.py')) + persist_file(playground_content, os.path.join(microservice_path, 'app.py')) def debug_microservice(self, path, microservice_name, num_approach, packages, description, test): @@ -183,8 +183,9 @@ The playground (app.py) must not let the user configure the host on the ui. log_hubble = push_executor(previous_microservice_path) error = process_error_message(log_hubble) if error: + print('An error occurred during the build process. Feeding the error back to the assistent...') os.makedirs(next_microservice_path) - file_name_to_content = self.get_all_microservice_files_with_content(previous_executor_path) + file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path) is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile']) @@ -247,6 +248,7 @@ The playground (app.py) must not let the user configure the host on the ui. if any([error_message in error for error_message in ['AttributeError', 'NameError', 'AssertionError']]): return False + print_colored('', 'Is it a dependency issue?', 'blue') conversation = self.gpt_session.get_conversation([]) answer = conversation.query( f'Your task is to assist in identifying the root cause of a Docker build error for a python application. ' @@ -278,27 +280,34 @@ PDFParserExecutor return name def get_possible_packages(self, description): - print_colored('', '############# What package to use? #############', 'red') + print_colored('', '############# What packages to use? #############', 'blue') user_query = f''' -Here is the task description of the problme you need to solve: +Here is the task description of the problem you need to solve: "{description}" -First, write down all the subtasks you need to solve which require python packages. -For each subtask: - Provide a list of 1 to 3 python packages you could use to solve the subtask. Prefer modern packages. - For each package: - Write down some non-obvious thoughts about the challenges you might face for the task and give multiple approaches on how you handle them. - For example, there might be some packages you must not use because they do not obay the rules: - {not_allowed_executor()} - Discuss the pros and cons for all of these packages. -Create a list of package subsets that you could use to solve the task. -The list is sorted in a way that the most promising subset of packages is at the top. -The maximum length of the list is 5. +1. Write down all the non-trivial subtasks you need to solve. +2. Find out what is the core problem to solve. +3. Provide a list of all python packages you can think of that could directly be used to solve the core problem. +3. Provide a list of the 7 most promising python packages that fulfill the following requirements: +- can directly be used to solve the core problem +- has a stable api among different versions +- does not have system requirements + +For each package: + a) Write down some non-obvious challenges you might face with the package when implementing your task and give multiple approaches on how you handle them. + For example, you might find out that you must not use the package because it does not obey the rules: + {not_allowed_executor()} + b) Discuss the pros and cons for the package. + +4. Output the best 5 python packages starting with the best one. The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this: **packages.csv** ``` -package1,package2 -package2,package3,... +package1 +package2 +package3 +package4 +package5 ... ``` ''' @@ -319,7 +328,9 @@ package2,package3,... final_version_path = self.debug_microservice(microservice_path, microservice_name, num_approach, packages, description, test) self.generate_playground(microservice_name, final_version_path) except self.MaxDebugTimeReachedException: - print('Could not debug the Microservice.') + print('Could not debug the Microservice with the approach:', packages) + if num_approach == len(packages_list) - 1: + print_colored('', f'Could not debug the Microservice with any of the approaches: {packages} giving up.', 'red') continue print(f''' You can now run or deploy your microservice: diff --git a/src/options/generate/prompt_system.py b/src/options/generate/prompt_system.py index 02a38ee..8b0c987 100644 --- a/src/options/generate/prompt_system.py +++ b/src/options/generate/prompt_system.py @@ -71,4 +71,8 @@ print(response[0].text) ```''' -system_base_definition = f'''You are a principal engineer working at Jina - an open source company. You accurately satisfy all of the user's requirements.''' \ No newline at end of file +system_base_definition = f''' +It is the year 2021. +You are a principal engineer working at Jina - an open source company. +You accurately satisfy all of the user's requirements. +''' \ No newline at end of file diff --git a/src/options/generate/prompt_tasks.py b/src/options/generate/prompt_tasks.py index 239f952..b9ac614 100644 --- a/src/options/generate/prompt_tasks.py +++ b/src/options/generate/prompt_tasks.py @@ -51,7 +51,7 @@ def test_executor_file_task(executor_name, test_scenario): "Start the test with an extensive comment about the test case. " + ( f"Write a single test case that tests the following scenario: '{test_scenario}'. " - f"In case the test scenario is not precise enough, test the most general case without any assumptions." + f"In case the test scenario is not precise enough, test a general case without any assumptions." if test_scenario else "" ) + "Use the following import to import the executor: " @@ -82,7 +82,7 @@ def docker_file_task(): "Usually libraries are installed with apt-get. " "Be aware that the machine the docker container is running on does not have a GPU - only CPU. " "Add the config.yml file to the Dockerfile. Note that the Dockerfile only has access to the files: " - "executor.py, requirements.txt, config.yml, test_executor.py. " + "microservice.py, requirements.txt, config.yml, test_microservice.py. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. " 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. ' 'Make sure the all files are in the /workdir. ' @@ -149,6 +149,7 @@ The executor must not use any attribute of Document accept Document.text. def not_allowed_docker(): return ''' -Note that the Dockerfile only has access to the files: executor.py, requirements.txt, config.yml, test_executor.py. +Note that the Dockerfile only has access to the files: microservice.py, requirements.txt, config.yml, test_microservice.py. Note that the Dockerfile runs the test_microservice.py during the build process. +Note that it is not allowed to attach a virtual display when running test_microservice.py. ''' diff --git a/src/utils/io.py b/src/utils/io.py index 68f388d..c3a70b0 100644 --- a/src/utils/io.py +++ b/src/utils/io.py @@ -1,6 +1,5 @@ import os -import concurrent.futures import concurrent.futures from typing import Generator import sys