From 0153f5c3bfb1424aab5c9a7875c17dcc05686ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Thu, 27 Apr 2023 10:11:16 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=91=A8=E2=80=8D=F0=9F=92=BC=F0=9F=91=A9?= =?UTF-8?q?=E2=80=8D=F0=9F=92=BC=20feat:=20pm=20role?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/apis/gpt.py | 69 ++++----- src/cli.py | 4 +- src/options/generate/generator.py | 238 ++++++++++++++++++++++++++++-- src/options/generate/ui.py | 82 ++++++++++ 4 files changed, 342 insertions(+), 51 deletions(-) create mode 100644 src/options/generate/ui.py diff --git a/src/apis/gpt.py b/src/apis/gpt.py index a3ec8e3..807f917 100644 --- a/src/apis/gpt.py +++ b/src/apis/gpt.py @@ -8,7 +8,7 @@ from langchain import PromptTemplate from langchain.callbacks import CallbackManager from langchain.chat_models import ChatOpenAI from openai.error import RateLimitError -from langchain.schema import HumanMessage, SystemMessage, BaseMessage +from langchain.schema import HumanMessage, SystemMessage, BaseMessage, AIMessage from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from requests.exceptions import ConnectionError from urllib3.exceptions import InvalidChunkLength @@ -48,13 +48,12 @@ class GPTSession: self.chars_prompt_so_far = 0 self.chars_generation_so_far = 0 - def get_conversation(self, system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client']): + def get_conversation(self, messages: List[BaseMessage] = [], print_stream: bool = True, print_costs: bool = True): return _GPTConversation( - self.model_name, self.cost_callback, self.task_description, self.test_description, system_definition_examples + self.model_name, self.cost_callback, messages, print_stream, print_costs ) - @staticmethod def is_gpt4_available(): try: @@ -75,14 +74,15 @@ class GPTSession: except openai.error.InvalidRequestError: return False - def cost_callback(self, chars_prompt, chars_generation): + def cost_callback(self, chars_prompt, chars_generation, print_costs: bool = True): self.chars_prompt_so_far += chars_prompt self.chars_generation_so_far += chars_generation - print('\n') - money_prompt = self._calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt) - money_generation = self._calculate_money_spent(self.chars_generation_so_far, self.pricing_generation) - print('Total money spent so far on openai.com:', f'${money_prompt + money_generation:.3f}') - print('\n') + if print_costs: + print('\n') + money_prompt = self._calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt) + money_generation = self._calculate_money_spent(self.chars_generation_so_far, self.pricing_generation) + print('Total money spent so far on openai.com:', f'${money_prompt + money_generation:.3f}') + print('\n') @staticmethod def _calculate_money_spent(num_chars, price): @@ -96,29 +96,39 @@ class AssistantStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler): class _GPTConversation: - def __init__(self, model: str, cost_callback, task_description, test_description, system_definition_examples: List[str] = ['executor', 'docarray', 'client']): + def __init__(self, model: str, cost_callback, messages: List[BaseMessage], print_stream, print_costs): self._chat = ChatOpenAI( model_name=model, streaming=True, - callback_manager=CallbackManager([AssistantStreamingStdOutCallbackHandler()]), + callback_manager=CallbackManager([AssistantStreamingStdOutCallbackHandler()] if print_stream else []), verbose=True, temperature=0, ) self.cost_callback = cost_callback - self.messages: List[BaseMessage] = [] - self.system_message = self._create_system_message(task_description, test_description, system_definition_examples) - if os.environ['VERBOSE'].lower() == 'true': - print_colored('system', self.system_message.content, 'magenta') + self.messages = messages + self.print_stream = print_stream + self.print_costs = print_costs + for message in messages: + if os.environ['VERBOSE'].lower() == 'true': + if isinstance(message, SystemMessage): + print_colored('system - prompt', message.content, 'magenta') + elif isinstance(message, HumanMessage): + print_colored('user - prompt', message.content, 'blue') + elif isinstance(message, AIMessage): + print_colored('assistant - prompt', message.content, 'green') - def chat(self, prompt: str): - chat_message = HumanMessage(content=prompt) + def chat(self, prompt: str, role: str = 'user'): + MassageClass = HumanMessage if role == 'user' else SystemMessage + chat_message = MassageClass(content=prompt) self.messages.append(chat_message) if os.environ['VERBOSE'].lower() == 'true': - print_colored('user', prompt, 'blue') - print_colored('assistant', '', 'green', end='') + color = 'blue' if role == 'user' else 'magenta' + print_colored(role, prompt, color) + if self.print_stream: + print_colored('assistant', '', 'green', end='') for i in range(10): try: - response = self._chat([self.system_message] + self.messages) + response = self._chat(self.messages) break except (ConnectionError, InvalidChunkLength) as e: print('There was a connection error. Retrying...') @@ -128,22 +138,7 @@ class _GPTConversation: if os.environ['VERBOSE'].lower() == 'true': print() - self.cost_callback(sum([len(m.content) for m in self.messages]), len(response.content)) + self.cost_callback(sum([len(m.content) for m in self.messages]), len(response.content), self.print_costs) self.messages.append(response) return response.content - @staticmethod - def _create_system_message(task_description, test_description, system_definition_examples: List[str] = []) -> SystemMessage: - system_message = PromptTemplate.from_template(template_system_message_base).format( - task_description=task_description, - test_description=test_description, - ) - if 'gpt' in system_definition_examples: - system_message += f'\n{gpt_example}' - if 'executor' in system_definition_examples: - system_message += f'\n{executor_example}' - if 'docarray' in system_definition_examples: - system_message += f'\n{docarray_example}' - if 'client' in system_definition_examples: - system_message += f'\n{client_example}' - return SystemMessage(content=system_message) diff --git a/src/cli.py b/src/cli.py index 13ecd75..5b8d763 100644 --- a/src/cli.py +++ b/src/cli.py @@ -50,8 +50,8 @@ def main(ctx): @openai_api_key_needed @main.command() -@click.option('--description', required=True, help='Description of the microservice.') -@click.option('--test', required=True, help='Test scenario for the microservice.') +@click.option('--description', required=False, help='Description of the microservice.') +@click.option('--test', required=False, help='Test scenario for the microservice.') @click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).') @click.option('--verbose', default=False, is_flag=True, help='Verbose mode.') # only for development @path_param diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py index 26c5360..b0d0006 100644 --- a/src/options/generate/generator.py +++ b/src/options/generate/generator.py @@ -2,30 +2,46 @@ import os import random import re import shutil -from typing import List +from typing import List, Text, Optional + +from langchain import PromptTemplate +from langchain.schema import SystemMessage, HumanMessage, AIMessage +from pydantic.dataclasses import dataclass from src.apis import gpt from src.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \ PROBLEMATIC_PACKAGES, EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \ REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, DOCKER_FILE_TAG, UNNECESSARY_PACKAGES +from src.options.generate.templates_system import template_system_message_base, gpt_example, executor_example, \ + docarray_example, client_example from src.options.generate.templates_user import template_generate_microservice_name, \ template_generate_possible_packages, \ template_solve_code_issue, \ template_solve_dependency_issue, template_is_dependency_issue, template_generate_playground, \ template_generate_executor, template_generate_test, template_generate_requirements, template_generate_dockerfile, \ template_chain_of_thought, template_summarize_error, template_generate_possible_packages_output_format_string +from src.options.generate.ui import get_random_employee from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path from src.utils.string_tools import print_colored +@dataclass +class TaskSpecification: + task: Optional[Text] + test: Optional[Text] + +system_task_introduction = f''' +You are a product manager who refines the requirements of a client who wants to create a microservice. +''' + class Generator: def __init__(self, task_description, test_description, model='gpt-4'): self.gpt_session = gpt.GPTSession(task_description, test_description, model=model) - self.task_description = task_description - self.test_description = test_description + self.microservice_specification = TaskSpecification(task=task_description, test=test_description) def extract_content_from_result(self, plain_text, file_name, match_single_block=False): + pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)\n```" # the \n at the end makes sure that ``` within the generated code is not matched match = re.search(pattern, plain_text, re.MULTILINE) if match: @@ -56,9 +72,23 @@ metas: return all_microservice_files_string.strip() - def generate_and_persist_file(self, section_title, template, destination_folder=None, file_name=None, system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client'], **template_kwargs): + def generate_and_persist_file( + self, + section_title, + template, + destination_folder=None, + file_name=None, + system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client'], + **template_kwargs + ): + """ + Generates a file using the GPT-3 API and persists it to the destination folder if specified. + In case the content is not properly generated, it retries the generation. + It returns the generated content. + """ print_colored('', f'\n\n############# {section_title} #############', 'blue') - conversation = self.gpt_session.get_conversation(system_definition_examples=system_definition_examples) + system_introduction_message = self._create_system_message(self.microservice_specification.task, self.microservice_specification.test, system_definition_examples) + conversation = self.gpt_session.get_conversation(messages=[system_introduction_message]) template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables} content_raw = conversation.chat( template.format( @@ -91,8 +121,8 @@ metas: template_generate_executor, MICROSERVICE_FOLDER_v1, microservice_name=microservice_name, - microservice_description=self.task_description, - test_description=self.test_description, + microservice_description=self.microservice_specification.task, + test_description=self.microservice_specification.test, packages=packages, file_name_purpose=EXECUTOR_FILE_NAME, tag_name=EXECUTOR_FILE_TAG, @@ -105,8 +135,8 @@ metas: MICROSERVICE_FOLDER_v1, code_files_wrapped=self.files_to_string({'microservice.py': microservice_content}), microservice_name=microservice_name, - microservice_description=self.task_description, - test_description=self.test_description, + microservice_description=self.microservice_specification.task, + test_description=self.microservice_specification.test, file_name_purpose=TEST_EXECUTOR_FILE_NAME, tag_name=TEST_EXECUTOR_FILE_TAG, file_name=TEST_EXECUTOR_FILE_NAME, @@ -235,7 +265,7 @@ metas: ) else: user_query = template_solve_code_issue.format( - task_description=self.task_description, test_description=self.test_description, + task_description=self.microservice_specification.task, test_description=self.microservice_specification.test, summarized_error=summarized_error, all_files_string=self.files_to_string(file_name_to_content), ) conversation = self.gpt_session.get_conversation() @@ -276,7 +306,7 @@ metas: None, file_name='packages.csv', system_definition_examples=['gpt'], - description=self.task_description + description=self.microservice_specification.task ) packages_list = [[pkg.strip() for pkg in packages_string.split(',')] for packages_string in packages_csv_string.split('\n')] @@ -284,7 +314,8 @@ metas: return packages_list def generate(self, microservice_path): - generated_name = self.generate_microservice_name(self.task_description) + self.refine_specification() + generated_name = self.generate_microservice_name(self.microservice_specification.task) microservice_name = f'{generated_name}{random.randint(0, 10_000_000)}' packages_list = self.get_possible_packages() packages_list = [ @@ -320,3 +351,186 @@ gptdeploy deploy --path {microservice_path} error_summary = conversation.chat(template_summarize_error.format(error=error)) return error_summary + def refine_specification(self): + pm = get_random_employee('pm') + print(f'{pm.emoji}👋 Hi, I\'m {pm.name}, a PM at Jina AI. Gathering the requirements for our engineers.') + self.refine_task(pm) + self.refine_test(pm) + print(f''' +{pm.emoji} 👍 Great, I will handover the following requirements to our engineers: +{self.microservice_specification.task} +The following test scenario will be tested: +{self.microservice_specification.test} +''') + + def refine_task(self, pm): + system_task_iteration = f''' +The client writes a description of the microservice. +You must only talk to the client about the microservice. +You must not output anything else than what you got told in the following steps. +1. +You must create a check list for the requirements of the microservice. +Input and output have to be accurately specified. +You must use the following format (insert ✅, ❌ or n/a) depending on whether the requirement is fulfilled, not fulfilled or not applicable: +input: +output: +credentials: +database access: + +2. +You must do either a or b. +a) +If the description is not sufficiently specified, then ask for the missing information. +Your response must exactly match the following block code format: + +**prompt.txt** +```text + +``` + +b) +Otherwise you respond with the summarized description. +The summarized description must contain all the information mentioned by the client. +Your response must exactly match the following block code format: + +**task-final.txt** +```text + +``` <-- this is in a new line + +The character sequence ``` must always be at the beginning of the line. +You must not add information that was not provided by the client. + + +Example for the description "given a city, get the weather report for the next 5 days": +input: ✅ +output: ✅ +credentials: ❌ +database access: n/a + +**prompt.txt** +```text +Please provide the url of the weather api and a valid api key. Or let our engineers try to find a free api. +``` + + +Example for the description "convert png to svg" +input: ✅ +output: ✅ +credentials: n/a +database access: n/a + +**task-final.txt** +```text +The user inserts a png and gets an svg as response. +``` + + +Example for the description "parser" +input: ❌ +output: ❌ +credentials: n/a +database access: n/a + +**prompt.txt** +```text +Please provide the input and output format. +``` + +''' + + + + task_description = self.microservice_specification.task + if not task_description: + task_description = self.get_user_input(pm, 'What should your microservice do?') + messages = [ + SystemMessage(content=system_task_introduction + system_task_iteration), + ] + + while True: + conversation = self.gpt_session.get_conversation(messages, print_stream=os.environ['VERBOSE'].lower() == 'true', print_costs=False) + print('thinking...') + agent_response_raw = conversation.chat(task_description, role='user') + + question = self.extract_content_from_result(agent_response_raw, 'prompt.txt') + task_final = self.extract_content_from_result(agent_response_raw, 'task-final.txt') + if task_final: + self.microservice_specification.task = task_final + break + if question: + task_description = self.get_user_input(pm, question) + messages.extend([HumanMessage(content=task_description)]) + else: + task_description = self.get_user_input(pm, agent_response_raw + '\n: ') + + def refine_test(self, pm): + system_test_iteration = f''' +The client gives you a description of the microservice. +Your task is to describe verbally a unit test for that microservice. +There are two cases: +a) The unit test requires a file as input. +In this case you must ask the client to provide the file as URL. +Your response must exactly match the following block code format: + +**prompt.txt** +```text + +``` + +If you did a, you must not do b. +b) Any strings, ints, or bools can be used as input for the unit test. +In this case you must describe the unit test verbally. +Your response must exactly match the following block code format: + +**test-final.txt** +```text + +``` + +If you did b, you must not do a. + +Example for the description "given a city, get the weather report for the next 5 days using the ap": +''' + messages = [ + SystemMessage(content=system_task_introduction + system_test_iteration), + ] + user_input = self.microservice_specification.task + while True: + conversation = self.gpt_session.get_conversation(messages, print_stream=os.environ['VERBOSE'].lower() == 'true', print_costs=False) + agent_response_raw = conversation.chat(user_input, role='user') + question = self.extract_content_from_result(agent_response_raw, 'prompt.txt') + test_final = self.extract_content_from_result(agent_response_raw, 'test-final.txt') + if test_final: + self.microservice_specification.task = test_final + break + if question: + user_input = self.get_user_input(pm, question) + messages.extend([HumanMessage(content=user_input)]) + else: + user_input = self.get_user_input(pm, agent_response_raw + '\n: ') + + + + @staticmethod + def _create_system_message(task_description, test_description, system_definition_examples: List[str] = []) -> SystemMessage: + system_message = PromptTemplate.from_template(template_system_message_base).format( + task_description=task_description, + test_description=test_description, + ) + if 'gpt' in system_definition_examples: + system_message += f'\n{gpt_example}' + if 'executor' in system_definition_examples: + system_message += f'\n{executor_example}' + if 'docarray' in system_definition_examples: + system_message += f'\n{docarray_example}' + if 'client' in system_definition_examples: + system_message += f'\n{client_example}' + return SystemMessage(content=system_message) + + @staticmethod + def get_user_input(employee, prompt_to_user): + val = input(f'{employee.emoji}❓ {prompt_to_user}\nyou: ') + while not val: + val = input('you: ') + return val diff --git a/src/options/generate/ui.py b/src/options/generate/ui.py new file mode 100644 index 0000000..81d7a75 --- /dev/null +++ b/src/options/generate/ui.py @@ -0,0 +1,82 @@ +import random +from dataclasses import dataclass + +product_manager_names = [ + ('Leon', 'm'), + ('Saahil', 'm',), + ('Susana', 'f') +] +engineer_names = [ + ('Joschka', 'm'), + ('Johannes', 'm'), + ('Johanna', 'f'), + ('Aaron', 'm'), + ('Alaeddine', 'm'), + ('Andrei', 'm'), + ('Anne', 'f'), + ('Bo', 'm'), + ('Charlotte', 'f'), + ('David', 'm'), + ('Deepankar', 'm'), + ('Delgermurun', 'm'), + ('Edward', 'm'), + ('Felix', 'm'), + ('Florian', 'm'), + ('Georgios', 'm'), + ('Girish', 'm'), + ('Guillaume', 'm'), + ('Isabelle', 'f'), + ('Jackmin', 'm'), + ('Jie', 'm'), + ('Joan', 'm'), + ('Johannes', 'm'), + ('Joschka', 'm'), + ('Lechun', 'm'), + ('Louis', 'm'), + ('Mark', 'm'), + ('Maximilian', 'm'), + ('Michael', 'm'), + ('Mohamed Aziz', 'm'), + ('Mohammad Kalim', 'm'), + ('Nikos', 'm'), + ('Ran', 'm'), + ('Saba', 'f'), + ('Sami', 'm'), + ('Sha', 'm'), + ('Subba Reddy', 'm'), + ('Tanguy', 'm'), + ('Winston', 'm'), + ('Yadh', 'm'), + ('Yanlong', 'm'), + ('Zac', 'm'), + ('Zhaofeng', 'm'), + ('Zihao', 'm'), + ('Ziniu', 'm') +] + +role_to_gender_to_emoji = { + 'engineer':{ + 'm': '👨‍💻', + 'f': '👩‍💻' + }, + 'pm': { + 'm': '👨‍💼', + 'f': '👩‍💼' + }, + 'qa_endineer': { + 'm': '👨‍🔧', + 'f': '👩‍🔧', + }, +} + +@dataclass +class Employee: + role: str + name: str + gender: str + emoji: str + +def get_random_employee(role: str) -> Employee: + name, gender = random.choice(product_manager_names) + emoji = role_to_gender_to_emoji[role][gender] + return Employee(role, name, gender, emoji)