From 0153f5c3bfb1424aab5c9a7875c17dcc05686ad2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Thu, 27 Apr 2023 10:11:16 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=91=A8=E2=80=8D=F0=9F=92=BC=F0=9F=91=A9?=
 =?UTF-8?q?=E2=80=8D=F0=9F=92=BC=20feat:=20pm=20role?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/apis/gpt.py                   |  69 ++++-----
 src/cli.py                        |   4 +-
 src/options/generate/generator.py | 238 ++++++++++++++++++++++++++++--
 src/options/generate/ui.py        |  82 ++++++++++
 4 files changed, 342 insertions(+), 51 deletions(-)
 create mode 100644 src/options/generate/ui.py

diff --git a/src/apis/gpt.py b/src/apis/gpt.py
index a3ec8e3..807f917 100644
--- a/src/apis/gpt.py
+++ b/src/apis/gpt.py
@@ -8,7 +8,7 @@ from langchain import PromptTemplate
 from langchain.callbacks import CallbackManager
 from langchain.chat_models import ChatOpenAI
 from openai.error import RateLimitError
-from langchain.schema import HumanMessage, SystemMessage, BaseMessage
+from langchain.schema import HumanMessage, SystemMessage, BaseMessage, AIMessage
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from requests.exceptions import ConnectionError
 from urllib3.exceptions import InvalidChunkLength
@@ -48,13 +48,12 @@ class GPTSession:
         self.chars_prompt_so_far = 0
         self.chars_generation_so_far = 0
 
-    def get_conversation(self, system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client']):
+    def get_conversation(self, messages: List[BaseMessage] = [], print_stream: bool = True, print_costs: bool = True):
         return _GPTConversation(
-            self.model_name, self.cost_callback, self.task_description, self.test_description, system_definition_examples
+            self.model_name, self.cost_callback, messages, print_stream, print_costs
         )
 
 
-
     @staticmethod
     def is_gpt4_available():
         try:
@@ -75,14 +74,15 @@ class GPTSession:
         except openai.error.InvalidRequestError:
             return False
 
-    def cost_callback(self, chars_prompt, chars_generation):
+    def cost_callback(self, chars_prompt, chars_generation, print_costs: bool = True):
         self.chars_prompt_so_far += chars_prompt
         self.chars_generation_so_far += chars_generation
-        print('\n')
-        money_prompt = self._calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt)
-        money_generation = self._calculate_money_spent(self.chars_generation_so_far, self.pricing_generation)
-        print('Total money spent so far on openai.com:', f'${money_prompt + money_generation:.3f}')
-        print('\n')
+        if print_costs:
+            print('\n')
+            money_prompt = self._calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt)
+            money_generation = self._calculate_money_spent(self.chars_generation_so_far, self.pricing_generation)
+            print('Total money spent so far on openai.com:', f'${money_prompt + money_generation:.3f}')
+            print('\n')
 
     @staticmethod
     def _calculate_money_spent(num_chars, price):
@@ -96,29 +96,39 @@ class AssistantStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
 
 
 class _GPTConversation:
-    def __init__(self, model: str, cost_callback, task_description, test_description, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
+    def __init__(self, model: str, cost_callback, messages: List[BaseMessage], print_stream, print_costs):
         self._chat = ChatOpenAI(
             model_name=model,
             streaming=True,
-            callback_manager=CallbackManager([AssistantStreamingStdOutCallbackHandler()]),
+            callback_manager=CallbackManager([AssistantStreamingStdOutCallbackHandler()] if print_stream else []),
             verbose=True,
             temperature=0,
         )
         self.cost_callback = cost_callback
-        self.messages: List[BaseMessage] = []
-        self.system_message = self._create_system_message(task_description, test_description, system_definition_examples)
-        if os.environ['VERBOSE'].lower() == 'true':
-            print_colored('system', self.system_message.content, 'magenta')
+        self.messages = messages
+        self.print_stream = print_stream
+        self.print_costs = print_costs
+        for message in messages:
+            if os.environ['VERBOSE'].lower() == 'true':
+                if isinstance(message, SystemMessage):
+                    print_colored('system - prompt', message.content, 'magenta')
+                elif isinstance(message, HumanMessage):
+                    print_colored('user - prompt', message.content, 'blue')
+                elif isinstance(message, AIMessage):
+                    print_colored('assistant - prompt', message.content, 'green')
 
-    def chat(self, prompt: str):
-        chat_message = HumanMessage(content=prompt)
+    def chat(self, prompt: str, role: str = 'user'):
+        MassageClass = HumanMessage if role == 'user' else SystemMessage
+        chat_message = MassageClass(content=prompt)
         self.messages.append(chat_message)
         if os.environ['VERBOSE'].lower() == 'true':
-            print_colored('user', prompt, 'blue')
-        print_colored('assistant', '', 'green', end='')
+            color = 'blue' if role == 'user' else 'magenta'
+            print_colored(role, prompt, color)
+        if self.print_stream:
+            print_colored('assistant', '', 'green', end='')
         for i in range(10):
             try:
-                response = self._chat([self.system_message] + self.messages)
+                response = self._chat(self.messages)
                 break
             except (ConnectionError, InvalidChunkLength) as e:
                 print('There was a connection error. Retrying...')
@@ -128,22 +138,7 @@ class _GPTConversation:
 
         if os.environ['VERBOSE'].lower() == 'true':
             print()
-        self.cost_callback(sum([len(m.content) for m in self.messages]), len(response.content))
+        self.cost_callback(sum([len(m.content) for m in self.messages]), len(response.content), self.print_costs)
         self.messages.append(response)
         return response.content
 
-    @staticmethod
-    def _create_system_message(task_description, test_description, system_definition_examples: List[str] = []) -> SystemMessage:
-        system_message = PromptTemplate.from_template(template_system_message_base).format(
-            task_description=task_description,
-            test_description=test_description,
-        )
-        if 'gpt' in system_definition_examples:
-            system_message += f'\n{gpt_example}'
-        if 'executor' in system_definition_examples:
-            system_message += f'\n{executor_example}'
-        if 'docarray' in system_definition_examples:
-            system_message += f'\n{docarray_example}'
-        if 'client' in system_definition_examples:
-            system_message += f'\n{client_example}'
-        return SystemMessage(content=system_message)
diff --git a/src/cli.py b/src/cli.py
index 13ecd75..5b8d763 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -50,8 +50,8 @@ def main(ctx):
 
 @openai_api_key_needed
 @main.command()
-@click.option('--description', required=True, help='Description of the microservice.')
-@click.option('--test', required=True, help='Test scenario for the microservice.')
+@click.option('--description', required=False, help='Description of the microservice.')
+@click.option('--test', required=False, help='Test scenario for the microservice.')
 @click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).')
 @click.option('--verbose', default=False, is_flag=True, help='Verbose mode.')   # only for development
 @path_param
diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index 26c5360..b0d0006 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -2,30 +2,46 @@ import os
 import random
 import re
 import shutil
-from typing import List
+from typing import List, Text, Optional
+
+from langchain import PromptTemplate
+from langchain.schema import SystemMessage, HumanMessage, AIMessage
+from pydantic.dataclasses import dataclass
 
 from src.apis import gpt
 from src.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub
 from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
     PROBLEMATIC_PACKAGES, EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
     REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, DOCKER_FILE_TAG, UNNECESSARY_PACKAGES
+from src.options.generate.templates_system import template_system_message_base, gpt_example, executor_example, \
+    docarray_example, client_example
 from src.options.generate.templates_user import template_generate_microservice_name, \
     template_generate_possible_packages, \
     template_solve_code_issue, \
     template_solve_dependency_issue, template_is_dependency_issue, template_generate_playground, \
     template_generate_executor, template_generate_test, template_generate_requirements, template_generate_dockerfile, \
     template_chain_of_thought, template_summarize_error, template_generate_possible_packages_output_format_string
+from src.options.generate.ui import get_random_employee
 from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path
 from src.utils.string_tools import print_colored
 
 
+@dataclass
+class TaskSpecification:
+    task: Optional[Text]
+    test: Optional[Text]
+
+system_task_introduction = f'''
+You are a product manager who refines the requirements of a client who wants to create a microservice.
+'''
+
 class Generator:
     def __init__(self, task_description, test_description, model='gpt-4'):
         self.gpt_session = gpt.GPTSession(task_description, test_description, model=model)
-        self.task_description = task_description
-        self.test_description = test_description
+        self.microservice_specification = TaskSpecification(task=task_description, test=test_description)
 
     def extract_content_from_result(self, plain_text, file_name, match_single_block=False):
+
         pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)\n```" # the \n at the end makes sure that ``` within the generated code is not matched
         match = re.search(pattern, plain_text, re.MULTILINE)
         if match:
@@ -56,9 +72,23 @@ metas:
         return all_microservice_files_string.strip()
 
 
-    def generate_and_persist_file(self, section_title, template, destination_folder=None, file_name=None, system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client'],  **template_kwargs):
+    def generate_and_persist_file(
+            self,
+            section_title,
+            template,
+            destination_folder=None,
+            file_name=None,
+            system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client'],
+            **template_kwargs
+    ):
+        """
+        Generates a file using the GPT-3 API and persists it to the destination folder if specified.
+        In case the content is not properly generated, it retries the generation.
+        It returns the generated content.
+        """
         print_colored('', f'\n\n############# {section_title} #############', 'blue')
-        conversation = self.gpt_session.get_conversation(system_definition_examples=system_definition_examples)
+        system_introduction_message = self._create_system_message(self.microservice_specification.task, self.microservice_specification.test, system_definition_examples)
+        conversation = self.gpt_session.get_conversation(messages=[system_introduction_message])
         template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables}
         content_raw = conversation.chat(
             template.format(
@@ -91,8 +121,8 @@ metas:
             template_generate_executor,
             MICROSERVICE_FOLDER_v1,
             microservice_name=microservice_name,
-            microservice_description=self.task_description,
-            test_description=self.test_description,
+            microservice_description=self.microservice_specification.task,
+            test_description=self.microservice_specification.test,
             packages=packages,
             file_name_purpose=EXECUTOR_FILE_NAME,
             tag_name=EXECUTOR_FILE_TAG,
@@ -105,8 +135,8 @@ metas:
             MICROSERVICE_FOLDER_v1,
             code_files_wrapped=self.files_to_string({'microservice.py': microservice_content}),
             microservice_name=microservice_name,
-            microservice_description=self.task_description,
-            test_description=self.test_description,
+            microservice_description=self.microservice_specification.task,
+            test_description=self.microservice_specification.test,
             file_name_purpose=TEST_EXECUTOR_FILE_NAME,
             tag_name=TEST_EXECUTOR_FILE_TAG,
             file_name=TEST_EXECUTOR_FILE_NAME,
@@ -235,7 +265,7 @@ metas:
             )
         else:
             user_query = template_solve_code_issue.format(
-                task_description=self.task_description, test_description=self.test_description,
+                task_description=self.microservice_specification.task, test_description=self.microservice_specification.test,
                 summarized_error=summarized_error, all_files_string=self.files_to_string(file_name_to_content),
             )
         conversation = self.gpt_session.get_conversation()
@@ -276,7 +306,7 @@ metas:
             None,
             file_name='packages.csv',
             system_definition_examples=['gpt'],
-            description=self.task_description
+            description=self.microservice_specification.task
 
         )
         packages_list = [[pkg.strip() for pkg in packages_string.split(',')] for packages_string in packages_csv_string.split('\n')]
@@ -284,7 +314,8 @@ metas:
         return packages_list
 
     def generate(self, microservice_path):
-        generated_name = self.generate_microservice_name(self.task_description)
+        self.refine_specification()
+        generated_name = self.generate_microservice_name(self.microservice_specification.task)
         microservice_name = f'{generated_name}{random.randint(0, 10_000_000)}'
         packages_list = self.get_possible_packages()
         packages_list = [
@@ -320,3 +351,186 @@ gptdeploy deploy --path {microservice_path}
         error_summary = conversation.chat(template_summarize_error.format(error=error))
         return error_summary
 
+    def refine_specification(self):
+        pm = get_random_employee('pm')
+        print(f'{pm.emoji}👋 Hi, I\'m {pm.name}, a PM at Jina AI. Gathering the requirements for our engineers.')
+        self.refine_task(pm)
+        self.refine_test(pm)
+        print(f'''
+{pm.emoji} 👍 Great, I will handover the following requirements to our engineers:
+{self.microservice_specification.task}
+The following test scenario will be tested:
+{self.microservice_specification.test}
+''')
+
+    def refine_task(self, pm):
+        system_task_iteration = f'''
+The client writes a description of the microservice.
+You must only talk to the client about the microservice.
+You must not output anything else than what you got told in the following steps.
+1. 
+You must create a check list for the requirements of the microservice.
+Input and output have to be accurately specified.
+You must use the following format (insert ✅, ❌ or n/a) depending on whether the requirement is fulfilled, not fulfilled or not applicable:
+input: <insert one of ✅, ❌ or n/a here>
+output: <insert one of ✅, ❌ or n/a here>
+credentials: <insert one of ✅, ❌ or n/a here>
+database access: <insert one of ✅, ❌ or n/a here>
+
+2.
+You must do either a or b.
+a)
+If the description is not sufficiently specified, then ask for the missing information.
+Your response must exactly match the following block code format:
+
+**prompt.txt**
+```text
+<prompt to the client here>
+```
+
+b)
+Otherwise you respond with the summarized description.
+The summarized description must contain all the information mentioned by the client.
+Your response must exactly match the following block code format:
+
+**task-final.txt**
+```text
+<task here>
+``` <-- this is in a new line
+
+The character sequence ``` must always be at the beginning of the line.
+You must not add information that was not provided by the client.
+
+
+Example for the description "given a city, get the weather report for the next 5 days":
+input: ✅
+output: ✅
+credentials: ❌
+database access: n/a
+
+**prompt.txt**
+```text
+Please provide the url of the weather api and a valid api key. Or let our engineers try to find a free api.
+```
+
+
+Example for the description "convert png to svg"
+input: ✅
+output: ✅
+credentials: n/a
+database access: n/a
+
+**task-final.txt**
+```text
+The user inserts a png and gets an svg as response.
+```
+
+
+Example for the description "parser"
+input: ❌
+output: ❌
+credentials: n/a
+database access: n/a
+
+**prompt.txt**
+```text
+Please provide the input and output format.
+```
+
+'''
+
+
+
+        task_description = self.microservice_specification.task
+        if not task_description:
+            task_description = self.get_user_input(pm, 'What should your microservice do?')
+        messages = [
+            SystemMessage(content=system_task_introduction + system_task_iteration),
+        ]
+
+        while True:
+            conversation = self.gpt_session.get_conversation(messages, print_stream=os.environ['VERBOSE'].lower() == 'true', print_costs=False)
+            print('thinking...')
+            agent_response_raw = conversation.chat(task_description, role='user')
+
+            question = self.extract_content_from_result(agent_response_raw, 'prompt.txt')
+            task_final = self.extract_content_from_result(agent_response_raw, 'task-final.txt')
+            if task_final:
+                self.microservice_specification.task = task_final
+                break
+            if question:
+                task_description = self.get_user_input(pm, question)
+                messages.extend([HumanMessage(content=task_description)])
+            else:
+                task_description = self.get_user_input(pm, agent_response_raw + '\n: ')
+
+    def refine_test(self, pm):
+        system_test_iteration = f'''
+The client gives you a description of the microservice.
+Your task is to describe verbally a unit test for that microservice.
+There are two cases:
+a) The unit test requires a file as input.
+In this case you must ask the client to provide the file as URL.
+Your response must exactly match the following block code format:
+
+**prompt.txt**
+```text
+<prompt to the client here>
+```
+
+If you did a, you must not do b.
+b) Any strings, ints, or bools can be used as input for the unit test.
+In this case you must describe the unit test verbally.
+Your response must exactly match the following block code format:
+
+**test-final.txt**
+```text
+<task here>
+```
+
+If you did b, you must not do a.
+
+Example for the description "given a city, get the weather report for the next 5 days using the ap":
+'''
+        messages = [
+            SystemMessage(content=system_task_introduction + system_test_iteration),
+        ]
+        user_input = self.microservice_specification.task
+        while True:
+            conversation = self.gpt_session.get_conversation(messages, print_stream=os.environ['VERBOSE'].lower() == 'true', print_costs=False)
+            agent_response_raw = conversation.chat(user_input, role='user')
+            question = self.extract_content_from_result(agent_response_raw, 'prompt.txt')
+            test_final = self.extract_content_from_result(agent_response_raw, 'test-final.txt')
+            if test_final:
+                self.microservice_specification.task = test_final
+                break
+            if question:
+                user_input = self.get_user_input(pm, question)
+                messages.extend([HumanMessage(content=user_input)])
+            else:
+                user_input = self.get_user_input(pm, agent_response_raw + '\n: ')
+
+
+
+    @staticmethod
+    def _create_system_message(task_description, test_description, system_definition_examples: List[str] = []) -> SystemMessage:
+        system_message = PromptTemplate.from_template(template_system_message_base).format(
+            task_description=task_description,
+            test_description=test_description,
+        )
+        if 'gpt' in system_definition_examples:
+            system_message += f'\n{gpt_example}'
+        if 'executor' in system_definition_examples:
+            system_message += f'\n{executor_example}'
+        if 'docarray' in system_definition_examples:
+            system_message += f'\n{docarray_example}'
+        if 'client' in system_definition_examples:
+            system_message += f'\n{client_example}'
+        return SystemMessage(content=system_message)
+
+    @staticmethod
+    def get_user_input(employee, prompt_to_user):
+        val = input(f'{employee.emoji}❓ {prompt_to_user}\nyou: ')
+        while not val:
+            val = input('you: ')
+        return val
diff --git a/src/options/generate/ui.py b/src/options/generate/ui.py
new file mode 100644
index 0000000..81d7a75
--- /dev/null
+++ b/src/options/generate/ui.py
@@ -0,0 +1,82 @@
+import random
+from dataclasses import dataclass
+
+product_manager_names = [
+    ('Leon', 'm'),
+    ('Saahil', 'm',),
+    ('Susana', 'f')
+]
+engineer_names = [
+    ('Joschka', 'm'),
+    ('Johannes', 'm'),
+    ('Johanna', 'f'),
+    ('Aaron', 'm'),
+    ('Alaeddine', 'm'),
+    ('Andrei', 'm'),
+    ('Anne', 'f'),
+    ('Bo', 'm'),
+    ('Charlotte', 'f'),
+    ('David', 'm'),
+    ('Deepankar', 'm'),
+    ('Delgermurun', 'm'),
+    ('Edward', 'm'),
+    ('Felix', 'm'),
+    ('Florian', 'm'),
+    ('Georgios', 'm'),
+    ('Girish', 'm'),
+    ('Guillaume', 'm'),
+    ('Isabelle', 'f'),
+    ('Jackmin', 'm'),
+    ('Jie', 'm'),
+    ('Joan', 'm'),
+    ('Johannes', 'm'),
+    ('Joschka', 'm'),
+    ('Lechun', 'm'),
+    ('Louis', 'm'),
+    ('Mark', 'm'),
+    ('Maximilian', 'm'),
+    ('Michael', 'm'),
+    ('Mohamed Aziz', 'm'),
+    ('Mohammad Kalim', 'm'),
+    ('Nikos', 'm'),
+    ('Ran', 'm'),
+    ('Saba', 'f'),
+    ('Sami', 'm'),
+    ('Sha', 'm'),
+    ('Subba Reddy', 'm'),
+    ('Tanguy', 'm'),
+    ('Winston', 'm'),
+    ('Yadh', 'm'),
+    ('Yanlong', 'm'),
+    ('Zac', 'm'),
+    ('Zhaofeng', 'm'),
+    ('Zihao', 'm'),
+    ('Ziniu', 'm')
+]
+
+role_to_gender_to_emoji = {
+    'engineer':{
+        'm': '👨‍💻',
+        'f': '👩‍💻'
+    },
+    'pm': {
+        'm': '👨‍💼',
+        'f': '👩‍💼'
+    },
+    'qa_endineer': {
+        'm': '👨‍🔧',
+        'f': '👩‍🔧',
+    },
+}
+
+@dataclass
+class Employee:
+    role: str
+    name: str
+    gender: str
+    emoji: str
+
+def get_random_employee(role: str) -> Employee:
+    name, gender = random.choice(product_manager_names)
+    emoji = role_to_gender_to_emoji[role][gender]
+    return Employee(role, name, gender, emoji)