From 142f4d9f648730a513504af3c834bce3e82520cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Mon, 1 May 2023 18:31:41 +0200
Subject: [PATCH 01/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/ci.yml | 4 ++--
 test/test_generator.py   | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 76e6d66..814ac13 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,12 +5,12 @@ on:
   pull_request:
 
 jobs:
-  test_level:
+  test_cognitive_level:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        group: [0, 1, 2]
+        group: [0, 1, 2, 3]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python 3.8
diff --git a/test/test_generator.py b/test/test_generator.py
index cac1950..bcddedf 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -31,6 +31,7 @@ def test_generation_level_0(tmpdir):
 def tmpdir():
     return 'microservice'
 
+
 def test_generation_level_1(tmpdir):
     """
     Requirements:
@@ -72,7 +73,6 @@ def test_generation_level_2(tmpdir):
     assert generator.generate() == 0
 
 
-@pytest.mark.skip(reason="not possible")
 def test_generation_level_3(tmpdir):
     """
     Requirements:
@@ -105,7 +105,7 @@ Create an audio file of the summarized text.
     )
     assert generator.generate() == 0
 
-@pytest.mark.skip(reason="not possible")
+
 def test_generation_level_4(tmpdir):
     """
     Requirements:
@@ -143,6 +143,5 @@ The output is the image with the joke on it.''',
                           )
     assert generator.generate() == 0
 
-
 # further ideas:
-# Create a wrapper around google called Joogle. It modifies the page summary preview text of the search results to insert the word Jina as much as possible.
\ No newline at end of file
+# Create a wrapper around google called Joogle. It modifies the page summary preview text of the search results to insert the word Jina as much as possible.

From 1c5d8329f4d8a7c07b1dd1bce9b799b8d2a264bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Mon, 1 May 2023 18:32:24 +0200
Subject: [PATCH 02/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20and=204?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 814ac13..63a0452 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,7 +10,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        group: [0, 1, 2, 3]
+        group: [0, 1, 2, 3, 4]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python 3.8

From b6354d256dfa4434cf03f3cb93f3a9c66a77c0bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Mon, 1 May 2023 21:32:18 +0200
Subject: [PATCH 03/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20and=204?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/generator.py        |   8 +-
 src/options/generate/templates_system.py | 101 ++++++++++++++---------
 src/options/generate/templates_user.py   |  64 ++++++++------
 test/test_generator.py                   |   8 +-
 4 files changed, 112 insertions(+), 69 deletions(-)

diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index ad6b45b..3ec460a 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -461,8 +461,10 @@ gptdeploy deploy --path {self.microservice_root_path}
                     'task',
                     '',
                     template_pm_task_iteration,
-                    micro_service_initial_description=f'''Microservice description: 
+                    micro_service_initial_description=f'''Microservice description:
+```
 {self.microservice_specification.task}
+```
 ''',
                 )
                 self.refine_requirements(
@@ -510,8 +512,8 @@ Test scenario:
                 role='user'
             )
             messages.append(HumanMessage(content=user_input))
-            agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.txt', can_contain_code_block=False)
-            final = self.extract_content_from_result(agent_response_raw, 'final.txt', can_contain_code_block=False)
+            agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.json', can_contain_code_block=False)
+            final = self.extract_content_from_result(agent_response_raw, 'final.json', can_contain_code_block=False)
             if final:
                 messages.append(AIMessage(content=final))
                 setattr(self.microservice_specification, refinement_type, final)
diff --git a/src/options/generate/templates_system.py b/src/options/generate/templates_system.py
index 7dabc54..8583154 100644
--- a/src/options/generate/templates_system.py
+++ b/src/options/generate/templates_system.py
@@ -40,9 +40,11 @@ a)
 If the description is not sufficiently specified, then ask for the missing information.
 Your response must exactly match the following block code format (double asterisks for the file name and triple backticks for the file block):
 
-**prompt.txt**
-```text
-<prompt to the client here>
+**prompt.json**
+```json
+{{
+    "question": "<prompt to the client here>"
+}}
 ```
 
 b)
@@ -50,9 +52,13 @@ Otherwise you respond with the summarized description.
 The summarized description must contain all the information mentioned by the client.
 Your response must exactly match the following block code format (double asterisks for the file name and triple backticks for the file block):
 
-**final.txt**
-```text
-<task here>
+**final.json**
+```json
+{{
+    "description": "<microservice description here>",
+    "implementation_details": "<additional implementation details here>",
+    "credentials: "<credentials here>"
+}}
 ```
 
 The character sequence ``` must always be at the beginning of the line.
@@ -64,9 +70,11 @@ output: defined
 api access: not defined
 database access: n/a
 
-**prompt.txt**
-```text
-Please provide the url of the weather api and a valid api key or some other way accessing the api. Or let our engineers try to find a free api.
+**prompt.json**
+```json
+{{
+    "question": "Please provide the url of the weather api and a valid api key or some other way accessing the api. Or let our engineers try to find a free api."
+}}
 ```
 
 Example for the description "convert png to svg":
@@ -75,9 +83,13 @@ output: defined
 api access: n/a
 database access: n/a
 
-**final.txt**
-```text
-The user inserts a png and gets an svg as response.
+**final.json**
+```json
+{{
+    "description": "The user inserts a png and gets an svg as response.",
+    "implementation_details": "n/a",
+    "credentials: "n/a"
+}}
 ```
 
 Example for the description "parser":
@@ -86,9 +98,11 @@ output: not defined
 api access: n/a
 database access: n/a
 
-**prompt.txt**
-```text
-Please provide the input and output format.
+**prompt.json**
+```json
+{{
+    "question": "Please provide the input and output format."
+}}
 ```
 '''
 
@@ -104,9 +118,11 @@ Your response must exactly match the following block code format (double asteris
 1.
 contains example: no
 2.
-**prompt.txt**
-```text
-<prompt to the client here>
+**prompt.json**
+```json
+{{
+    "question": "<prompt to the client here>"
+}}
 ```
 
 If you did a, you must not do b.
@@ -117,10 +133,12 @@ Your response must exactly match the following block code format (double asteris
 1.
 contains example: yes (<insert example here>)
 2.
-**final.txt**
-```text
-input: "<input here>"
-assertion: the output is of type <type here>
+**final.json**
+```json
+{{
+    "input": "<input here>",
+    "assertion": "the output is of type <type here>"
+}}
 ```
 
 If you did b, you must not do a.
@@ -129,37 +147,46 @@ Example for: "given a city like "Berlin", get the weather report for the next 5
 1.
 contains example: yes (Berlin)
 2.
-**final.txt**
-```text
-input: "Berlin"
-assertion: the output is of type string
+**final.json**
+```json
+{{
+    "input": "Berlin",
+    "assertion": "the output is of type string"
+}}
 ```
 
 Example for "The user inserts a png and gets an svg as response.":
 1.
 contains example: no
 2.
-**prompt.txt**
-```text
-Please provide a png example input file as url.
+**prompt.json**
+```json
+{{
+    "question": "Please provide a png example input file as url."
+}}
 ```
 
+
 Example for "The user inserts a png like https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png and gets an svg as response.":
 1.
 contains example: yes (https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png)
 2.
-**final.txt**
-```text
-input: "https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png"
-assertion: the output is of type svg
+**final.json**
+```json
+{{
+    "input": "https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png",
+    "assertion": "the output is of type svg"
+}}
 ```
 
 Example for "The microservice takes nothing as input and returns the current time.":
 1.
 contains example: n/a
-**final.txt**
-```text
-input: "nothing"
-assertion: the output is of type string
+**final.json**
+```json
+{{
+    "input": "nothing",
+    "assertion": "the output is of type string"
+}}
 ```
 '''
diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index a505435..88cfcac 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -374,22 +374,28 @@ The playground (app.py) must not import the executor.
 template_pm_task_iteration = PromptTemplate.from_template(
     '''{micro_service_initial_description}
 1.Quickly go through the checklist (input/output well defined? api or db access needed?)  and think about if you should ask something to the client or if you should write the final description.
-2.Either write the prompt.txt or the final.txt file.
+2.Either write the prompt.json or the final.json file.
 Either ask for clarification like this:
-**prompt.txt**
-```text
-<prompt to the client here (must be only one question)>
+**prompt.json**
+```json
+{{
+    "question": "<prompt to the client here (must be only one question)>"
+}}
 ```
 
-Or write the summarized microservice description like this:
-**final.txt**
-```text
-<microservice description here>
+Or write the summarized microservice description and additional implementation details like this:
+**final.json**
+```json
+{{
+    "description": "<microservice description here>",
+    "implementation_details": "<additional implementation details here>",
+    "credentials: "<credentials here>"
+}}
 ``` 
-Note that your response must be either prompt.txt or final.txt. You must not write both.
+Note that your response must be either prompt.json or final.json. You must not write both.
 Note that you must obey the double asterisk and tripple backtick syntax from above.
 Note that the last sequence of characters in your response must be ``` (triple backtick).
-Note that prompt.txt must not only contain one question.
+Note that prompt.json must not only contain one question.
 Note that if urls, secrets, database names, etc. are mentioned, they must be part of the summary.
 {custom_suffix}
 '''
@@ -398,35 +404,41 @@ Note that if urls, secrets, database names, etc. are mentioned, they must be par
 template_pm_test_iteration = PromptTemplate.from_template(
     '''{micro_service_initial_description}
 1. write down if the original description and the refined description contain an example input for the microservice.
-2. write down either prompt.txt or final.txt.
-If the example input for the microservice is mentioned in the refined description or the original description, then output final.txt.
-Otherwise, output prompt.txt where you ask for the example input file as URL or the example string.
+2. write down either prompt.json or final.json.
+If the example input for the microservice is mentioned in the refined description or the original description, then output final.json.
+Otherwise, output prompt.json where you ask for the example input file as URL or the example string.
 Except for urls, you should come up with your own example input that makes sense for the microservice description.
 
 Example for the case where an example input file is required and was not mentioned before:
-**prompt.txt**
-```text
-Can you please provide an example input file as URL?
+**prompt.json**
+```json
+{{
+    "question": "Can you please provide an example input file as URL?"
+}}
 ```
 
 Example for the case where the example input string is required and was not mentioned before:
-**prompt.txt**
-```text
-Can you please provide an example input string?
+**prompt.json**
+```json
+{{
+    "question": "Can you please provide an example input string?"
+}}
 ```
 Note that you must not ask for an example input in case the example input is already mentioned in the refined description or the original description.
 
 Example for the case where the example is already mentioned in the refined description or the original description:
-**final.txt**
-```text
-input: <input here>
-assertion: the output is of type <type here>
-``` 
-Note that your response must be either prompt.txt or final.txt. You must not write both.
+**final.json**
+```json
+{{
+    "input": "<input here>",
+    "assertion": "the output is of type <type here>"
+}}
+```
+Note that your response must be either prompt.json or final.json. You must not write both.
 Note that you must obey the double asterisk and tripple backtick syntax from above.
 Note that the last sequence of characters in your response must be ``` (triple backtick).
 Note that your response must start with the character sequence ** (double asterisk).
-Note that prompt.txt must only contain one question.
+Note that prompt.json must only contain one question.
 {custom_suffix}
 '''
 )
diff --git a/test/test_generator.py b/test/test_generator.py
index bcddedf..54c7573 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -97,7 +97,7 @@ data = {{
 }}
 response = requests.post(url, headers=headers, files=file, data=data)
 print(response.text)
-Summarize the text.
+Summarize the text (50 words).
 Create an audio file of the summarized text.
 ''',
         str(tmpdir) + 'microservice',
@@ -118,7 +118,7 @@ def test_generation_level_4(tmpdir):
     """
     os.environ['VERBOSE'] = 'true'
     generator = Generator(f'''
-The input is an image like this: https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png.
+The input is an image.
 Use the following api to get the description of the image:
 Request:
 curl "https://us-central1-causal-diffusion.cloudfunctions.net/describe" \\
@@ -137,7 +137,9 @@ Result format:
 }}
 The description is then used to generate a joke.
 The joke is the put on the image.
-The output is the image with the joke on it.''',
+The output is the image with the joke on it.
+Example input image: https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png
+''',
                           str(tmpdir) + 'microservice',
                           'gpt-3.5-turbo'
                           )

From bae332887690879104a99d54ef4e17ca7e6143f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Mon, 1 May 2023 21:36:18 +0200
Subject: [PATCH 04/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20and=204?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/generator.py | 5 +++--
 test/test_generator.py            | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index 3ec460a..eae74a3 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -519,8 +519,9 @@ Test scenario:
                 setattr(self.microservice_specification, refinement_type, final)
                 break
             elif agent_question:
-                messages.append(AIMessage(content=agent_question))
-                user_input = self.get_user_input(pm, agent_question)
+                question_parsed = json.loads(agent_question)['question']
+                messages.append(AIMessage(content=question_parsed))
+                user_input = self.get_user_input(pm, question_parsed)
             else:
                 if num_parsing_tries > 2:
                     raise self.TaskRefinementException()
diff --git a/test/test_generator.py b/test/test_generator.py
index 54c7573..50bc50c 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -85,7 +85,7 @@ def test_generation_level_3(tmpdir):
     """
     os.environ['VERBOSE'] = 'true'
     generator = Generator(
-        f'''Given an audio file of speech like https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav, 
+        f'''Given an audio file (1min) of speech like https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav, 
 get convert it to text using the following api:
 import requests
 url = "https://transcribe.whisperapi.com"

From ebb57979820fba87b91b23a5f45791a8ae83f44e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Mon, 1 May 2023 22:31:01 +0200
Subject: [PATCH 05/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20refinement?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/apis/gpt.py                          |  4 ++--
 src/options/generate/templates_system.py | 10 ++++++----
 src/options/generate/templates_user.py   |  6 ++++--
 test/test_generator.py                   | 10 ++++++----
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/apis/gpt.py b/src/apis/gpt.py
index 16a53af..66512de 100644
--- a/src/apis/gpt.py
+++ b/src/apis/gpt.py
@@ -11,7 +11,7 @@ from langchain.chat_models import ChatOpenAI
 from openai.error import RateLimitError
 from langchain.schema import HumanMessage, SystemMessage, BaseMessage, AIMessage
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from requests.exceptions import ConnectionError
+from requests.exceptions import ConnectionError, ChunkedEncodingError
 from urllib3.exceptions import InvalidChunkLength
 
 from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
@@ -132,7 +132,7 @@ class _GPTConversation:
             try:
                 response = self._chat(self.messages)
                 break
-            except (ConnectionError, InvalidChunkLength) as e:
+            except (ConnectionError, InvalidChunkLength, ChunkedEncodingError) as e:
                 print('There was a connection error. Retrying...')
                 if i == 9:
                     raise e
diff --git a/src/options/generate/templates_system.py b/src/options/generate/templates_system.py
index 8583154..70a98d1 100644
--- a/src/options/generate/templates_system.py
+++ b/src/options/generate/templates_system.py
@@ -48,15 +48,16 @@ Your response must exactly match the following block code format (double asteris
 ```
 
 b)
-Otherwise you respond with the summarized description.
-The summarized description must contain all the information mentioned by the client.
+Otherwise you respond with the detailed description.
+The detailed description must contain all the information mentioned by the client.
 Your response must exactly match the following block code format (double asterisks for the file name and triple backticks for the file block):
 
 **final.json**
 ```json
 {{
     "description": "<microservice description here>",
-    "implementation_details": "<additional implementation details here>",
+    "code_samples": "<code samples from the client here>",
+    "documentation_info": "<documentation info here>",
     "credentials: "<credentials here>"
 }}
 ```
@@ -87,7 +88,8 @@ database access: n/a
 ```json
 {{
     "description": "The user inserts a png and gets an svg as response.",
-    "implementation_details": "n/a",
+    "code_samples": "n/a",
+    "documentation_info": "n/a",
     "credentials: "n/a"
 }}
 ```
diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 88cfcac..873ab81 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -383,12 +383,14 @@ Either ask for clarification like this:
 }}
 ```
 
-Or write the summarized microservice description and additional implementation details like this:
+Or write the detailed microservice description all mentioned code samples, documentation info and credentials like this:
 **final.json**
 ```json
 {{
     "description": "<microservice description here>",
-    "implementation_details": "<additional implementation details here>",
+    "example_input_file": "<example input file here if mentioned before otherwise n/a>",
+    "code_samples": "<code samples from the client here>",
+    "documentation_info": "<documentation info here>",
     "credentials: "<credentials here>"
 }}
 ``` 
diff --git a/test/test_generator.py b/test/test_generator.py
index 50bc50c..251509b 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -85,8 +85,9 @@ def test_generation_level_3(tmpdir):
     """
     os.environ['VERBOSE'] = 'true'
     generator = Generator(
-        f'''Given an audio file (1min) of speech like https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav, 
-get convert it to text using the following api:
+        f'''Given an audio file (1min wav) of speech, 
+1. convert it to text using the Whisper API.
+Here is the documentation on how to use the API:
 import requests
 url = "https://transcribe.whisperapi.com"
 headers = {{
@@ -97,8 +98,9 @@ data = {{
 }}
 response = requests.post(url, headers=headers, files=file, data=data)
 print(response.text)
-Summarize the text (50 words).
-Create an audio file of the summarized text.
+2. Summarize the text (~50 words) while still maintaining the key facts.
+3. Create an audio file of the summarized text using a tts library.
+Example input file: https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav
 ''',
         str(tmpdir) + 'microservice',
         'gpt-3.5-turbo'

From 8a2360da7ae440a5ee79eb449caaa524380fa1ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Mon, 1 May 2023 23:14:21 +0200
Subject: [PATCH 06/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20refinement?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/generator.py        |  2 +-
 src/options/generate/templates_system.py |  2 +-
 src/options/generate/templates_user.py   | 19 +++++++++++++++++--
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index eae74a3..9a62e34 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -542,7 +542,7 @@ Test scenario:
     def replace_with_gpt_3_5_turbo_if_possible(pkg):
         if pkg in ['allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk',
                    'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
-                   'spacy', 'stanza', 'textblob', 'textstat', 'transformers']:
+                   'spacy', 'stanza', 'summarizer', 'textblob', 'textstat', 'transformers']:
 
             return 'gpt_3_5_turbo_api'
         return pkg
diff --git a/src/options/generate/templates_system.py b/src/options/generate/templates_system.py
index 70a98d1..1d31a5b 100644
--- a/src/options/generate/templates_system.py
+++ b/src/options/generate/templates_system.py
@@ -139,7 +139,7 @@ contains example: yes (<insert example here>)
 ```json
 {{
     "input": "<input here>",
-    "assertion": "the output is of type <type here>"
+    "assertion": "the output contains the result that is of type <type here>"
 }}
 ```
 
diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 873ab81..9f3aeb9 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -268,7 +268,22 @@ You are given the following files:
 {all_files_string}
 
 To solve this error, you should determine the list of packages that need to be installed via `apt-get install` in the Dockerfile.
-Output them as a white space separated list:'''
+Output the apt-get packages that need to me placed at {{apt_get_packages}} as json in the following format:
+**apt-get-packages.json**
+```json
+{{"packages": ["<package1>", "<package2>"]}}
+```
+Example for the following requirements.txt file:
+**requirements.txt**
+```
+numpy==1.19.5
+fitz
+```
+The output would be:
+**apt-get-packages.json**
+```json
+{{"packages": []}}
+```'''
 )
 
 
@@ -433,7 +448,7 @@ Example for the case where the example is already mentioned in the refined descr
 ```json
 {{
     "input": "<input here>",
-    "assertion": "the output is of type <type here>"
+    "assertion": "the output contains the result that is of type <type here>"
 }}
 ```
 Note that your response must be either prompt.json or final.json. You must not write both.

From e4facfc7b5da76d095239f31ace54e12de977b9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 01:50:28 +0200
Subject: [PATCH 07/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20refinement?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/apis/pypi.py                       | 12 ++++++++++++
 src/constants.py                       |  4 ++--
 src/options/generate/generator.py      | 27 +++++++++++++++-----------
 src/options/generate/templates_user.py | 12 +++++++++++-
 test/test_api.py                       | 15 ++++++++++++++
 test/test_generator.py                 |  2 +-
 test/test_hub.py                       |  6 ------
 7 files changed, 57 insertions(+), 21 deletions(-)
 create mode 100644 src/apis/pypi.py
 create mode 100644 test/test_api.py
 delete mode 100644 test/test_hub.py

diff --git a/src/apis/pypi.py b/src/apis/pypi.py
new file mode 100644
index 0000000..7b0456f
--- /dev/null
+++ b/src/apis/pypi.py
@@ -0,0 +1,12 @@
+import requests
+
+def is_package_on_pypi(package_name, version=None):
+    optional_version = f"/{version}" if version else ""
+    url = f"https://pypi.org/pypi/{package_name}{optional_version}/json"
+    response = requests.get(url)
+    if response.status_code == 200:
+        return True
+    elif response.status_code == 404:
+        return False
+    else:
+        return None
\ No newline at end of file
diff --git a/src/constants.py b/src/constants.py
index 8deda8f..1715861 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -39,9 +39,9 @@ MAX_DEBUGGING_ITERATIONS = 10
 
 DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
 
-PROBLEMATIC_PACKAGES = [
+BLACKLISTED_PACKAGES = [
     # 'Pyrender', 'Trimesh',
     'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d', # because they need a screen,
+    'fastapi', 'uvicorn', 'starlette', # because we use jina executors
 ]
 
-UNNECESSARY_PACKAGES = ['fastapi']
diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index 9a62e34..021dc34 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -14,9 +14,10 @@ from pydantic.dataclasses import dataclass
 from src.apis import gpt
 from src.apis.gpt import _GPTConversation
 from src.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub
+from src.apis.pypi import is_package_on_pypi
 from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
-    PROBLEMATIC_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
-    REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, UNNECESSARY_PACKAGES, IMPLEMENTATION_FILE_NAME, \
+    BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
+    REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
     IMPLEMENTATION_FILE_TAG
 from src.options.generate.templates_system import  system_task_iteration, system_task_introduction, system_test_iteration
 from src.options.generate.templates_user import template_generate_microservice_name, \
@@ -52,7 +53,7 @@ class Generator:
             return match.group(1).strip()
         elif match_single_block:
             # Check for a single code block
-            single_code_block_pattern = r"^```(?:\w+\n)?([\s\S]*?)```"
+            single_code_block_pattern = r"```(?:\w+\n)?([\s\S]*?)```"
             single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE)
             if len(single_code_block_match) == 1:
                 return single_code_block_match[0].strip()
@@ -374,9 +375,11 @@ pytest
 
         print_colored('', f'Is it a {package_manager} dependency issue?', 'blue')
         conversation = self.gpt_session.get_conversation()
-        answer = conversation.chat(
+        answer_raw = conversation.chat(
             template_is_dependency_issue.format(summarized_error=summarized_error, all_files_string=dock_req_string).replace('PACKAGE_MANAGER', package_manager)
         )
+        answer_json_string = self.extract_content_from_result(answer_raw, 'response.json', match_single_block=True, )
+        answer = json.loads(answer_json_string)['dependency_installation_failure']
         return 'yes' in answer.lower()
 
     def generate_microservice_name(self, description):
@@ -402,12 +405,7 @@ pytest
         packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)]
         packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in packages_list]
 
-        packages_list = [
-            packages for packages in packages_list if len(set(packages).intersection(set(PROBLEMATIC_PACKAGES))) == 0
-        ]
-        packages_list = [
-            [package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list
-        ]
+        packages_list = self.filter_packages_list(packages_list)
         packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES]
         return packages_list
 
@@ -542,7 +540,14 @@ Test scenario:
     def replace_with_gpt_3_5_turbo_if_possible(pkg):
         if pkg in ['allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk',
                    'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
-                   'spacy', 'stanza', 'summarizer', 'textblob', 'textstat', 'transformers']:
+                   'spacy', 'stanza', 'summarizer', 'sumy', 'textblob', 'textstat', 'transformers']:
 
             return 'gpt_3_5_turbo_api'
         return pkg
+
+    @staticmethod
+    def filter_packages_list(packages_list):
+        packages_list = [
+            [package for package in packages if package not in BLACKLISTED_PACKAGES and is_package_on_pypi(package)] for packages in packages_list
+        ]
+        return packages_list
diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 9f3aeb9..ff4d71a 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -221,7 +221,17 @@ You are given the following files:
 
 {all_files_string}
 
-Is this a PACKAGE_MANAGER dependency installation failure? Answer with "yes" or "no".'''
+Is this a PACKAGE_MANAGER dependency installation failure? 
+1. Write down one bullet point on why it could be a PACKAGE_MANAGER dependency installation failure.
+2. Write down one bullet point on why it is unlikely that it is a PACKAGE_MANAGER dependency installation failure.
+3. Write down your final answer.
+4. Write down your final answer as json in the following format:
+**response.json**
+```json
+{{"dependency_installation_failure": "<yes/no>"}}
+```
+Note that you must obey the double asterisk and tripple backtick syntax from above.
+'''
 )
 
 
diff --git a/test/test_api.py b/test/test_api.py
new file mode 100644
index 0000000..4a5e9e6
--- /dev/null
+++ b/test/test_api.py
@@ -0,0 +1,15 @@
+from src.apis.jina_cloud import is_executor_in_hub
+from src.apis.pypi import is_package_on_pypi
+
+
+def test_is_microservice_in_hub():
+    assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False
+    assert is_executor_in_hub('CLIPImageEncoder') is True
+
+def test_is_package_on_pypi():
+    assert is_package_on_pypi('jina') is True
+    assert is_package_on_pypi('jina', '0.9.25') is True
+    assert is_package_on_pypi('jina', '10.10.10') is False
+    assert is_package_on_pypi('jina-jina-jina') is False
+    assert is_package_on_pypi('jina-jina-jina', '0.9.25') is False
+    assert is_package_on_pypi('jina-jina-jina', '10.10.10') is False
diff --git a/test/test_generator.py b/test/test_generator.py
index 251509b..29dbbea 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -96,7 +96,7 @@ headers = {{
 data = {{
   "url": "URL_OF_STORED_AUDIO_FILE"
 }}
-response = requests.post(url, headers=headers, files=file, data=data)
+response = requests.post(url, headers=headers, data=data)
 print(response.text)
 2. Summarize the text (~50 words) while still maintaining the key facts.
 3. Create an audio file of the summarized text using a tts library.
diff --git a/test/test_hub.py b/test/test_hub.py
deleted file mode 100644
index 0aa6d1b..0000000
--- a/test/test_hub.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from src.apis.jina_cloud import is_executor_in_hub
-
-
-def test_is_microservice_in_hub():
-    assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False
-    assert is_executor_in_hub('CLIPImageEncoder') is True

From eb16b2f33fb3f9f0ff404054915aefda9455dda7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 11:01:45 +0200
Subject: [PATCH 08/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20fix=20no=20input?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/constants.py                       | 10 +++++++---
 src/options/generate/generator.py      |  7 ++-----
 src/options/generate/templates_user.py |  8 +++++---
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/constants.py b/src/constants.py
index 1715861..d03b047 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -14,7 +14,6 @@ DOCKER_FILE_TAG = 'dockerfile'
 CLIENT_FILE_TAG = 'python'
 STREAMLIT_FILE_TAG = 'python'
 
-
 FILE_AND_TAG_PAIRS = [
     (EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG),
     (IMPLEMENTATION_FILE_NAME, IMPLEMENTATION_FILE_TAG),
@@ -41,7 +40,12 @@ DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
 
 BLACKLISTED_PACKAGES = [
     # 'Pyrender', 'Trimesh',
-    'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d', # because they need a screen,
-    'fastapi', 'uvicorn', 'starlette', # because we use jina executors
+    'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d',  # because they need a screen,
+    'fastapi', 'uvicorn', 'starlette',  # because we use jina executors
 ]
 
+LANGUAGE_PACKAGES = [
+    'allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk',
+    'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
+    'spacy', 'stanza', 'summarizer', 'sumy', 'textblob', 'textstat', 'transformers'
+]
diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index 021dc34..f476fd6 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -18,7 +18,7 @@ from src.apis.pypi import is_package_on_pypi
 from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
     BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
     REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
-    IMPLEMENTATION_FILE_TAG
+    IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES
 from src.options.generate.templates_system import  system_task_iteration, system_task_introduction, system_test_iteration
 from src.options.generate.templates_user import template_generate_microservice_name, \
     template_generate_possible_packages, \
@@ -538,10 +538,7 @@ Test scenario:
 
     @staticmethod
     def replace_with_gpt_3_5_turbo_if_possible(pkg):
-        if pkg in ['allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk',
-                   'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
-                   'spacy', 'stanza', 'summarizer', 'sumy', 'textblob', 'textstat', 'transformers']:
-
+        if pkg in LANGUAGE_PACKAGES:
             return 'gpt_3_5_turbo_api'
         return pkg
 
diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index ff4d71a..6b64638 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -413,7 +413,7 @@ Or write the detailed microservice description all mentioned code samples, docum
 ```json
 {{
     "description": "<microservice description here>",
-    "example_input_file": "<example input file here if mentioned before otherwise n/a>",
+    "example_input": "<example input file or string here if mentioned before otherwise n/a>",
     "code_samples": "<code samples from the client here>",
     "documentation_info": "<documentation info here>",
     "credentials: "<credentials here>"
@@ -430,8 +430,9 @@ Note that if urls, secrets, database names, etc. are mentioned, they must be par
 
 template_pm_test_iteration = PromptTemplate.from_template(
     '''{micro_service_initial_description}
-1. write down if the original description and the refined description contain an example input for the microservice.
-2. write down either prompt.json or final.json.
+1. write down if the microservice requires input.
+2. if it requires input, then write down if the original description or the refined description contain an example input for the microservice.
+3. write down either prompt.json or final.json.
 If the example input for the microservice is mentioned in the refined description or the original description, then output final.json.
 Otherwise, output prompt.json where you ask for the example input file as URL or the example string.
 Except for urls, you should come up with your own example input that makes sense for the microservice description.
@@ -452,6 +453,7 @@ Example for the case where the example input string is required and was not ment
 }}
 ```
 Note that you must not ask for an example input in case the example input is already mentioned in the refined description or the original description.
+Note that you must not ask for an example input in case the microservice does not require input.
 
 Example for the case where the example is already mentioned in the refined description or the original description:
 **final.json**

From ec42b7a5f2333a286f0000810101d91437de5bf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 12:44:14 +0200
Subject: [PATCH 09/22] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20fix=20no=20input?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/templates_user.py |  8 ++++----
 test/test_generator.py                 | 13 ++++++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 6b64638..d5c374b 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -138,7 +138,7 @@ template_generate_test = PromptTemplate.from_template(
 Write a single pytest case that tests the following scenario: '{test_description}'. In case the test scenario is not precise enough, test a general case without any assumptions.
 Start the test with an extensive comment about the test case. If gpt_3_5_turbo_api is used in the executor, then the test must not check the exact output of the executor as it is not deterministic. 
 
-Use the following import to import the function:
+You must use the following import to import the function:
 ```
 from .implementation import func
 ```
@@ -221,9 +221,9 @@ You are given the following files:
 
 {all_files_string}
 
-Is this a PACKAGE_MANAGER dependency installation failure? 
-1. Write down one bullet point on why it could be a PACKAGE_MANAGER dependency installation failure.
-2. Write down one bullet point on why it is unlikely that it is a PACKAGE_MANAGER dependency installation failure.
+Is this error happening because a PACKAGE_MANAGER package is missing or failed to install? 
+1. Write down one bullet point on why the error might happen because a PACKAGE_MANAGER package is missing or failed to install.
+2. Write down one bullet point on why it is unlikely that the error happens because a PACKAGE_MANAGER package is missing or failed to install.
 3. Write down your final answer.
 4. Write down your final answer as json in the following format:
 **response.json**
diff --git a/test/test_generator.py b/test/test_generator.py
index 29dbbea..fd9a19d 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -26,10 +26,6 @@ def test_generation_level_0(tmpdir):
     assert generator.generate() == 0
 
 
-# fixture
-@pytest.fixture
-def tmpdir():
-    return 'microservice'
 
 
 def test_generation_level_1(tmpdir):
@@ -97,9 +93,11 @@ data = {{
   "url": "URL_OF_STORED_AUDIO_FILE"
 }}
 response = requests.post(url, headers=headers, data=data)
-print(response.text)
+assert response.status_code == 200
+print('This is the text from the audio file:', response.json()['text'])
 2. Summarize the text (~50 words) while still maintaining the key facts.
 3. Create an audio file of the summarized text using a tts library.
+4. Return the the audio file.
 Example input file: https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav
 ''',
         str(tmpdir) + 'microservice',
@@ -147,5 +145,10 @@ Example input image: https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/P
                           )
     assert generator.generate() == 0
 
+@pytest.fixture
+def tmpdir():
+    return 'microservice'
+
+
 # further ideas:
 # Create a wrapper around google called Joogle. It modifies the page summary preview text of the search results to insert the word Jina as much as possible.

From c6562b10deb7274ab8745bc3e6a58cd050bd68c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 13:38:03 +0200
Subject: [PATCH 10/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20fix=20no=20input?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/ci.yml               |  2 +-
 src/options/generate/templates_user.py |  7 +++--
 test/test_generator.py                 | 40 ++++++++++++++++++++++----
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 63a0452..97be0f4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
         id: test
         run: |
           pytest -vs test/test_generator.py::test_generation_level_${{ matrix.group }}
-        timeout-minutes: 10
+        timeout-minutes: 15
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index d5c374b..7e01ca0 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -118,6 +118,10 @@ The function must full-fill: '{microservice_description}'.
 It will be tested with the following scenario: '{test_description}'.
 For the implementation use the following package(s): '{packages}'.
 
+The code must start with the following import:
+```
+from .apis import GPT_3_5_Turbo_API
+```
 Obey the following rules:
 ''' + not_allowed_function_string + '''
 
@@ -138,11 +142,10 @@ template_generate_test = PromptTemplate.from_template(
 Write a single pytest case that tests the following scenario: '{test_description}'. In case the test scenario is not precise enough, test a general case without any assumptions.
 Start the test with an extensive comment about the test case. If gpt_3_5_turbo_api is used in the executor, then the test must not check the exact output of the executor as it is not deterministic. 
 
-You must use the following import to import the function:
+The test must start with the following import:
 ```
 from .implementation import func
 ```
-
 ''' + not_allowed_function_string + '''
 The test must not open local files.
 The test must not mock a function of the executor.
diff --git a/test/test_generator.py b/test/test_generator.py
index fd9a19d..419c1d0 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -68,13 +68,43 @@ def test_generation_level_2(tmpdir):
     )
     assert generator.generate() == 0
 
-
 def test_generation_level_3(tmpdir):
     """
     Requirements:
     coding challenge: ❌
-    pip packages: ✅ (text to speech)
+    pip packages: ✅ (csv parser)
     environment: ❌
+    GPT-3.5-turbo: ✅ (for processing the text)
+    APIs: ✅ (financial data API)
+    Databases: ❌
+    """
+    os.environ['VERBOSE'] = 'true'
+    generator = Generator(
+        f'''The input is a stock symbol (e.g., AAPL for Apple Inc.). 
+1. Fetch stock data (open, high, low, close, volume) for the past 30 days using a financial data API (e.g., Alpha Vantage, Yahoo Finance, or any other API). 
+2. Calculate the average closing price over the 30 days.
+3. Read a CSV file containing a list of stock symbols and their company names.
+4. Find the input stock symbol in the CSV file and get the corresponding company name.
+5. Generate a brief summary of the company's stock performance over the past 30 days, including the average closing price and the company name.
+6. Return the summary as a string.
+Example input: 'AAPL'
+Example CSV file format:
+symbol,company_name
+AAPL,Apple Inc.
+GOOGL,Alphabet Inc.
+AMZN,Amazon.com, Inc.
+''',
+        str(tmpdir) + 'microservice',
+        'gpt-3.5-turbo'
+    )
+    assert generator.generate() == 0
+
+def test_generation_level_4(tmpdir):
+    """
+    Requirements:
+    coding challenge: ❌
+    pip packages: ✅ (text to speech)
+    environment: ✅ (tts library)
     GPT-3.5-turbo: ✅ (summarizing the text)
     APIs: ✅ (whisper for speech to text)
     Databases: ❌
@@ -101,17 +131,17 @@ print('This is the text from the audio file:', response.json()['text'])
 Example input file: https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav
 ''',
         str(tmpdir) + 'microservice',
-        'gpt-3.5-turbo'
+        'gpt-4'
     )
     assert generator.generate() == 0
 
 
-def test_generation_level_4(tmpdir):
+def test_generation_level_5(tmpdir):
     """
     Requirements:
     coding challenge: ✅ (putting text on the image)
     pip packages: ✅ (Pillow for image processing)
-    environment: ❌
+    environment: ✅ (image library)
     GPT-3.5-turbo: ✅ (for writing the joke)
     APIs: ✅ (scenex for image description)
     Databases: ❌

From 8677649815ecce31e6113f274472a1dbf35db1ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 13:42:38 +0200
Subject: [PATCH 11/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20fix=20no=20input?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_generator.py b/test/test_generator.py
index 419c1d0..5876bbf 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -127,7 +127,7 @@ assert response.status_code == 200
 print('This is the text from the audio file:', response.json()['text'])
 2. Summarize the text (~50 words) while still maintaining the key facts.
 3. Create an audio file of the summarized text using a tts library.
-4. Return the the audio file.
+4. Return the the audio file as base64 encoded binary.
 Example input file: https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav
 ''',
         str(tmpdir) + 'microservice',

From 06e9154f7e9bd8dac7b71c070e3a2314de2f1e1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 14:08:36 +0200
Subject: [PATCH 12/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=203=20fix=20no=20input?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/templates_user.py | 2 +-
 test/test_generator.py                 | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 7e01ca0..4fbe0ed 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -24,7 +24,7 @@ The implemented function and the test must not contain prototype or placeholder
 The implemented function and the test must run in a docker container based on debian.
 The implemented function and the test must not use libraries like Flask.
 The implemented function and the test must not have a __main__ function.
-The implemented function and the test must use gpt_3_5_turbo_api if the task requires understanding, generation, summarization, transformation, translation or classification of natural language or using any language model. Other language models are not allowed.'''
+The implemented function and the test must use gpt_3_5_turbo_api if the task requires the processing of language (e.g., understanding, generation, summarization, transformation, translation, modification or classification). Other language models are not allowed.'''
 
 
 template_generate_microservice_name = PromptTemplate.from_template(
diff --git a/test/test_generator.py b/test/test_generator.py
index 5876bbf..b248bb7 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -81,18 +81,11 @@ def test_generation_level_3(tmpdir):
     os.environ['VERBOSE'] = 'true'
     generator = Generator(
         f'''The input is a stock symbol (e.g., AAPL for Apple Inc.). 
-1. Fetch stock data (open, high, low, close, volume) for the past 30 days using a financial data API (e.g., Alpha Vantage, Yahoo Finance, or any other API). 
+1. Fetch stock data (open, high, low, close, volume) for the past 30 days using a financial data API Yahoo Finance.
 2. Calculate the average closing price over the 30 days.
-3. Read a CSV file containing a list of stock symbols and their company names.
-4. Find the input stock symbol in the CSV file and get the corresponding company name.
 5. Generate a brief summary of the company's stock performance over the past 30 days, including the average closing price and the company name.
 6. Return the summary as a string.
 Example input: 'AAPL'
-Example CSV file format:
-symbol,company_name
-AAPL,Apple Inc.
-GOOGL,Alphabet Inc.
-AMZN,Amazon.com, Inc.
 ''',
         str(tmpdir) + 'microservice',
         'gpt-3.5-turbo'

From 374bf9db003d78781c4c82c1c8ed64303dbbdddf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 14:45:03 +0200
Subject: [PATCH 13/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20blacklist=20tika?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/constants.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/constants.py b/src/constants.py
index d03b047..bbf45f5 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -41,6 +41,7 @@ DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
 BLACKLISTED_PACKAGES = [
     # 'Pyrender', 'Trimesh',
     'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d',  # because they need a screen,
+    'tika'  # because it needs java
     'fastapi', 'uvicorn', 'starlette',  # because we use jina executors
 ]
 

From 4d9518dc412d07e594f393ab4afe946fadbb9310 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 14:57:13 +0200
Subject: [PATCH 14/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20verbose?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/static_files/microservice/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options/generate/static_files/microservice/Dockerfile b/src/options/generate/static_files/microservice/Dockerfile
index d0de26d..15e5434 100644
--- a/src/options/generate/static_files/microservice/Dockerfile
+++ b/src/options/generate/static_files/microservice/Dockerfile
@@ -6,7 +6,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y {{apt_get_packa
 
 ## install requirements for the executor
 COPY requirements.txt .
-RUN pip install --compile -r requirements.txt
+RUN pip -v install --compile -r requirements.txt
 
 # setup the workspace
 COPY . /workdir/

From 450c25370fe95983082864f0fb901674306eafd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 15:27:08 +0200
Subject: [PATCH 15/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20missing=20comma?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/constants.py b/src/constants.py
index bbf45f5..cc714ea 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -41,7 +41,7 @@ DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
 BLACKLISTED_PACKAGES = [
     # 'Pyrender', 'Trimesh',
     'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d',  # because they need a screen,
-    'tika'  # because it needs java
+    'tika',  # because it needs java
     'fastapi', 'uvicorn', 'starlette',  # because we use jina executors
 ]
 

From 4e1849e78c8ea14eb756d6d219730db7cad8d59f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 15:37:25 +0200
Subject: [PATCH 16/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20clean=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/templates_user.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 4fbe0ed..7b987cf 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -24,7 +24,8 @@ The implemented function and the test must not contain prototype or placeholder
 The implemented function and the test must run in a docker container based on debian.
 The implemented function and the test must not use libraries like Flask.
 The implemented function and the test must not have a __main__ function.
-The implemented function and the test must use gpt_3_5_turbo_api if the task requires the processing of language (e.g., understanding, generation, summarization, transformation, translation, modification or classification). Other language models are not allowed.'''
+The implemented function and the test must use gpt_3_5_turbo_api if the task requires the processing of language (e.g., understanding, generation, summarization, transformation, translation, modification or classification). Other language models are not allowed.
+The implemented function must follow a clean and elegant coding style.'''
 
 
 template_generate_microservice_name = PromptTemplate.from_template(

From 9137d358271c3039f79b985d527611a72ded4e02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 16:12:36 +0200
Subject: [PATCH 17/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20rename=203.5-turbo?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/generator.py             |  4 ++--
 .../static_files/microservice/apis.py         |  2 +-
 src/options/generate/templates_user.py        | 22 +++++++++----------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index f476fd6..3f9834b 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -230,7 +230,7 @@ metas:
         content_parsed = self.extract_content_from_result(content_raw, 'requirements.txt', match_single_block=True)
 
         lines = content_parsed.split('\n')
-        lines = [line for line in lines if not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo_api']])]
+        lines = [line for line in lines if not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])]
         content_modified = f'''jina==3.15.1.dev14
 docarray==0.21.0
 openai==0.27.5
@@ -539,7 +539,7 @@ Test scenario:
     @staticmethod
     def replace_with_gpt_3_5_turbo_if_possible(pkg):
         if pkg in LANGUAGE_PACKAGES:
-            return 'gpt_3_5_turbo_api'
+            return 'gpt_3_5_turbo'
         return pkg
 
     @staticmethod
diff --git a/src/options/generate/static_files/microservice/apis.py b/src/options/generate/static_files/microservice/apis.py
index 1bb25ff..24dcb01 100644
--- a/src/options/generate/static_files/microservice/apis.py
+++ b/src/options/generate/static_files/microservice/apis.py
@@ -5,7 +5,7 @@ import openai
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
 
-class GPT_3_5_Turbo_API:
+class GPT_3_5_Turbo:
     def __init__(self, system: str = ''):
         self.system = system
 
diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 7b987cf..43f861d 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -24,7 +24,7 @@ The implemented function and the test must not contain prototype or placeholder
 The implemented function and the test must run in a docker container based on debian.
 The implemented function and the test must not use libraries like Flask.
 The implemented function and the test must not have a __main__ function.
-The implemented function and the test must use gpt_3_5_turbo_api if the task requires the processing of language (e.g., understanding, generation, summarization, transformation, translation, modification or classification). Other language models are not allowed.
+The implemented function and the test must use gpt_3_5_turbo if the task requires the processing of language (e.g., understanding, generation, summarization, transformation, translation, modification or classification). Other language models are not allowed.
 The implemented function must follow a clean and elegant coding style.'''
 
 
@@ -68,8 +68,8 @@ template_generate_possible_packages = PromptTemplate.from_template(
 "{description}"
 1. Write down ut to 3 different strategies to solve the task. For each strategy write down all the non-trivial subtasks you need to solve. If there is a natural language understanding or generation stragegy, write it down.
 2. Find out what is the core problem to solve.
-3. List up to 10 Python packages that are specifically designed or have functionalities to solve the complete core problem with one of the defined strategies. You must add gpt_3_5_turbo_api if the task involves generating or understanding natural language or using a (pre-trained) language model.
-4. Exclude any package that can generate or understand natural language or enables using any language model, but you must not exclude gpt_3_5_turbo_api. Print the cleaned list of packages and give a brief reason for keeping it after its name.
+3. List up to 10 Python packages that are specifically designed or have functionalities to solve the complete core problem with one of the defined strategies. You must add gpt_3_5_turbo if the task involves generating or understanding natural language or using a (pre-trained) language model.
+4. Exclude any package that can generate or understand natural language or enables using any language model, but you must not exclude gpt_3_5_turbo. Print the cleaned list of packages and give a brief reason for keeping it after its name.
 5. For each cleaned package think if it fulfills the following requirements:
 a) specifically designed or have functionalities to solve the complete core problem.
 b) has a stable api among different versions
@@ -82,7 +82,7 @@ When answering, just write "yes" or "no".
 
 6. Determine the 5 most suitable python package combinations, ordered from the best to the least suitable. Combine the packages to achieve a comprehensive solution.
 If the package is mentioned in the description, then it is automatically the best one.
-If you listed gpt_3_5_turbo_api earlier, you must use it. gpt_3_5_turbo_api is the best package for handling text-based tasks. Also, gpt_3_5_turbo_api doesn't need any other packages processing text or using language models. It can handle any text-based task alone.
+If you listed gpt_3_5_turbo earlier, you must use it. gpt_3_5_turbo is the best package for handling text-based tasks. Also, gpt_3_5_turbo doesn't need any other packages processing text or using language models. It can handle any text-based task alone.
 
 ''' + template_generate_possible_packages_output_format_string)
 
@@ -95,11 +95,11 @@ template_code_wrapping_string = '''The code will go into {file_name_purpose}. Ma
 You must provide the complete file with the exact same syntax to wrap the code.'''
 
 
-gpt_35_turbo_usage_string = """If need to use gpt_3_5_turbo_api, then this is an example on how to use it:
+gpt_35_turbo_usage_string = """If need to use gpt_3_5_turbo, then this is an example on how to use it:
 ```
-from .apis import GPT_3_5_Turbo_API
+from .apis import GPT_3_5_Turbo
 
-gpt_3_5_turbo_api = GPT_3_5_Turbo_API(
+gpt_3_5_turbo = GPT_3_5_Turbo(
     system=\'\'\'
 You are a tv-reporter who is specialized in C-list celebrities.
 When you get asked something like 'Who was having a date with <X>?', then you answer with a json like '{{"dates": ["<Y>", "<Z>"]}}'. 
@@ -121,14 +121,14 @@ For the implementation use the following package(s): '{packages}'.
 
 The code must start with the following import:
 ```
-from .apis import GPT_3_5_Turbo_API
+from .apis import GPT_3_5_Turbo
 ```
 Obey the following rules:
 ''' + not_allowed_function_string + '''
 
 Your approach:
 1. Identify the core challenge when implementing the function.
-2. Think about solutions for these challenges. If gpt_3_5_turbo_api is mentioned in the above list of packages, then you must use it.
+2. Think about solutions for these challenges. If gpt_3_5_turbo is mentioned in the above list of packages, then you must use it.
 3. Decide for one of the solutions.
 4. Write the code for the function. Don't write code for the test.
 ''' + gpt_35_turbo_usage_string + '\n' + template_code_wrapping_string
@@ -141,7 +141,7 @@ template_generate_test = PromptTemplate.from_template(
 {code_files_wrapped}
 
 Write a single pytest case that tests the following scenario: '{test_description}'. In case the test scenario is not precise enough, test a general case without any assumptions.
-Start the test with an extensive comment about the test case. If gpt_3_5_turbo_api is used in the executor, then the test must not check the exact output of the executor as it is not deterministic. 
+Start the test with an extensive comment about the test case. If gpt_3_5_turbo is used in the executor, then the test must not check the exact output of the executor as it is not deterministic. 
 
 The test must start with the following import:
 ```
@@ -167,7 +167,7 @@ Write the content of the requirements.txt file like this:
 ...
 ```
 Add any more packages that are needed to run the code.
-You must not add gpt_3_5_turbo_api to the requirements.txt file. 
+You must not add gpt_3_5_turbo to the requirements.txt file. 
 
 All versions are fixed using ~=, ==, <, >, <=, >=. The package versions must not have conflicts. Output only the requirements.txt file.
 ''' + '\n' + template_code_wrapping_string

From 9686a369f791056ab4019f8f7d4171765fb2a19a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 16:24:02 +0200
Subject: [PATCH 18/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20strategies=20format?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/templates_user.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 43f861d..4971a56 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -50,7 +50,7 @@ list of lists wrapped into ``` and name it **strategies.json**. \
 Do not use quotation marks around packages names in the output. \
 Separate packages in a combination by comma. \
 Note that you can also leave a line empty to indicate that one of the strategies does not require any package and can be done in plain python.
-The output looks like this:
+Write the output using double asterisks and triple backticks like this:
 **strategies.json**
 ```
 [

From 2f152dd7f7491f76d44f18a3d44cb917c53b9ff9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 16:33:29 +0200
Subject: [PATCH 19/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20strategies=20format?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/templates_user.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py
index 4971a56..340a76e 100644
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -273,16 +273,16 @@ jina==2.0.0
 
 
 template_solve_apt_get_dependency_issue = PromptTemplate.from_template(
-    '''Your task is to provide guidance on how to solve an error that occurred during the Docker build process. 
-Here is the summary of the error that occurred:
-{summarized_error}
-
+    '''Your task is to provide guidance on how to solve an error that occurred during the Docker build process.
 You are given the following files:
 
 {all_files_string}
 
+Here is the summary of the error that occurred:
+{summarized_error}
+
 To solve this error, you should determine the list of packages that need to be installed via `apt-get install` in the Dockerfile.
-Output the apt-get packages that need to me placed at {{apt_get_packages}} as json in the following format:
+Output the apt-get packages that need to be placed at {{apt_get_packages}} as json in the following format:
 **apt-get-packages.json**
 ```json
 {{"packages": ["<package1>", "<package2>"]}}
@@ -297,7 +297,9 @@ The output would be:
 **apt-get-packages.json**
 ```json
 {{"packages": []}}
-```'''
+```
+Note that you must not output any other files. Only output the apt-get-packages.json file.
+'''
 )
 
 

From e4c680d20c2b8ece60ac2e9aed52b0162a81c8ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 23:02:33 +0200
Subject: [PATCH 20/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20cleanup?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/constants.py                  |  5 ++++-
 src/options/generate/generator.py | 15 ++++++++++++---
 test/test_generator.py            |  2 +-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/constants.py b/src/constants.py
index cc714ea..43f6b8b 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -42,7 +42,10 @@ BLACKLISTED_PACKAGES = [
     # 'Pyrender', 'Trimesh',
     'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d',  # because they need a screen,
     'tika',  # because it needs java
-    'fastapi', 'uvicorn', 'starlette',  # because we use jina executors
+    'fastapi', 'uvicorn', 'starlette',
+]
+UNNECESSARY_PACKAGES = [
+    'fastapi', 'uvicorn', 'starlette'  # because the wrappers are used instead
 ]
 
 LANGUAGE_PACKAGES = [
diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index 3f9834b..ae02e65 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -18,7 +18,7 @@ from src.apis.pypi import is_package_on_pypi
 from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
     BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
     REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
-    IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES
+    IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES
 from src.options.generate.templates_system import  system_task_iteration, system_task_introduction, system_test_iteration
 from src.options.generate.templates_user import template_generate_microservice_name, \
     template_generate_possible_packages, \
@@ -297,7 +297,7 @@ pytest
             log_hubble = push_executor(previous_microservice_path)
             error = process_error_message(log_hubble)
             if error:
-                print('An error occurred during the build process. Feeding the error back to the assistent...')
+                print('An error occurred during the build process. Feeding the error back to the assistant...')
                 self.do_debug_iteration(error, next_microservice_path, previous_microservice_path)
                 if i == MAX_DEBUGGING_ITERATIONS - 1:
                     raise self.MaxDebugTimeReachedException('Could not debug the microservice.')
@@ -544,7 +544,16 @@ Test scenario:
 
     @staticmethod
     def filter_packages_list(packages_list):
+        # filter out complete package lists
         packages_list = [
-            [package for package in packages if package not in BLACKLISTED_PACKAGES and is_package_on_pypi(package)] for packages in packages_list
+            packages for packages in packages_list if all([
+                is_package_on_pypi(pkg)  # all packages must be on pypi
+                and pkg not in BLACKLISTED_PACKAGES  # no package is allowed to be blacklisted
+                for pkg in packages
+            ])
+        ]
+        # filter out single packages
+        packages_list = [
+            [package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list
         ]
         return packages_list
diff --git a/test/test_generator.py b/test/test_generator.py
index b248bb7..f0b0d67 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -87,7 +87,7 @@ def test_generation_level_3(tmpdir):
 6. Return the summary as a string.
 Example input: 'AAPL'
 ''',
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
         'gpt-3.5-turbo'
     )
     assert generator.generate() == 0

From 0fdf5edd2cc46155bbaa1ed3a81e359351a88762 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Tue, 2 May 2023 23:24:24 +0200
Subject: [PATCH 21/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20gpt=20turbo=20stays?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/options/generate/generator.py | 7 +++++--
 test/test_generator.py            | 8 ++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index ae02e65..9dcadfe 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -547,8 +547,11 @@ Test scenario:
         # filter out complete package lists
         packages_list = [
             packages for packages in packages_list if all([
-                is_package_on_pypi(pkg)  # all packages must be on pypi
-                and pkg not in BLACKLISTED_PACKAGES  # no package is allowed to be blacklisted
+                pkg == 'gpt_3_5_turbo'
+                or (
+                    is_package_on_pypi(pkg)  # all packages must be on pypi or it is gpt_3_5_turbo
+                    and pkg not in BLACKLISTED_PACKAGES  # no package is allowed to be blacklisted
+                )
                 for pkg in packages
             ])
         ]
diff --git a/test/test_generator.py b/test/test_generator.py
index f0b0d67..7f7b022 100644
--- a/test/test_generator.py
+++ b/test/test_generator.py
@@ -71,8 +71,8 @@ def test_generation_level_2(tmpdir):
 def test_generation_level_3(tmpdir):
     """
     Requirements:
-    coding challenge: ❌
-    pip packages: ✅ (csv parser)
+    coding challenge: ✅ (calculate the average closing price)
+    pip packages: ❌
     environment: ❌
     GPT-3.5-turbo: ✅ (for processing the text)
     APIs: ✅ (financial data API)
@@ -83,8 +83,8 @@ def test_generation_level_3(tmpdir):
         f'''The input is a stock symbol (e.g., AAPL for Apple Inc.). 
 1. Fetch stock data (open, high, low, close, volume) for the past 30 days using a financial data API Yahoo Finance.
 2. Calculate the average closing price over the 30 days.
-5. Generate a brief summary of the company's stock performance over the past 30 days, including the average closing price and the company name.
-6. Return the summary as a string.
+3. Generate a brief summary of the company's stock performance over the past 30 days, including the average closing price and the company name.
+4. Return the summary as a string.
 Example input: 'AAPL'
 ''',
         str(tmpdir),

From 412c0a940307b67280c5cf738c38a74b43fdff17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Wed, 3 May 2023 00:01:02 +0200
Subject: [PATCH 22/22] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:?=
 =?UTF-8?q?=20level=204=20gpt=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/ci.yml                 | 28 +++++++-
 src/constants.py                         |  2 -
 src/options/generate/generator.py        | 90 +++++++++++++++---------
 test/integration/__init__.py             |  0
 test/{ => integration}/test_generator.py | 10 +--
 test/test_api.py                         | 15 ----
 test/unit/__init__.py                    |  0
 test/unit/test_api.py                    | 34 +++++++++
 test/{ => unit}/test_strings.py          |  0
 9 files changed, 122 insertions(+), 57 deletions(-)
 create mode 100644 test/integration/__init__.py
 rename test/{ => integration}/test_generator.py (96%)
 delete mode 100644 test/test_api.py
 create mode 100644 test/unit/__init__.py
 create mode 100644 test/unit/test_api.py
 rename test/{ => unit}/test_strings.py (100%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 97be0f4..a56cc2c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,9 +27,35 @@ jobs:
       - name: Test
         id: test
         run: |
-          pytest -vs test/test_generator.py::test_generation_level_${{ matrix.group }}
+          pytest -vs test/integration/test_generator.py::test_generation_level_${{ matrix.group }}
         timeout-minutes: 15
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
           WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }}
+
+  test_unit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Prepare environment
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install wheel
+          pip install --no-cache-dir ".[full,test]"
+          pip install pytest
+          pip install pytest-split
+      - name: Test
+        id: test
+        run: |
+          pytest -vs test/unit
+        timeout-minutes: 15
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
+          WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }}
+
diff --git a/src/constants.py b/src/constants.py
index 43f6b8b..3119882 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -39,10 +39,8 @@ MAX_DEBUGGING_ITERATIONS = 10
 DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
 
 BLACKLISTED_PACKAGES = [
-    # 'Pyrender', 'Trimesh',
     'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d',  # because they need a screen,
     'tika',  # because it needs java
-    'fastapi', 'uvicorn', 'starlette',
 ]
 UNNECESSARY_PACKAGES = [
     'fastapi', 'uvicorn', 'starlette'  # because the wrappers are used instead
diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py
index 9dcadfe..f59a1a6 100644
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -4,7 +4,6 @@ import random
 import re
 import shutil
 from typing import Callable
-
 from typing import List, Text, Optional
 
 from langchain import PromptTemplate
@@ -19,16 +18,15 @@ from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX
     BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
     REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
     IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES
-from src.options.generate.templates_system import  system_task_iteration, system_task_introduction, system_test_iteration
+from src.options.generate.templates_system import system_task_iteration, system_task_introduction, system_test_iteration
 from src.options.generate.templates_user import template_generate_microservice_name, \
     template_generate_possible_packages, \
     template_solve_code_issue, \
     template_solve_pip_dependency_issue, template_is_dependency_issue, template_generate_playground, \
     template_generate_function, template_generate_test, template_generate_requirements, \
     template_chain_of_thought, template_summarize_error, \
-    template_generate_apt_get_install, template_solve_apt_get_dependency_issue, template_pm_task_iteration, \
+    template_solve_apt_get_dependency_issue, template_pm_task_iteration, \
     template_pm_test_iteration
-
 from src.options.generate.ui import get_random_employee
 from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path
 from src.utils.string_tools import print_colored
@@ -39,6 +37,7 @@ class TaskSpecification:
     task: Optional[Text]
     test: Optional[Text]
 
+
 class Generator:
     def __init__(self, task_description, path, model='gpt-4'):
         self.gpt_session = gpt.GPTSession(task_description, model=model)
@@ -46,7 +45,7 @@ class Generator:
         self.microservice_root_path = path
 
     def extract_content_from_result(self, plain_text, file_name, match_single_block=False, can_contain_code_block=True):
-        optional_line_break = '\n' if can_contain_code_block else '' # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break
+        optional_line_break = '\n' if can_contain_code_block else ''  # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break
         pattern = fr"\*?\*?{file_name}\*?\*?\n```(?:\w+\n)?([\s\S]*?){optional_line_break}```"
         match = re.search(pattern, plain_text, re.MULTILINE)
         if match:
@@ -80,10 +79,11 @@ metas:
         def _default_parse_result_fn(x):
             _parsed_results = {}
             for _file_name in files_names:
-                _content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names)==1)
+                _content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names) == 1)
                 if _content != '':
                     _parsed_results[_file_name] = _content
             return _parsed_results
+
         return _default_parse_result_fn
 
     def generate_and_persist_file(
@@ -114,7 +114,9 @@ metas:
             parse_result_fn = self.get_default_parse_result_fn(file_name_s)
 
         print_colored('', f'\n\n############# {section_title} #############', 'blue')
-        system_introduction_message = _GPTConversation._create_system_message(self.microservice_specification.task, self.microservice_specification.test, system_definition_examples)
+        system_introduction_message = _GPTConversation._create_system_message(self.microservice_specification.task,
+                                                                              self.microservice_specification.test,
+                                                                              system_definition_examples)
         conversation = self.gpt_session.get_conversation(messages=[system_introduction_message])
         template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables}
         if 'file_name' in template.input_variables and len(file_name_s) == 1:
@@ -126,7 +128,8 @@ metas:
         )
         content = parse_result_fn(content_raw)
         if content == {}:
-            content_raw = conversation.chat('You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else ''))
+            content_raw = conversation.chat(
+                'You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else ''))
             content = parse_result_fn(content_raw)
         for _file_name, _file_content in content.items():
             persist_file(_file_content, os.path.join(destination_folder, _file_name))
@@ -138,12 +141,14 @@ metas:
             packages,
             num_approach,
     ):
-        MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, 1)
+        MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages,
+                                                       num_approach, 1)
         os.makedirs(MICROSERVICE_FOLDER_v1)
 
         with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'microservice.py'), 'r') as f:
             microservice_executor_boilerplate = f.read()
-        microservice_executor_code = microservice_executor_boilerplate.replace('class GPTDeployExecutor(Executor):', f'class {microservice_name}(Executor):')
+        microservice_executor_code = microservice_executor_boilerplate.replace('class GPTDeployExecutor(Executor):',
+                                                                               f'class {microservice_name}(Executor):')
         persist_file(microservice_executor_code, os.path.join(MICROSERVICE_FOLDER_v1, EXECUTOR_FILE_NAME))
 
         with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'apis.py'), 'r') as f:
@@ -202,14 +207,14 @@ metas:
         #     })
         # )
 
-
-        with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', encoding='utf-8') as f:
+        with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r',
+                  encoding='utf-8') as f:
             docker_file_template_lines = f.readlines()
-        docker_file_template_lines = [line for line in docker_file_template_lines if not line.startswith('RUN apt-get update')]
+        docker_file_template_lines = [line for line in docker_file_template_lines if
+                                      not line.startswith('RUN apt-get update')]
         docker_file_content = '\n'.join(docker_file_template_lines)
         persist_file(docker_file_content, os.path.join(MICROSERVICE_FOLDER_v1, 'Dockerfile'))
 
-
         self.write_config_yml(microservice_name, MICROSERVICE_FOLDER_v1)
 
         print('\nFirst version of the microservice generated. Start iterating on it to make the tests pass...')
@@ -224,13 +229,15 @@ metas:
         packages = ' '.join(json.loads(json_string)['packages'])
 
         docker_file_template = self.read_docker_template()
-        return {DOCKER_FILE_NAME: docker_file_template.replace('{{apt_get_packages}}', '{apt_get_packages}').format(apt_get_packages=packages)}
+        return {DOCKER_FILE_NAME: docker_file_template.replace('{{apt_get_packages}}', '{apt_get_packages}').format(
+            apt_get_packages=packages)}
 
     def parse_result_fn_requirements(self, content_raw: str):
         content_parsed = self.extract_content_from_result(content_raw, 'requirements.txt', match_single_block=True)
 
         lines = content_parsed.split('\n')
-        lines = [line for line in lines if not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])]
+        lines = [line for line in lines if
+                 not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])]
         content_modified = f'''jina==3.15.1.dev14
 docarray==0.21.0
 openai==0.27.5
@@ -292,8 +299,10 @@ pytest
         for i in range(1, MAX_DEBUGGING_ITERATIONS):
             print('Debugging iteration', i)
             print('Trying to debug the microservice. Might take a while...')
-            previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i)
-            next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i + 1)
+            previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages,
+                                                               num_approach, i)
+            next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages,
+                                                           num_approach, i + 1)
             log_hubble = push_executor(previous_microservice_path)
             error = process_error_message(log_hubble)
             if error:
@@ -357,7 +366,8 @@ pytest
                     summarized_error=summarized_error,
                     task_description=self.microservice_specification.task,
                     test_description=self.microservice_specification.test,
-                    all_files_string=self.files_to_string({key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}),
+                    all_files_string=self.files_to_string(
+                        {key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}),
                 )
 
     class MaxDebugTimeReachedException(BaseException):
@@ -368,15 +378,19 @@ pytest
 
     def is_dependency_issue(self, summarized_error, dock_req_string: str, package_manager: str):
         # a few heuristics to quickly jump ahead
-        if any([error_message in summarized_error for error_message in ['AttributeError', 'NameError', 'AssertionError']]):
+        if any([error_message in summarized_error for error_message in
+                ['AttributeError', 'NameError', 'AssertionError']]):
             return False
-        if package_manager.lower() == 'pip' and any([em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]):
+        if package_manager.lower() == 'pip' and any(
+                [em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]):
             return True
 
         print_colored('', f'Is it a {package_manager} dependency issue?', 'blue')
         conversation = self.gpt_session.get_conversation()
         answer_raw = conversation.chat(
-            template_is_dependency_issue.format(summarized_error=summarized_error, all_files_string=dock_req_string).replace('PACKAGE_MANAGER', package_manager)
+            template_is_dependency_issue.format(summarized_error=summarized_error,
+                                                all_files_string=dock_req_string).replace('PACKAGE_MANAGER',
+                                                                                          package_manager)
         )
         answer_json_string = self.extract_content_from_result(answer_raw, 'response.json', match_single_block=True, )
         answer = json.loads(answer_json_string)['dependency_installation_failure']
@@ -403,7 +417,8 @@ pytest
             description=self.microservice_specification.task
         )['strategies.json']
         packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)]
-        packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in packages_list]
+        packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in
+                         packages_list]
 
         packages_list = self.filter_packages_list(packages_list)
         packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES]
@@ -497,11 +512,14 @@ Test scenario:
 {self.microservice_specification.test}
 ''')
 
-    def refine_requirements(self, pm, messages, refinement_type, custom_suffix, template_pm_iteration, micro_service_initial_description=None):
+    def refine_requirements(self, pm, messages, refinement_type, custom_suffix, template_pm_iteration,
+                            micro_service_initial_description=None):
         user_input = self.microservice_specification.task
         num_parsing_tries = 0
         while True:
-            conversation = self.gpt_session.get_conversation(messages, print_stream=os.environ['VERBOSE'].lower() == 'true', print_costs=False)
+            conversation = self.gpt_session.get_conversation(messages,
+                                                             print_stream=os.environ['VERBOSE'].lower() == 'true',
+                                                             print_costs=False)
             agent_response_raw = conversation.chat(
                 template_pm_iteration.format(
                     custom_suffix=custom_suffix,
@@ -510,7 +528,8 @@ Test scenario:
                 role='user'
             )
             messages.append(HumanMessage(content=user_input))
-            agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.json', can_contain_code_block=False)
+            agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.json',
+                                                              can_contain_code_block=False)
             final = self.extract_content_from_result(agent_response_raw, 'final.json', can_contain_code_block=False)
             if final:
                 messages.append(AIMessage(content=final))
@@ -525,8 +544,8 @@ Test scenario:
                     raise self.TaskRefinementException()
                 num_parsing_tries += 1
                 messages.append(AIMessage(content=agent_response_raw))
-                messages.append(SystemMessage(content='You did not put your answer into the right format using *** and ```.'))
-
+                messages.append(
+                    SystemMessage(content='You did not put your answer into the right format using *** and ```.'))
 
     @staticmethod
     def get_user_input(employee, prompt_to_user):
@@ -547,16 +566,19 @@ Test scenario:
         # filter out complete package lists
         packages_list = [
             packages for packages in packages_list if all([
-                pkg == 'gpt_3_5_turbo'
-                or (
-                    is_package_on_pypi(pkg)  # all packages must be on pypi or it is gpt_3_5_turbo
-                    and pkg not in BLACKLISTED_PACKAGES  # no package is allowed to be blacklisted
-                )
+                pkg not in BLACKLISTED_PACKAGES  # no package is allowed to be blacklisted
                 for pkg in packages
             ])
         ]
         # filter out single packages
         packages_list = [
-            [package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list
+            [
+                package for package in packages
+                if (package not in UNNECESSARY_PACKAGES)
+                   and (  # all packages must be on pypi or it is gpt_3_5_turbo
+                           is_package_on_pypi(package)
+                           or package == 'gpt_3_5_turbo'
+                   )
+            ] for packages in packages_list
         ]
         return packages_list
diff --git a/test/integration/__init__.py b/test/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/test_generator.py b/test/integration/test_generator.py
similarity index 96%
rename from test/test_generator.py
rename to test/integration/test_generator.py
index 7f7b022..fec9658 100644
--- a/test/test_generator.py
+++ b/test/integration/test_generator.py
@@ -20,7 +20,7 @@ def test_generation_level_0(tmpdir):
     os.environ['VERBOSE'] = 'true'
     generator = Generator(
         "The microservice is very simple, it does not take anything as input and only outputs the word 'test'",
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
         'gpt-3.5-turbo'
     )
     assert generator.generate() == 0
@@ -44,7 +44,7 @@ def test_generation_level_1(tmpdir):
 Example tweet: 
 \'When your coworker microwaves fish in the break room... AGAIN. 🐟🤢 
 But hey, at least SOMEONE's enjoying their lunch. #officelife\'''',
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
         'gpt-3.5-turbo'
     )
     assert generator.generate() == 0
@@ -63,7 +63,7 @@ def test_generation_level_2(tmpdir):
     os.environ['VERBOSE'] = 'true'
     generator = Generator(
         "The input is a PDF like https://www.africau.edu/images/default/sample.pdf and the output the summarized text (50 words).",
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
         'gpt-3.5-turbo'
     )
     assert generator.generate() == 0
@@ -123,7 +123,7 @@ print('This is the text from the audio file:', response.json()['text'])
 4. Return the the audio file as base64 encoded binary.
 Example input file: https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav
 ''',
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
         'gpt-4'
     )
     assert generator.generate() == 0
@@ -163,7 +163,7 @@ The joke is the put on the image.
 The output is the image with the joke on it.
 Example input image: https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png
 ''',
-                          str(tmpdir) + 'microservice',
+                          str(tmpdir),
                           'gpt-3.5-turbo'
                           )
     assert generator.generate() == 0
diff --git a/test/test_api.py b/test/test_api.py
deleted file mode 100644
index 4a5e9e6..0000000
--- a/test/test_api.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from src.apis.jina_cloud import is_executor_in_hub
-from src.apis.pypi import is_package_on_pypi
-
-
-def test_is_microservice_in_hub():
-    assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False
-    assert is_executor_in_hub('CLIPImageEncoder') is True
-
-def test_is_package_on_pypi():
-    assert is_package_on_pypi('jina') is True
-    assert is_package_on_pypi('jina', '0.9.25') is True
-    assert is_package_on_pypi('jina', '10.10.10') is False
-    assert is_package_on_pypi('jina-jina-jina') is False
-    assert is_package_on_pypi('jina-jina-jina', '0.9.25') is False
-    assert is_package_on_pypi('jina-jina-jina', '10.10.10') is False
diff --git a/test/unit/__init__.py b/test/unit/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/unit/test_api.py b/test/unit/test_api.py
new file mode 100644
index 0000000..5202a89
--- /dev/null
+++ b/test/unit/test_api.py
@@ -0,0 +1,34 @@
+from src.apis.jina_cloud import is_executor_in_hub
+from src.apis.pypi import is_package_on_pypi
+from src.options.generate.generator import Generator
+
+
+def test_is_microservice_in_hub():
+    assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False
+    assert is_executor_in_hub('CLIPImageEncoder') is True
+
+
+def test_is_package_on_pypi():
+    assert is_package_on_pypi('jina') is True
+    assert is_package_on_pypi('jina', '0.9.25') is True
+    assert is_package_on_pypi('jina', '10.10.10') is False
+    assert is_package_on_pypi('jina-jina-jina') is False
+    assert is_package_on_pypi('jina-jina-jina', '0.9.25') is False
+    assert is_package_on_pypi('jina-jina-jina', '10.10.10') is False
+
+
+def test_filter_packages_list():
+    filtered_list = Generator.filter_packages_list([
+        ["gpt_3_5_turbo", "requests", "base64", "gtts", "pydub"],
+        ["requests", "base64", "gtts", "pydub"],
+        ["gpt_3_5_turbo", "requests", "base64", "gtts"],
+        ["gpt_3_5_turbo", "requests", "base64", "pydub"],
+        ["requests", "base64", "gtts"]
+    ])
+    assert filtered_list == [
+        ["gpt_3_5_turbo", "requests", "gtts", "pydub"],
+        ["requests", "gtts", "pydub"],
+        ["gpt_3_5_turbo", "requests", "gtts"],
+        ["gpt_3_5_turbo", "requests", "pydub"],
+        ["requests", "gtts"]
+    ]
diff --git a/test/test_strings.py b/test/unit/test_strings.py
similarity index 100%
rename from test/test_strings.py
rename to test/unit/test_strings.py