Merge pull request #74 from jina-ai/test_level_3

🧪4️⃣ test: level 4
2025-12-24 17:14:18 +01:00 · 2023-05-03 00:36:34 +02:00
parent ce8f1e7c5a 412c0a9403
commit db1602c269
15 changed files with 394 additions and 178 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,12 +5,12 @@ on:
  pull_request:

 jobs:
-  test_level:
+  test_cognitive_level:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
-        group: [0, 1, 2]
+        group: [0, 1, 2, 3, 4]
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python 3.8
@@ -27,9 +27,35 @@ jobs:
      - name: Test
        id: test
        run: |
-          pytest -vs test/test_generator.py::test_generation_level_${{ matrix.group }}
-        timeout-minutes: 10
+          pytest -vs test/integration/test_generator.py::test_generation_level_${{ matrix.group }}
+        timeout-minutes: 15
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
          WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }}
+
+  test_unit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Prepare environment
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install wheel
+          pip install --no-cache-dir ".[full,test]"
+          pip install pytest
+          pip install pytest-split
+      - name: Test
+        id: test
+        run: |
+          pytest -vs test/unit
+        timeout-minutes: 15
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
+          WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }}
+
--- a/src/apis/gpt.py
+++ b/src/apis/gpt.py
@@ -11,7 +11,7 @@ from langchain.chat_models import ChatOpenAI
 from openai.error import RateLimitError
 from langchain.schema import HumanMessage, SystemMessage, BaseMessage, AIMessage
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from requests.exceptions import ConnectionError
+from requests.exceptions import ConnectionError, ChunkedEncodingError
 from urllib3.exceptions import InvalidChunkLength

 from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
@@ -132,7 +132,7 @@ class _GPTConversation:
            try:
                response = self._chat(self.messages)
                break
-            except (ConnectionError, InvalidChunkLength) as e:
+            except (ConnectionError, InvalidChunkLength, ChunkedEncodingError) as e:
                print('There was a connection error. Retrying...')
                if i == 9:
                    raise e
--- a/src/apis/pypi.py
+++ b/src/apis/pypi.py
@@ -0,0 +1,12 @@
+import requests
+
+def is_package_on_pypi(package_name, version=None):
+    optional_version = f"/{version}" if version else ""
+    url = f"https://pypi.org/pypi/{package_name}{optional_version}/json"
+    response = requests.get(url)
+    if response.status_code == 200:
+        return True
+    elif response.status_code == 404:
+        return False
+    else:
+        return None
--- a/src/constants.py
+++ b/src/constants.py
@@ -14,7 +14,6 @@ DOCKER_FILE_TAG = 'dockerfile'
 CLIENT_FILE_TAG = 'python'
 STREAMLIT_FILE_TAG = 'python'

-
 FILE_AND_TAG_PAIRS = [
    (EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG),
    (IMPLEMENTATION_FILE_NAME, IMPLEMENTATION_FILE_TAG),
@@ -39,9 +38,16 @@ MAX_DEBUGGING_ITERATIONS = 10

 DEMO_TOKEN = '45372338e04f5a41af949024db929d46'

-PROBLEMATIC_PACKAGES = [
-    # 'Pyrender', 'Trimesh',
-    'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d', # because they need a screen,
+BLACKLISTED_PACKAGES = [
+    'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d',  # because they need a screen,
+    'tika',  # because it needs java
+]
+UNNECESSARY_PACKAGES = [
+    'fastapi', 'uvicorn', 'starlette'  # because the wrappers are used instead
 ]

-UNNECESSARY_PACKAGES = ['fastapi']
+LANGUAGE_PACKAGES = [
+    'allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk',
+    'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
+    'spacy', 'stanza', 'summarizer', 'sumy', 'textblob', 'textstat', 'transformers'
+]
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -4,7 +4,6 @@ import random
 import re
 import shutil
 from typing import Callable
-
 from typing import List, Text, Optional

 from langchain import PromptTemplate
@@ -14,20 +13,20 @@ from pydantic.dataclasses import dataclass
 from src.apis import gpt
 from src.apis.gpt import _GPTConversation
 from src.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub
+from src.apis.pypi import is_package_on_pypi
 from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
-    PROBLEMATIC_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
-    REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, UNNECESSARY_PACKAGES, IMPLEMENTATION_FILE_NAME, \
-    IMPLEMENTATION_FILE_TAG
-from src.options.generate.templates_system import  system_task_iteration, system_task_introduction, system_test_iteration
+    BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
+    REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
+    IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES
+from src.options.generate.templates_system import system_task_iteration, system_task_introduction, system_test_iteration
 from src.options.generate.templates_user import template_generate_microservice_name, \
    template_generate_possible_packages, \
    template_solve_code_issue, \
    template_solve_pip_dependency_issue, template_is_dependency_issue, template_generate_playground, \
    template_generate_function, template_generate_test, template_generate_requirements, \
    template_chain_of_thought, template_summarize_error, \
-    template_generate_apt_get_install, template_solve_apt_get_dependency_issue, template_pm_task_iteration, \
+    template_solve_apt_get_dependency_issue, template_pm_task_iteration, \
    template_pm_test_iteration
-
 from src.options.generate.ui import get_random_employee
 from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path
 from src.utils.string_tools import print_colored
@@ -38,6 +37,7 @@ class TaskSpecification:
    task: Optional[Text]
    test: Optional[Text]

+
 class Generator:
    def __init__(self, task_description, path, model='gpt-4'):
        self.gpt_session = gpt.GPTSession(task_description, model=model)
@@ -45,14 +45,14 @@ class Generator:
        self.microservice_root_path = path

    def extract_content_from_result(self, plain_text, file_name, match_single_block=False, can_contain_code_block=True):
-        optional_line_break = '\n' if can_contain_code_block else '' # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break
+        optional_line_break = '\n' if can_contain_code_block else ''  # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break
        pattern = fr"\*?\*?{file_name}\*?\*?\n```(?:\w+\n)?([\s\S]*?){optional_line_break}```"
        match = re.search(pattern, plain_text, re.MULTILINE)
        if match:
            return match.group(1).strip()
        elif match_single_block:
            # Check for a single code block
-            single_code_block_pattern = r"^```(?:\w+\n)?([\s\S]*?)```"
+            single_code_block_pattern = r"```(?:\w+\n)?([\s\S]*?)```"
            single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE)
            if len(single_code_block_match) == 1:
                return single_code_block_match[0].strip()
@@ -79,10 +79,11 @@ metas:
        def _default_parse_result_fn(x):
            _parsed_results = {}
            for _file_name in files_names:
-                _content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names)==1)
+                _content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names) == 1)
                if _content != '':
                    _parsed_results[_file_name] = _content
            return _parsed_results
+
        return _default_parse_result_fn

    def generate_and_persist_file(
@@ -113,7 +114,9 @@ metas:
            parse_result_fn = self.get_default_parse_result_fn(file_name_s)

        print_colored('', f'\n\n############# {section_title} #############', 'blue')
-        system_introduction_message = _GPTConversation._create_system_message(self.microservice_specification.task, self.microservice_specification.test, system_definition_examples)
+        system_introduction_message = _GPTConversation._create_system_message(self.microservice_specification.task,
+                                                                              self.microservice_specification.test,
+                                                                              system_definition_examples)
        conversation = self.gpt_session.get_conversation(messages=[system_introduction_message])
        template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables}
        if 'file_name' in template.input_variables and len(file_name_s) == 1:
@@ -125,7 +128,8 @@ metas:
        )
        content = parse_result_fn(content_raw)
        if content == {}:
-            content_raw = conversation.chat('You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else ''))
+            content_raw = conversation.chat(
+                'You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else ''))
            content = parse_result_fn(content_raw)
        for _file_name, _file_content in content.items():
            persist_file(_file_content, os.path.join(destination_folder, _file_name))
@@ -137,12 +141,14 @@ metas:
            packages,
            num_approach,
    ):
-        MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, 1)
+        MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages,
+                                                       num_approach, 1)
        os.makedirs(MICROSERVICE_FOLDER_v1)

        with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'microservice.py'), 'r') as f:
            microservice_executor_boilerplate = f.read()
-        microservice_executor_code = microservice_executor_boilerplate.replace('class GPTDeployExecutor(Executor):', f'class {microservice_name}(Executor):')
+        microservice_executor_code = microservice_executor_boilerplate.replace('class GPTDeployExecutor(Executor):',
+                                                                               f'class {microservice_name}(Executor):')
        persist_file(microservice_executor_code, os.path.join(MICROSERVICE_FOLDER_v1, EXECUTOR_FILE_NAME))

        with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'apis.py'), 'r') as f:
@@ -201,14 +207,14 @@ metas:
        #     })
        # )

-
-        with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', encoding='utf-8') as f:
+        with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r',
+                  encoding='utf-8') as f:
            docker_file_template_lines = f.readlines()
-        docker_file_template_lines = [line for line in docker_file_template_lines if not line.startswith('RUN apt-get update')]
+        docker_file_template_lines = [line for line in docker_file_template_lines if
+                                      not line.startswith('RUN apt-get update')]
        docker_file_content = '\n'.join(docker_file_template_lines)
        persist_file(docker_file_content, os.path.join(MICROSERVICE_FOLDER_v1, 'Dockerfile'))

-
        self.write_config_yml(microservice_name, MICROSERVICE_FOLDER_v1)

        print('\nFirst version of the microservice generated. Start iterating on it to make the tests pass...')
@@ -223,13 +229,15 @@ metas:
        packages = ' '.join(json.loads(json_string)['packages'])

        docker_file_template = self.read_docker_template()
-        return {DOCKER_FILE_NAME: docker_file_template.replace('{{apt_get_packages}}', '{apt_get_packages}').format(apt_get_packages=packages)}
+        return {DOCKER_FILE_NAME: docker_file_template.replace('{{apt_get_packages}}', '{apt_get_packages}').format(
+            apt_get_packages=packages)}

    def parse_result_fn_requirements(self, content_raw: str):
        content_parsed = self.extract_content_from_result(content_raw, 'requirements.txt', match_single_block=True)

        lines = content_parsed.split('\n')
-        lines = [line for line in lines if not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo_api']])]
+        lines = [line for line in lines if
+                 not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])]
        content_modified = f'''jina==3.15.1.dev14
 docarray==0.21.0
 openai==0.27.5
@@ -291,12 +299,14 @@ pytest
        for i in range(1, MAX_DEBUGGING_ITERATIONS):
            print('Debugging iteration', i)
            print('Trying to debug the microservice. Might take a while...')
-            previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i)
-            next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i + 1)
+            previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages,
+                                                               num_approach, i)
+            next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages,
+                                                           num_approach, i + 1)
            log_hubble = push_executor(previous_microservice_path)
            error = process_error_message(log_hubble)
            if error:
-                print('An error occurred during the build process. Feeding the error back to the assistent...')
+                print('An error occurred during the build process. Feeding the error back to the assistant...')
                self.do_debug_iteration(error, next_microservice_path, previous_microservice_path)
                if i == MAX_DEBUGGING_ITERATIONS - 1:
                    raise self.MaxDebugTimeReachedException('Could not debug the microservice.')
@@ -356,7 +366,8 @@ pytest
                    summarized_error=summarized_error,
                    task_description=self.microservice_specification.task,
                    test_description=self.microservice_specification.test,
-                    all_files_string=self.files_to_string({key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}),
+                    all_files_string=self.files_to_string(
+                        {key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}),
                )

    class MaxDebugTimeReachedException(BaseException):
@@ -367,16 +378,22 @@ pytest

    def is_dependency_issue(self, summarized_error, dock_req_string: str, package_manager: str):
        # a few heuristics to quickly jump ahead
-        if any([error_message in summarized_error for error_message in ['AttributeError', 'NameError', 'AssertionError']]):
+        if any([error_message in summarized_error for error_message in
+                ['AttributeError', 'NameError', 'AssertionError']]):
            return False
-        if package_manager.lower() == 'pip' and any([em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]):
+        if package_manager.lower() == 'pip' and any(
+                [em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]):
            return True

        print_colored('', f'Is it a {package_manager} dependency issue?', 'blue')
        conversation = self.gpt_session.get_conversation()
-        answer = conversation.chat(
-            template_is_dependency_issue.format(summarized_error=summarized_error, all_files_string=dock_req_string).replace('PACKAGE_MANAGER', package_manager)
+        answer_raw = conversation.chat(
+            template_is_dependency_issue.format(summarized_error=summarized_error,
+                                                all_files_string=dock_req_string).replace('PACKAGE_MANAGER',
+                                                                                          package_manager)
        )
+        answer_json_string = self.extract_content_from_result(answer_raw, 'response.json', match_single_block=True, )
+        answer = json.loads(answer_json_string)['dependency_installation_failure']
        return 'yes' in answer.lower()

    def generate_microservice_name(self, description):
@@ -400,14 +417,10 @@ pytest
            description=self.microservice_specification.task
        )['strategies.json']
        packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)]
-        packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in packages_list]
+        packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in
+                         packages_list]

-        packages_list = [
-            packages for packages in packages_list if len(set(packages).intersection(set(PROBLEMATIC_PACKAGES))) == 0
-        ]
-        packages_list = [
-            [package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list
-        ]
+        packages_list = self.filter_packages_list(packages_list)
        packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES]
        return packages_list

@@ -461,8 +474,10 @@ gptdeploy deploy --path {self.microservice_root_path}
                    'task',
                    '',
                    template_pm_task_iteration,
-                    micro_service_initial_description=f'''Microservice description: 
+                    micro_service_initial_description=f'''Microservice description:
+```
 {self.microservice_specification.task}
+```
 ''',
                )
                self.refine_requirements(
@@ -497,11 +512,14 @@ Test scenario:
 {self.microservice_specification.test}
 ''')

-    def refine_requirements(self, pm, messages, refinement_type, custom_suffix, template_pm_iteration, micro_service_initial_description=None):
+    def refine_requirements(self, pm, messages, refinement_type, custom_suffix, template_pm_iteration,
+                            micro_service_initial_description=None):
        user_input = self.microservice_specification.task
        num_parsing_tries = 0
        while True:
-            conversation = self.gpt_session.get_conversation(messages, print_stream=os.environ['VERBOSE'].lower() == 'true', print_costs=False)
+            conversation = self.gpt_session.get_conversation(messages,
+                                                             print_stream=os.environ['VERBOSE'].lower() == 'true',
+                                                             print_costs=False)
            agent_response_raw = conversation.chat(
                template_pm_iteration.format(
                    custom_suffix=custom_suffix,
@@ -510,22 +528,24 @@ Test scenario:
                role='user'
            )
            messages.append(HumanMessage(content=user_input))
-            agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.txt', can_contain_code_block=False)
-            final = self.extract_content_from_result(agent_response_raw, 'final.txt', can_contain_code_block=False)
+            agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.json',
+                                                              can_contain_code_block=False)
+            final = self.extract_content_from_result(agent_response_raw, 'final.json', can_contain_code_block=False)
            if final:
                messages.append(AIMessage(content=final))
                setattr(self.microservice_specification, refinement_type, final)
                break
            elif agent_question:
-                messages.append(AIMessage(content=agent_question))
-                user_input = self.get_user_input(pm, agent_question)
+                question_parsed = json.loads(agent_question)['question']
+                messages.append(AIMessage(content=question_parsed))
+                user_input = self.get_user_input(pm, question_parsed)
            else:
                if num_parsing_tries > 2:
                    raise self.TaskRefinementException()
                num_parsing_tries += 1
                messages.append(AIMessage(content=agent_response_raw))
-                messages.append(SystemMessage(content='You did not put your answer into the right format using *** and ```.'))
-
+                messages.append(
+                    SystemMessage(content='You did not put your answer into the right format using *** and ```.'))

    @staticmethod
    def get_user_input(employee, prompt_to_user):
@@ -537,9 +557,28 @@ Test scenario:

    @staticmethod
    def replace_with_gpt_3_5_turbo_if_possible(pkg):
-        if pkg in ['allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk',
-                   'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
-                   'spacy', 'stanza', 'textblob', 'textstat', 'transformers']:
-
-            return 'gpt_3_5_turbo_api'
+        if pkg in LANGUAGE_PACKAGES:
+            return 'gpt_3_5_turbo'
        return pkg
+
+    @staticmethod
+    def filter_packages_list(packages_list):
+        # filter out complete package lists
+        packages_list = [
+            packages for packages in packages_list if all([
+                pkg not in BLACKLISTED_PACKAGES  # no package is allowed to be blacklisted
+                for pkg in packages
+            ])
+        ]
+        # filter out single packages
+        packages_list = [
+            [
+                package for package in packages
+                if (package not in UNNECESSARY_PACKAGES)
+                   and (  # all packages must be on pypi or it is gpt_3_5_turbo
+                           is_package_on_pypi(package)
+                           or package == 'gpt_3_5_turbo'
+                   )
+            ] for packages in packages_list
+        ]
+        return packages_list
--- a/src/options/generate/static_files/microservice/Dockerfile
+++ b/src/options/generate/static_files/microservice/Dockerfile
@@ -6,7 +6,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y {{apt_get_packa

 ## install requirements for the executor
 COPY requirements.txt .
-RUN pip install --compile -r requirements.txt
+RUN pip -v install --compile -r requirements.txt

 # setup the workspace
 COPY . /workdir/
--- a/src/options/generate/static_files/microservice/apis.py
+++ b/src/options/generate/static_files/microservice/apis.py
@@ -5,7 +5,7 @@ import openai
 openai.api_key = os.getenv("OPENAI_API_KEY")


-class GPT_3_5_Turbo_API:
+class GPT_3_5_Turbo:
    def __init__(self, system: str = ''):
        self.system = system

--- a/src/options/generate/templates_system.py
+++ b/src/options/generate/templates_system.py
@@ -40,19 +40,26 @@ a)
 If the description is not sufficiently specified, then ask for the missing information.
 Your response must exactly match the following block code format (double asterisks for the file name and triple backticks for the file block):

-**prompt.txt**
-```text
-<prompt to the client here>
+**prompt.json**
+```json
+{{
+    "question": "<prompt to the client here>"
+}}
 ```

 b)
-Otherwise you respond with the summarized description.
-The summarized description must contain all the information mentioned by the client.
+Otherwise you respond with the detailed description.
+The detailed description must contain all the information mentioned by the client.
 Your response must exactly match the following block code format (double asterisks for the file name and triple backticks for the file block):

-**final.txt**
-```text
-<task here>
+**final.json**
+```json
+{{
+    "description": "<microservice description here>",
+    "code_samples": "<code samples from the client here>",
+    "documentation_info": "<documentation info here>",
+    "credentials: "<credentials here>"
+}}
 ```

 The character sequence ``` must always be at the beginning of the line.
@@ -64,9 +71,11 @@ output: defined
 api access: not defined
 database access: n/a

-**prompt.txt**
-```text
-Please provide the url of the weather api and a valid api key or some other way accessing the api. Or let our engineers try to find a free api.
+**prompt.json**
+```json
+{{
+    "question": "Please provide the url of the weather api and a valid api key or some other way accessing the api. Or let our engineers try to find a free api."
+}}
 ```

 Example for the description "convert png to svg":
@@ -75,9 +84,14 @@ output: defined
 api access: n/a
 database access: n/a

-**final.txt**
-```text
-The user inserts a png and gets an svg as response.
+**final.json**
+```json
+{{
+    "description": "The user inserts a png and gets an svg as response.",
+    "code_samples": "n/a",
+    "documentation_info": "n/a",
+    "credentials: "n/a"
+}}
 ```

 Example for the description "parser":
@@ -86,9 +100,11 @@ output: not defined
 api access: n/a
 database access: n/a

-**prompt.txt**
-```text
-Please provide the input and output format.
+**prompt.json**
+```json
+{{
+    "question": "Please provide the input and output format."
+}}
 ```
 '''

@@ -104,9 +120,11 @@ Your response must exactly match the following block code format (double asteris
 1.
 contains example: no
 2.
-**prompt.txt**
-```text
-<prompt to the client here>
+**prompt.json**
+```json
+{{
+    "question": "<prompt to the client here>"
+}}
 ```

 If you did a, you must not do b.
@@ -117,10 +135,12 @@ Your response must exactly match the following block code format (double asteris
 1.
 contains example: yes (<insert example here>)
 2.
-**final.txt**
-```text
-input: "<input here>"
-assertion: the output is of type <type here>
+**final.json**
+```json
+{{
+    "input": "<input here>",
+    "assertion": "the output contains the result that is of type <type here>"
+}}
 ```

 If you did b, you must not do a.
@@ -129,37 +149,46 @@ Example for: "given a city like "Berlin", get the weather report for the next 5
 1.
 contains example: yes (Berlin)
 2.
-**final.txt**
-```text
-input: "Berlin"
-assertion: the output is of type string
+**final.json**
+```json
+{{
+    "input": "Berlin",
+    "assertion": "the output is of type string"
+}}
 ```

 Example for "The user inserts a png and gets an svg as response.":
 1.
 contains example: no
 2.
-**prompt.txt**
-```text
-Please provide a png example input file as url.
+**prompt.json**
+```json
+{{
+    "question": "Please provide a png example input file as url."
+}}
 ```

+
 Example for "The user inserts a png like https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png and gets an svg as response.":
 1.
 contains example: yes (https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png)
 2.
-**final.txt**
-```text
-input: "https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png"
-assertion: the output is of type svg
+**final.json**
+```json
+{{
+    "input": "https://aquasecurity.github.io/kube-bench/v0.6.5/images/kube-bench-logo-only.png",
+    "assertion": "the output is of type svg"
+}}
 ```

 Example for "The microservice takes nothing as input and returns the current time.":
 1.
 contains example: n/a
-**final.txt**
-```text
-input: "nothing"
-assertion: the output is of type string
+**final.json**
+```json
+{{
+    "input": "nothing",
+    "assertion": "the output is of type string"
+}}
 ```
 '''
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -24,7 +24,8 @@ The implemented function and the test must not contain prototype or placeholder
 The implemented function and the test must run in a docker container based on debian.
 The implemented function and the test must not use libraries like Flask.
 The implemented function and the test must not have a __main__ function.
-The implemented function and the test must use gpt_3_5_turbo_api if the task requires understanding, generation, summarization, transformation, translation or classification of natural language or using any language model. Other language models are not allowed.'''
+The implemented function and the test must use gpt_3_5_turbo if the task requires the processing of language (e.g., understanding, generation, summarization, transformation, translation, modification or classification). Other language models are not allowed.
+The implemented function must follow a clean and elegant coding style.'''


 template_generate_microservice_name = PromptTemplate.from_template(
@@ -49,7 +50,7 @@ list of lists wrapped into ``` and name it **strategies.json**. \
 Do not use quotation marks around packages names in the output. \
 Separate packages in a combination by comma. \
 Note that you can also leave a line empty to indicate that one of the strategies does not require any package and can be done in plain python.
-The output looks like this:
+Write the output using double asterisks and triple backticks like this:
 **strategies.json**
 ```
 [
@@ -67,8 +68,8 @@ template_generate_possible_packages = PromptTemplate.from_template(
 "{description}"
 1. Write down ut to 3 different strategies to solve the task. For each strategy write down all the non-trivial subtasks you need to solve. If there is a natural language understanding or generation stragegy, write it down.
 2. Find out what is the core problem to solve.
-3. List up to 10 Python packages that are specifically designed or have functionalities to solve the complete core problem with one of the defined strategies. You must add gpt_3_5_turbo_api if the task involves generating or understanding natural language or using a (pre-trained) language model.
-4. Exclude any package that can generate or understand natural language or enables using any language model, but you must not exclude gpt_3_5_turbo_api. Print the cleaned list of packages and give a brief reason for keeping it after its name.
+3. List up to 10 Python packages that are specifically designed or have functionalities to solve the complete core problem with one of the defined strategies. You must add gpt_3_5_turbo if the task involves generating or understanding natural language or using a (pre-trained) language model.
+4. Exclude any package that can generate or understand natural language or enables using any language model, but you must not exclude gpt_3_5_turbo. Print the cleaned list of packages and give a brief reason for keeping it after its name.
 5. For each cleaned package think if it fulfills the following requirements:
 a) specifically designed or have functionalities to solve the complete core problem.
 b) has a stable api among different versions
@@ -81,7 +82,7 @@ When answering, just write "yes" or "no".

 6. Determine the 5 most suitable python package combinations, ordered from the best to the least suitable. Combine the packages to achieve a comprehensive solution.
 If the package is mentioned in the description, then it is automatically the best one.
-If you listed gpt_3_5_turbo_api earlier, you must use it. gpt_3_5_turbo_api is the best package for handling text-based tasks. Also, gpt_3_5_turbo_api doesn't need any other packages processing text or using language models. It can handle any text-based task alone.
+If you listed gpt_3_5_turbo earlier, you must use it. gpt_3_5_turbo is the best package for handling text-based tasks. Also, gpt_3_5_turbo doesn't need any other packages processing text or using language models. It can handle any text-based task alone.

 ''' + template_generate_possible_packages_output_format_string)

@@ -94,11 +95,11 @@ template_code_wrapping_string = '''The code will go into {file_name_purpose}. Ma
 You must provide the complete file with the exact same syntax to wrap the code.'''


-gpt_35_turbo_usage_string = """If need to use gpt_3_5_turbo_api, then this is an example on how to use it:
+gpt_35_turbo_usage_string = """If need to use gpt_3_5_turbo, then this is an example on how to use it:
 ```
-from .apis import GPT_3_5_Turbo_API
+from .apis import GPT_3_5_Turbo

-gpt_3_5_turbo_api = GPT_3_5_Turbo_API(
+gpt_3_5_turbo = GPT_3_5_Turbo(
    system=\'\'\'
 You are a tv-reporter who is specialized in C-list celebrities.
 When you get asked something like 'Who was having a date with <X>?', then you answer with a json like '{{"dates": ["<Y>", "<Z>"]}}'. 
@@ -118,12 +119,16 @@ The function must full-fill: '{microservice_description}'.
 It will be tested with the following scenario: '{test_description}'.
 For the implementation use the following package(s): '{packages}'.

+The code must start with the following import:
+```
+from .apis import GPT_3_5_Turbo
+```
 Obey the following rules:
 ''' + not_allowed_function_string + '''

 Your approach:
 1. Identify the core challenge when implementing the function.
-2. Think about solutions for these challenges. If gpt_3_5_turbo_api is mentioned in the above list of packages, then you must use it.
+2. Think about solutions for these challenges. If gpt_3_5_turbo is mentioned in the above list of packages, then you must use it.
 3. Decide for one of the solutions.
 4. Write the code for the function. Don't write code for the test.
 ''' + gpt_35_turbo_usage_string + '\n' + template_code_wrapping_string
@@ -136,13 +141,12 @@ template_generate_test = PromptTemplate.from_template(
 {code_files_wrapped}

 Write a single pytest case that tests the following scenario: '{test_description}'. In case the test scenario is not precise enough, test a general case without any assumptions.
-Start the test with an extensive comment about the test case. If gpt_3_5_turbo_api is used in the executor, then the test must not check the exact output of the executor as it is not deterministic. 
+Start the test with an extensive comment about the test case. If gpt_3_5_turbo is used in the executor, then the test must not check the exact output of the executor as it is not deterministic. 

-Use the following import to import the function:
+The test must start with the following import:
 ```
 from .implementation import func
 ```
-
 ''' + not_allowed_function_string + '''
 The test must not open local files.
 The test must not mock a function of the executor.
@@ -163,7 +167,7 @@ Write the content of the requirements.txt file like this:
 ...
 ```
 Add any more packages that are needed to run the code.
-You must not add gpt_3_5_turbo_api to the requirements.txt file. 
+You must not add gpt_3_5_turbo to the requirements.txt file. 

 All versions are fixed using ~=, ==, <, >, <=, >=. The package versions must not have conflicts. Output only the requirements.txt file.
 ''' + '\n' + template_code_wrapping_string
@@ -221,7 +225,17 @@ You are given the following files:

 {all_files_string}

-Is this a PACKAGE_MANAGER dependency installation failure? Answer with "yes" or "no".'''
+Is this error happening because a PACKAGE_MANAGER package is missing or failed to install? 
+1. Write down one bullet point on why the error might happen because a PACKAGE_MANAGER package is missing or failed to install.
+2. Write down one bullet point on why it is unlikely that the error happens because a PACKAGE_MANAGER package is missing or failed to install.
+3. Write down your final answer.
+4. Write down your final answer as json in the following format:
+**response.json**
+```json
+{{"dependency_installation_failure": "<yes/no>"}}
+```
+Note that you must obey the double asterisk and tripple backtick syntax from above.
+'''
 )


@@ -259,16 +273,33 @@ jina==2.0.0


 template_solve_apt_get_dependency_issue = PromptTemplate.from_template(
-    '''Your task is to provide guidance on how to solve an error that occurred during the Docker build process. 
-Here is the summary of the error that occurred:
-{summarized_error}
-
+    '''Your task is to provide guidance on how to solve an error that occurred during the Docker build process.
 You are given the following files:

 {all_files_string}

+Here is the summary of the error that occurred:
+{summarized_error}
+
 To solve this error, you should determine the list of packages that need to be installed via `apt-get install` in the Dockerfile.
-Output them as a white space separated list:'''
+Output the apt-get packages that need to be placed at {{apt_get_packages}} as json in the following format:
+**apt-get-packages.json**
+```json
+{{"packages": ["<package1>", "<package2>"]}}
+```
+Example for the following requirements.txt file:
+**requirements.txt**
+```
+numpy==1.19.5
+fitz
+```
+The output would be:
+**apt-get-packages.json**
+```json
+{{"packages": []}}
+```
+Note that you must not output any other files. Only output the apt-get-packages.json file.
+'''
 )


@@ -374,22 +405,30 @@ The playground (app.py) must not import the executor.
 template_pm_task_iteration = PromptTemplate.from_template(
    '''{micro_service_initial_description}
 1.Quickly go through the checklist (input/output well defined? api or db access needed?)  and think about if you should ask something to the client or if you should write the final description.
-2.Either write the prompt.txt or the final.txt file.
+2.Either write the prompt.json or the final.json file.
 Either ask for clarification like this:
-**prompt.txt**
-```text
-<prompt to the client here (must be only one question)>
+**prompt.json**
+```json
+{{
+    "question": "<prompt to the client here (must be only one question)>"
+}}
 ```

-Or write the summarized microservice description like this:
-**final.txt**
-```text
-<microservice description here>
+Or write the detailed microservice description all mentioned code samples, documentation info and credentials like this:
+**final.json**
+```json
+{{
+    "description": "<microservice description here>",
+    "example_input": "<example input file or string here if mentioned before otherwise n/a>",
+    "code_samples": "<code samples from the client here>",
+    "documentation_info": "<documentation info here>",
+    "credentials: "<credentials here>"
+}}
 ``` 
-Note that your response must be either prompt.txt or final.txt. You must not write both.
+Note that your response must be either prompt.json or final.json. You must not write both.
 Note that you must obey the double asterisk and tripple backtick syntax from above.
 Note that the last sequence of characters in your response must be ``` (triple backtick).
-Note that prompt.txt must not only contain one question.
+Note that prompt.json must not only contain one question.
 Note that if urls, secrets, database names, etc. are mentioned, they must be part of the summary.
 {custom_suffix}
 '''
@@ -397,36 +436,44 @@ Note that if urls, secrets, database names, etc. are mentioned, they must be par

 template_pm_test_iteration = PromptTemplate.from_template(
    '''{micro_service_initial_description}
-1. write down if the original description and the refined description contain an example input for the microservice.
-2. write down either prompt.txt or final.txt.
-If the example input for the microservice is mentioned in the refined description or the original description, then output final.txt.
-Otherwise, output prompt.txt where you ask for the example input file as URL or the example string.
+1. write down if the microservice requires input.
+2. if it requires input, then write down if the original description or the refined description contain an example input for the microservice.
+3. write down either prompt.json or final.json.
+If the example input for the microservice is mentioned in the refined description or the original description, then output final.json.
+Otherwise, output prompt.json where you ask for the example input file as URL or the example string.
 Except for urls, you should come up with your own example input that makes sense for the microservice description.

 Example for the case where an example input file is required and was not mentioned before:
-**prompt.txt**
-```text
-Can you please provide an example input file as URL?
+**prompt.json**
+```json
+{{
+    "question": "Can you please provide an example input file as URL?"
+}}
 ```

 Example for the case where the example input string is required and was not mentioned before:
-**prompt.txt**
-```text
-Can you please provide an example input string?
+**prompt.json**
+```json
+{{
+    "question": "Can you please provide an example input string?"
+}}
 ```
 Note that you must not ask for an example input in case the example input is already mentioned in the refined description or the original description.
+Note that you must not ask for an example input in case the microservice does not require input.

 Example for the case where the example is already mentioned in the refined description or the original description:
-**final.txt**
-```text
-input: <input here>
-assertion: the output is of type <type here>
-``` 
-Note that your response must be either prompt.txt or final.txt. You must not write both.
+**final.json**
+```json
+{{
+    "input": "<input here>",
+    "assertion": "the output contains the result that is of type <type here>"
+}}
+```
+Note that your response must be either prompt.json or final.json. You must not write both.
 Note that you must obey the double asterisk and tripple backtick syntax from above.
 Note that the last sequence of characters in your response must be ``` (triple backtick).
 Note that your response must start with the character sequence ** (double asterisk).
-Note that prompt.txt must only contain one question.
+Note that prompt.json must only contain one question.
 {custom_suffix}
 '''
 )
--- a/test/integration/init.py
+++ b/test/integration/init.py
--- a/test/integration/test_generator.py
+++ b/test/integration/test_generator.py
@@ -20,16 +20,13 @@ def test_generation_level_0(tmpdir):
    os.environ['VERBOSE'] = 'true'
    generator = Generator(
        "The microservice is very simple, it does not take anything as input and only outputs the word 'test'",
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
        'gpt-3.5-turbo'
    )
    assert generator.generate() == 0


-# fixture
-@pytest.fixture
-def tmpdir():
-    return 'microservice'
+

 def test_generation_level_1(tmpdir):
    """
@@ -47,7 +44,7 @@ def test_generation_level_1(tmpdir):
 Example tweet: 
 \'When your coworker microwaves fish in the break room... AGAIN. 🐟🤢 
 But hey, at least SOMEONE's enjoying their lunch. #officelife\'''',
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
        'gpt-3.5-turbo'
    )
    assert generator.generate() == 0
@@ -66,27 +63,50 @@ def test_generation_level_2(tmpdir):
    os.environ['VERBOSE'] = 'true'
    generator = Generator(
        "The input is a PDF like https://www.africau.edu/images/default/sample.pdf and the output the summarized text (50 words).",
-        str(tmpdir) + 'microservice',
+        str(tmpdir),
        'gpt-3.5-turbo'
    )
    assert generator.generate() == 0

-
-@pytest.mark.skip(reason="not possible")
 def test_generation_level_3(tmpdir):
+    """
+    Requirements:
+    coding challenge: ✅ (calculate the average closing price)
+    pip packages: ❌
+    environment: ❌
+    GPT-3.5-turbo: ✅ (for processing the text)
+    APIs: ✅ (financial data API)
+    Databases: ❌
+    """
+    os.environ['VERBOSE'] = 'true'
+    generator = Generator(
+        f'''The input is a stock symbol (e.g., AAPL for Apple Inc.). 
+1. Fetch stock data (open, high, low, close, volume) for the past 30 days using a financial data API Yahoo Finance.
+2. Calculate the average closing price over the 30 days.
+3. Generate a brief summary of the company's stock performance over the past 30 days, including the average closing price and the company name.
+4. Return the summary as a string.
+Example input: 'AAPL'
+''',
+        str(tmpdir),
+        'gpt-3.5-turbo'
+    )
+    assert generator.generate() == 0
+
+def test_generation_level_4(tmpdir):
    """
    Requirements:
    coding challenge: ❌
    pip packages: ✅ (text to speech)
-    environment: ❌
+    environment: ✅ (tts library)
    GPT-3.5-turbo: ✅ (summarizing the text)
    APIs: ✅ (whisper for speech to text)
    Databases: ❌
    """
    os.environ['VERBOSE'] = 'true'
    generator = Generator(
-        f'''Given an audio file of speech like https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav, 
-get convert it to text using the following api:
+        f'''Given an audio file (1min wav) of speech, 
+1. convert it to text using the Whisper API.
+Here is the documentation on how to use the API:
 import requests
 url = "https://transcribe.whisperapi.com"
 headers = {{
@@ -95,30 +115,33 @@ headers = {{
 data = {{
  "url": "URL_OF_STORED_AUDIO_FILE"
 }}
-response = requests.post(url, headers=headers, files=file, data=data)
-print(response.text)
-Summarize the text.
-Create an audio file of the summarized text.
+response = requests.post(url, headers=headers, data=data)
+assert response.status_code == 200
+print('This is the text from the audio file:', response.json()['text'])
+2. Summarize the text (~50 words) while still maintaining the key facts.
+3. Create an audio file of the summarized text using a tts library.
+4. Return the the audio file as base64 encoded binary.
+Example input file: https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav
 ''',
-        str(tmpdir) + 'microservice',
-        'gpt-3.5-turbo'
+        str(tmpdir),
+        'gpt-4'
    )
    assert generator.generate() == 0

-@pytest.mark.skip(reason="not possible")
-def test_generation_level_4(tmpdir):
+
+def test_generation_level_5(tmpdir):
    """
    Requirements:
    coding challenge: ✅ (putting text on the image)
    pip packages: ✅ (Pillow for image processing)
-    environment: ❌
+    environment: ✅ (image library)
    GPT-3.5-turbo: ✅ (for writing the joke)
    APIs: ✅ (scenex for image description)
    Databases: ❌
    """
    os.environ['VERBOSE'] = 'true'
    generator = Generator(f'''
-The input is an image like this: https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png.
+The input is an image.
 Use the following api to get the description of the image:
 Request:
 curl "https://us-central1-causal-diffusion.cloudfunctions.net/describe" \\
@@ -137,12 +160,18 @@ Result format:
 }}
 The description is then used to generate a joke.
 The joke is the put on the image.
-The output is the image with the joke on it.''',
-                          str(tmpdir) + 'microservice',
+The output is the image with the joke on it.
+Example input image: https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png
+''',
+                          str(tmpdir),
                          'gpt-3.5-turbo'
                          )
    assert generator.generate() == 0

+@pytest.fixture
+def tmpdir():
+    return 'microservice'
+

 # further ideas:
-# Create a wrapper around google called Joogle. It modifies the page summary preview text of the search results to insert the word Jina as much as possible.
+# Create a wrapper around google called Joogle. It modifies the page summary preview text of the search results to insert the word Jina as much as possible.
--- a/test/test_hub.py
+++ b/test/test_hub.py
@@ -1,6 +0,0 @@
-from src.apis.jina_cloud import is_executor_in_hub
-
-
-def test_is_microservice_in_hub():
-    assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False
-    assert is_executor_in_hub('CLIPImageEncoder') is True
--- a/test/unit/init.py
+++ b/test/unit/init.py
--- a/test/unit/test_api.py
+++ b/test/unit/test_api.py
@@ -0,0 +1,34 @@
+from src.apis.jina_cloud import is_executor_in_hub
+from src.apis.pypi import is_package_on_pypi
+from src.options.generate.generator import Generator
+
+
+def test_is_microservice_in_hub():
+    assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False
+    assert is_executor_in_hub('CLIPImageEncoder') is True
+
+
+def test_is_package_on_pypi():
+    assert is_package_on_pypi('jina') is True
+    assert is_package_on_pypi('jina', '0.9.25') is True
+    assert is_package_on_pypi('jina', '10.10.10') is False
+    assert is_package_on_pypi('jina-jina-jina') is False
+    assert is_package_on_pypi('jina-jina-jina', '0.9.25') is False
+    assert is_package_on_pypi('jina-jina-jina', '10.10.10') is False
+
+
+def test_filter_packages_list():
+    filtered_list = Generator.filter_packages_list([
+        ["gpt_3_5_turbo", "requests", "base64", "gtts", "pydub"],
+        ["requests", "base64", "gtts", "pydub"],
+        ["gpt_3_5_turbo", "requests", "base64", "gtts"],
+        ["gpt_3_5_turbo", "requests", "base64", "pydub"],
+        ["requests", "base64", "gtts"]
+    ])
+    assert filtered_list == [
+        ["gpt_3_5_turbo", "requests", "gtts", "pydub"],
+        ["requests", "gtts", "pydub"],
+        ["gpt_3_5_turbo", "requests", "gtts"],
+        ["gpt_3_5_turbo", "requests", "pydub"],
+        ["requests", "gtts"]
+    ]
--- a/test/unit/test_strings.py
+++ b/test/unit/test_strings.py