feat: enhance package generation (#34)

* feat: enhance package generation * fix: exclude unnecessary packages * fix: using custom gpt with gpt turbo * fix: fix curl command * fix: fix usage of gpt * fix: fix typo * refactor: refactor package creation flow for
2025-12-23 16:44:20 +01:00 · 2023-04-21 17:25:47 +02:00
parent ef2e6a2b2c
commit 0361f67ea7
3 changed files with 60 additions and 41 deletions
--- a/src/constants.py
+++ b/src/constants.py
@@ -38,5 +38,7 @@ DEMO_TOKEN = '45372338e04f5a41af949024db929d46'

 PROBLEMATIC_PACKAGES = [
    # 'Pyrender', 'Trimesh',
-    'ModernGL', 'PyOpenGL', 'Pyglet', 'pythreejs', 'panda3d' # because they need a screen
+    'ModernGL', 'PyOpenGL', 'Pyglet', 'pythreejs', 'panda3d', # because they need a screen,
 ]
+
+UNNECESSARY_PACKAGES = ['FastAPI']
--- a/src/options/generate/generator.py
+++ b/src/options/generate/generator.py
@@ -2,17 +2,19 @@ import os
 import random
 import re
 import shutil
+from typing import List

 from src.apis import gpt
 from src.apis.jina_cloud import process_error_message, push_executor
 from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
    PROBLEMATIC_PACKAGES, EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
-    REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, DOCKER_FILE_TAG
-from src.options.generate.templates_user import template_generate_microservice_name, template_generate_possible_packages, \
+    REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, DOCKER_FILE_TAG, UNNECESSARY_PACKAGES
+from src.options.generate.templates_user import template_generate_microservice_name, \
+    template_generate_possible_packages, \
    template_solve_code_issue, \
    template_solve_dependency_issue, template_is_dependency_issue, template_generate_playground, \
    template_generate_executor, template_generate_test, template_generate_requirements, template_generate_dockerfile, \
-    template_chain_of_thought, template_summarize_error
+    template_chain_of_thought, template_summarize_error, template_generate_possible_packages_output_format_string
 from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path
 from src.utils.string_tools import print_colored

@@ -54,9 +56,9 @@ metas:
        return all_microservice_files_string.strip()


-    def generate_and_persist_file(self, section_title, template, destination_folder, file_name, **template_kwargs):
+    def generate_and_persist_file(self, section_title, template, destination_folder=None, file_name=None, system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client'],  **template_kwargs):
        print_colored('', f'\n\n############# {section_title} #############', 'blue')
-        conversation = self.gpt_session.get_conversation()
+        conversation = self.gpt_session.get_conversation(system_definition_examples=system_definition_examples)
        template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables}
        content_raw = conversation.chat(
            template.format(
@@ -66,11 +68,12 @@ metas:
        )
        content = self.extract_content_from_result(content_raw, file_name, match_single_block=True)
        if content == '':
-            content_raw = conversation.chat(f'You must add the {file_name} code.')
+            content_raw = conversation.chat(f'You must add the content for {file_name}.')
            content = self.extract_content_from_result(
                content_raw, file_name, match_single_block=True
            )
-        persist_file(content, os.path.join(destination_folder, file_name))
+        if destination_folder:
+            persist_file(content, os.path.join(destination_folder, file_name))
        return content

    def generate_microservice(
@@ -258,11 +261,15 @@ metas:

    def get_possible_packages(self):
        print_colored('', '\n\n############# What packages to use? #############', 'blue')
-        conversation = self.gpt_session.get_conversation()
-        packages_raw = conversation.chat(
-            template_generate_possible_packages.format(description=self.task_description)
+        packages_csv_string = self.generate_and_persist_file(
+            'packages to use',
+            template_generate_possible_packages,
+            None,
+            file_name='packages.csv',
+            system_definition_examples=['gpt'],
+            description=self.task_description
+
        )
-        packages_csv_string = self.extract_content_from_result(packages_raw, 'packages.csv')
        packages_list = [[pkg.strip() for pkg in packages_string.split(',')] for packages_string in packages_csv_string.split('\n')]
        packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES]
        return packages_list
@@ -274,6 +281,9 @@ metas:
        packages_list = [
            packages for packages in packages_list if len(set(packages).intersection(set(PROBLEMATIC_PACKAGES))) == 0
        ]
+        packages_list = [
+            [package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list
+        ]
        for num_approach, packages in enumerate(packages_list):
            try:
                self.generate_microservice(microservice_path, microservice_name, packages, num_approach)
--- a/src/options/generate/templates_user.py
+++ b/src/options/generate/templates_user.py
@@ -26,7 +26,7 @@ The executor and the test must not train a model.
 The executor and the test must not use any attribute of Document accept Document.text.
 The executor and the test must not contain prototype or placeholder implementations.
 The executor and the test must run in a docker container based on debian.
-The executor and the test must use gpt_3_5_turbo_api if the task requires a language model. Other language models are not allowed.'''
+The executor and the test must use gpt_3_5_turbo_api if the task requires understanding or generating natural language or using any language model. Other language models are not allowed.'''



@@ -47,13 +47,26 @@ PDFParserExecutor
 )


+template_generate_possible_packages_output_format_string = '''You must output the package combinations as a list of lists wrapped into ``` and name it **packages.csv**. Do not use quotation marks around packages names in the output. Separate packages in a combination by comma. The output looks this:
+**{file_name}**
+```
+package1a, package1b ...
+package2a, package2b, package2c
+package3a ...
+package4a ...
+package5a ...
+...
+```'''
+
+
 template_generate_possible_packages = PromptTemplate.from_template(
    '''Here is the task description of the problem you need to solve:
 "{description}"
-1. Write down all the non-trivial subtasks you need to solve.
+1. Write down the different strategies to solve the task. For each strategy write down all the non-trivial subtasks you need to solve. If there is a natural language understanding or generation stragegy, write it down.
 2. Find out what is the core problem to solve.
-3. List up to 15 Python packages that are specifically designed or have functionalities to solve the complete core problem.
-4. For each of the 15 package think if it fulfills the following requirements:
+3. List up to 15 Python packages that are specifically designed or have functionalities to solve the complete core problem with one of the defined strategies. You must add gpt_3_5_turbo_api if the task involves generating or understanding natural language or using a (pre-trained) language model.
+4. Exclude any package that can generate or understand natural language or enables using any language model, but you must not exclude gpt_3_5_turbo_api. Print the cleaned list of packages and give a brief reason for keeping it after its name.
+5. For each cleaned package think if it fulfills the following requirements:
 a) specifically designed or have functionalities to solve the complete core problem.
 b) has a stable api among different versions
 c) does not have system requirements
@@ -63,20 +76,11 @@ e) the implementation of the core problem using the package would obey the follo

 When answering, just write "yes" or "no".

-5. Output the most suitable 5 python packages starting with the best one. 
+6. Determine the 5 most suitable python package combinations, ordered from the best to the least suitable. Combine the packages to achieve a comprehensive solution.
 If the package is mentioned in the description, then it is automatically the best one.
+If you listed gpt_3_5_turbo_api earlier, you must use it. gpt_3_5_turbo_api is the best package for handling text-based tasks. Also, gpt_3_5_turbo_api doesn't need any other packages processing text or using language models. It can handle any text-based task alone.

-The output must be a list of lists wrapped into ``` and starting with **packages.csv** like this:
-**packages.csv**
-```
-package1a, package1b ...
-package2a, package2b, package2c
-package3a ...
-package4a ...
-package5a ...
-...
-```
-''')
+''' + template_generate_possible_packages_output_format_string)


 template_code_wrapping_string = '''The code will go into {file_name_purpose}. Make sure to wrap the code into ``` marks even if you only output code:
@@ -93,7 +97,7 @@ template_generate_executor = PromptTemplate.from_template(
 Write the executor called '{microservice_name}'. The name is very important to keep.
 It matches the following description: '{microservice_description}'.
 It will be tested with the following scenario: '{test_description}'.
-For the implementation use the following package: '{packages}'.
+For the implementation use the following package(s): '{packages}'.

 Obey the following rules:
 Have in mind that d.uri is never a path to a local file. It is always a url.
@@ -101,10 +105,11 @@ Have in mind that d.uri is never a path to a local file. It is always a url.

 Your approach:
 1. Identify the core challenge when implementing the executor.
-2. Think about solutions for these challenges including the usage of gpt via "gpt_3_5_turbo_api"
+2. Think about solutions for these challenges. Use gpt_3_5_turbo_api if it is mentioned in the above list of packages.
 3. Decide for one of the solutions.
 4. Write the code for the executor. Don't write code for the test.
-If you decided to use gpt, then the executor must include the following code:
+If and only if gpt_3_5_turbo_api is in the package list, then you must always include the following code in microservice.py:
+```
 import os
 import openai
 openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -125,9 +130,10 @@ class GPT_3_5_Turbo_API:
            }}]
        )
        return response.choices[0]['message']['content']
+```


-''' + '\n' + template_code_wrapping_string
+''' + template_code_wrapping_string
 )


@@ -137,7 +143,7 @@ template_generate_test = PromptTemplate.from_template(
 {code_files_wrapped}

 Write a single test case that tests the following scenario: '{test_description}'. In case the test scenario is not precise enough, test a general case without any assumptions.
-Start the test with an extensive comment about the test case.
+Start the test with an extensive comment about the test case. If gpt_3_5_turbo_api is used in the executor, then the test must not check the exact output of the executor as it is not deterministic. 

 Use the following import to import the executor:
 ```
@@ -148,6 +154,7 @@ from microservice import {microservice_name}
 The test must not open local files.
 The test must not mock a function of the executor.
 The test must not use other data than the one provided in the test scenario.
+The test must not set any environment variables which require a key.
 ''' + '\n' + template_code_wrapping_string
 )

@@ -227,9 +234,8 @@ You are given the following files:

 {all_files_string}

-Output all the files that need change. 
-Don't output files that don't need change. If you output a file, then write the 
-complete file. Use the exact following syntax to wrap the code:
+Output all the files that need change. Don't output files that don't need change.
+If you output a file, then write the complete file. Use the exact following syntax to wrap the code:

 **...**
 ```
@@ -270,8 +276,9 @@ Obey the following rules:
 ''' + f'{not_allowed_executor_string}\n{not_allowed_docker_string}' + '''

 Output all the files that need change. 
-Don't output files that don't need change. If you output a file, then write the 
-complete file. Use the exact following syntax to wrap the code:
+Don't output files that don't need change. If you output a file, then write the complete file.
+If you change microservice.py and it uses gpt_3_5_turbo_api, then you must keep the code for gpt_3_5_turbo_api in the microservice.py file.
+Use the exact following syntax to wrap the code:

 **...**
 ```...
@@ -307,8 +314,8 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul
 ```
 Note that the response will always be in response[0].text
 The playground displays a code block containing the microservice specific curl code that can be used to send the request to the microservice.
-Example: 
-
+While the exact payload in the curl might change, the host and deployment ID always stay the same. Example: 
+```
 deployment_id = os.environ.get("K8S_NAMESPACE_NAME", "")
 host = f'https://gptdeploy-{{deployment_id.split("-")[1]}}.wolf.jina.ai/post' if deployment_id else "http://localhost:8080/post"
 with st.expander("See curl command"):
@@ -316,7 +323,7 @@ with st.expander("See curl command"):
        f'curl -X \\'POST\\' \\'host\\' -H \\'accept: application/json\\' -H \\'Content-Type: application/json\\' -d \\'{{{{"data": [{{{{"text": "hello, world!"}}}}]}}}}\\'',
        language='bash'
    )
-
+```
 You must provide the complete app.py file using the following syntax to wrap the code:
 **app.py**
 ```python