From e4facfc7b5da76d095239f31ace54e12de977b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Tue, 2 May 2023 01:50:28 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=AA3=EF=B8=8F=E2=83=A3=20test:=20level?= =?UTF-8?q?=203=20refinement?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/apis/pypi.py | 12 ++++++++++++ src/constants.py | 4 ++-- src/options/generate/generator.py | 27 +++++++++++++++----------- src/options/generate/templates_user.py | 12 +++++++++++- test/test_api.py | 15 ++++++++++++++ test/test_generator.py | 2 +- test/test_hub.py | 6 ------ 7 files changed, 57 insertions(+), 21 deletions(-) create mode 100644 src/apis/pypi.py create mode 100644 test/test_api.py delete mode 100644 test/test_hub.py diff --git a/src/apis/pypi.py b/src/apis/pypi.py new file mode 100644 index 0000000..7b0456f --- /dev/null +++ b/src/apis/pypi.py @@ -0,0 +1,12 @@ +import requests + +def is_package_on_pypi(package_name, version=None): + optional_version = f"/{version}" if version else "" + url = f"https://pypi.org/pypi/{package_name}{optional_version}/json" + response = requests.get(url) + if response.status_code == 200: + return True + elif response.status_code == 404: + return False + else: + return None \ No newline at end of file diff --git a/src/constants.py b/src/constants.py index 8deda8f..1715861 100644 --- a/src/constants.py +++ b/src/constants.py @@ -39,9 +39,9 @@ MAX_DEBUGGING_ITERATIONS = 10 DEMO_TOKEN = '45372338e04f5a41af949024db929d46' -PROBLEMATIC_PACKAGES = [ +BLACKLISTED_PACKAGES = [ # 'Pyrender', 'Trimesh', 'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d', # because they need a screen, + 'fastapi', 'uvicorn', 'starlette', # because we use jina executors ] -UNNECESSARY_PACKAGES = ['fastapi'] diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py index 9a62e34..021dc34 100644 --- a/src/options/generate/generator.py +++ b/src/options/generate/generator.py @@ -14,9 +14,10 @@ from pydantic.dataclasses import dataclass from src.apis import gpt from src.apis.gpt import _GPTConversation from src.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub +from src.apis.pypi import is_package_on_pypi from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \ - PROBLEMATIC_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \ - REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, UNNECESSARY_PACKAGES, IMPLEMENTATION_FILE_NAME, \ + BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \ + REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \ IMPLEMENTATION_FILE_TAG from src.options.generate.templates_system import system_task_iteration, system_task_introduction, system_test_iteration from src.options.generate.templates_user import template_generate_microservice_name, \ @@ -52,7 +53,7 @@ class Generator: return match.group(1).strip() elif match_single_block: # Check for a single code block - single_code_block_pattern = r"^```(?:\w+\n)?([\s\S]*?)```" + single_code_block_pattern = r"```(?:\w+\n)?([\s\S]*?)```" single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE) if len(single_code_block_match) == 1: return single_code_block_match[0].strip() @@ -374,9 +375,11 @@ pytest print_colored('', f'Is it a {package_manager} dependency issue?', 'blue') conversation = self.gpt_session.get_conversation() - answer = conversation.chat( + answer_raw = conversation.chat( template_is_dependency_issue.format(summarized_error=summarized_error, all_files_string=dock_req_string).replace('PACKAGE_MANAGER', package_manager) ) + answer_json_string = self.extract_content_from_result(answer_raw, 'response.json', match_single_block=True, ) + answer = json.loads(answer_json_string)['dependency_installation_failure'] return 'yes' in answer.lower() def generate_microservice_name(self, description): @@ -402,12 +405,7 @@ pytest packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)] packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in packages_list] - packages_list = [ - packages for packages in packages_list if len(set(packages).intersection(set(PROBLEMATIC_PACKAGES))) == 0 - ] - packages_list = [ - [package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list - ] + packages_list = self.filter_packages_list(packages_list) packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES] return packages_list @@ -542,7 +540,14 @@ Test scenario: def replace_with_gpt_3_5_turbo_if_possible(pkg): if pkg in ['allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk', 'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers', - 'spacy', 'stanza', 'summarizer', 'textblob', 'textstat', 'transformers']: + 'spacy', 'stanza', 'summarizer', 'sumy', 'textblob', 'textstat', 'transformers']: return 'gpt_3_5_turbo_api' return pkg + + @staticmethod + def filter_packages_list(packages_list): + packages_list = [ + [package for package in packages if package not in BLACKLISTED_PACKAGES and is_package_on_pypi(package)] for packages in packages_list + ] + return packages_list diff --git a/src/options/generate/templates_user.py b/src/options/generate/templates_user.py index 9f3aeb9..ff4d71a 100644 --- a/src/options/generate/templates_user.py +++ b/src/options/generate/templates_user.py @@ -221,7 +221,17 @@ You are given the following files: {all_files_string} -Is this a PACKAGE_MANAGER dependency installation failure? Answer with "yes" or "no".''' +Is this a PACKAGE_MANAGER dependency installation failure? +1. Write down one bullet point on why it could be a PACKAGE_MANAGER dependency installation failure. +2. Write down one bullet point on why it is unlikely that it is a PACKAGE_MANAGER dependency installation failure. +3. Write down your final answer. +4. Write down your final answer as json in the following format: +**response.json** +```json +{{"dependency_installation_failure": ""}} +``` +Note that you must obey the double asterisk and tripple backtick syntax from above. +''' ) diff --git a/test/test_api.py b/test/test_api.py new file mode 100644 index 0000000..4a5e9e6 --- /dev/null +++ b/test/test_api.py @@ -0,0 +1,15 @@ +from src.apis.jina_cloud import is_executor_in_hub +from src.apis.pypi import is_package_on_pypi + + +def test_is_microservice_in_hub(): + assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False + assert is_executor_in_hub('CLIPImageEncoder') is True + +def test_is_package_on_pypi(): + assert is_package_on_pypi('jina') is True + assert is_package_on_pypi('jina', '0.9.25') is True + assert is_package_on_pypi('jina', '10.10.10') is False + assert is_package_on_pypi('jina-jina-jina') is False + assert is_package_on_pypi('jina-jina-jina', '0.9.25') is False + assert is_package_on_pypi('jina-jina-jina', '10.10.10') is False diff --git a/test/test_generator.py b/test/test_generator.py index 251509b..29dbbea 100644 --- a/test/test_generator.py +++ b/test/test_generator.py @@ -96,7 +96,7 @@ headers = {{ data = {{ "url": "URL_OF_STORED_AUDIO_FILE" }} -response = requests.post(url, headers=headers, files=file, data=data) +response = requests.post(url, headers=headers, data=data) print(response.text) 2. Summarize the text (~50 words) while still maintaining the key facts. 3. Create an audio file of the summarized text using a tts library. diff --git a/test/test_hub.py b/test/test_hub.py deleted file mode 100644 index 0aa6d1b..0000000 --- a/test/test_hub.py +++ /dev/null @@ -1,6 +0,0 @@ -from src.apis.jina_cloud import is_executor_in_hub - - -def test_is_microservice_in_hub(): - assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False - assert is_executor_in_hub('CLIPImageEncoder') is True