From 3d8b478e53f1476273b06d8cc09e93ec7abf9db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Wed, 3 May 2023 10:29:27 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=85=20feat:=20requirements=20txt=20valida?= =?UTF-8?q?tion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/apis/pypi.py | 65 ++++++++++++++++++++++++++++++- src/options/generate/generator.py | 3 +- test/unit/test_api.py | 31 ++++++++++++++- 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/src/apis/pypi.py b/src/apis/pypi.py index 7b0456f..4351e2a 100644 --- a/src/apis/pypi.py +++ b/src/apis/pypi.py @@ -1,4 +1,10 @@ +import os +import re +from datetime import datetime + import requests +from packaging import version + def is_package_on_pypi(package_name, version=None): optional_version = f"/{version}" if version else "" @@ -9,4 +15,61 @@ def is_package_on_pypi(package_name, version=None): elif response.status_code == 404: return False else: - return None \ No newline at end of file + return None + + +def get_latest_package_version(package_name): + url = f'https://pypi.org/pypi/{package_name}/json' + response = requests.get(url) + if response.status_code != 200: + return None + data = response.json() + releases = data['releases'] + + # Get package versions not older than 2021 + valid_versions = [] + for v, release_info in releases.items(): + upload_time = datetime.strptime(release_info[0]['upload_time'], '%Y-%m-%dT%H:%M:%S') + if upload_time.year <= 2021: + valid_versions.append(v) + + v = max(valid_versions, key=version.parse) if valid_versions else None + return v + + +def clean_requirements_txt(previous_microservice_path): + requirements_txt_path = os.path.join(previous_microservice_path, 'requirements.txt') + with open(requirements_txt_path, 'r', encoding='utf-8') as f: + requirements_txt = f.read() + + updated_requirements = [] + + for line in requirements_txt.split('\n'): + line = line.strip() + if not line or line.startswith('#'): + continue + + split = re.split(r'==|>=|<=|>|<|~=', line) + if len(split) == 1: + version = None + package_name = split[0] + elif len(split) == 2: + package_name, version = split + else: + raise ValueError(f'Could not parse line {line} in requirements.txt') + + # Keep lines with jina, docarray, openai, pytest unchanged + if package_name in {'jina', 'docarray', 'openai', 'pytest'}: + updated_requirements.append(line) + continue + if is_package_on_pypi(package_name): + if version is None or not is_package_on_pypi(package_name, version): + latest_version = get_latest_package_version(package_name) + if latest_version is None: + raise ValueError(f'Package {package_name} not found on PyPI') + updated_requirements.append(f'{package_name}~={latest_version}') + else: + updated_requirements.append(line) + + with open(requirements_txt_path, 'w', encoding='utf-8') as f: + f.write('\n'.join(updated_requirements)) diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py index 7467d5e..c463db2 100644 --- a/src/options/generate/generator.py +++ b/src/options/generate/generator.py @@ -13,7 +13,7 @@ from pydantic.dataclasses import dataclass from src.apis import gpt from src.apis.gpt import _GPTConversation from src.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub -from src.apis.pypi import is_package_on_pypi +from src.apis.pypi import is_package_on_pypi, get_latest_package_version, clean_requirements_txt from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \ BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \ REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \ @@ -303,6 +303,7 @@ pytest num_approach, i) next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i + 1) + clean_requirements_txt(previous_microservice_path) log_hubble = push_executor(previous_microservice_path) error = process_error_message(log_hubble) if error: diff --git a/test/unit/test_api.py b/test/unit/test_api.py index 5202a89..8ebb124 100644 --- a/test/unit/test_api.py +++ b/test/unit/test_api.py @@ -1,5 +1,7 @@ +import os + from src.apis.jina_cloud import is_executor_in_hub -from src.apis.pypi import is_package_on_pypi +from src.apis.pypi import is_package_on_pypi, clean_requirements_txt from src.options.generate.generator import Generator @@ -32,3 +34,30 @@ def test_filter_packages_list(): ["gpt_3_5_turbo", "requests", "pydub"], ["requests", "gtts"] ] + + +def test_precheck_requirements_txt(tmpdir): + requirements_content = """\ +jina==1.2.3 +docarray==1.2.3 +requests~=2.26.0 +gtts~=2.2.3 +pydub~=123.123.123 +base64~=3.3.0 +""" + requirements_clean = """\ +jina==1.2.3 +docarray==1.2.3 +requests~=2.26.0 +gtts~=2.2.3 +pydub~=0.25.1""" + requirements_txt_path = os.path.join(tmpdir, "requirements.txt") + with open(requirements_txt_path, "w", encoding="utf-8") as f: + f.write(requirements_content) + + clean_requirements_txt(tmpdir) + + with open(requirements_txt_path, "r", encoding="utf-8") as f: + updated_requirements = f.read() + + assert updated_requirements == requirements_clean