feat: requirements txt validation

This commit is contained in:
Florian Hönicke
2023-05-03 10:29:27 +02:00
parent 51f2e4b096
commit 3d8b478e53
3 changed files with 96 additions and 3 deletions

View File

@@ -1,4 +1,10 @@
import os
import re
from datetime import datetime
import requests
from packaging import version
def is_package_on_pypi(package_name, version=None):
optional_version = f"/{version}" if version else ""
@@ -9,4 +15,61 @@ def is_package_on_pypi(package_name, version=None):
elif response.status_code == 404:
return False
else:
return None
return None
def get_latest_package_version(package_name):
url = f'https://pypi.org/pypi/{package_name}/json'
response = requests.get(url)
if response.status_code != 200:
return None
data = response.json()
releases = data['releases']
# Get package versions not older than 2021
valid_versions = []
for v, release_info in releases.items():
upload_time = datetime.strptime(release_info[0]['upload_time'], '%Y-%m-%dT%H:%M:%S')
if upload_time.year <= 2021:
valid_versions.append(v)
v = max(valid_versions, key=version.parse) if valid_versions else None
return v
def clean_requirements_txt(previous_microservice_path):
requirements_txt_path = os.path.join(previous_microservice_path, 'requirements.txt')
with open(requirements_txt_path, 'r', encoding='utf-8') as f:
requirements_txt = f.read()
updated_requirements = []
for line in requirements_txt.split('\n'):
line = line.strip()
if not line or line.startswith('#'):
continue
split = re.split(r'==|>=|<=|>|<|~=', line)
if len(split) == 1:
version = None
package_name = split[0]
elif len(split) == 2:
package_name, version = split
else:
raise ValueError(f'Could not parse line {line} in requirements.txt')
# Keep lines with jina, docarray, openai, pytest unchanged
if package_name in {'jina', 'docarray', 'openai', 'pytest'}:
updated_requirements.append(line)
continue
if is_package_on_pypi(package_name):
if version is None or not is_package_on_pypi(package_name, version):
latest_version = get_latest_package_version(package_name)
if latest_version is None:
raise ValueError(f'Package {package_name} not found on PyPI')
updated_requirements.append(f'{package_name}~={latest_version}')
else:
updated_requirements.append(line)
with open(requirements_txt_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(updated_requirements))

View File

@@ -13,7 +13,7 @@ from pydantic.dataclasses import dataclass
from src.apis import gpt
from src.apis.gpt import _GPTConversation
from src.apis.jina_cloud import process_error_message, push_executor, is_executor_in_hub
from src.apis.pypi import is_package_on_pypi
from src.apis.pypi import is_package_on_pypi, get_latest_package_version, clean_requirements_txt
from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
@@ -303,6 +303,7 @@ pytest
num_approach, i)
next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages,
num_approach, i + 1)
clean_requirements_txt(previous_microservice_path)
log_hubble = push_executor(previous_microservice_path)
error = process_error_message(log_hubble)
if error:

View File

@@ -1,5 +1,7 @@
import os
from src.apis.jina_cloud import is_executor_in_hub
from src.apis.pypi import is_package_on_pypi
from src.apis.pypi import is_package_on_pypi, clean_requirements_txt
from src.options.generate.generator import Generator
@@ -32,3 +34,30 @@ def test_filter_packages_list():
["gpt_3_5_turbo", "requests", "pydub"],
["requests", "gtts"]
]
def test_precheck_requirements_txt(tmpdir):
requirements_content = """\
jina==1.2.3
docarray==1.2.3
requests~=2.26.0
gtts~=2.2.3
pydub~=123.123.123
base64~=3.3.0
"""
requirements_clean = """\
jina==1.2.3
docarray==1.2.3
requests~=2.26.0
gtts~=2.2.3
pydub~=0.25.1"""
requirements_txt_path = os.path.join(tmpdir, "requirements.txt")
with open(requirements_txt_path, "w", encoding="utf-8") as f:
f.write(requirements_content)
clean_requirements_txt(tmpdir)
with open(requirements_txt_path, "r", encoding="utf-8") as f:
updated_requirements = f.read()
assert updated_requirements == requirements_clean