Files
Auditor/theauditor/deps.py

1109 lines
41 KiB
Python

"""Dependency parser for multiple ecosystems."""
import glob
import http.client
import json
import platform
import re
import shutil
import time
import urllib.error
import yaml
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from theauditor.security import sanitize_path, sanitize_url_component, validate_package_name, SecurityError
# Detect if running on Windows for character encoding
IS_WINDOWS = platform.system() == "Windows"
# Rate limiting configuration - optimized for minimal runtime
# Based on actual API rate limits and industry standards
RATE_LIMIT_NPM = 0.1 # npm registry: 600 req/min (well under any limit)
RATE_LIMIT_PYPI = 0.2 # PyPI: 300 req/min (safe margin)
RATE_LIMIT_DOCKER = 0.2 # Docker Hub: 300 req/min for tag checks
RATE_LIMIT_BACKOFF = 15 # Backoff on 429/disconnect (15s gives APIs time to reset)
def parse_dependencies(root_path: str = ".") -> List[Dict[str, Any]]:
"""
Parse dependencies from various package managers.
Returns list of dependency objects with structure:
{
"name": str,
"version": str,
"manager": "npm"|"py",
"files": [paths that import it],
"source": "package.json|pyproject.toml|requirements.txt"
}
"""
import os
root = Path(root_path)
deps = []
# Debug mode
debug = os.environ.get("THEAUDITOR_DEBUG")
# Parse Node dependencies
try:
package_json = sanitize_path("package.json", root_path)
if package_json.exists():
if debug:
print(f"Debug: Found {package_json}")
deps.extend(_parse_package_json(package_json))
except SecurityError as e:
if debug:
print(f"Debug: Security error checking package.json: {e}")
# Parse Python dependencies
try:
pyproject = sanitize_path("pyproject.toml", root_path)
if pyproject.exists():
if debug:
print(f"Debug: Found {pyproject}")
deps.extend(_parse_pyproject_toml(pyproject))
except SecurityError as e:
if debug:
print(f"Debug: Security error checking pyproject.toml: {e}")
# Parse requirements files
req_files = list(root.glob("requirements*.txt"))
if debug and req_files:
print(f"Debug: Found requirements files: {req_files}")
for req_file in req_files:
try:
# Validate the path is within project root
safe_req_file = sanitize_path(str(req_file), root_path)
deps.extend(_parse_requirements_txt(safe_req_file))
except SecurityError as e:
if debug:
print(f"Debug: Security error with {req_file}: {e}")
# Parse Docker Compose files
docker_compose_files = list(root.glob("docker-compose*.yml")) + list(root.glob("docker-compose*.yaml"))
if debug and docker_compose_files:
print(f"Debug: Found Docker Compose files: {docker_compose_files}")
for compose_file in docker_compose_files:
try:
safe_compose_file = sanitize_path(str(compose_file), root_path)
deps.extend(_parse_docker_compose(safe_compose_file))
except SecurityError as e:
if debug:
print(f"Debug: Security error with {compose_file}: {e}")
# Parse Dockerfiles
dockerfiles = list(root.glob("**/Dockerfile"))
if debug and dockerfiles:
print(f"Debug: Found Dockerfiles: {dockerfiles}")
for dockerfile in dockerfiles:
try:
safe_dockerfile = sanitize_path(str(dockerfile), root_path)
deps.extend(_parse_dockerfile(safe_dockerfile))
except SecurityError as e:
if debug:
print(f"Debug: Security error with {dockerfile}: {e}")
if debug:
print(f"Debug: Total dependencies found: {len(deps)}")
return deps
def _parse_package_json(path: Path) -> List[Dict[str, Any]]:
"""Parse dependencies from package.json, with monorepo support."""
deps = []
processed_packages = set() # Track processed packages to avoid duplicates
def parse_single_package(pkg_path: Path, workspace_path: str = "package.json") -> List[Dict[str, Any]]:
"""Parse a single package.json file."""
local_deps = []
try:
with open(pkg_path, encoding="utf-8") as f:
data = json.load(f)
# Combine dependencies and devDependencies
all_deps = {}
if "dependencies" in data:
all_deps.update(data["dependencies"])
if "devDependencies" in data:
all_deps.update(data["devDependencies"])
for name, version_spec in all_deps.items():
# Clean version spec (remove ^, ~, >=, etc.)
version = _clean_version(version_spec)
local_deps.append({
"name": name,
"version": version,
"manager": "npm",
"files": [], # Will be populated by workset scan
"source": "package.json",
"workspace_package": workspace_path # Track which package.json this came from
})
except (json.JSONDecodeError, KeyError) as e:
# Log but don't fail - package.json might be malformed
print(f"Warning: Could not parse {pkg_path}: {e}")
return local_deps
# Parse the root package.json first
root_dir = path.parent
deps.extend(parse_single_package(path, "package.json"))
processed_packages.add(str(path.resolve()))
# Check for monorepo workspaces
try:
with open(path, encoding="utf-8") as f:
data = json.load(f)
# Check for workspaces field (Yarn/npm workspaces)
workspaces = data.get("workspaces", [])
# Handle different workspace formats
if isinstance(workspaces, dict):
# npm 7+ format: {"packages": ["packages/*"]}
workspaces = workspaces.get("packages", [])
if workspaces and isinstance(workspaces, list):
# This is a monorepo - expand workspace patterns
for pattern in workspaces:
# Convert workspace pattern to absolute path pattern
abs_pattern = str(root_dir / pattern)
# Handle glob patterns like "packages/*" or "apps/**"
if "*" in abs_pattern:
# Use glob to find matching directories
matched_paths = glob.glob(abs_pattern)
for matched_path in matched_paths:
matched_dir = Path(matched_path)
if matched_dir.is_dir():
# Look for package.json in this directory
workspace_pkg = matched_dir / "package.json"
if workspace_pkg.exists():
# Skip if already processed
if str(workspace_pkg.resolve()) in processed_packages:
continue
# Calculate relative path for workspace_package field
try:
rel_path = workspace_pkg.relative_to(root_dir)
workspace_path = str(rel_path).replace("\\", "/")
except ValueError:
# If relative path fails, use absolute path
workspace_path = str(workspace_pkg)
# Parse this workspace package
workspace_deps = parse_single_package(workspace_pkg, workspace_path)
deps.extend(workspace_deps)
processed_packages.add(str(workspace_pkg.resolve()))
else:
# Direct path without glob
workspace_dir = root_dir / pattern
if workspace_dir.is_dir():
workspace_pkg = workspace_dir / "package.json"
if workspace_pkg.exists():
# Skip if already processed
if str(workspace_pkg.resolve()) in processed_packages:
continue
# Calculate relative path for workspace_package field
try:
rel_path = workspace_pkg.relative_to(root_dir)
workspace_path = str(rel_path).replace("\\", "/")
except ValueError:
workspace_path = str(workspace_pkg)
# Parse this workspace package
workspace_deps = parse_single_package(workspace_pkg, workspace_path)
deps.extend(workspace_deps)
processed_packages.add(str(workspace_pkg.resolve()))
# Also check for Lerna configuration (lerna.json)
lerna_json = root_dir / "lerna.json"
if lerna_json.exists():
try:
with open(lerna_json, encoding="utf-8") as f:
lerna_data = json.load(f)
lerna_packages = lerna_data.get("packages", [])
for pattern in lerna_packages:
abs_pattern = str(root_dir / pattern)
if "*" in abs_pattern:
matched_paths = glob.glob(abs_pattern)
for matched_path in matched_paths:
matched_dir = Path(matched_path)
if matched_dir.is_dir():
workspace_pkg = matched_dir / "package.json"
if workspace_pkg.exists() and str(workspace_pkg.resolve()) not in processed_packages:
try:
rel_path = workspace_pkg.relative_to(root_dir)
workspace_path = str(rel_path).replace("\\", "/")
except ValueError:
workspace_path = str(workspace_pkg)
workspace_deps = parse_single_package(workspace_pkg, workspace_path)
deps.extend(workspace_deps)
processed_packages.add(str(workspace_pkg.resolve()))
except (json.JSONDecodeError, KeyError):
# Lerna.json parsing failed, continue without it
pass
# Check for pnpm-workspace.yaml
pnpm_workspace = root_dir / "pnpm-workspace.yaml"
if pnpm_workspace.exists():
try:
with open(pnpm_workspace, encoding="utf-8") as f:
pnpm_data = yaml.safe_load(f)
pnpm_packages = pnpm_data.get("packages", [])
for pattern in pnpm_packages:
abs_pattern = str(root_dir / pattern)
if "*" in abs_pattern:
matched_paths = glob.glob(abs_pattern)
for matched_path in matched_paths:
matched_dir = Path(matched_path)
if matched_dir.is_dir():
workspace_pkg = matched_dir / "package.json"
if workspace_pkg.exists() and str(workspace_pkg.resolve()) not in processed_packages:
try:
rel_path = workspace_pkg.relative_to(root_dir)
workspace_path = str(rel_path).replace("\\", "/")
except ValueError:
workspace_path = str(workspace_pkg)
workspace_deps = parse_single_package(workspace_pkg, workspace_path)
deps.extend(workspace_deps)
processed_packages.add(str(workspace_pkg.resolve()))
except (yaml.YAMLError, KeyError):
# pnpm-workspace.yaml parsing failed, continue without it
pass
except (json.JSONDecodeError, KeyError) as e:
# Root package.json parsing for workspaces failed, but we already have root deps
pass
return deps
def _parse_pyproject_toml(path: Path) -> List[Dict[str, Any]]:
"""Parse dependencies from pyproject.toml."""
deps = []
try:
import tomllib
except ImportError:
# Python < 3.11
try:
import tomli as tomllib
except ImportError:
# Can't parse TOML without library
print(f"Warning: Cannot parse {path} - tomllib not available")
return deps
try:
with open(path, "rb") as f:
data = tomllib.load(f)
# Get project dependencies
project_deps = data.get("project", {}).get("dependencies", [])
for dep_spec in project_deps:
name, version = _parse_python_dep_spec(dep_spec)
if name:
deps.append({
"name": name,
"version": version or "latest",
"manager": "py",
"files": [],
"source": "pyproject.toml"
})
# Also check optional dependencies
optional = data.get("project", {}).get("optional-dependencies", {})
for group_deps in optional.values():
for dep_spec in group_deps:
name, version = _parse_python_dep_spec(dep_spec)
if name:
deps.append({
"name": name,
"version": version or "latest",
"manager": "py",
"files": [],
"source": "pyproject.toml"
})
except Exception as e:
print(f"Warning: Could not parse {path}: {e}")
return deps
def _parse_requirements_txt(path: Path) -> List[Dict[str, Any]]:
"""Parse dependencies from requirements.txt."""
deps = []
try:
with open(path, encoding="utf-8") as f:
for line in f:
line = line.strip()
# Skip comments and empty lines
if not line or line.startswith("#"):
continue
# Skip special directives
if line.startswith("-"):
continue
# Strip inline comments and trailing whitespace
if "#" in line:
line = line.split("#")[0].strip()
name, version = _parse_python_dep_spec(line)
if name:
deps.append({
"name": name,
"version": version or "latest",
"manager": "py",
"files": [],
"source": path.name
})
except Exception as e:
print(f"Warning: Could not parse {path}: {e}")
return deps
def _parse_python_dep_spec(spec: str) -> tuple[str, Optional[str]]:
"""
Parse a Python dependency specification.
Returns (name, version) tuple.
"""
# Handle various formats:
# package==1.2.3
# package>=1.2.3
# package~=1.2.3
# package[extra]==1.2.3
# package @ git+https://...
# Remove extras
spec = re.sub(r'\[.*?\]', '', spec)
# Handle git URLs
if "@" in spec and ("git+" in spec or "https://" in spec):
name = spec.split("@")[0].strip()
return (name, "git")
# Parse version specs (allow dots, underscores, hyphens in package names)
match = re.match(r'^([a-zA-Z0-9._-]+)\s*([><=~!]+)\s*(.+)$', spec)
if match:
name, op, version = match.groups()
# For pinned versions, use exact version
if op == "==":
return (name, version)
# For other operators, use the specified version as hint
return (name, version)
# No version specified
return (spec.strip(), None)
def _clean_version(version_spec: str) -> str:
"""
Clean version specification to get actual version.
^1.2.3 -> 1.2.3
~1.2.3 -> 1.2.3
>=1.2.3 -> 1.2.3
"""
# Remove common prefixes
version = re.sub(r'^[~^>=<]+', '', version_spec)
# Handle ranges (use first version)
if " " in version:
version = version.split()[0]
return version.strip()
def _parse_docker_compose(path: Path) -> List[Dict[str, Any]]:
"""Parse Docker base images from docker-compose.yml files."""
deps = []
try:
with open(path, encoding="utf-8") as f:
data = yaml.safe_load(f)
# Check if services key exists
if not data or "services" not in data:
return deps
# Iterate through services
for service_name, service_config in data["services"].items():
if not isinstance(service_config, dict):
continue
# Extract image if present
if "image" in service_config:
image_spec = service_config["image"]
# Parse image:tag format
if ":" in image_spec:
name, tag = image_spec.rsplit(":", 1)
else:
name = image_spec
tag = "latest"
# Handle registry prefixes (e.g., docker.io/library/postgres)
if "/" in name:
# Take the last part as the image name
name_parts = name.split("/")
if len(name_parts) >= 2:
# If it's library/image, use just image
if name_parts[-2] == "library":
name = name_parts[-1]
else:
# Keep org/image format
name = "/".join(name_parts[-2:])
deps.append({
"name": name,
"version": tag,
"manager": "docker",
"files": [],
"source": path.name
})
except (yaml.YAMLError, KeyError, AttributeError) as e:
print(f"Warning: Could not parse {path}: {e}")
return deps
def _parse_dockerfile(path: Path) -> List[Dict[str, Any]]:
"""Parse Docker base images from Dockerfile."""
deps = []
try:
with open(path, encoding="utf-8") as f:
for line in f:
line = line.strip()
# Look for FROM instructions
if line.upper().startswith("FROM "):
# Extract image spec after FROM
image_spec = line[5:].strip()
# Handle multi-stage builds (FROM image AS stage)
if " AS " in image_spec.upper():
image_spec = image_spec.split(" AS ")[0].strip()
elif " as " in image_spec:
image_spec = image_spec.split(" as ")[0].strip()
# Skip scratch and build stages
if image_spec.lower() in ["scratch", "builder"]:
continue
# Parse image:tag format
if ":" in image_spec:
name, tag = image_spec.rsplit(":", 1)
else:
name = image_spec
tag = "latest"
# Handle registry prefixes
if "/" in name:
name_parts = name.split("/")
if len(name_parts) >= 2:
if name_parts[-2] == "library":
name = name_parts[-1]
else:
name = "/".join(name_parts[-2:])
deps.append({
"name": name,
"version": tag,
"manager": "docker",
"files": [],
"source": str(path.relative_to(Path.cwd()))
})
except Exception as e:
print(f"Warning: Could not parse {path}: {e}")
return deps
def write_deps_json(deps: List[Dict[str, Any]], output_path: str = "./.pf/deps.json") -> None:
"""Write dependencies to JSON file."""
try:
output = sanitize_path(output_path, ".")
output.parent.mkdir(parents=True, exist_ok=True)
with open(output, "w", encoding="utf-8") as f:
json.dump(deps, f, indent=2, sort_keys=True)
except SecurityError as e:
raise SecurityError(f"Invalid output path: {e}")
def check_latest_versions(
deps: List[Dict[str, Any]],
allow_net: bool = True,
offline: bool = False,
cache_file: str = "./.pf/deps_cache.json"
) -> Dict[str, Dict[str, Any]]:
"""
Check latest versions from registries with caching.
Returns dict keyed by "manager:name" with:
{
"locked": str,
"latest": str,
"delta": str,
"is_outdated": bool,
"last_checked": str (ISO timestamp)
}
"""
if offline or not allow_net:
# Try to load from cache in offline mode
cached_data = _load_deps_cache(cache_file)
if cached_data:
# Update locked versions from current deps
for dep in deps:
key = f"{dep['manager']}:{dep['name']}"
if key in cached_data:
cached_data[key]["locked"] = dep["version"]
cached_data[key]["is_outdated"] = cached_data[key]["latest"] != dep["version"]
cached_data[key]["delta"] = _calculate_version_delta(dep["version"], cached_data[key]["latest"])
return cached_data or {}
# Load existing cache
cache = _load_deps_cache(cache_file)
latest_info = {}
needs_check = []
# FIRST PASS: Check what's in cache and still valid
for dep in deps:
key = f"{dep['manager']}:{dep['name']}"
if key in latest_info:
continue # Already processed
# Check if we have valid cached data (24 hours for deps)
if key in cache and _is_cache_valid(cache[key], hours=24):
# Update locked version from current deps
cache[key]["locked"] = dep["version"]
cache[key]["is_outdated"] = cache[key]["latest"] != dep["version"]
cache[key]["delta"] = _calculate_version_delta(dep["version"], cache[key]["latest"])
latest_info[key] = cache[key]
else:
needs_check.append(dep)
# Early exit if everything is cached
if not needs_check:
return latest_info
# SECOND PASS: Check only what needs updating, with per-service rate limiting
npm_rate_limited_until = 0
pypi_rate_limited_until = 0
docker_rate_limited_until = 0
for dep in needs_check:
key = f"{dep['manager']}:{dep['name']}"
current_time = time.time()
# Skip if this service is rate limited
if dep["manager"] == "npm" and current_time < npm_rate_limited_until:
# Use cached data if available, even if expired
if key in cache:
latest_info[key] = cache[key]
continue
elif dep["manager"] == "py" and current_time < pypi_rate_limited_until:
if key in cache:
latest_info[key] = cache[key]
continue
elif dep["manager"] == "docker" and current_time < docker_rate_limited_until:
if key in cache:
latest_info[key] = cache[key]
continue
try:
if dep["manager"] == "npm":
latest = _check_npm_latest(dep["name"])
elif dep["manager"] == "py":
latest = _check_pypi_latest(dep["name"])
elif dep["manager"] == "docker":
latest = _check_dockerhub_latest(dep["name"])
else:
continue
if latest:
locked = dep["version"]
delta = _calculate_version_delta(locked, latest)
latest_info[key] = {
"locked": locked,
"latest": latest,
"delta": delta,
"is_outdated": locked != latest,
"last_checked": datetime.now().isoformat()
}
# Rate limiting: service-specific delays for optimal performance
if dep["manager"] == "npm":
time.sleep(RATE_LIMIT_NPM) # 0.1s for npm
elif dep["manager"] == "py":
time.sleep(RATE_LIMIT_PYPI) # 0.2s for PyPI
elif dep["manager"] == "docker":
time.sleep(RATE_LIMIT_DOCKER) # 0.2s for Docker Hub
except (urllib.error.URLError, urllib.error.HTTPError, http.client.RemoteDisconnected,
TimeoutError, json.JSONDecodeError, KeyError, ValueError) as e:
error_msg = f"{type(e).__name__}: {str(e)[:50]}"
# Handle rate limiting and connection errors specifically
if ("429" in str(e) or "rate" in str(e).lower() or
"RemoteDisconnected" in str(e) or "closed connection" in str(e).lower()):
# Set rate limit expiry for this service
if dep["manager"] == "npm":
npm_rate_limited_until = current_time + RATE_LIMIT_BACKOFF
elif dep["manager"] == "py":
pypi_rate_limited_until = current_time + RATE_LIMIT_BACKOFF
elif dep["manager"] == "docker":
docker_rate_limited_until = current_time + RATE_LIMIT_BACKOFF
# Use cached data if available, even if expired
if key in cache:
latest_info[key] = cache[key]
latest_info[key]["error"] = error_msg
else:
latest_info[key] = {
"locked": dep["version"],
"latest": None,
"delta": None,
"is_outdated": False,
"error": error_msg,
"last_checked": datetime.now().isoformat()
}
continue
# Save updated cache
_save_deps_cache(latest_info, cache_file)
return latest_info
def _load_deps_cache(cache_file: str) -> Dict[str, Dict[str, Any]]:
"""
Load the dependency cache from disk.
Returns empty dict if cache doesn't exist or is invalid.
"""
try:
cache_path = Path(cache_file)
if cache_path.exists():
with open(cache_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
pass
return {}
def _save_deps_cache(latest_info: Dict[str, Dict[str, Any]], cache_file: str) -> None:
"""
Save the dependency cache to disk.
Merges with existing cache to preserve data for packages not in current check.
"""
try:
cache_path = Path(cache_file)
cache_path.parent.mkdir(parents=True, exist_ok=True)
# Load existing cache to merge
existing = _load_deps_cache(cache_file)
# Merge new data into existing (new data takes precedence)
existing.update(latest_info)
# Write merged cache
with open(cache_path, 'w', encoding='utf-8') as f:
json.dump(existing, f, indent=2, sort_keys=True)
except OSError:
pass # Fail silently if can't write cache
def _is_cache_valid(cached_item: Dict[str, Any], hours: int = 24) -> bool:
"""
Check if a cached item is still valid based on age.
Default is 24 hours for dependency version checks.
"""
try:
if "last_checked" not in cached_item:
return False
last_checked = datetime.fromisoformat(cached_item["last_checked"])
age = datetime.now() - last_checked
return age.total_seconds() < (hours * 3600)
except (ValueError, KeyError):
return False
def _check_npm_latest(package_name: str) -> Optional[str]:
"""Fetch latest version from npm registry."""
import urllib.request
import urllib.error
# Validate and sanitize package name
if not validate_package_name(package_name, "npm"):
return None
# URL-encode the package name for safety
safe_package_name = sanitize_url_component(package_name)
url = f"https://registry.npmjs.org/{safe_package_name}"
try:
with urllib.request.urlopen(url, timeout=10) as response:
data = json.loads(response.read())
return data.get("dist-tags", {}).get("latest")
except (urllib.error.URLError, http.client.RemoteDisconnected, json.JSONDecodeError, KeyError):
return None
def _check_pypi_latest(package_name: str) -> Optional[str]:
"""Fetch latest version from PyPI."""
import urllib.request
import urllib.error
# Validate package name
if not validate_package_name(package_name, "py"):
return None
# Normalize package name for PyPI (replace underscores with hyphens)
normalized_name = package_name.replace('_', '-')
# Sanitize for URL
safe_package_name = sanitize_url_component(normalized_name)
url = f"https://pypi.org/pypi/{safe_package_name}/json"
try:
with urllib.request.urlopen(url, timeout=10) as response:
data = json.loads(response.read())
return data.get("info", {}).get("version")
except (urllib.error.URLError, http.client.RemoteDisconnected, json.JSONDecodeError, KeyError):
return None
def _check_dockerhub_latest(image_name: str) -> Optional[str]:
"""Fetch latest version from Docker Hub."""
import urllib.request
import urllib.error
# Validate image name
if not validate_package_name(image_name, "docker"):
return None
# For official images, use library/ prefix
if "/" not in image_name:
image_name = f"library/{image_name}"
# Sanitize image name for URL
safe_image_name = sanitize_url_component(image_name)
# Docker Hub API endpoint for tags
url = f"https://hub.docker.com/v2/repositories/{safe_image_name}/tags"
try:
# Create request with proper headers
req = urllib.request.Request(url)
req.add_header('User-Agent', 'TheAuditor/0.1.0')
with urllib.request.urlopen(req, timeout=10) as response:
data = json.loads(response.read())
# Parse the results to find latest stable version
tags = data.get("results", [])
if not tags:
return None
# Filter and sort tags to find the best "latest" version
version_tags = []
for tag in tags:
tag_name = tag.get("name", "")
# Skip non-version tags
if tag_name in ["latest", "alpine", "slim", "bullseye", "bookworm"]:
continue
# Look for semantic version-like tags
if re.match(r'^\d+(\.\d+)*', tag_name):
version_tags.append(tag_name)
if version_tags:
# Sort versions (simple string sort for now)
# More sophisticated version comparison could be added
version_tags.sort(reverse=True)
return version_tags[0]
# Fallback to "latest" if no version tags found
for tag in tags:
if tag.get("name") == "latest":
return "latest"
return None
except (urllib.error.URLError, http.client.RemoteDisconnected, json.JSONDecodeError, KeyError) as e:
# Docker Hub API might require auth or have rate limits
return None
def _calculate_version_delta(locked: str, latest: str) -> str:
"""
Calculate semantic version delta.
Returns: "major", "minor", "patch", "equal", or "unknown"
"""
try:
locked_parts = [int(x) for x in locked.split(".")[:3]]
latest_parts = [int(x) for x in latest.split(".")[:3]]
# Pad with zeros if needed
while len(locked_parts) < 3:
locked_parts.append(0)
while len(latest_parts) < 3:
latest_parts.append(0)
if locked_parts == latest_parts:
return "equal"
elif latest_parts[0] > locked_parts[0]:
return "major"
elif latest_parts[1] > locked_parts[1]:
return "minor"
elif latest_parts[2] > locked_parts[2]:
return "patch"
else:
return "unknown" # locked is newer than latest?
except (ValueError, IndexError):
return "unknown"
def write_deps_latest_json(
latest_info: Dict[str, Dict[str, Any]],
output_path: str = "./.pf/deps_latest.json"
) -> None:
"""Write latest version info to JSON file."""
try:
output = sanitize_path(output_path, ".")
output.parent.mkdir(parents=True, exist_ok=True)
with open(output, "w", encoding="utf-8") as f:
json.dump(latest_info, f, indent=2, sort_keys=True)
except SecurityError as e:
raise SecurityError(f"Invalid output path: {e}")
def upgrade_all_deps(
root_path: str,
latest_info: Dict[str, Dict[str, Any]],
deps_list: List[Dict[str, Any]]
) -> Dict[str, int]:
"""
YOLO MODE: Upgrade all dependencies to latest versions.
Rewrites requirements.txt, package.json, and pyproject.toml with latest versions.
Returns dict with counts of upgraded packages per file type.
"""
import shutil
from datetime import datetime
root = Path(root_path)
upgraded = {
"requirements.txt": 0,
"package.json": 0,
"pyproject.toml": 0
}
# Group deps by source file
deps_by_source = {}
for dep in deps_list:
source = dep.get("source", "")
if source not in deps_by_source:
deps_by_source[source] = []
deps_by_source[source].append(dep)
# Upgrade requirements*.txt files
for req_file in root.glob("requirements*.txt"):
if req_file.name in deps_by_source:
count = _upgrade_requirements_txt(req_file, latest_info, deps_by_source[req_file.name])
upgraded["requirements.txt"] += count
# Upgrade package.json
package_json = root / "package.json"
if package_json.exists() and "package.json" in deps_by_source:
count = _upgrade_package_json(package_json, latest_info, deps_by_source["package.json"])
upgraded["package.json"] = count
# Upgrade pyproject.toml
pyproject = root / "pyproject.toml"
if pyproject.exists() and "pyproject.toml" in deps_by_source:
count = _upgrade_pyproject_toml(pyproject, latest_info, deps_by_source["pyproject.toml"])
upgraded["pyproject.toml"] = count
return upgraded
def _upgrade_requirements_txt(
path: Path,
latest_info: Dict[str, Dict[str, Any]],
deps: List[Dict[str, Any]]
) -> int:
"""Upgrade a requirements.txt file to latest versions."""
# Sanitize path
try:
safe_path = sanitize_path(str(path), ".")
except SecurityError:
return 0 # Skip files outside project root
# Create backup
backup_path = safe_path.with_suffix(safe_path.suffix + ".bak")
shutil.copy2(safe_path, backup_path)
# Read current file
with open(safe_path, "r", encoding="utf-8") as f:
lines = f.readlines()
# Build package name to latest version map
latest_versions = {}
for dep in deps:
key = f"py:{dep['name']}"
if key in latest_info:
latest_versions[dep['name']] = latest_info[key]['latest']
# Rewrite lines with latest versions
updated_lines = []
count = 0
for line in lines:
original_line = line
line = line.strip()
# Skip comments and empty lines
if not line or line.startswith("#") or line.startswith("-"):
updated_lines.append(original_line)
continue
# Parse package name
name, _ = _parse_python_dep_spec(line)
if name and name in latest_versions:
# Replace with latest version
updated_lines.append(f"{name}=={latest_versions[name]}\n")
count += 1
else:
updated_lines.append(original_line)
# Write updated file
with open(safe_path, "w", encoding="utf-8") as f:
f.writelines(updated_lines)
return count
def _upgrade_package_json(
path: Path,
latest_info: Dict[str, Dict[str, Any]],
deps: List[Dict[str, Any]]
) -> int:
"""Upgrade package.json to latest versions."""
import shutil
# Sanitize path
try:
safe_path = sanitize_path(str(path), ".")
except SecurityError:
return 0 # Skip files outside project root
# Create backup
backup_path = safe_path.with_suffix(safe_path.suffix + ".bak")
shutil.copy2(safe_path, backup_path)
# Read current file
with open(safe_path, "r", encoding="utf-8") as f:
data = json.load(f)
count = 0
# Update dependencies
if "dependencies" in data:
for name in data["dependencies"]:
key = f"npm:{name}"
if key in latest_info:
data["dependencies"][name] = latest_info[key]["latest"]
count += 1
# Update devDependencies
if "devDependencies" in data:
for name in data["devDependencies"]:
key = f"npm:{name}"
if key in latest_info:
data["devDependencies"][name] = latest_info[key]["latest"]
count += 1
# Write updated file
with open(safe_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
f.write("\n") # Add trailing newline
return count
def _upgrade_pyproject_toml(
path: Path,
latest_info: Dict[str, Dict[str, Any]],
deps: List[Dict[str, Any]]
) -> int:
"""Upgrade pyproject.toml to latest versions - handles ALL sections."""
import shutil
import re
# Sanitize path
try:
safe_path = sanitize_path(str(path), ".")
except SecurityError:
return 0 # Skip files outside project root
# Create backup
backup_path = safe_path.with_suffix(safe_path.suffix + ".bak")
shutil.copy2(safe_path, backup_path)
# Read entire file as string for regex replacement
with open(safe_path, "r", encoding="utf-8") as f:
content = f.read()
count = 0
updated_packages = {} # Track all updates: package -> [(old, new)]
# For each package in latest_info
for key, info in latest_info.items():
if not key.startswith("py:"):
continue
package_name = key[3:] # Remove "py:" prefix
latest_version = info.get("latest")
if not latest_version:
continue
# Pattern to match this package anywhere in the file
# Matches: "package==X.Y.Z" with any version number
pattern = rf'"{package_name}==([^"]+)"'
# Replace ALL occurrences at once using re.sub with a function
def replacer(match):
old_version = match.group(1)
if old_version != latest_version:
# Track the update
if package_name not in updated_packages:
updated_packages[package_name] = []
updated_packages[package_name].append((old_version, latest_version))
return f'"{package_name}=={latest_version}"'
return match.group(0) # No change
# Replace all occurrences in one pass
new_content = re.sub(pattern, replacer, content)
# Update count only if package was actually updated
if package_name in updated_packages and content != new_content:
count += 1
content = new_content
# Write updated content
with open(safe_path, "w", encoding="utf-8") as f:
f.write(content)
# Report what was updated
total_occurrences = 0
# Use ASCII characters on Windows
check_mark = "[OK]" if IS_WINDOWS else ""
arrow = "->" if IS_WINDOWS else ""
for package, updates in updated_packages.items():
total_occurrences += len(updates)
if len(updates) == 1:
print(f" {check_mark} {package}: {updates[0][0]} {arrow} {updates[0][1]}")
else:
print(f" {check_mark} {package}: {updates[0][0]} {arrow} {updates[0][1]} ({len(updates)} occurrences)")
# Return total occurrences updated, not just unique packages
return total_occurrences