mirror of
https://github.com/aljazceru/Auditor.git
synced 2025-12-17 03:24:18 +01:00
150 lines
4.8 KiB
Python
150 lines
4.8 KiB
Python
"""Security utilities for input sanitization and validation."""
|
|
|
|
import shlex
|
|
import urllib.parse
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
class SecurityError(Exception):
|
|
"""Raised when a security violation is detected."""
|
|
pass
|
|
|
|
|
|
def sanitize_path(path_str: str, project_root: Optional[str] = None) -> Path:
|
|
"""
|
|
Sanitize a file path to prevent path traversal attacks.
|
|
|
|
Args:
|
|
path_str: The path string to sanitize
|
|
project_root: The root directory to restrict paths within (default: current directory)
|
|
|
|
Returns:
|
|
A resolved Path object that is safe to use
|
|
|
|
Raises:
|
|
SecurityError: If the path attempts to escape the project root
|
|
"""
|
|
if project_root is None:
|
|
project_root = "."
|
|
|
|
# Resolve both paths to absolute
|
|
root = Path(project_root).resolve()
|
|
|
|
# Handle relative paths - make them relative to project root
|
|
if not Path(path_str).is_absolute():
|
|
target = (root / path_str).resolve()
|
|
else:
|
|
target = Path(path_str).resolve()
|
|
|
|
# Check if the resolved path is within the project root
|
|
try:
|
|
# This will raise ValueError if target is not relative to root
|
|
target.relative_to(root)
|
|
except ValueError:
|
|
# Path is outside project root - this is a security violation
|
|
raise SecurityError(f"Path traversal attempt detected: {path_str} resolves outside project root")
|
|
|
|
return target
|
|
|
|
|
|
def sanitize_shell_arg(arg: str) -> str:
|
|
"""
|
|
Sanitize a string for safe use as a shell argument.
|
|
|
|
Uses shlex.quote to properly escape special characters and prevent command injection.
|
|
|
|
Args:
|
|
arg: The argument string to sanitize
|
|
|
|
Returns:
|
|
A properly quoted/escaped string safe for shell use
|
|
"""
|
|
# shlex.quote properly escapes shell metacharacters
|
|
return shlex.quote(arg)
|
|
|
|
|
|
def sanitize_url_component(component: str) -> str:
|
|
"""
|
|
Sanitize a string for safe use in URL construction.
|
|
|
|
Properly encodes special characters to prevent URL injection.
|
|
|
|
Args:
|
|
component: The URL component to sanitize (e.g., package name)
|
|
|
|
Returns:
|
|
A properly URL-encoded string
|
|
"""
|
|
# URL-encode the component to handle special characters safely
|
|
# safe='' means encode everything except alphanumerics
|
|
return urllib.parse.quote(component, safe='')
|
|
|
|
|
|
def validate_package_name(name: str, manager: str) -> bool:
|
|
"""
|
|
Validate that a package name follows the expected format for its package manager.
|
|
|
|
Args:
|
|
name: The package name to validate
|
|
manager: The package manager type ("npm", "py", "docker")
|
|
|
|
Returns:
|
|
True if the name is valid, False otherwise
|
|
"""
|
|
import re
|
|
|
|
if not name or len(name) > 214: # npm max length is 214
|
|
return False
|
|
|
|
if manager == "npm":
|
|
# npm package names: lowercase, alphanumeric, hyphens, underscores, dots
|
|
# Can be scoped: @scope/package
|
|
pattern = r'^(@[a-z0-9][\w.-]*\/)?[a-z0-9][\w.-]*$'
|
|
return bool(re.match(pattern, name))
|
|
|
|
elif manager == "py":
|
|
# PyPI package names: alphanumeric, hyphens, underscores, dots
|
|
# Case insensitive but typically lowercase
|
|
pattern = r'^[a-zA-Z0-9][\w.-]*$'
|
|
return bool(re.match(pattern, name))
|
|
|
|
elif manager == "docker":
|
|
# Docker image names: lowercase, alphanumeric, hyphens, underscores, dots, slashes
|
|
# Can include registry and namespace
|
|
pattern = r'^[a-z0-9][\w./:-]*$'
|
|
return bool(re.match(pattern, name))
|
|
|
|
return False
|
|
|
|
|
|
def sanitize_config_path(config_value: str, config_section: str, config_key: str, project_root: str = ".") -> Path:
|
|
"""
|
|
Sanitize a path value from configuration.
|
|
|
|
This is specifically for paths that come from config files or environment variables,
|
|
which are common sources of tainted input.
|
|
|
|
Args:
|
|
config_value: The path value from config
|
|
config_section: The config section (e.g., "paths")
|
|
config_key: The config key (e.g., "manifest")
|
|
project_root: The root directory to restrict paths within
|
|
|
|
Returns:
|
|
A sanitized Path object
|
|
|
|
Raises:
|
|
SecurityError: If the path is invalid or attempts traversal
|
|
"""
|
|
if not config_value:
|
|
raise SecurityError(f"Empty path in config[{config_section}][{config_key}]")
|
|
|
|
# Special handling for known config paths that should always be under .pf/
|
|
if config_section == "paths" and config_key in ["manifest", "db", "workset", "pf_dir"]:
|
|
# These should always be under .pf/ directory
|
|
if not config_value.startswith("./.pf/") and not config_value.startswith(".pf/"):
|
|
# Force it to be under .pf/ for safety
|
|
config_value = f"./.pf/{Path(config_value).name}"
|
|
|
|
return sanitize_path(config_value, project_root) |