Files
Auditor/theauditor/security.py

150 lines
4.8 KiB
Python

"""Security utilities for input sanitization and validation."""
import shlex
import urllib.parse
from pathlib import Path
from typing import Optional
class SecurityError(Exception):
"""Raised when a security violation is detected."""
pass
def sanitize_path(path_str: str, project_root: Optional[str] = None) -> Path:
"""
Sanitize a file path to prevent path traversal attacks.
Args:
path_str: The path string to sanitize
project_root: The root directory to restrict paths within (default: current directory)
Returns:
A resolved Path object that is safe to use
Raises:
SecurityError: If the path attempts to escape the project root
"""
if project_root is None:
project_root = "."
# Resolve both paths to absolute
root = Path(project_root).resolve()
# Handle relative paths - make them relative to project root
if not Path(path_str).is_absolute():
target = (root / path_str).resolve()
else:
target = Path(path_str).resolve()
# Check if the resolved path is within the project root
try:
# This will raise ValueError if target is not relative to root
target.relative_to(root)
except ValueError:
# Path is outside project root - this is a security violation
raise SecurityError(f"Path traversal attempt detected: {path_str} resolves outside project root")
return target
def sanitize_shell_arg(arg: str) -> str:
"""
Sanitize a string for safe use as a shell argument.
Uses shlex.quote to properly escape special characters and prevent command injection.
Args:
arg: The argument string to sanitize
Returns:
A properly quoted/escaped string safe for shell use
"""
# shlex.quote properly escapes shell metacharacters
return shlex.quote(arg)
def sanitize_url_component(component: str) -> str:
"""
Sanitize a string for safe use in URL construction.
Properly encodes special characters to prevent URL injection.
Args:
component: The URL component to sanitize (e.g., package name)
Returns:
A properly URL-encoded string
"""
# URL-encode the component to handle special characters safely
# safe='' means encode everything except alphanumerics
return urllib.parse.quote(component, safe='')
def validate_package_name(name: str, manager: str) -> bool:
"""
Validate that a package name follows the expected format for its package manager.
Args:
name: The package name to validate
manager: The package manager type ("npm", "py", "docker")
Returns:
True if the name is valid, False otherwise
"""
import re
if not name or len(name) > 214: # npm max length is 214
return False
if manager == "npm":
# npm package names: lowercase, alphanumeric, hyphens, underscores, dots
# Can be scoped: @scope/package
pattern = r'^(@[a-z0-9][\w.-]*\/)?[a-z0-9][\w.-]*$'
return bool(re.match(pattern, name))
elif manager == "py":
# PyPI package names: alphanumeric, hyphens, underscores, dots
# Case insensitive but typically lowercase
pattern = r'^[a-zA-Z0-9][\w.-]*$'
return bool(re.match(pattern, name))
elif manager == "docker":
# Docker image names: lowercase, alphanumeric, hyphens, underscores, dots, slashes
# Can include registry and namespace
pattern = r'^[a-z0-9][\w./:-]*$'
return bool(re.match(pattern, name))
return False
def sanitize_config_path(config_value: str, config_section: str, config_key: str, project_root: str = ".") -> Path:
"""
Sanitize a path value from configuration.
This is specifically for paths that come from config files or environment variables,
which are common sources of tainted input.
Args:
config_value: The path value from config
config_section: The config section (e.g., "paths")
config_key: The config key (e.g., "manifest")
project_root: The root directory to restrict paths within
Returns:
A sanitized Path object
Raises:
SecurityError: If the path is invalid or attempts traversal
"""
if not config_value:
raise SecurityError(f"Empty path in config[{config_section}][{config_key}]")
# Special handling for known config paths that should always be under .pf/
if config_section == "paths" and config_key in ["manifest", "db", "workset", "pf_dir"]:
# These should always be under .pf/ directory
if not config_value.startswith("./.pf/") and not config_value.startswith(".pf/"):
# Force it to be under .pf/ for safety
config_value = f"./.pf/{Path(config_value).name}"
return sanitize_path(config_value, project_root)