Auditor/theauditor/utils/finding_priority.py

"""Centralized finding prioritization for internal organization.

This module provides consistent sorting of findings to ensure
critical security issues appear before style warnings in reports.
This is NOT severity mapping for tools, but internal organization
for optimal AI context utilization.
"""

# Priority order for internal organization (lower = higher priority)
# This is our SINGLE SOURCE OF TRUTH for severity ranking
PRIORITY_ORDER = {
    "critical": 0,  # Immediate security threats
    "high": 1,      # Serious bugs/vulnerabilities
    "medium": 2,    # Should fix soon
    "low": 3,       # Minor issues
    "warning": 4,   # Potential problems
    "info": 5,      # Informational
    "style": 6,     # Code style/formatting
    "unknown": 7    # Unrecognized severity
}

# Tool importance for secondary sorting (lower = more important)
# Security tools rank higher than style tools
TOOL_IMPORTANCE = {
    # Security tools - highest importance
    "taint-analyzer": 0,
    "vulnerability-scanner": 0,
    "security-rules": 0,
    "sql-injection": 0,
    "xss-detector": 0,
    "docker-analyzer": 0,  # Docker security findings

    # Pattern detection
    "pattern-detector": 1,
    "orm": 1,
    "database-rules": 1,

    # Testing and validation
    "fce": 2,
    "test": 2,
    "pytest": 2,
    "jest": 2,

    # Analysis tools
    "ml": 3,
    "graph": 3,
    "dependency": 3,
    "deps": 3,

    # Code quality
    "ruff": 4,
    "mypy": 4,
    "bandit": 4,
    "pylint": 4,

    # Style tools - lowest importance
    "eslint": 5,
    "prettier": 6,
    "format": 7,
    "beautifier": 7
}

# Comprehensive severity normalization mappings
# Handles all formats: integers, strings, alternatives
SEVERITY_MAPPINGS = {
    # Integer mappings (Docker uses 4=critical, CVE uses 1-4 scale)
    4: "critical",  # Docker's highest
    3: "high",
    2: "medium",
    1: "low",
    0: "info",      # Sometimes used for informational

    # String alternatives from various tools
    "error": "high",        # ESLint, many linters
    "warning": "medium",    # Standard warning
    "warn": "medium",       # Prettier variant
    "info": "low",          # Informational
    "note": "low",          # Ruff uses this
    "debug": "low",         # Debug-level issues
    "fatal": "critical",    # Some tools use fatal
    "blocker": "critical",  # Severity naming from bug trackers
    "major": "high",
    "minor": "low",
    "trivial": "low",

    # Pass-through for already normalized
    "critical": "critical",
    "high": "high",
    "medium": "medium",
    "low": "low",

    # Style-specific (for prettier/eslint)
    "style": "style",
    "formatting": "style"
}

def normalize_severity(severity_value):
    """Normalize severity from various formats to standard string.

    Handles integers (Docker), floats (ML confidence), strings (ESLint),
    and missing values (test failures).

    Args:
        severity_value: Can be int, float, string, or None

    Returns:
        Normalized severity string from PRIORITY_ORDER keys
    """
    if severity_value is None:
        return "warning"  # Default for missing severity

    # Handle numeric types
    if isinstance(severity_value, (int, float)):
        # ML confidence scores (0.0-1.0)
        if isinstance(severity_value, float) and 0.0 <= severity_value <= 1.0:
            if severity_value >= 0.9:
                return "critical"
            elif severity_value >= 0.7:
                return "high"
            elif severity_value >= 0.4:
                return "medium"
            else:
                return "low"
        # Integer severity (Docker style, CVE scores)
        return SEVERITY_MAPPINGS.get(int(severity_value), "warning")

    # Handle string types
    severity_str = str(severity_value).lower().strip()

    # Check if it's already a valid normalized severity
    if severity_str in PRIORITY_ORDER:
        return severity_str

    # Try to map it
    return SEVERITY_MAPPINGS.get(severity_str, "warning")

def get_sort_key(finding):
    """Generate sort key for a finding.

    Multi-level sort: severity -> tool -> file -> line

    Args:
        finding: Dictionary with severity, tool, file, line fields

    Returns:
        Tuple for sorting (lower values = higher priority)
    """
    # Normalize severity to handle all formats
    normalized_severity = normalize_severity(finding.get("severity"))

    # Get tool name, handle missing
    tool_name = str(finding.get("tool", "unknown")).lower()

    # Build sort key with defaults for missing fields
    return (
        PRIORITY_ORDER.get(normalized_severity, 7),      # Severity priority
        TOOL_IMPORTANCE.get(tool_name, 8),               # Tool priority
        finding.get("file", "zzz"),                      # File path
        finding.get("line", 999999)                      # Line number
    )

def sort_findings(findings):
    """Sort findings by priority for optimal report organization.

    Critical security issues will appear first, style issues last.
    This ensures AI sees the most important issues within its
    limited context window.

    Args:
        findings: List of finding dictionaries

    Returns:
        New sorted list (original unchanged)
    """
    if not findings:
        return findings

    return sorted(findings, key=get_sort_key)