Files
Auditor/theauditor/utils/finding_priority.py

178 lines
5.3 KiB
Python

"""Centralized finding prioritization for internal organization.
This module provides consistent sorting of findings to ensure
critical security issues appear before style warnings in reports.
This is NOT severity mapping for tools, but internal organization
for optimal AI context utilization.
"""
# Priority order for internal organization (lower = higher priority)
# This is our SINGLE SOURCE OF TRUTH for severity ranking
PRIORITY_ORDER = {
"critical": 0, # Immediate security threats
"high": 1, # Serious bugs/vulnerabilities
"medium": 2, # Should fix soon
"low": 3, # Minor issues
"warning": 4, # Potential problems
"info": 5, # Informational
"style": 6, # Code style/formatting
"unknown": 7 # Unrecognized severity
}
# Tool importance for secondary sorting (lower = more important)
# Security tools rank higher than style tools
TOOL_IMPORTANCE = {
# Security tools - highest importance
"taint-analyzer": 0,
"vulnerability-scanner": 0,
"security-rules": 0,
"sql-injection": 0,
"xss-detector": 0,
"docker-analyzer": 0, # Docker security findings
# Pattern detection
"pattern-detector": 1,
"orm": 1,
"database-rules": 1,
# Testing and validation
"fce": 2,
"test": 2,
"pytest": 2,
"jest": 2,
# Analysis tools
"ml": 3,
"graph": 3,
"dependency": 3,
"deps": 3,
# Code quality
"ruff": 4,
"mypy": 4,
"bandit": 4,
"pylint": 4,
# Style tools - lowest importance
"eslint": 5,
"prettier": 6,
"format": 7,
"beautifier": 7
}
# Comprehensive severity normalization mappings
# Handles all formats: integers, strings, alternatives
SEVERITY_MAPPINGS = {
# Integer mappings (Docker uses 4=critical, CVE uses 1-4 scale)
4: "critical", # Docker's highest
3: "high",
2: "medium",
1: "low",
0: "info", # Sometimes used for informational
# String alternatives from various tools
"error": "high", # ESLint, many linters
"warning": "medium", # Standard warning
"warn": "medium", # Prettier variant
"info": "low", # Informational
"note": "low", # Ruff uses this
"debug": "low", # Debug-level issues
"fatal": "critical", # Some tools use fatal
"blocker": "critical", # Severity naming from bug trackers
"major": "high",
"minor": "low",
"trivial": "low",
# Pass-through for already normalized
"critical": "critical",
"high": "high",
"medium": "medium",
"low": "low",
# Style-specific (for prettier/eslint)
"style": "style",
"formatting": "style"
}
def normalize_severity(severity_value):
"""Normalize severity from various formats to standard string.
Handles integers (Docker), floats (ML confidence), strings (ESLint),
and missing values (test failures).
Args:
severity_value: Can be int, float, string, or None
Returns:
Normalized severity string from PRIORITY_ORDER keys
"""
if severity_value is None:
return "warning" # Default for missing severity
# Handle numeric types
if isinstance(severity_value, (int, float)):
# ML confidence scores (0.0-1.0)
if isinstance(severity_value, float) and 0.0 <= severity_value <= 1.0:
if severity_value >= 0.9:
return "critical"
elif severity_value >= 0.7:
return "high"
elif severity_value >= 0.4:
return "medium"
else:
return "low"
# Integer severity (Docker style, CVE scores)
return SEVERITY_MAPPINGS.get(int(severity_value), "warning")
# Handle string types
severity_str = str(severity_value).lower().strip()
# Check if it's already a valid normalized severity
if severity_str in PRIORITY_ORDER:
return severity_str
# Try to map it
return SEVERITY_MAPPINGS.get(severity_str, "warning")
def get_sort_key(finding):
"""Generate sort key for a finding.
Multi-level sort: severity -> tool -> file -> line
Args:
finding: Dictionary with severity, tool, file, line fields
Returns:
Tuple for sorting (lower values = higher priority)
"""
# Normalize severity to handle all formats
normalized_severity = normalize_severity(finding.get("severity"))
# Get tool name, handle missing
tool_name = str(finding.get("tool", "unknown")).lower()
# Build sort key with defaults for missing fields
return (
PRIORITY_ORDER.get(normalized_severity, 7), # Severity priority
TOOL_IMPORTANCE.get(tool_name, 8), # Tool priority
finding.get("file", "zzz"), # File path
finding.get("line", 999999) # Line number
)
def sort_findings(findings):
"""Sort findings by priority for optimal report organization.
Critical security issues will appear first, style issues last.
This ensures AI sees the most important issues within its
limited context window.
Args:
findings: List of finding dictionaries
Returns:
New sorted list (original unchanged)
"""
if not findings:
return findings
return sorted(findings, key=get_sort_key)