mirror of
https://github.com/aljazceru/Auditor.git
synced 2025-12-17 03:24:18 +01:00
- Fix TypeError when linters return null instead of missing fields - Use 'or' operator to safely convert null to 0 - Affects only JSON-based parsers (ESLint, Bandit) - Line 0 indicates file-level or configuration issues Fixes GitHub issue: "TypeError: '<' not supported between instances of 'str' and 'NoneType'"
504 lines
19 KiB
Python
504 lines
19 KiB
Python
"""Linter output parsers - converts various linter outputs to normalized format.
|
|
|
|
COURIER PHILOSOPHY:
|
|
- We translate tool output keys to standard keys
|
|
- We preserve exact messages and severities
|
|
- We perform direct data access without interpretation
|
|
- We validate translation, not content
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
def parse_eslint_output(output: str, workset_files: set[str]) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
"""Parse ESLint JSON output.
|
|
|
|
Returns:
|
|
Tuple of (findings, ast_data) where ast_data maps file paths to AST objects
|
|
"""
|
|
findings = []
|
|
ast_data = {}
|
|
|
|
try:
|
|
results = json.loads(output)
|
|
for file_result in results:
|
|
file = Path(file_result["filePath"])
|
|
# Normalize path to forward slashes for cross-platform compatibility
|
|
file_str = str(file).replace("\\", "/")
|
|
|
|
# Try to match against workset in various forms
|
|
matched = False
|
|
for workset_file in workset_files:
|
|
# Check if the absolute path ends with the relative workset path
|
|
# This handles both Windows absolute paths and Unix paths
|
|
if file_str.endswith(workset_file):
|
|
matched = True
|
|
file_str = workset_file
|
|
break
|
|
# Also check if workset file is contained in the path (with proper separators)
|
|
elif "/" + workset_file in file_str or file_str.startswith(workset_file):
|
|
matched = True
|
|
file_str = workset_file
|
|
break
|
|
|
|
if not matched:
|
|
continue
|
|
|
|
# Extract AST if present
|
|
if "ast" in file_result:
|
|
ast_data[file_str] = file_result["ast"]
|
|
|
|
for message in file_result.get("messages", []):
|
|
# Direct data access from ESLint output
|
|
# Create the translated finding using standard keys
|
|
# ESLint severity: numeric (2=error, 1=warning) - translate to standard
|
|
eslint_severity = message.get("severity", 1)
|
|
# Map numeric severity to standard strings
|
|
if eslint_severity == 2:
|
|
standard_severity = "error"
|
|
elif eslint_severity == 1:
|
|
standard_severity = "warning"
|
|
else:
|
|
standard_severity = "warning" # Default for unknown values
|
|
|
|
translated = {
|
|
"tool": "eslint",
|
|
"file": file_str,
|
|
"line": int(message.get("line") or 0),
|
|
"column": int(message.get("column") or 0),
|
|
"rule": message.get("ruleId", ""), # Empty not "unknown"
|
|
"message": message.get("message", ""),
|
|
"severity": standard_severity, # Use standardized severity
|
|
"category": "lint",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
except json.JSONDecodeError:
|
|
# Fall back to regex parsing
|
|
pattern = r"([^:]+):(\d+):(\d+):\s+(error|warning)\s+(.+?)\s+([a-z0-9\-\/]+)\s*$"
|
|
for line in output.strip().split("\n"):
|
|
match = re.match(pattern, line.strip())
|
|
if match:
|
|
file = match.group(1).strip()
|
|
if file in workset_files:
|
|
findings.append(
|
|
{
|
|
"tool": "eslint",
|
|
"file": file,
|
|
"line": int(match.group(2)),
|
|
"column": int(match.group(3)),
|
|
"rule": match.group(6),
|
|
"message": match.group(5),
|
|
"severity": match.group(4), # Keep original for /raw/
|
|
"category": "lint",
|
|
}
|
|
)
|
|
|
|
return findings, ast_data
|
|
|
|
|
|
def parse_ruff_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse ruff output - translate to standard keys."""
|
|
findings = []
|
|
|
|
# Format: path:line:col: code message
|
|
pattern = r"([^:]+):(\d+):(\d+):\s+([A-Z]\d+)\s+(.+)$"
|
|
for line in output.strip().split("\n"):
|
|
match = re.match(pattern, line.strip())
|
|
if match:
|
|
file = match.group(1).strip()
|
|
# Normalize Windows backslashes to forward slashes for comparison
|
|
normalized_file = file.replace("\\", "/")
|
|
|
|
# Check if file is in workset (comparing normalized paths)
|
|
if normalized_file in workset_files or file in workset_files:
|
|
# Create original dict for validation
|
|
original = {
|
|
"file": file,
|
|
"line": match.group(2),
|
|
"column": match.group(3),
|
|
"code": match.group(4),
|
|
"message": match.group(5)
|
|
}
|
|
|
|
# COURIER: Translate to standard keys
|
|
code = match.group(4)
|
|
translated = {
|
|
"tool": "ruff",
|
|
"file": normalized_file, # Use normalized path
|
|
"line": int(match.group(2)),
|
|
"column": int(match.group(3)),
|
|
"rule": code, # Preserve original code in rule field
|
|
"message": match.group(5), # Preserve exactly
|
|
"severity": "warning", # Direct preservation - ruff doesn't provide severity in concise format
|
|
"category": "lint",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_mypy_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse mypy output."""
|
|
findings = []
|
|
|
|
# Format: path:line: error: message [type-code]
|
|
pattern = r"([^:]+):(\d+):\s+(error|warning|note):\s+(.+?)(?:\s+\[([^\]]+)\])?$"
|
|
for line in output.strip().split("\n"):
|
|
match = re.match(pattern, line.strip())
|
|
if match:
|
|
file = match.group(1).strip()
|
|
# Normalize Windows backslashes to forward slashes for comparison
|
|
normalized_file = file.replace("\\", "/")
|
|
|
|
# Check if file is in workset (comparing normalized paths)
|
|
if normalized_file in workset_files or file in workset_files:
|
|
# Create translated finding
|
|
original = {
|
|
"file": file,
|
|
"line": match.group(2),
|
|
"severity": match.group(3),
|
|
"message": match.group(4),
|
|
"code": match.group(5)
|
|
}
|
|
|
|
translated = {
|
|
"tool": "mypy",
|
|
"file": normalized_file, # Use normalized path
|
|
"line": int(match.group(2)),
|
|
"column": 0,
|
|
"rule": match.group(5) or "type-error",
|
|
"message": match.group(4),
|
|
"severity": match.group(3), # Keep original mypy severity for /raw/
|
|
"category": "type",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_tsc_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse TypeScript compiler output."""
|
|
findings = []
|
|
|
|
# Format: path(line,col): error TS1234: message
|
|
pattern = r"([^(]+)\((\d+),(\d+)\):\s+(error|warning)\s+(TS\d+):\s+(.+)$"
|
|
for line in output.strip().split("\n"):
|
|
match = re.match(pattern, line.strip())
|
|
if match:
|
|
file = match.group(1).strip()
|
|
if file in workset_files:
|
|
# Create translated finding
|
|
original = {
|
|
"file": file,
|
|
"line": match.group(2),
|
|
"column": match.group(3),
|
|
"severity": match.group(4),
|
|
"code": match.group(5),
|
|
"message": match.group(6)
|
|
}
|
|
|
|
translated = {
|
|
"tool": "tsc",
|
|
"file": file,
|
|
"line": int(match.group(2)),
|
|
"column": int(match.group(3)),
|
|
"rule": match.group(5),
|
|
"message": match.group(6),
|
|
"severity": match.group(4), # Keep original tsc severity for /raw/
|
|
"category": "type",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_prettier_output(
|
|
stdout: str, stderr: str, workset_files: set[str]
|
|
) -> list[dict[str, Any]]:
|
|
"""Parse Prettier output."""
|
|
findings = []
|
|
|
|
# When run with --check, Prettier lists unformatted files on stderr with [warn] prefix
|
|
# Example: "[warn] backend/src/app.ts" or with ANSI codes: "\x1b[33m[warn]\x1b[39m backend/src/app.ts"
|
|
import re
|
|
|
|
# Pattern to extract file path after [warn] prefix, handling ANSI codes
|
|
# Matches: [warn] file.ts or \x1b[XXm[warn]\x1b[XXm file.ts
|
|
pattern = r'\[warn\]\s+(.+?)$'
|
|
|
|
for line in stderr.strip().split("\n"):
|
|
if line and not line.startswith("Checking"):
|
|
# Remove ANSI color codes first
|
|
clean_line = re.sub(r'\x1b\[[0-9;]*m', '', line)
|
|
clean_line = re.sub(r'\[\d+m', '', clean_line) # Also handle [33m format
|
|
|
|
# Extract file path after [warn]
|
|
match = re.search(pattern, clean_line)
|
|
if match:
|
|
file = match.group(1).strip()
|
|
else:
|
|
# Fallback: if no [warn] prefix, use the whole line
|
|
file = clean_line.strip()
|
|
|
|
# Normalize path for comparison
|
|
normalized_file = file.replace("\\", "/")
|
|
|
|
# Check if file is in workset
|
|
if normalized_file in workset_files or file in workset_files:
|
|
# Create translated finding
|
|
original = {"file": file}
|
|
|
|
translated = {
|
|
"tool": "prettier",
|
|
"file": normalized_file, # Use normalized path
|
|
"line": 0,
|
|
"column": 0,
|
|
"rule": "format",
|
|
"message": "File needs formatting",
|
|
"severity": "warning", # Keep original for /raw/
|
|
"category": "style",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_black_output(stdout: str, stderr: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse Black output."""
|
|
findings = []
|
|
|
|
# Black shows files that would be reformatted in stderr
|
|
pattern = r"would reformat (.+)$"
|
|
for match in re.finditer(pattern, stderr, re.MULTILINE):
|
|
file = match.group(1)
|
|
# Normalize Windows backslashes to forward slashes for comparison
|
|
normalized_file = file.replace("\\", "/")
|
|
|
|
# Check if file is in workset (comparing normalized paths)
|
|
if normalized_file in workset_files or file in workset_files:
|
|
# Create translated finding
|
|
original = {"file": file}
|
|
|
|
translated = {
|
|
"tool": "black",
|
|
"file": normalized_file, # Use normalized path
|
|
"line": 0,
|
|
"column": 0,
|
|
"rule": "format",
|
|
"message": "File needs formatting",
|
|
"severity": "warning", # Keep original for /raw/
|
|
"category": "style",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
# Also check for --diff output in stdout
|
|
# When --diff is used, Black outputs unified diff format to stdout
|
|
if stdout and stdout.startswith("---"):
|
|
# Extract filenames from diff headers
|
|
diff_pattern = r"^---\s+(.+?)\s+\d{4}-\d{2}-\d{2}"
|
|
for match in re.finditer(diff_pattern, stdout, re.MULTILINE):
|
|
file = match.group(1)
|
|
# Normalize Windows backslashes to forward slashes for comparison
|
|
normalized_file = file.replace("\\", "/")
|
|
|
|
# Check if file is in workset (comparing normalized paths)
|
|
if normalized_file in workset_files or file in workset_files:
|
|
# Check if we already added this file from stderr
|
|
if not any(f["file"] == normalized_file for f in findings):
|
|
translated = {
|
|
"tool": "black",
|
|
"file": normalized_file, # Use normalized path
|
|
"line": 0,
|
|
"column": 0,
|
|
"rule": "format",
|
|
"message": "File needs formatting",
|
|
"severity": "warning",
|
|
"category": "style",
|
|
}
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_golangci_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse golangci-lint output."""
|
|
findings = []
|
|
|
|
# Format: path:line:col: message (linter)
|
|
pattern = r"([^:]+):(\d+):(\d+):\s+(.+?)\s+\(([^)]+)\)$"
|
|
for match in re.finditer(pattern, output, re.MULTILINE):
|
|
file = match.group(1)
|
|
if file in workset_files:
|
|
# Create translated finding
|
|
original = {
|
|
"file": file,
|
|
"line": match.group(2),
|
|
"column": match.group(3),
|
|
"message": match.group(4),
|
|
"linter": match.group(5)
|
|
}
|
|
|
|
translated = {
|
|
"tool": "golangci-lint",
|
|
"file": file,
|
|
"line": int(match.group(2)),
|
|
"column": int(match.group(3)),
|
|
"rule": match.group(5),
|
|
"message": match.group(4),
|
|
"severity": "warning", # Keep original for /raw/
|
|
"category": "lint",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_go_vet_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse go vet output."""
|
|
findings = []
|
|
|
|
# Format: path:line:col: message
|
|
pattern = r"([^:]+):(\d+):(\d+):\s+(.+)$"
|
|
for match in re.finditer(pattern, output, re.MULTILINE):
|
|
file = match.group(1)
|
|
if file in workset_files:
|
|
# Create translated finding
|
|
original = {
|
|
"file": file,
|
|
"line": match.group(2),
|
|
"column": match.group(3),
|
|
"message": match.group(4)
|
|
}
|
|
|
|
translated = {
|
|
"tool": "go-vet",
|
|
"file": file,
|
|
"line": int(match.group(2)),
|
|
"column": int(match.group(3)),
|
|
"rule": "vet",
|
|
"message": match.group(4),
|
|
"severity": "warning", # Keep original for /raw/
|
|
"category": "lint",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_maven_output(tool: str, output: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse Maven-based tool output (SpotBugs/Checkstyle)."""
|
|
findings = []
|
|
|
|
# Simple pattern matching for Maven output
|
|
pattern = r"\[(?:ERROR|WARNING)\]\s+([^:]+):(\d+):\s+(.+)$"
|
|
for match in re.finditer(pattern, output, re.MULTILINE):
|
|
file = match.group(1)
|
|
if file in workset_files:
|
|
# Create translated finding
|
|
original = {
|
|
"file": file,
|
|
"line": match.group(2),
|
|
"message": match.group(3)
|
|
}
|
|
|
|
translated = {
|
|
"tool": tool,
|
|
"file": file,
|
|
"line": int(match.group(2)),
|
|
"column": 0,
|
|
"rule": tool,
|
|
"message": match.group(3),
|
|
"severity": "warning", # Keep original for /raw/
|
|
"category": "lint",
|
|
}
|
|
|
|
# No validation needed
|
|
|
|
findings.append(translated)
|
|
|
|
return findings
|
|
|
|
|
|
def parse_bandit_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
|
|
"""Parse bandit JSON output for Python security issues."""
|
|
findings = []
|
|
|
|
try:
|
|
results = json.loads(output)
|
|
# Bandit JSON structure has "results" key with findings
|
|
for result in results.get("results", []):
|
|
file = result.get("filename", "")
|
|
# Normalize Windows backslashes to forward slashes for comparison
|
|
normalized_file = file.replace("\\", "/")
|
|
|
|
# Check if the absolute path from Bandit matches any relative workset path
|
|
# Bandit returns absolute paths like C:/Users/.../file.py
|
|
# Workset has relative paths like scrapers/file.py
|
|
matched = False
|
|
matched_file = normalized_file # Default to normalized absolute path
|
|
|
|
for workset_file in workset_files:
|
|
# Normalize workset file for comparison
|
|
normalized_workset = workset_file.replace("\\", "/")
|
|
# Check if absolute path ends with the relative path
|
|
if normalized_file.endswith(normalized_workset):
|
|
matched = True
|
|
matched_file = normalized_workset # Use the workset's relative path
|
|
break
|
|
# Also check with leading slash
|
|
elif normalized_file.endswith("/" + normalized_workset):
|
|
matched = True
|
|
matched_file = normalized_workset
|
|
break
|
|
|
|
if matched:
|
|
# Map bandit severity/confidence to standard
|
|
severity_map = {
|
|
"HIGH": "error",
|
|
"MEDIUM": "warning",
|
|
"LOW": "warning"
|
|
}
|
|
|
|
translated = {
|
|
"tool": "bandit",
|
|
"file": matched_file, # Use the matched relative path from workset
|
|
"line": int(result.get("line_number") or 0),
|
|
"column": int(result.get("col_offset") or 0),
|
|
"rule": result.get("test_id", ""),
|
|
"message": result.get("issue_text", ""),
|
|
"severity": severity_map.get(result.get("issue_severity", "MEDIUM"), "warning"),
|
|
"category": "security",
|
|
}
|
|
findings.append(translated)
|
|
except json.JSONDecodeError:
|
|
# Fallback to text parsing if JSON fails
|
|
pass
|
|
|
|
return findings |