mirror of
https://github.com/aljazceru/Auditor.git
synced 2025-12-18 11:54:18 +01:00
387 lines
18 KiB
Python
387 lines
18 KiB
Python
"""Linter runner module - executes linter subprocesses."""
|
|
|
|
import json
|
|
import os
|
|
import platform
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
# Import our custom temp manager to avoid WSL2/Windows issues
|
|
from theauditor.utils.temp_manager import TempManager
|
|
|
|
# Detect if running on Windows for subprocess shell handling
|
|
IS_WINDOWS = platform.system() == "Windows"
|
|
|
|
# Note: Path quoting is NOT needed when using shell=False (which we now use everywhere).
|
|
# subprocess.run() with shell=False passes arguments directly to the OS without
|
|
# shell interpretation, so paths with spaces work correctly without quotes.
|
|
|
|
|
|
def run_linter(
|
|
tool: str,
|
|
command: list[str],
|
|
root_path: str,
|
|
workset_files: set[str],
|
|
timeout: int,
|
|
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
"""
|
|
Run a linter and parse its output.
|
|
|
|
Returns:
|
|
Tuple of (findings, ast_data) where ast_data maps file paths to AST objects
|
|
"""
|
|
findings = []
|
|
ast_data = {}
|
|
|
|
try:
|
|
# Add workset files to command if tool supports it
|
|
if tool in ["eslint", "ruff", "mypy", "prettier", "black", "bandit"]:
|
|
# Filter files by appropriate extension for each tool
|
|
if tool in ["ruff", "mypy", "black", "bandit"]:
|
|
# Python linters - only process .py files
|
|
file_args = [f for f in workset_files if f.endswith('.py')]
|
|
# Skip Python linters entirely if no Python files in workset
|
|
if not file_args:
|
|
# Silent skip - no Python files to lint
|
|
return [], {}
|
|
elif tool == "eslint":
|
|
# JavaScript/TypeScript linter
|
|
file_args = []
|
|
has_standard_structure = False
|
|
|
|
for f in workset_files:
|
|
if f.endswith(('.js', '.jsx', '.ts', '.tsx', '.mjs')):
|
|
normalized = f.replace('\\', '/')
|
|
|
|
# Professional structures: /src/ anywhere (monorepo + traditional)
|
|
if '/src/' in normalized:
|
|
has_standard_structure = True
|
|
# Exclude obvious non-source files with more robust path-based exclusions
|
|
if not any(excluded_path in normalized for excluded_path in [
|
|
'/config/', '/scripts/', '/migrations/', '/seeders/',
|
|
'.config.', '.test.', '.spec.',
|
|
'/node_modules/', '/dist/', '/build/', '/.next/', '/.nuxt/'
|
|
]):
|
|
file_args.append(f)
|
|
|
|
# Fallback for non-standard projects
|
|
if not file_args and not has_standard_structure:
|
|
print("\n" + "="*60)
|
|
print("WARNING: NON-STANDARD PROJECT STRUCTURE DETECTED")
|
|
print("="*60)
|
|
print("This project does not follow conventional src/ directory structure.")
|
|
print("TheAuditor will attempt to lint ALL JavaScript files.")
|
|
print("This is HIGH RISK and may produce incorrect results.")
|
|
print("Consider restructuring your project to use:")
|
|
print(" - frontend/src/ and backend/src/ (traditional)")
|
|
print(" - packages/*/src/ or apps/*/src/ (monorepo)")
|
|
print("="*60 + "\n")
|
|
|
|
# Just grab everything and pray
|
|
for f in workset_files:
|
|
if f.endswith(('.js', '.jsx', '.ts', '.tsx', '.mjs')):
|
|
normalized = f.replace('\\', '/')
|
|
# At least skip the absolute garbage
|
|
if not any(x in normalized.lower() for x in [
|
|
'/node_modules/', '/dist/', '/build/', '/.git/'
|
|
]):
|
|
file_args.append(f)
|
|
elif tool == "prettier":
|
|
# Prettier can handle many file types - focus on source code only
|
|
file_args = [f for f in workset_files if f.endswith(('.js', '.jsx', '.ts', '.tsx', '.json', '.css', '.scss', '.html'))]
|
|
else:
|
|
# Default: use all files
|
|
file_args = list(workset_files)
|
|
|
|
if not file_args:
|
|
return [], {}
|
|
|
|
# Check if we need to chunk for Windows command line limit
|
|
CHUNK_SIZE = 50 # Safe for 8KB Windows limit
|
|
# Enable chunking for all tools that accept file lists
|
|
all_chunking_tools = ["eslint", "prettier", "ruff", "mypy", "black", "bandit"]
|
|
needs_chunking = tool in all_chunking_tools and len(file_args) > CHUNK_SIZE
|
|
|
|
if needs_chunking:
|
|
# We'll process in chunks - set up aggregation
|
|
all_findings = []
|
|
all_ast_data = {}
|
|
total_chunks = (len(file_args) + CHUNK_SIZE - 1) // CHUNK_SIZE
|
|
print(f" Processing {len(file_args)} files in {total_chunks} chunks...")
|
|
|
|
# Process each chunk
|
|
for chunk_num, i in enumerate(range(0, len(file_args), CHUNK_SIZE), 1):
|
|
chunk_files = file_args[i:i + CHUNK_SIZE]
|
|
print(f" Chunk {chunk_num}/{total_chunks}: {len(chunk_files)} files")
|
|
|
|
# Normalize paths for JavaScript tools only
|
|
if tool in ["eslint", "prettier"]:
|
|
chunk_files = [f.replace('\\', '/') for f in chunk_files]
|
|
# Python tools use native path format
|
|
chunk_command = command + chunk_files
|
|
|
|
# Execute this chunk
|
|
chunk_findings, chunk_ast_data = _execute_linter_command(
|
|
tool, chunk_command, root_path, workset_files, timeout
|
|
)
|
|
|
|
# Aggregate results
|
|
all_findings.extend(chunk_findings)
|
|
all_ast_data.update(chunk_ast_data)
|
|
|
|
return all_findings, all_ast_data
|
|
else:
|
|
# Single command execution (no chunking needed)
|
|
if tool in ["eslint", "prettier"]: # JS tools need normalized paths
|
|
file_args = [f.replace('\\', '/') for f in file_args]
|
|
# Python tools use native path format
|
|
command = command + file_args
|
|
|
|
# Execute single command
|
|
return _execute_linter_command(
|
|
tool, command, root_path, workset_files, timeout
|
|
)
|
|
|
|
elif tool in ["golangci-lint", "go-vet"]:
|
|
# Go linters - filter to .go files if needed
|
|
go_files = [f for f in workset_files if f.endswith('.go')]
|
|
if not go_files:
|
|
return [], {}
|
|
# Note: These tools typically operate on packages/directories, not individual files
|
|
elif tool == "tsc":
|
|
# TypeScript compiler - check if we have any TS/TSX files
|
|
ts_files = [f for f in workset_files if f.endswith(('.ts', '.tsx'))]
|
|
if not ts_files:
|
|
return [], {}
|
|
# Note: tsc doesn't take file arguments - it uses tsconfig.json
|
|
elif tool in ["spotbugs", "checkstyle"]:
|
|
# Java linters - check if we have any Java files
|
|
java_files = [f for f in workset_files if f.endswith('.java')]
|
|
if not java_files:
|
|
return [], {}
|
|
# Note: Maven tools operate on the whole project
|
|
|
|
# For non-chunked tools, execute directly
|
|
return _execute_linter_command(tool, command, root_path, workset_files, timeout)
|
|
|
|
except subprocess.TimeoutExpired:
|
|
print(f"Warning: {tool} timed out after {timeout}s")
|
|
except FileNotFoundError:
|
|
print(f"Warning: {tool} not found, skipping")
|
|
except Exception as e:
|
|
print(f"Warning: Error running {tool}: {e}")
|
|
|
|
return findings, ast_data
|
|
|
|
|
|
def _execute_linter_command(
|
|
tool: str,
|
|
command: list[str],
|
|
root_path: str,
|
|
workset_files: set[str],
|
|
timeout: int,
|
|
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
"""
|
|
Execute a single linter command and parse its output.
|
|
This is called once for non-chunked tools, multiple times for chunked tools.
|
|
"""
|
|
findings = []
|
|
ast_data = {}
|
|
|
|
try:
|
|
# Create debug log file when debug flag is set
|
|
debug_log_path = None
|
|
if os.environ.get("THEAUDITOR_DEBUG"):
|
|
debug_log_path = Path(".pf") / "linter_debug.log"
|
|
debug_log_path.parent.mkdir(exist_ok=True)
|
|
|
|
# Log ground truth before execution
|
|
debug_info = {
|
|
"tool": tool,
|
|
"command": command,
|
|
"root_path": root_path,
|
|
"cwd": os.getcwd(),
|
|
"PATH": os.environ.get('PATH', ''),
|
|
"NODE_PATH": os.environ.get('NODE_PATH', ''),
|
|
"platform": platform.system(),
|
|
"IS_WINDOWS": IS_WINDOWS,
|
|
"workset_files_count": len(workset_files)
|
|
}
|
|
|
|
with open(debug_log_path, 'a', encoding='utf-8') as f:
|
|
f.write(f"\n{'='*60}\n")
|
|
f.write(f"[{tool}] Pre-execution debug at {os.path.basename(__file__)}:{_execute_linter_command.__name__}\n")
|
|
f.write(json.dumps(debug_info, indent=2))
|
|
f.write("\n")
|
|
|
|
# Run the linter using our custom temp files to avoid buffer overflow and WSL2 issues
|
|
stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(root_path, tool)
|
|
|
|
with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
|
|
open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
|
|
|
|
# Use bundled Node.js for JavaScript tools on ALL platforms
|
|
js_tools = ["eslint", "tsc", "prettier"]
|
|
if tool in js_tools:
|
|
# Find bundled Node.js runtime
|
|
sandbox_base = Path(root_path) / ".auditor_venv" / ".theauditor_tools"
|
|
node_runtime = sandbox_base / "node-runtime"
|
|
|
|
# Platform-specific Node.js executable
|
|
if IS_WINDOWS:
|
|
node_exe = node_runtime / "node.exe"
|
|
else:
|
|
node_exe = node_runtime / "bin" / "node"
|
|
|
|
if node_exe.exists():
|
|
# The command[0] is the .cmd or shell wrapper path
|
|
# We need to find the actual JavaScript entry point
|
|
# npm installs in node_modules/<package>/<entry>
|
|
|
|
# Map tool to its JavaScript entry point
|
|
# These are the ACTUAL paths where npm installs them
|
|
node_modules = sandbox_base / "node_modules"
|
|
|
|
if tool == "eslint":
|
|
# ESLint main entry is at node_modules/eslint/bin/eslint.js
|
|
js_script = node_modules / "eslint" / "bin" / "eslint.js"
|
|
# Fallback to lib/cli.js if bin doesn't exist (older versions)
|
|
if not js_script.exists():
|
|
js_script = node_modules / "eslint" / "lib" / "cli.js"
|
|
elif tool == "tsc":
|
|
# TypeScript compiler is at node_modules/typescript/lib/tsc.js
|
|
js_script = node_modules / "typescript" / "lib" / "tsc.js"
|
|
elif tool == "prettier":
|
|
# Prettier can be at different locations
|
|
# Try node_modules/prettier/bin/prettier.cjs first
|
|
js_script = node_modules / "prettier" / "bin" / "prettier.cjs"
|
|
if not js_script.exists():
|
|
# Try prettier.js
|
|
js_script = node_modules / "prettier" / "bin" / "prettier.js"
|
|
if not js_script.exists():
|
|
# Try the main entry
|
|
js_script = node_modules / "prettier" / "index.js"
|
|
|
|
if js_script.exists():
|
|
# Build new command using bundled Node.js
|
|
# Direct execution: node script.js [args...]
|
|
command_to_run = [str(node_exe), str(js_script)] + command[1:]
|
|
use_shell = False # No shell needed with direct execution
|
|
|
|
if debug_log_path:
|
|
with open(debug_log_path, 'a', encoding='utf-8') as f:
|
|
f.write(f"[{tool}] Using bundled Node.js runtime\n")
|
|
f.write(f" Node: {node_exe}\n")
|
|
f.write(f" Script: {js_script}\n")
|
|
f.write(f" Command: {command_to_run}\n")
|
|
else:
|
|
# Script not found - try to help debug
|
|
if debug_log_path:
|
|
with open(debug_log_path, 'a', encoding='utf-8') as f:
|
|
f.write(f"[{tool}] Script not found: {js_script}\n")
|
|
# List what actually exists to help debug
|
|
tool_dir = node_modules / tool.replace("tsc", "typescript")
|
|
if tool_dir.exists():
|
|
f.write(f"[{tool}] Directory exists: {tool_dir}\n")
|
|
try:
|
|
files = list(tool_dir.rglob("*.js"))[:5]
|
|
f.write(f"[{tool}] Found JS files: {files}\n")
|
|
except:
|
|
pass
|
|
print(f"ERROR: JavaScript entry point not found: {js_script}")
|
|
print(f" Expected location: {js_script}")
|
|
print(f" Run 'aud setup-claude --target .' to reinstall")
|
|
return [], {}
|
|
else:
|
|
# No bundled Node.js - fail with clear error
|
|
if debug_log_path:
|
|
with open(debug_log_path, 'a', encoding='utf-8') as f:
|
|
f.write(f"[{tool}] Bundled Node.js not found at: {node_exe}\n")
|
|
print(f"WARNING: {tool} requires bundled Node.js runtime")
|
|
print(f" Expected at: {node_exe}")
|
|
print(f" Run 'aud setup-claude --target .' to install")
|
|
return [], {}
|
|
else:
|
|
# Non-JS tools: always use list-based execution
|
|
command_to_run = command
|
|
use_shell = False # Never use shell
|
|
|
|
# Log the actual command that will be executed
|
|
if debug_log_path:
|
|
with open(debug_log_path, 'a', encoding='utf-8') as f:
|
|
f.write(f"[{tool}] Actual command to execute:\n")
|
|
f.write(f" Type: {type(command_to_run)}\n")
|
|
f.write(f" Value: {command_to_run}\n")
|
|
f.write(f" Shell: {use_shell}\n")
|
|
|
|
result = subprocess.run(
|
|
command_to_run,
|
|
cwd=root_path,
|
|
stdout=stdout_fp,
|
|
stderr=stderr_fp,
|
|
text=True,
|
|
encoding='utf-8',
|
|
errors='replace', # Handle encoding errors gracefully
|
|
timeout=timeout,
|
|
shell=use_shell, # Determined above based on tool and platform
|
|
)
|
|
|
|
with open(stdout_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
result.stdout = f.read()
|
|
with open(stderr_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
result.stderr = f.read()
|
|
|
|
# Log the result after execution
|
|
if debug_log_path:
|
|
with open(debug_log_path, 'a', encoding='utf-8') as f:
|
|
f.write(f"[{tool}] Post-execution results:\n")
|
|
f.write(f" Return code: {result.returncode}\n")
|
|
f.write(f" Stdout length: {len(result.stdout)} bytes\n")
|
|
f.write(f" Stderr length: {len(result.stderr)} bytes\n")
|
|
if result.stdout:
|
|
f.write(f" Stdout first 500 chars: {result.stdout[:500]}\n")
|
|
if result.stderr:
|
|
f.write(f" Stderr first 500 chars: {result.stderr[:500]}\n")
|
|
f.write(f"{'='*60}\n")
|
|
|
|
# Clean up temp files - best effort, don't fail if can't delete
|
|
try:
|
|
Path(stdout_path).unlink()
|
|
Path(stderr_path).unlink()
|
|
except (OSError, PermissionError):
|
|
pass # WSL2/Windows may hold locks
|
|
|
|
# Import parsers dynamically to avoid circular imports
|
|
from . import parsers
|
|
|
|
# Parse output based on tool
|
|
if tool == "eslint":
|
|
findings, ast_data = parsers.parse_eslint_output(result.stdout, workset_files)
|
|
elif tool == "ruff":
|
|
findings = parsers.parse_ruff_output(result.stdout, workset_files)
|
|
elif tool == "mypy":
|
|
findings = parsers.parse_mypy_output(result.stdout, workset_files)
|
|
elif tool == "tsc":
|
|
findings = parsers.parse_tsc_output(result.stdout, workset_files)
|
|
elif tool == "prettier":
|
|
findings = parsers.parse_prettier_output(result.stdout, result.stderr, workset_files)
|
|
elif tool == "black":
|
|
findings = parsers.parse_black_output(result.stdout, result.stderr, workset_files)
|
|
elif tool == "bandit":
|
|
findings = parsers.parse_bandit_output(result.stdout, workset_files)
|
|
elif tool == "golangci-lint":
|
|
findings = parsers.parse_golangci_output(result.stdout, workset_files)
|
|
elif tool == "go-vet":
|
|
findings = parsers.parse_go_vet_output(result.stderr, workset_files)
|
|
elif tool in ["spotbugs", "checkstyle"]:
|
|
findings = parsers.parse_maven_output(tool, result.stdout, workset_files)
|
|
|
|
except subprocess.TimeoutExpired:
|
|
print(f"Warning: {tool} timed out after {timeout}s")
|
|
except FileNotFoundError:
|
|
print(f"Warning: {tool} not found, skipping")
|
|
except Exception as e:
|
|
print(f"Warning: Error running {tool}: {e}")
|
|
|
|
return findings, ast_data |