Critical Fix: Implement adaptive timeouts to prevent long-running analysis tools from being killed

- Replace hardcoded 300s (5 min) timeout that was killing taint-analyze and detect-patterns
  - Add COMMAND_TIMEOUTS dictionary with appropriate timeouts per command type:
    * taint-analyze, detect-patterns: 2 hours (can take 10-60 min on large codebases)
    * lint: 15 minutes (ESLint/ruff on monorepos)
    * fce: 30 minutes (correlation analysis)
    * graph operations: 10 minutes
    * Quick operations (deps, docs, workset): 5 minutes
  - Add get_command_timeout() function to determine timeout based on command name
  - Support environment variable overrides:
    * Global: THEAUDITOR_TIMEOUT_SECONDS (default 30 min)
    * Per-command: THEAUDITOR_TIMEOUT_TAINT_ANALYZE_SECONDS, etc.
  - Update all 3 run_subprocess_with_interrupt calls to use adaptive timeouts
This commit is contained in:
TheAuditorTool
2025-09-09 17:22:12 +07:00
parent c7b1f54ab8
commit 2520ad8698

View File

@@ -25,6 +25,51 @@ except ImportError:
# Windows compatibility # Windows compatibility
IS_WINDOWS = platform.system() == "Windows" IS_WINDOWS = platform.system() == "Windows"
# Command-specific timeout configuration (in seconds)
# Based on empirical testing and user reports of 10-60 minute analysis times
COMMAND_TIMEOUTS = {
"index": 600, # 10 minutes - AST parsing can be slow on large codebases
"detect-frameworks": 300, # 5 minutes - Quick scan of config files
"deps": 300, # 5 minutes - Network I/O but usually fast
"docs": 300, # 5 minutes - Network I/O for fetching docs
"workset": 300, # 5 minutes - File system traversal
"lint": 900, # 15 minutes - ESLint/ruff on large codebases
"detect-patterns": 7200, # 2 hours - 100+ security patterns on all files
"graph": 600, # 10 minutes - Building dependency graphs
"taint-analyze": 7200, # 2 hours - Data flow analysis is expensive
"taint": 7200, # 2 hours - Alias for taint-analyze
"fce": 1800, # 30 minutes - Correlation analysis
"report": 600, # 10 minutes - Report generation
"summary": 300, # 5 minutes - Quick summary generation
}
# Allow environment variable override for all timeouts
DEFAULT_TIMEOUT = int(os.environ.get('THEAUDITOR_TIMEOUT_SECONDS', '1800')) # Default 30 minutes
def get_command_timeout(cmd: List[str]) -> int:
"""
Determine appropriate timeout for a command based on its name.
Args:
cmd: Command array to execute
Returns:
Timeout in seconds
"""
# Extract command name from the command array
# Format: [python, -m, theauditor.cli, COMMAND_NAME, ...]
cmd_str = " ".join(cmd)
# Check for specific command patterns
for cmd_name, timeout in COMMAND_TIMEOUTS.items():
if cmd_name in cmd_str:
# Check for environment variable override for specific command
env_key = f'THEAUDITOR_TIMEOUT_{cmd_name.upper().replace("-", "_")}_SECONDS'
return int(os.environ.get(env_key, timeout))
# Default timeout if command not recognized
return DEFAULT_TIMEOUT
# Global stop event for interrupt handling # Global stop event for interrupt handling
stop_event = threading.Event() stop_event = threading.Event()
@@ -176,13 +221,16 @@ def run_command_chain(commands: List[Tuple[str, List[str]]], root: str, chain_na
with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \ with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \
open(stderr_file, 'w+', encoding='utf-8') as err_fp: open(stderr_file, 'w+', encoding='utf-8') as err_fp:
# Determine appropriate timeout for this command
cmd_timeout = get_command_timeout(cmd)
result = run_subprocess_with_interrupt( result = run_subprocess_with_interrupt(
cmd, cmd,
stdout_fp=out_fp, stdout_fp=out_fp,
stderr_fp=err_fp, stderr_fp=err_fp,
cwd=root, cwd=root,
shell=IS_WINDOWS, # Windows compatibility fix shell=IS_WINDOWS, # Windows compatibility fix
timeout=300 # 5 minutes per command in parallel tracks timeout=cmd_timeout # Adaptive timeout based on command type
) )
# Read outputs # Read outputs
@@ -556,13 +604,16 @@ def run_full_pipeline(
with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \ with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \
open(stderr_file, 'w+', encoding='utf-8') as err_fp: open(stderr_file, 'w+', encoding='utf-8') as err_fp:
# Determine appropriate timeout for this command
cmd_timeout = get_command_timeout(cmd)
result = run_subprocess_with_interrupt( result = run_subprocess_with_interrupt(
cmd, cmd,
stdout_fp=out_fp, stdout_fp=out_fp,
stderr_fp=err_fp, stderr_fp=err_fp,
cwd=root, cwd=root,
shell=IS_WINDOWS, # Windows compatibility fix shell=IS_WINDOWS, # Windows compatibility fix
timeout=300 # 5 minutes per command in parallel tracks timeout=cmd_timeout # Adaptive timeout based on command type
) )
# Read outputs # Read outputs
@@ -802,13 +853,16 @@ def run_full_pipeline(
with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \ with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \
open(stderr_file, 'w+', encoding='utf-8') as err_fp: open(stderr_file, 'w+', encoding='utf-8') as err_fp:
# Determine appropriate timeout for this command
cmd_timeout = get_command_timeout(cmd)
result = run_subprocess_with_interrupt( result = run_subprocess_with_interrupt(
cmd, cmd,
stdout_fp=out_fp, stdout_fp=out_fp,
stderr_fp=err_fp, stderr_fp=err_fp,
cwd=root, cwd=root,
shell=IS_WINDOWS, # Windows compatibility fix shell=IS_WINDOWS, # Windows compatibility fix
timeout=600 # 10 minutes for final aggregation timeout=cmd_timeout # Adaptive timeout based on command type
) )
# Read outputs # Read outputs