Files
Auditor/theauditor/config_runtime.py

160 lines
5.6 KiB
Python

"""Runtime configuration for TheAuditor - centralized configuration management."""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any
DEFAULTS = {
"paths": {
# Core files
"manifest": "./.pf/manifest.json",
"db": "./.pf/repo_index.db",
"workset": "./.pf/workset.json",
# Directories
"pf_dir": "./.pf",
"capsules_dir": "./.pf/capsules",
"docs_dir": "./.pf/docs",
"audit_dir": "./.pf/audit",
"context_docs_dir": "./.pf/context/docs",
"doc_capsules_dir": "./.pf/context/doc_capsules",
"graphs_dir": "./.pf/graphs",
"model_dir": "./.pf/ml",
"claude_dir": "./.claude",
# Core artifacts
"journal": "./.pf/journal.ndjson",
"checkpoint": "./.pf/checkpoint.json",
"run_report": "./.pf/run_report.json",
"fce_json": "./.pf/raw/fce.json",
"ast_proofs_json": "./.pf/ast_proofs.json",
"ast_proofs_md": "./.pf/ast_proofs.md",
"ml_suggestions": "./.pf/insights/ml_suggestions.json",
"graphs_db": "./.pf/graphs.db",
"graph_analysis": "./.pf/graph_analysis.json",
"deps_json": "./.pf/deps.json",
"findings_json": "./.pf/findings.json",
"patterns_json": "./.pf/patterns.json",
"xgraph_json": "./.pf/xgraph.json",
"pattern_fce_json": "./.pf/pattern_fce.json",
"fix_suggestions_json": "./.pf/fix_suggestions.json",
"policy_yml": "./.pf/policy.yml",
},
"limits": {
# File size limits
"max_file_size": 2 * 1024 * 1024, # 2 MiB
# Chunking limits for extraction
"max_chunks_per_file": 3, # Maximum number of chunks per extracted file
"max_chunk_size": 56320, # Maximum size per chunk in bytes (55KB)
# Batch processing
"default_batch_size": 200,
"evidence_batch_size": 100,
# ML and analysis windows
"ml_window": 50,
"git_churn_window_days": 30,
# Graph analysis
"max_graph_depth": 3,
"high_risk_threshold": 0.5,
"high_risk_limit": 10,
"graph_limit_nodes": 500,
},
"timeouts": {
# Tool detection (quick checks)
"tool_detection": 5,
# Network operations
"url_fetch": 10,
"venv_check": 30,
# Build/test operations
"test_run": 60,
"venv_install": 120,
# Analysis operations
"lint_timeout": 300,
"orchestrator_timeout": 300,
# FCE and long operations
"fce_timeout": 600,
},
"report": {
"max_lint_rows": 50,
"max_ast_rows": 50,
"max_snippet_lines": 12,
"max_snippet_chars": 800,
}
}
def load_runtime_config(root: str = ".") -> dict[str, Any]:
"""
Load runtime configuration from .pf/config.json and environment variables.
Config priority (highest to lowest):
1. Environment variables (THEAUDITOR_* prefixed)
2. .pf/config.json file
3. Built-in defaults
Args:
root: Root directory to look for config file
Returns:
Configuration dictionary with merged values
"""
# Start with deep copy of defaults
import copy
cfg = copy.deepcopy(DEFAULTS)
# Try to load user config from .pf/config.json
path = Path(root) / ".pf" / "config.json"
try:
if path.exists():
with open(path, "r", encoding="utf-8") as f:
user = json.load(f)
# Merge each section if present
if isinstance(user, dict):
for section in ["paths", "limits", "timeouts", "report"]:
if section in user and isinstance(user[section], dict):
for key, value in user[section].items():
# Validate type matches default
if key in cfg[section]:
if isinstance(value, type(cfg[section][key])):
cfg[section][key] = value
except (json.JSONDecodeError, IOError, OSError) as e:
print(f"[WARNING] Could not load config file from {path}: {e}")
print("[INFO] Continuing with default configuration")
# Continue with defaults - config file is optional
# Environment variable overrides (flattened namespace)
# Format: THEAUDITOR_SECTION_KEY (e.g., THEAUDITOR_PATHS_MANIFEST)
for section in cfg:
for key in cfg[section]:
env_var = f"THEAUDITOR_{section.upper()}_{key.upper()}"
if env_var in os.environ:
value = os.environ[env_var]
try:
# Try to cast to the same type as the default
default_value = cfg[section][key]
if isinstance(default_value, int):
cfg[section][key] = int(value)
elif isinstance(default_value, float):
cfg[section][key] = float(value)
elif isinstance(default_value, list):
# Parse comma-separated values for lists
cfg[section][key] = [v.strip() for v in value.split(",")]
else:
cfg[section][key] = value
except (ValueError, AttributeError) as e:
print(f"[WARNING] Invalid value for environment variable {env_var}: '{value}' - {e}")
print(f"[INFO] Using default value: {cfg[section][key]}")
# Continue with default value - env vars are optional overrides
return cfg