mirror of
https://github.com/aljazceru/Auditor.git
synced 2025-12-18 11:54:18 +01:00
Initial commit: TheAuditor v1.0.1 - AI-centric SAST and Code Intelligence Platform
This commit is contained in:
160
theauditor/config_runtime.py
Normal file
160
theauditor/config_runtime.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""Runtime configuration for TheAuditor - centralized configuration management."""
|
||||
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
DEFAULTS = {
|
||||
"paths": {
|
||||
# Core files
|
||||
"manifest": "./.pf/manifest.json",
|
||||
"db": "./.pf/repo_index.db",
|
||||
"workset": "./.pf/workset.json",
|
||||
|
||||
# Directories
|
||||
"pf_dir": "./.pf",
|
||||
"capsules_dir": "./.pf/capsules",
|
||||
"docs_dir": "./.pf/docs",
|
||||
"audit_dir": "./.pf/audit",
|
||||
"context_docs_dir": "./.pf/context/docs",
|
||||
"doc_capsules_dir": "./.pf/context/doc_capsules",
|
||||
"graphs_dir": "./.pf/graphs",
|
||||
"model_dir": "./.pf/ml",
|
||||
"claude_dir": "./.claude",
|
||||
|
||||
# Core artifacts
|
||||
"journal": "./.pf/journal.ndjson",
|
||||
"checkpoint": "./.pf/checkpoint.json",
|
||||
"run_report": "./.pf/run_report.json",
|
||||
"fce_json": "./.pf/raw/fce.json",
|
||||
"ast_proofs_json": "./.pf/ast_proofs.json",
|
||||
"ast_proofs_md": "./.pf/ast_proofs.md",
|
||||
"ml_suggestions": "./.pf/insights/ml_suggestions.json",
|
||||
"graphs_db": "./.pf/graphs.db",
|
||||
"graph_analysis": "./.pf/graph_analysis.json",
|
||||
"deps_json": "./.pf/deps.json",
|
||||
"findings_json": "./.pf/findings.json",
|
||||
"patterns_json": "./.pf/patterns.json",
|
||||
"xgraph_json": "./.pf/xgraph.json",
|
||||
"pattern_fce_json": "./.pf/pattern_fce.json",
|
||||
"fix_suggestions_json": "./.pf/fix_suggestions.json",
|
||||
"policy_yml": "./.pf/policy.yml",
|
||||
},
|
||||
"limits": {
|
||||
# File size limits
|
||||
"max_file_size": 2 * 1024 * 1024, # 2 MiB
|
||||
|
||||
# Chunking limits for extraction
|
||||
"max_chunks_per_file": 3, # Maximum number of chunks per extracted file
|
||||
"max_chunk_size": 56320, # Maximum size per chunk in bytes (55KB)
|
||||
|
||||
# Batch processing
|
||||
"default_batch_size": 200,
|
||||
"evidence_batch_size": 100,
|
||||
|
||||
# ML and analysis windows
|
||||
"ml_window": 50,
|
||||
"git_churn_window_days": 30,
|
||||
|
||||
# Graph analysis
|
||||
"max_graph_depth": 3,
|
||||
"high_risk_threshold": 0.5,
|
||||
"high_risk_limit": 10,
|
||||
"graph_limit_nodes": 500,
|
||||
},
|
||||
"timeouts": {
|
||||
# Tool detection (quick checks)
|
||||
"tool_detection": 5,
|
||||
|
||||
# Network operations
|
||||
"url_fetch": 10,
|
||||
"venv_check": 30,
|
||||
|
||||
# Build/test operations
|
||||
"test_run": 60,
|
||||
"venv_install": 120,
|
||||
|
||||
# Analysis operations
|
||||
"lint_timeout": 300,
|
||||
"orchestrator_timeout": 300,
|
||||
|
||||
# FCE and long operations
|
||||
"fce_timeout": 600,
|
||||
},
|
||||
"report": {
|
||||
"max_lint_rows": 50,
|
||||
"max_ast_rows": 50,
|
||||
"max_snippet_lines": 12,
|
||||
"max_snippet_chars": 800,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def load_runtime_config(root: str = ".") -> dict[str, Any]:
|
||||
"""
|
||||
Load runtime configuration from .pf/config.json and environment variables.
|
||||
|
||||
Config priority (highest to lowest):
|
||||
1. Environment variables (THEAUDITOR_* prefixed)
|
||||
2. .pf/config.json file
|
||||
3. Built-in defaults
|
||||
|
||||
Args:
|
||||
root: Root directory to look for config file
|
||||
|
||||
Returns:
|
||||
Configuration dictionary with merged values
|
||||
"""
|
||||
# Start with deep copy of defaults
|
||||
import copy
|
||||
cfg = copy.deepcopy(DEFAULTS)
|
||||
|
||||
# Try to load user config from .pf/config.json
|
||||
path = Path(root) / ".pf" / "config.json"
|
||||
try:
|
||||
if path.exists():
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
user = json.load(f)
|
||||
|
||||
# Merge each section if present
|
||||
if isinstance(user, dict):
|
||||
for section in ["paths", "limits", "timeouts", "report"]:
|
||||
if section in user and isinstance(user[section], dict):
|
||||
for key, value in user[section].items():
|
||||
# Validate type matches default
|
||||
if key in cfg[section]:
|
||||
if isinstance(value, type(cfg[section][key])):
|
||||
cfg[section][key] = value
|
||||
except (json.JSONDecodeError, IOError, OSError) as e:
|
||||
print(f"[WARNING] Could not load config file from {path}: {e}")
|
||||
print("[INFO] Continuing with default configuration")
|
||||
# Continue with defaults - config file is optional
|
||||
|
||||
# Environment variable overrides (flattened namespace)
|
||||
# Format: THEAUDITOR_SECTION_KEY (e.g., THEAUDITOR_PATHS_MANIFEST)
|
||||
for section in cfg:
|
||||
for key in cfg[section]:
|
||||
env_var = f"THEAUDITOR_{section.upper()}_{key.upper()}"
|
||||
if env_var in os.environ:
|
||||
value = os.environ[env_var]
|
||||
try:
|
||||
# Try to cast to the same type as the default
|
||||
default_value = cfg[section][key]
|
||||
if isinstance(default_value, int):
|
||||
cfg[section][key] = int(value)
|
||||
elif isinstance(default_value, float):
|
||||
cfg[section][key] = float(value)
|
||||
elif isinstance(default_value, list):
|
||||
# Parse comma-separated values for lists
|
||||
cfg[section][key] = [v.strip() for v in value.split(",")]
|
||||
else:
|
||||
cfg[section][key] = value
|
||||
except (ValueError, AttributeError) as e:
|
||||
print(f"[WARNING] Invalid value for environment variable {env_var}: '{value}' - {e}")
|
||||
print(f"[INFO] Using default value: {cfg[section][key]}")
|
||||
# Continue with default value - env vars are optional overrides
|
||||
|
||||
return cfg
|
||||
Reference in New Issue
Block a user