Auditor/theauditor/config_runtime.py

"""Runtime configuration for TheAuditor - centralized configuration management."""

from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any


DEFAULTS = {
    "paths": {
        # Core files
        "manifest": "./.pf/manifest.json",
        "db": "./.pf/repo_index.db",
        "workset": "./.pf/workset.json",

        # Directories
        "pf_dir": "./.pf",
        "capsules_dir": "./.pf/capsules",
        "docs_dir": "./.pf/docs",
        "audit_dir": "./.pf/audit",
        "context_docs_dir": "./.pf/context/docs",
        "doc_capsules_dir": "./.pf/context/doc_capsules",
        "graphs_dir": "./.pf/graphs",
        "model_dir": "./.pf/ml",
        "claude_dir": "./.claude",

        # Core artifacts
        "journal": "./.pf/journal.ndjson",
        "checkpoint": "./.pf/checkpoint.json",
        "run_report": "./.pf/run_report.json",
        "fce_json": "./.pf/raw/fce.json",
        "ast_proofs_json": "./.pf/ast_proofs.json",
        "ast_proofs_md": "./.pf/ast_proofs.md",
        "ml_suggestions": "./.pf/insights/ml_suggestions.json",
        "graphs_db": "./.pf/graphs.db",
        "graph_analysis": "./.pf/graph_analysis.json",
        "deps_json": "./.pf/deps.json",
        "findings_json": "./.pf/findings.json",
        "patterns_json": "./.pf/patterns.json",
        "xgraph_json": "./.pf/xgraph.json",
        "pattern_fce_json": "./.pf/pattern_fce.json",
        "fix_suggestions_json": "./.pf/fix_suggestions.json",
        "policy_yml": "./.pf/policy.yml",
    },
    "limits": {
        # File size limits
        "max_file_size": 2 * 1024 * 1024,  # 2 MiB

        # Chunking limits for extraction
        "max_chunks_per_file": 3,  # Maximum number of chunks per extracted file
        "max_chunk_size": 56320,  # Maximum size per chunk in bytes (55KB)

        # Batch processing
        "default_batch_size": 200,
        "evidence_batch_size": 100,

        # ML and analysis windows
        "ml_window": 50,
        "git_churn_window_days": 30,

        # Graph analysis
        "max_graph_depth": 3,
        "high_risk_threshold": 0.5,
        "high_risk_limit": 10,
        "graph_limit_nodes": 500,
    },
    "timeouts": {
        # Tool detection (quick checks)
        "tool_detection": 5,

        # Network operations
        "url_fetch": 10,
        "venv_check": 30,

        # Build/test operations
        "test_run": 60,
        "venv_install": 120,

        # Analysis operations
        "lint_timeout": 300,
        "orchestrator_timeout": 300,

        # FCE and long operations
        "fce_timeout": 600,
    },
    "report": {
        "max_lint_rows": 50,
        "max_ast_rows": 50,
        "max_snippet_lines": 12,
        "max_snippet_chars": 800,
    }
}


def load_runtime_config(root: str = ".") -> dict[str, Any]:
    """
    Load runtime configuration from .pf/config.json and environment variables.

    Config priority (highest to lowest):
    1. Environment variables (THEAUDITOR_* prefixed)
    2. .pf/config.json file
    3. Built-in defaults

    Args:
        root: Root directory to look for config file

    Returns:
        Configuration dictionary with merged values
    """
    # Start with deep copy of defaults
    import copy
    cfg = copy.deepcopy(DEFAULTS)

    # Try to load user config from .pf/config.json
    path = Path(root) / ".pf" / "config.json"
    try:
        if path.exists():
            with open(path, "r", encoding="utf-8") as f:
                user = json.load(f)

            # Merge each section if present
            if isinstance(user, dict):
                for section in ["paths", "limits", "timeouts", "report"]:
                    if section in user and isinstance(user[section], dict):
                        for key, value in user[section].items():
                            # Validate type matches default
                            if key in cfg[section]:
                                if isinstance(value, type(cfg[section][key])):
                                    cfg[section][key] = value
    except (json.JSONDecodeError, IOError, OSError) as e:
        print(f"[WARNING] Could not load config file from {path}: {e}")
        print("[INFO] Continuing with default configuration")
        # Continue with defaults - config file is optional

    # Environment variable overrides (flattened namespace)
    # Format: THEAUDITOR_SECTION_KEY (e.g., THEAUDITOR_PATHS_MANIFEST)
    for section in cfg:
        for key in cfg[section]:
            env_var = f"THEAUDITOR_{section.upper()}_{key.upper()}"
            if env_var in os.environ:
                value = os.environ[env_var]
                try:
                    # Try to cast to the same type as the default
                    default_value = cfg[section][key]
                    if isinstance(default_value, int):
                        cfg[section][key] = int(value)
                    elif isinstance(default_value, float):
                        cfg[section][key] = float(value)
                    elif isinstance(default_value, list):
                        # Parse comma-separated values for lists
                        cfg[section][key] = [v.strip() for v in value.split(",")]
                    else:
                        cfg[section][key] = value
                except (ValueError, AttributeError) as e:
                    print(f"[WARNING] Invalid value for environment variable {env_var}: '{value}' - {e}")
                    print(f"[INFO] Using default value: {cfg[section][key]}")
                    # Continue with default value - env vars are optional overrides

    return cfg