Auditor/theauditor/init.py

"""Initialization module for TheAuditor - handles project setup and initialization."""

from pathlib import Path
from typing import Dict, Any
from theauditor.security import sanitize_config_path, SecurityError


def initialize_project(
    offline: bool = False,
    skip_docs: bool = False,
    skip_deps: bool = False,
    progress_callback: Any = None
) -> Dict[str, Any]:
    """
    Initialize TheAuditor for first-time use by running all setup steps.

    This function handles the sequence of operations:
    1. Index repository
    2. Create workset
    3. Check dependencies (unless skipped/offline)
    4. Fetch documentation (unless skipped/offline)

    Args:
        offline: Skip network operations (deps check, docs fetch)
        skip_docs: Skip documentation fetching
        skip_deps: Skip dependency checking

    Returns:
        Dict containing:
            - stats: Statistics for each step
            - success: Overall success status
            - has_failures: Whether any steps failed
            - next_steps: List of recommended next commands
    """
    from theauditor.indexer import build_index
    from theauditor.workset import compute_workset
    from theauditor.deps import parse_dependencies, check_latest_versions
    from theauditor.docs_fetch import fetch_docs
    from theauditor.docs_summarize import summarize_docs
    from theauditor.config_runtime import load_runtime_config

    # Load configuration
    config = load_runtime_config(".")
    stats = {}

    # 1. Index
    if progress_callback:
        progress_callback("[1/4] Indexing repository...")
    try:
        # Sanitize paths from config before use
        manifest_path = str(sanitize_config_path(config["paths"]["manifest"], "paths", "manifest", "."))
        db_path = str(sanitize_config_path(config["paths"]["db"], "paths", "db", "."))

        result = build_index(
            root_path=".",
            manifest_path=manifest_path,
            db_path=db_path,
            print_stats=False,
            dry_run=False,
            follow_symlinks=False
        )
        if result.get("error"):
            raise Exception(result["error"])
        # Extract stats from nested structure
        index_stats = result.get("stats", {})
        stats["index"] = {
            "files": index_stats.get("total_files", 0),
            "text_files": index_stats.get("text_files", 0),
            "success": True
        }
        if progress_callback:
            progress_callback(f"  ✓ Indexed {stats['index']['text_files']} text files")
    except SecurityError as e:
        stats["index"] = {"success": False, "error": f"Security violation: {str(e)}"}
    except Exception as e:
        stats["index"] = {"success": False, "error": str(e)}
        if progress_callback:
            progress_callback(f"  ✗ Failed: {str(e)[:60]}")

    # 2. Workset
    if progress_callback:
        progress_callback("\n[2/4] Creating workset...")
    try:
        # Skip if indexing failed or found no files
        if not stats.get("index", {}).get("success"):
            raise Exception("Skipping - indexing failed")
        if stats.get("index", {}).get("text_files", 0) == 0:
            stats["workset"] = {"success": False, "files": 0}
            if progress_callback:
                progress_callback("  ⚠ No files found")
        else:
            # Sanitize paths from config before use
            db_path = str(sanitize_config_path(config["paths"]["db"], "paths", "db", "."))
            manifest_path = str(sanitize_config_path(config["paths"]["manifest"], "paths", "manifest", "."))
            output_path = str(sanitize_config_path(config["paths"]["workset"], "paths", "workset", "."))

            result = compute_workset(
                all_files=True,
                root_path=".",
                db_path=db_path,
                manifest_path=manifest_path,
                output_path=output_path,
                max_depth=2,
                print_stats=False
            )
            stats["workset"] = {
                "files": result.get("expanded_count", 0),
                "coverage": result.get("coverage", 0),
                "success": True
            }
            if progress_callback:
                progress_callback(f"  ✓ Created workset with {stats['workset']['files']} files")
    except SecurityError as e:
        stats["workset"] = {"success": False, "error": f"Security violation: {str(e)}"}
    except Exception as e:
        stats["workset"] = {"success": False, "error": str(e)}
        if progress_callback:
            progress_callback(f"  ✗ Failed: {str(e)[:60]}")

    # 3. Dependencies
    if not skip_deps and not offline:
        if progress_callback:
            progress_callback("\n[3/4] Checking dependencies...")
        try:
            deps_list = parse_dependencies(root_path=".")

            if deps_list:
                latest_info = check_latest_versions(deps_list, allow_net=True, offline=False)
                outdated = sum(1 for info in latest_info.values() if info["is_outdated"])
                stats["deps"] = {
                    "total": len(deps_list),
                    "outdated": outdated,
                    "success": True
                }
                if progress_callback:
                    progress_callback(f"  ✓ Found {len(deps_list)} dependencies ({outdated} outdated)")
            else:
                stats["deps"] = {"total": 0, "success": True}
                if progress_callback:
                    progress_callback("  ✓ No dependency files found")
        except Exception as e:
            stats["deps"] = {"success": False, "error": str(e)}
            if progress_callback:
                progress_callback(f"  ✗ Failed: {str(e)[:60]}")
    else:
        stats["deps"] = {"skipped": True}

    # 4. Documentation
    if not skip_docs and not offline:
        if progress_callback:
            progress_callback("\n[4/4] Fetching documentation...")
        try:
            deps_list = parse_dependencies(root_path=".")

            if deps_list:
                # Limit to first 250 deps for init command to avoid excessive runtime
                if len(deps_list) > 250:
                    deps_list = deps_list[:250]
                    if progress_callback:
                        progress_callback("  ℹ Limiting to first 250 packages for speed...")

                # Fetch with progress indicator
                fetch_result = fetch_docs(deps_list)
                fetched = fetch_result.get('fetched', 0)
                cached = fetch_result.get('cached', 0)
                errors = fetch_result.get('errors', [])

                # Summarize
                summarize_result = summarize_docs()
                stats["docs"] = {
                    "fetched": fetched,
                    "cached": cached,
                    "capsules": summarize_result.get('capsules_created', 0),
                    "success": True,
                    "errors": errors
                }
                if progress_callback:
                    progress_callback(f"  ✓ Fetched {fetched} docs, created {stats['docs']['capsules']} capsules")
            else:
                stats["docs"] = {"success": True, "fetched": 0, "capsules": 0}
                if progress_callback:
                    progress_callback("  ✓ No dependencies to document")
        except KeyboardInterrupt:
            stats["docs"] = {"success": False, "error": "Interrupted by user"}
            if progress_callback:
                progress_callback("\n  ⚠ Interrupted by user (Ctrl+C)")
        except Exception as e:
            stats["docs"] = {"success": False, "error": str(e)}
            if progress_callback:
                progress_callback(f"  ✗ Failed: {str(e)[:60]}")
    else:
        stats["docs"] = {"skipped": True}

    # Code capsules feature has been removed - the command was deleted
    # Doc capsules (for dependency documentation) are handled by 'aud docs summarize'

    # Check if initialization was successful
    has_failures = any(
        not stats.get(step, {}).get("success", False) and not stats.get(step, {}).get("skipped", False)
        for step in ["index", "workset", "deps", "docs"]
    )

    # Determine next steps
    next_steps = []
    if stats.get("workset", {}).get("files", 0) > 0:
        next_steps = [
            "aud lint --workset",
            "aud ast-verify --workset",
            "aud report"
        ]

    return {
        "stats": stats,
        "success": not has_failures,
        "has_failures": has_failures,
        "next_steps": next_steps
    }