Auditor/theauditor/graph/builder.py

"""Graph builder module - constructs dependency and call graphs."""

import os
import platform
import re
import subprocess
import tempfile
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any

# Windows compatibility
IS_WINDOWS = platform.system() == "Windows"

import click

from theauditor.indexer.config import SKIP_DIRS
from theauditor.module_resolver import ModuleResolver
from theauditor.ast_parser import ASTParser


@dataclass
class GraphNode:
    """Represents a node in the dependency or call graph."""

    id: str
    file: str
    lang: str | None = None
    loc: int = 0
    churn: int | None = None  # Git commit count if available
    type: str = "module"  # module, function, class


@dataclass
class GraphEdge:
    """Represents an edge in the graph."""

    source: str
    target: str
    type: str = "import"  # import, call, extends, implements
    file: str | None = None
    line: int | None = None


@dataclass
class Cycle:
    """Represents a cycle in the dependency graph."""

    nodes: list[str]
    size: int

    def __init__(self, nodes: list[str]):
        self.nodes = nodes
        self.size = len(nodes)


@dataclass
class Hotspot:
    """Represents a hotspot node with high connectivity."""

    id: str
    in_degree: int
    out_degree: int
    centrality: float
    score: float  # Computed based on weights


@dataclass
class ImpactAnalysis:
    """Results of change impact analysis."""

    targets: list[str]
    upstream: list[str]  # What depends on targets
    downstream: list[str]  # What targets depend on
    total_impacted: int


class XGraphBuilder:
    """Build cross-project dependency and call graphs."""

    # Import regex patterns for different languages
    IMPORT_PATTERNS = {
        "python": [
            r"^import\s+(\S+)",
            r"^from\s+(\S+)\s+import",
        ],
        "javascript": [
            # Standard ES6 imports with 'from'
            r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",

            # Side-effect imports (no 'from')
            r"import\s+['\"]([^'\"]+)['\"]",

            # CommonJS require
            r"require\(['\"]([^'\"]+)['\"]\)",

            # Dynamic imports
            r"import\(['\"]([^'\"]+)['\"]\)",

            # Re-exports
            r"export\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
        ],
        "typescript": [
            # Standard ES6 imports with 'from'
            r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",

            # Side-effect imports (no 'from')
            r"import\s+['\"]([^'\"]+)['\"]",

            # Type-only imports
            r"import\s+type\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",

            # CommonJS require
            r"require\(['\"]([^'\"]+)['\"]\)",

            # Dynamic imports
            r"import\(['\"]([^'\"]+)['\"]\)",

            # Re-exports
            r"export\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
        ],
        "java": [
            r"^import\s+(\S+);",
            r"^import\s+static\s+(\S+);",
        ],
        "go": [
            r'^import\s+"([^"]+)"',
            r'^import\s+\(\s*"([^"]+)"',
        ],
        "c#": [
            r"^using\s+(\S+);",
            r"^using\s+static\s+(\S+);",
        ],
        "php": [
            r"^use\s+(\S+);",
            r"require_once\s*\(['\"]([^'\"]+)['\"]\)",
            r"include_once\s*\(['\"]([^'\"]+)['\"]\)",
        ],
        "ruby": [
            r"^require\s+['\"]([^'\"]+)['\"]",
            r"^require_relative\s+['\"]([^'\"]+)['\"]",
        ],
    }

    # Export patterns for different languages
    EXPORT_PATTERNS = {
        "python": [
            r"^def\s+(\w+)\s*\(",
            r"^class\s+(\w+)",
            r"^(\w+)\s*=",  # Module-level variables
        ],
        "javascript": [
            r"export\s+(?:default\s+)?(?:function|class|const|let|var)\s+(\w+)",
            r"exports\.(\w+)\s*=",
            r"module\.exports\.(\w+)\s*=",
        ],
        "typescript": [
            r"export\s+(?:default\s+)?(?:function|class|const|let|var|interface|type)\s+(\w+)",
            r"exports\.(\w+)\s*=",
        ],
        "java": [
            r"public\s+(?:static\s+)?(?:class|interface|enum)\s+(\w+)",
            r"public\s+(?:static\s+)?(?:\w+\s+)?(\w+)\s*\(",  # Public methods
        ],
        "go": [
            r"^func\s+(\w+)\s*\(",  # Exported if capitalized
            r"^type\s+(\w+)\s+",
            r"^var\s+(\w+)\s+",
        ],
    }

    # Call patterns for different languages
    CALL_PATTERNS = {
        "python": [
            r"(\w+)\s*\(",  # Function calls
            r"(\w+)\.(\w+)\s*\(",  # Method calls
        ],
        "javascript": [
            r"(\w+)\s*\(",
            r"(\w+)\.(\w+)\s*\(",
            r"new\s+(\w+)\s*\(",
        ],
        "typescript": [
            r"(\w+)\s*\(",
            r"(\w+)\.(\w+)\s*\(",
            r"new\s+(\w+)\s*\(",
        ],
        "java": [
            r"(\w+)\s*\(",
            r"(\w+)\.(\w+)\s*\(",
            r"new\s+(\w+)\s*\(",
        ],
        "go": [
            r"(\w+)\s*\(",
            r"(\w+)\.(\w+)\s*\(",
        ],
    }

    def __init__(self, batch_size: int = 200, exclude_patterns: list[str] = None, project_root: str = "."):
        """Initialize builder with configuration."""
        self.batch_size = batch_size
        self.exclude_patterns = exclude_patterns or []
        self.checkpoint_file = Path(".pf/xgraph_checkpoint.json")
        self.project_root = Path(project_root).resolve()
        self.module_resolver = ModuleResolver()  # No project_root - uses database!
        self.ast_parser = ASTParser()  # Initialize AST parser for structural analysis

    def detect_language(self, file_path: Path) -> str | None:
        """Detect language from file extension."""
        ext_map = {
            ".py": "python",
            ".js": "javascript",
            ".jsx": "javascript",
            ".ts": "typescript",
            ".tsx": "typescript",
            ".java": "java",
            ".go": "go",
            ".cs": "c#",
            ".php": "php",
            ".rb": "ruby",
            ".c": "c",
            ".cpp": "c++",
            ".h": "c",
            ".hpp": "c++",
            ".rs": "rust",
            ".swift": "swift",
            ".kt": "kotlin",
            ".scala": "scala",
            ".r": "r",
            ".R": "r",
            ".m": "objective-c",
            ".mm": "objective-c++",
        }
        return ext_map.get(file_path.suffix.lower())

    def should_skip(self, file_path: Path) -> bool:
        """Check if file should be skipped based on exclude patterns."""
        # First, check if any component of the path is in SKIP_DIRS
        for part in file_path.parts:
            if part in SKIP_DIRS:
                return True

        # Second, check against exclude_patterns
        path_str = str(file_path)
        for pattern in self.exclude_patterns:
            if pattern in path_str:
                return True
        return False

    def extract_imports_from_db(self, rel_path: str) -> list[str]:
        """Extract import statements from the database where indexer already stored them.

        Args:
            rel_path: Relative path as stored in the database (e.g., "backend/src/app.ts")

        Returns:
            List of import targets
        """
        import sqlite3

        # Query the refs table for imports
        db_file = self.project_root / ".pf" / "repo_index.db"
        if not db_file.exists():
            print(f"Warning: Database not found at {db_file}")
            return []

        try:
            conn = sqlite3.connect(db_file)
            cursor = conn.cursor()

            # Get all imports for this file from refs table
            # The indexer stores imports with kind like 'import', 'require', etc.
            cursor.execute(
                "SELECT value FROM refs WHERE src = ? AND kind IN ('import', 'require', 'from', 'import_type', 'export')",
                (rel_path,)
            )

            imports = [row[0] for row in cursor.fetchall()]
            conn.close()

            return imports

        except sqlite3.Error as e:
            print(f"Warning: Failed to read imports from database: {e}")
            return []

    def extract_imports(self, file_path: Path, lang: str) -> list[str]:
        """Extract import statements from the database where indexer already stored them.

        The indexer has already extracted all imports and stored them in the refs table.
        We should read from there instead of re-parsing files.
        """
        import sqlite3

        # Get relative path for database lookup
        try:
            rel_path = file_path.relative_to(self.project_root)
        except ValueError:
            # If file_path is already relative or from a different root
            rel_path = file_path

        # Normalize path separators for database lookup
        db_path = str(rel_path).replace("\\", "/")

        # Query the refs table for imports
        db_file = self.project_root / ".pf" / "repo_index.db"
        if not db_file.exists():
            print(f"Warning: Database not found at {db_file}")
            return []

        try:
            conn = sqlite3.connect(db_file)
            cursor = conn.cursor()

            # Get all imports for this file from refs table
            # The indexer stores imports with kind like 'import', 'require', etc.
            cursor.execute(
                "SELECT value FROM refs WHERE src = ? AND kind IN ('import', 'require', 'from', 'import_type', 'export')",
                (db_path,)
            )

            imports = [row[0] for row in cursor.fetchall()]
            conn.close()

            return imports

        except sqlite3.Error as e:
            print(f"Warning: Failed to read imports from database: {e}")
            return []

    def extract_exports_from_db(self, rel_path: str) -> list[str]:
        """Extract exported symbols from the database where indexer already stored them.

        Args:
            rel_path: Relative path as stored in the database

        Returns:
            List of exported symbol names
        """
        import sqlite3

        db_file = self.project_root / ".pf" / "repo_index.db"
        if not db_file.exists():
            return []

        try:
            conn = sqlite3.connect(db_file)
            cursor = conn.cursor()

            # Get exported functions/classes from symbols table
            # The indexer stores these as 'function' and 'class' types
            cursor.execute(
                "SELECT name FROM symbols WHERE path = ? AND type IN ('function', 'class')",
                (rel_path,)
            )

            exports = [row[0] for row in cursor.fetchall()]
            conn.close()

            return exports

        except sqlite3.Error:
            return []

    def extract_exports(self, file_path: Path, lang: str) -> list[str]:
        """Extract exported symbols from a file using AST parser with regex fallback."""
        # Try AST parser first for supported languages
        if self.ast_parser.supports_language(lang):
            try:
                # Check persistent cache first for JS/TS files
                tree = None
                if lang in ["javascript", "typescript"]:
                    # Compute file hash for cache lookup
                    import hashlib
                    with open(file_path, 'rb') as f:
                        file_hash = hashlib.sha256(f.read()).hexdigest()

                    # Check cache
                    cache_dir = self.project_root / ".pf" / "ast_cache"
                    cache_file = cache_dir / f"{file_hash}.json"
                    if cache_file.exists():
                        try:
                            import json
                            with open(cache_file, 'r', encoding='utf-8') as f:
                                tree = json.load(f)
                        except (json.JSONDecodeError, OSError):
                            pass  # Cache read failed, parse fresh

                # Parse file if not in cache
                if not tree:
                    tree = self.ast_parser.parse_file(file_path, lang)
                    # REMOVED: Cache write logic - only indexer.py should write to cache

                if tree and tree.get("type") != "regex_fallback":
                    # Extract exports using AST
                    export_dicts = self.ast_parser.extract_exports(tree, lang)
                    # Convert to list of export names
                    exports = []
                    for exp in export_dicts:
                        name = exp.get('name')
                        if name and name != 'unknown':
                            exports.append(name)
                    if exports:  # If we got results, return them
                        return exports
            except Exception as e:
                # Fall through to regex fallback
                pass

        # Fallback to regex-based extraction
        return self._extract_exports_regex(file_path, lang)

    def extract_calls_from_db(self, rel_path: str) -> list[tuple[str, str | None]]:
        """Extract function calls from the database where indexer already stored them.

        Args:
            rel_path: Relative path as stored in the database

        Returns:
            List of (function_name, None) tuples for calls
        """
        import sqlite3

        db_file = self.project_root / ".pf" / "repo_index.db"
        if not db_file.exists():
            return []

        try:
            conn = sqlite3.connect(db_file)
            cursor = conn.cursor()

            # Get function calls from symbols table
            # The indexer stores these as 'call' type
            cursor.execute(
                "SELECT name FROM symbols WHERE path = ? AND type = 'call'",
                (rel_path,)
            )

            # Return as tuples with None for second element (no parent info)
            calls = [(row[0], None) for row in cursor.fetchall()]
            conn.close()

            return calls

        except sqlite3.Error:
            return []

    def extract_calls(self, file_path: Path, lang: str) -> list[tuple[str, str | None]]:
        """Extract function/method calls from a file using AST parser with regex fallback."""
        # Try AST parser first for supported languages
        if self.ast_parser.supports_language(lang):
            try:
                # Check persistent cache first for JS/TS files
                tree = None
                if lang in ["javascript", "typescript"]:
                    # Compute file hash for cache lookup
                    import hashlib
                    with open(file_path, 'rb') as f:
                        file_hash = hashlib.sha256(f.read()).hexdigest()

                    # Check cache
                    cache_dir = self.project_root / ".pf" / "ast_cache"
                    cache_file = cache_dir / f"{file_hash}.json"
                    if cache_file.exists():
                        try:
                            import json
                            with open(cache_file, 'r', encoding='utf-8') as f:
                                tree = json.load(f)
                        except (json.JSONDecodeError, OSError):
                            pass  # Cache read failed, parse fresh

                # Parse file if not in cache
                if not tree:
                    tree = self.ast_parser.parse_file(file_path, lang)
                    # REMOVED: Cache write logic - only indexer.py should write to cache

                if tree and tree.get("type") != "regex_fallback":
                    # Extract calls using AST
                    call_dicts = self.ast_parser.extract_calls(tree, lang)
                    # Convert to list of (function, method) tuples
                    calls = []
                    for call in call_dicts:
                        name = call.get('name', '')
                        # Check if it's a method call (contains dot)
                        if '.' in name:
                            parts = name.rsplit('.', 1)
                            if len(parts) == 2:
                                calls.append((parts[0], parts[1]))
                            else:
                                calls.append((name, None))
                        else:
                            calls.append((name, None))
                    if calls:  # If we got results, return them
                        return calls
            except Exception as e:
                # Fall through to regex fallback
                pass

        # Fallback to regex-based extraction
        return self._extract_calls_regex(file_path, lang)

    def resolve_import_path(self, import_str: str, source_file: Path, lang: str) -> str:
        """Resolve import string to a normalized module path that matches actual files in the graph."""
        import sqlite3

        # Clean up the import string (remove quotes, semicolons, etc.)
        import_str = import_str.strip().strip('"\'`;')

        # Language-specific resolution
        if lang == "python":
            # Convert Python module path to file path
            parts = import_str.split(".")
            return "/".join(parts)
        elif lang in ["javascript", "typescript"]:
            # Get source file directory for relative imports
            source_dir = source_file.parent
            # Handle case where source_file might already be relative or might be from manifest
            try:
                source_rel = str(source_file.relative_to(self.project_root)).replace("\\", "/")
            except ValueError:
                # If source_file is already relative or from a different root, use it as is
                source_rel = str(source_file).replace("\\", "/")

            # Handle different import patterns
            resolved_path = None

            # 1. Handle TypeScript path aliases using ModuleResolver (database-driven)
            if import_str.startswith("@"):
                # Determine context from source file location
                try:
                    source_rel = str(source_file.relative_to(self.project_root)).replace("\\", "/")
                except ValueError:
                    source_rel = str(source_file).replace("\\", "/")

                # Determine which tsconfig context applies
                if "backend/" in source_rel:
                    context = "backend"
                elif "frontend/" in source_rel:
                    context = "frontend"
                else:
                    context = "root"

                # Use ModuleResolver's context-aware resolution
                resolved = self.module_resolver.resolve_with_context(import_str, str(source_file), context)

                # Check if resolution succeeded
                if resolved != import_str:
                    # Resolution worked, now verify file exists in database
                    db_file = self.project_root / ".pf" / "repo_index.db"
                    if db_file.exists():
                        try:
                            conn = sqlite3.connect(db_file)
                            cursor = conn.cursor()

                            # Try with common extensions if no extension
                            test_paths = [resolved]
                            if not Path(resolved).suffix:
                                for ext in [".ts", ".tsx", ".js", ".jsx"]:
                                    test_paths.append(resolved + ext)
                                test_paths.append(resolved + "/index.ts")
                                test_paths.append(resolved + "/index.js")

                            for test_path in test_paths:
                                cursor.execute("SELECT 1 FROM files WHERE path = ? LIMIT 1", (test_path,))
                                if cursor.fetchone():
                                    conn.close()
                                    return test_path

                            conn.close()
                        except sqlite3.Error:
                            pass

                    # Return resolved even if file check failed
                    return resolved

            # 2. Handle relative imports (./foo, ../bar/baz)
            elif import_str.startswith("."):
                # Resolve relative to source file
                try:
                    # Remove leading dots and slashes
                    rel_import = import_str.lstrip("./")

                    # Go up directories for ../
                    up_count = import_str.count("../")
                    current_dir = source_dir
                    for _ in range(up_count):
                        current_dir = current_dir.parent

                    if up_count > 0:
                        rel_import = import_str.replace("../", "")

                    # Build the target path
                    target_path = current_dir / rel_import
                    rel_target = str(target_path.relative_to(self.project_root)).replace("\\", "/")

                    # Check if this file exists (try with extensions)
                    db_file = self.project_root / ".pf" / "repo_index.db"
                    if db_file.exists():
                        try:
                            conn = sqlite3.connect(db_file)
                            cursor = conn.cursor()

                            # Try with common extensions
                            for ext in ["", ".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"]:
                                test_path = rel_target + ext
                                cursor.execute("SELECT 1 FROM files WHERE path = ? LIMIT 1", (test_path,))
                                if cursor.fetchone():
                                    conn.close()
                                    return test_path

                            conn.close()
                        except sqlite3.Error:
                            pass

                    return rel_target

                except (ValueError, OSError):
                    pass

            # 3. Handle node_modules imports (just return as-is, they're external)
            else:
                # For npm packages, just return the package name
                return import_str

            # If nothing worked, return original
            return import_str
        else:
            # Default: return as-is
            return import_str

    def get_file_metrics(self, file_path: Path) -> dict[str, Any]:
        """Get basic metrics for a file."""
        metrics = {"loc": 0, "churn": None}

        # When working with manifest data, skip file reading
        # The manifest already has loc and other metrics
        if not file_path.exists():
            # File doesn't exist, we're working with manifest data
            # Return default metrics - the caller should use manifest data instead
            return metrics

        # Count lines of code
        try:
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                metrics["loc"] = sum(1 for _ in f)
        except (IOError, UnicodeDecodeError, OSError) as e:
            print(f"Warning: Failed to read {file_path} for metrics: {e}")
            # Still return default metrics but LOG the failure

        # Get git churn (commit count)
        try:
            # Use temp files to avoid buffer overflow
            with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
                 tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:

                stdout_path = stdout_fp.name
                stderr_path = stderr_fp.name

                result = subprocess.run(
                    ["git", "log", "--oneline", str(file_path)],
                    stdout=stdout_fp,
                    stderr=stderr_fp,
                    text=True,
                    timeout=5,
                    cwd=Path.cwd(),
                    shell=IS_WINDOWS  # Windows compatibility fix
                )

            with open(stdout_path, 'r', encoding='utf-8') as f:
                result.stdout = f.read()
            with open(stderr_path, 'r', encoding='utf-8') as f:
                result.stderr = f.read()

            os.unlink(stdout_path)
            os.unlink(stderr_path)
            if result.returncode == 0:
                metrics["churn"] = len(result.stdout.strip().split("\n"))
        except (subprocess.TimeoutExpired, OSError, IOError) as e:
            print(f"Warning: Failed to get git churn for {file_path}: {e}")
            # Still return default metrics but LOG the failure

        return metrics

    def build_import_graph(
        self,
        root: str = ".",
        langs: list[str] | None = None,
        file_filter: str | None = None,
        file_list: list[dict[str, Any]] | None = None,
    ) -> dict[str, Any]:
        """Build import/dependency graph for the project."""
        root_path = Path(root).resolve()
        nodes = {}
        edges = []

        # Collect all source files
        files = []
        manifest_lookup = {}  # Map file paths to manifest items for metrics

        if file_list is not None:
            # Use provided file list from manifest
            # The manifest already contains all the file info we need
            for item in file_list:
                manifest_path = Path(item['path'])

                # Use the path from manifest directly - we don't need actual files
                # The manifest has all the data (path, ext, content, etc.)
                file = root_path / manifest_path  # Just for consistent path handling

                # Store manifest item for later metric lookup
                manifest_lookup[str(file)] = item

                # Detect language from extension in manifest
                lang = self.detect_language(manifest_path)  # Use manifest path
                if lang and (not langs or lang in langs):
                    files.append((file, lang))
        else:
            # Fall back to original os.walk logic for backward compatibility
            for dirpath, dirnames, filenames in os.walk(root_path):
                # CRITICAL: Prune excluded directories before os.walk descends into them
                # This prevents traversal into .venv and other SKIP_DIRS
                dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]

                # Also prune based on exclude_patterns
                if self.exclude_patterns:
                    dirnames[:] = [d for d in dirnames
                                  if not any(pattern in d for pattern in self.exclude_patterns)]

                # Process files in this directory
                for filename in filenames:
                    file = Path(dirpath) / filename
                    if not self.should_skip(file):
                        lang = self.detect_language(file)
                        if lang and (not langs or lang in langs):
                            files.append((file, lang))

        # Process files with progress bar
        with click.progressbar(
            files,
            label="Building import graph",
            show_pos=True,
            show_percent=True,
            show_eta=True,
            item_show_func=lambda x: str(x[0].name) if x else None,
        ) as bar:
            for file_path, lang in bar:
                # Create node for this file
                rel_path = str(file_path.relative_to(root_path)).replace("\\", "/")  # Normalize separators
                node_id = rel_path  # Already normalized

                # Get metrics from manifest if available, otherwise from file
                if str(file_path) in manifest_lookup:
                    # Use manifest data which already has metrics
                    manifest_item = manifest_lookup[str(file_path)]
                    loc = manifest_item.get('loc', 0)
                    churn = None  # Manifest doesn't have churn data
                else:
                    # Fall back to reading file metrics
                    metrics = self.get_file_metrics(file_path)
                    loc = metrics["loc"]
                    churn = metrics["churn"]

                node = GraphNode(
                    id=node_id,
                    file=rel_path,  # Already normalized
                    lang=lang,
                    loc=loc,
                    churn=churn,
                    type="module",
                )
                nodes[node_id] = asdict(node)

                # Extract imports and create edges
                # Pass the relative path that matches what's in the database
                imports = self.extract_imports_from_db(rel_path)
                for imp in imports:
                    target = self.resolve_import_path(imp, file_path, lang)
                    edge = GraphEdge(
                        source=node_id,
                        target=target,
                        type="import",
                        file=rel_path,  # Already normalized
                    )
                    edges.append(asdict(edge))

        return {
            "nodes": list(nodes.values()),
            "edges": edges,
            "metadata": {
                "root": str(root_path),
                "languages": list(set(n["lang"] for n in nodes.values())),
                "total_files": len(nodes),
                "total_imports": len(edges),
            },
        }

    def build_call_graph(
        self,
        root: str = ".",
        langs: list[str] | None = None,
        file_filter: str | None = None,
        file_list: list[dict[str, Any]] | None = None,
    ) -> dict[str, Any]:
        """Build call graph for the project."""
        root_path = Path(root).resolve()
        nodes = {}
        edges = []

        # Collect all source files
        files = []

        if file_list is not None:
            # Use provided file list from manifest
            # The manifest already contains all the file info we need
            for item in file_list:
                manifest_path = Path(item['path'])

                # Use the path from manifest directly - we don't need actual files
                # The manifest has all the data (path, ext, content, etc.)
                file = root_path / manifest_path  # Just for consistent path handling

                # Detect language from extension in manifest
                lang = self.detect_language(manifest_path)  # Use manifest path
                if lang and (not langs or lang in langs):
                    files.append((file, lang))
        else:
            # Fall back to original os.walk logic for backward compatibility
            for dirpath, dirnames, filenames in os.walk(root_path):
                # CRITICAL: Prune excluded directories before os.walk descends into them
                # This prevents traversal into .venv and other SKIP_DIRS
                dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]

                # Also prune based on exclude_patterns
                if self.exclude_patterns:
                    dirnames[:] = [d for d in dirnames
                                  if not any(pattern in d for pattern in self.exclude_patterns)]

                # Process files in this directory
                for filename in filenames:
                    file = Path(dirpath) / filename
                    if not self.should_skip(file):
                        lang = self.detect_language(file)
                        if lang and (not langs or lang in langs):
                            files.append((file, lang))

        # Process files with progress bar to extract functions and calls
        with click.progressbar(
            files,
            label="Building call graph",
            show_pos=True,
            show_percent=True,
            show_eta=True,
            item_show_func=lambda x: str(x[0].name) if x else None,
        ) as bar:
            for file_path, lang in bar:
                rel_path = str(file_path.relative_to(root_path)).replace("\\", "/")  # Normalize separators
                module_id = rel_path  # Already normalized

                # Extract exported functions/classes from database
                exports = self.extract_exports_from_db(rel_path)
                for export in exports:
                    func_id = f"{module_id}::{export}"
                    node = GraphNode(
                        id=func_id,
                        file=rel_path,  # Already normalized
                        lang=lang,
                        type="function",
                    )
                    nodes[func_id] = asdict(node)

                # Extract calls from database
                calls = self.extract_calls_from_db(rel_path)
                for call, method in calls:
                    # Try to resolve the call target
                    if method:
                        # Method call
                        target_id = f"{call}.{method}"
                    else:
                        # Function call
                        target_id = call

                    # Create edge from module to called function
                    edge = GraphEdge(
                        source=module_id,
                        target=target_id,
                        type="call",
                        file=rel_path,  # Already normalized
                    )
                    edges.append(asdict(edge))

        return {
            "nodes": list(nodes.values()),
            "edges": edges,
            "metadata": {
                "root": str(root_path),
                "languages": langs or [],
                "total_functions": len(nodes),
                "total_calls": len(edges),
            },
        }

    def merge_graphs(self, import_graph: dict, call_graph: dict) -> dict[str, Any]:
        """Merge import and call graphs into a unified graph."""
        # Combine nodes (dedup by id)
        nodes = {}
        for node in import_graph["nodes"]:
            nodes[node["id"]] = node
        for node in call_graph["nodes"]:
            nodes[node["id"]] = node

        # Combine edges
        edges = import_graph["edges"] + call_graph["edges"]

        return {
            "nodes": list(nodes.values()),
            "edges": edges,
            "metadata": {
                "root": import_graph["metadata"]["root"],
                "languages": list(
                    set(
                        import_graph["metadata"]["languages"]
                        + call_graph["metadata"].get("languages", [])
                    )
                ),
                "total_nodes": len(nodes),
                "total_edges": len(edges),
            },
        }

    def _extract_imports_regex(self, file_path: Path, lang: str) -> list[str]:
        """Regex-based fallback for extracting imports.

        This method is used when AST parsing fails or is unavailable.
        """
        if lang not in self.IMPORT_PATTERNS:
            return []

        imports = []
        patterns = [re.compile(p, re.MULTILINE) for p in self.IMPORT_PATTERNS[lang]]

        try:
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                content = f.read()

            for pattern in patterns:
                matches = pattern.findall(content)
                imports.extend(matches)

        except (IOError, UnicodeDecodeError, OSError) as e:
            print(f"Warning: Failed to extract imports from {file_path}: {e}")
            # Return empty list but LOG the failure

        return imports

    def _extract_exports_regex(self, file_path: Path, lang: str) -> list[str]:
        """Regex-based fallback for extracting exports.

        This method is used when AST parsing fails or is unavailable.
        """
        if lang not in self.EXPORT_PATTERNS:
            return []

        exports = []
        patterns = [re.compile(p, re.MULTILINE) for p in self.EXPORT_PATTERNS[lang]]

        try:
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                content = f.read()

            for pattern in patterns:
                matches = pattern.findall(content)
                # Flatten tuples if regex has groups
                for match in matches:
                    if isinstance(match, tuple):
                        exports.extend([m for m in match if m])
                    else:
                        exports.append(match)

        except (IOError, UnicodeDecodeError, OSError) as e:
            print(f"Warning: Failed to extract exports from {file_path}: {e}")
            # Return empty list but LOG the failure

        # Filter exports for Go (only capitalized are public)
        if lang == "go":
            exports = [e for e in exports if e and e[0].isupper()]

        return exports

    def _extract_calls_regex(self, file_path: Path, lang: str) -> list[tuple[str, str | None]]:
        """Regex-based fallback for extracting function calls.

        This method is used when AST parsing fails or is unavailable.
        """
        if lang not in self.CALL_PATTERNS:
            return []

        calls = []
        patterns = [re.compile(p) for p in self.CALL_PATTERNS[lang]]

        try:
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                content = f.read()

            for pattern in patterns:
                matches = pattern.findall(content)
                for match in matches:
                    if isinstance(match, tuple):
                        # Method call: (object, method)
                        calls.append(match)
                    else:
                        # Function call
                        calls.append((match, None))

        except (IOError, UnicodeDecodeError, OSError) as e:
            print(f"Warning: Failed to extract calls from {file_path}: {e}")
            # Return empty list but LOG the failure

        return calls