Files
Auditor/theauditor/graph/builder.py

1017 lines
38 KiB
Python

"""Graph builder module - constructs dependency and call graphs."""
import os
import platform
import re
import subprocess
import tempfile
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any
# Windows compatibility
IS_WINDOWS = platform.system() == "Windows"
import click
from theauditor.indexer.config import SKIP_DIRS
from theauditor.module_resolver import ModuleResolver
from theauditor.ast_parser import ASTParser
@dataclass
class GraphNode:
"""Represents a node in the dependency or call graph."""
id: str
file: str
lang: str | None = None
loc: int = 0
churn: int | None = None # Git commit count if available
type: str = "module" # module, function, class
@dataclass
class GraphEdge:
"""Represents an edge in the graph."""
source: str
target: str
type: str = "import" # import, call, extends, implements
file: str | None = None
line: int | None = None
@dataclass
class Cycle:
"""Represents a cycle in the dependency graph."""
nodes: list[str]
size: int
def __init__(self, nodes: list[str]):
self.nodes = nodes
self.size = len(nodes)
@dataclass
class Hotspot:
"""Represents a hotspot node with high connectivity."""
id: str
in_degree: int
out_degree: int
centrality: float
score: float # Computed based on weights
@dataclass
class ImpactAnalysis:
"""Results of change impact analysis."""
targets: list[str]
upstream: list[str] # What depends on targets
downstream: list[str] # What targets depend on
total_impacted: int
class XGraphBuilder:
"""Build cross-project dependency and call graphs."""
# Import regex patterns for different languages
IMPORT_PATTERNS = {
"python": [
r"^import\s+(\S+)",
r"^from\s+(\S+)\s+import",
],
"javascript": [
# Standard ES6 imports with 'from'
r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
# Side-effect imports (no 'from')
r"import\s+['\"]([^'\"]+)['\"]",
# CommonJS require
r"require\(['\"]([^'\"]+)['\"]\)",
# Dynamic imports
r"import\(['\"]([^'\"]+)['\"]\)",
# Re-exports
r"export\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
],
"typescript": [
# Standard ES6 imports with 'from'
r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
# Side-effect imports (no 'from')
r"import\s+['\"]([^'\"]+)['\"]",
# Type-only imports
r"import\s+type\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
# CommonJS require
r"require\(['\"]([^'\"]+)['\"]\)",
# Dynamic imports
r"import\(['\"]([^'\"]+)['\"]\)",
# Re-exports
r"export\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
],
"java": [
r"^import\s+(\S+);",
r"^import\s+static\s+(\S+);",
],
"go": [
r'^import\s+"([^"]+)"',
r'^import\s+\(\s*"([^"]+)"',
],
"c#": [
r"^using\s+(\S+);",
r"^using\s+static\s+(\S+);",
],
"php": [
r"^use\s+(\S+);",
r"require_once\s*\(['\"]([^'\"]+)['\"]\)",
r"include_once\s*\(['\"]([^'\"]+)['\"]\)",
],
"ruby": [
r"^require\s+['\"]([^'\"]+)['\"]",
r"^require_relative\s+['\"]([^'\"]+)['\"]",
],
}
# Export patterns for different languages
EXPORT_PATTERNS = {
"python": [
r"^def\s+(\w+)\s*\(",
r"^class\s+(\w+)",
r"^(\w+)\s*=", # Module-level variables
],
"javascript": [
r"export\s+(?:default\s+)?(?:function|class|const|let|var)\s+(\w+)",
r"exports\.(\w+)\s*=",
r"module\.exports\.(\w+)\s*=",
],
"typescript": [
r"export\s+(?:default\s+)?(?:function|class|const|let|var|interface|type)\s+(\w+)",
r"exports\.(\w+)\s*=",
],
"java": [
r"public\s+(?:static\s+)?(?:class|interface|enum)\s+(\w+)",
r"public\s+(?:static\s+)?(?:\w+\s+)?(\w+)\s*\(", # Public methods
],
"go": [
r"^func\s+(\w+)\s*\(", # Exported if capitalized
r"^type\s+(\w+)\s+",
r"^var\s+(\w+)\s+",
],
}
# Call patterns for different languages
CALL_PATTERNS = {
"python": [
r"(\w+)\s*\(", # Function calls
r"(\w+)\.(\w+)\s*\(", # Method calls
],
"javascript": [
r"(\w+)\s*\(",
r"(\w+)\.(\w+)\s*\(",
r"new\s+(\w+)\s*\(",
],
"typescript": [
r"(\w+)\s*\(",
r"(\w+)\.(\w+)\s*\(",
r"new\s+(\w+)\s*\(",
],
"java": [
r"(\w+)\s*\(",
r"(\w+)\.(\w+)\s*\(",
r"new\s+(\w+)\s*\(",
],
"go": [
r"(\w+)\s*\(",
r"(\w+)\.(\w+)\s*\(",
],
}
def __init__(self, batch_size: int = 200, exclude_patterns: list[str] = None, project_root: str = "."):
"""Initialize builder with configuration."""
self.batch_size = batch_size
self.exclude_patterns = exclude_patterns or []
self.checkpoint_file = Path(".pf/xgraph_checkpoint.json")
self.project_root = Path(project_root).resolve()
self.module_resolver = ModuleResolver() # No project_root - uses database!
self.ast_parser = ASTParser() # Initialize AST parser for structural analysis
def detect_language(self, file_path: Path) -> str | None:
"""Detect language from file extension."""
ext_map = {
".py": "python",
".js": "javascript",
".jsx": "javascript",
".ts": "typescript",
".tsx": "typescript",
".java": "java",
".go": "go",
".cs": "c#",
".php": "php",
".rb": "ruby",
".c": "c",
".cpp": "c++",
".h": "c",
".hpp": "c++",
".rs": "rust",
".swift": "swift",
".kt": "kotlin",
".scala": "scala",
".r": "r",
".R": "r",
".m": "objective-c",
".mm": "objective-c++",
}
return ext_map.get(file_path.suffix.lower())
def should_skip(self, file_path: Path) -> bool:
"""Check if file should be skipped based on exclude patterns."""
# First, check if any component of the path is in SKIP_DIRS
for part in file_path.parts:
if part in SKIP_DIRS:
return True
# Second, check against exclude_patterns
path_str = str(file_path)
for pattern in self.exclude_patterns:
if pattern in path_str:
return True
return False
def extract_imports_from_db(self, rel_path: str) -> list[str]:
"""Extract import statements from the database where indexer already stored them.
Args:
rel_path: Relative path as stored in the database (e.g., "backend/src/app.ts")
Returns:
List of import targets
"""
import sqlite3
# Query the refs table for imports
db_file = self.project_root / ".pf" / "repo_index.db"
if not db_file.exists():
print(f"Warning: Database not found at {db_file}")
return []
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
# Get all imports for this file from refs table
# The indexer stores imports with kind like 'import', 'require', etc.
cursor.execute(
"SELECT value FROM refs WHERE src = ? AND kind IN ('import', 'require', 'from', 'import_type', 'export')",
(rel_path,)
)
imports = [row[0] for row in cursor.fetchall()]
conn.close()
return imports
except sqlite3.Error as e:
print(f"Warning: Failed to read imports from database: {e}")
return []
def extract_imports(self, file_path: Path, lang: str) -> list[str]:
"""Extract import statements from the database where indexer already stored them.
The indexer has already extracted all imports and stored them in the refs table.
We should read from there instead of re-parsing files.
"""
import sqlite3
# Get relative path for database lookup
try:
rel_path = file_path.relative_to(self.project_root)
except ValueError:
# If file_path is already relative or from a different root
rel_path = file_path
# Normalize path separators for database lookup
db_path = str(rel_path).replace("\\", "/")
# Query the refs table for imports
db_file = self.project_root / ".pf" / "repo_index.db"
if not db_file.exists():
print(f"Warning: Database not found at {db_file}")
return []
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
# Get all imports for this file from refs table
# The indexer stores imports with kind like 'import', 'require', etc.
cursor.execute(
"SELECT value FROM refs WHERE src = ? AND kind IN ('import', 'require', 'from', 'import_type', 'export')",
(db_path,)
)
imports = [row[0] for row in cursor.fetchall()]
conn.close()
return imports
except sqlite3.Error as e:
print(f"Warning: Failed to read imports from database: {e}")
return []
def extract_exports_from_db(self, rel_path: str) -> list[str]:
"""Extract exported symbols from the database where indexer already stored them.
Args:
rel_path: Relative path as stored in the database
Returns:
List of exported symbol names
"""
import sqlite3
db_file = self.project_root / ".pf" / "repo_index.db"
if not db_file.exists():
return []
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
# Get exported functions/classes from symbols table
# The indexer stores these as 'function' and 'class' types
cursor.execute(
"SELECT name FROM symbols WHERE path = ? AND type IN ('function', 'class')",
(rel_path,)
)
exports = [row[0] for row in cursor.fetchall()]
conn.close()
return exports
except sqlite3.Error:
return []
def extract_exports(self, file_path: Path, lang: str) -> list[str]:
"""Extract exported symbols from a file using AST parser with regex fallback."""
# Try AST parser first for supported languages
if self.ast_parser.supports_language(lang):
try:
# Check persistent cache first for JS/TS files
tree = None
if lang in ["javascript", "typescript"]:
# Compute file hash for cache lookup
import hashlib
with open(file_path, 'rb') as f:
file_hash = hashlib.sha256(f.read()).hexdigest()
# Check cache
cache_dir = self.project_root / ".pf" / "ast_cache"
cache_file = cache_dir / f"{file_hash}.json"
if cache_file.exists():
try:
import json
with open(cache_file, 'r', encoding='utf-8') as f:
tree = json.load(f)
except (json.JSONDecodeError, OSError):
pass # Cache read failed, parse fresh
# Parse file if not in cache
if not tree:
tree = self.ast_parser.parse_file(file_path, lang)
# REMOVED: Cache write logic - only indexer.py should write to cache
if tree and tree.get("type") != "regex_fallback":
# Extract exports using AST
export_dicts = self.ast_parser.extract_exports(tree, lang)
# Convert to list of export names
exports = []
for exp in export_dicts:
name = exp.get('name')
if name and name != 'unknown':
exports.append(name)
if exports: # If we got results, return them
return exports
except Exception as e:
# Fall through to regex fallback
pass
# Fallback to regex-based extraction
return self._extract_exports_regex(file_path, lang)
def extract_calls_from_db(self, rel_path: str) -> list[tuple[str, str | None]]:
"""Extract function calls from the database where indexer already stored them.
Args:
rel_path: Relative path as stored in the database
Returns:
List of (function_name, None) tuples for calls
"""
import sqlite3
db_file = self.project_root / ".pf" / "repo_index.db"
if not db_file.exists():
return []
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
# Get function calls from symbols table
# The indexer stores these as 'call' type
cursor.execute(
"SELECT name FROM symbols WHERE path = ? AND type = 'call'",
(rel_path,)
)
# Return as tuples with None for second element (no parent info)
calls = [(row[0], None) for row in cursor.fetchall()]
conn.close()
return calls
except sqlite3.Error:
return []
def extract_calls(self, file_path: Path, lang: str) -> list[tuple[str, str | None]]:
"""Extract function/method calls from a file using AST parser with regex fallback."""
# Try AST parser first for supported languages
if self.ast_parser.supports_language(lang):
try:
# Check persistent cache first for JS/TS files
tree = None
if lang in ["javascript", "typescript"]:
# Compute file hash for cache lookup
import hashlib
with open(file_path, 'rb') as f:
file_hash = hashlib.sha256(f.read()).hexdigest()
# Check cache
cache_dir = self.project_root / ".pf" / "ast_cache"
cache_file = cache_dir / f"{file_hash}.json"
if cache_file.exists():
try:
import json
with open(cache_file, 'r', encoding='utf-8') as f:
tree = json.load(f)
except (json.JSONDecodeError, OSError):
pass # Cache read failed, parse fresh
# Parse file if not in cache
if not tree:
tree = self.ast_parser.parse_file(file_path, lang)
# REMOVED: Cache write logic - only indexer.py should write to cache
if tree and tree.get("type") != "regex_fallback":
# Extract calls using AST
call_dicts = self.ast_parser.extract_calls(tree, lang)
# Convert to list of (function, method) tuples
calls = []
for call in call_dicts:
name = call.get('name', '')
# Check if it's a method call (contains dot)
if '.' in name:
parts = name.rsplit('.', 1)
if len(parts) == 2:
calls.append((parts[0], parts[1]))
else:
calls.append((name, None))
else:
calls.append((name, None))
if calls: # If we got results, return them
return calls
except Exception as e:
# Fall through to regex fallback
pass
# Fallback to regex-based extraction
return self._extract_calls_regex(file_path, lang)
def resolve_import_path(self, import_str: str, source_file: Path, lang: str) -> str:
"""Resolve import string to a normalized module path that matches actual files in the graph."""
import sqlite3
# Clean up the import string (remove quotes, semicolons, etc.)
import_str = import_str.strip().strip('"\'`;')
# Language-specific resolution
if lang == "python":
# Convert Python module path to file path
parts = import_str.split(".")
return "/".join(parts)
elif lang in ["javascript", "typescript"]:
# Get source file directory for relative imports
source_dir = source_file.parent
# Handle case where source_file might already be relative or might be from manifest
try:
source_rel = str(source_file.relative_to(self.project_root)).replace("\\", "/")
except ValueError:
# If source_file is already relative or from a different root, use it as is
source_rel = str(source_file).replace("\\", "/")
# Handle different import patterns
resolved_path = None
# 1. Handle TypeScript path aliases using ModuleResolver (database-driven)
if import_str.startswith("@"):
# Determine context from source file location
try:
source_rel = str(source_file.relative_to(self.project_root)).replace("\\", "/")
except ValueError:
source_rel = str(source_file).replace("\\", "/")
# Determine which tsconfig context applies
if "backend/" in source_rel:
context = "backend"
elif "frontend/" in source_rel:
context = "frontend"
else:
context = "root"
# Use ModuleResolver's context-aware resolution
resolved = self.module_resolver.resolve_with_context(import_str, str(source_file), context)
# Check if resolution succeeded
if resolved != import_str:
# Resolution worked, now verify file exists in database
db_file = self.project_root / ".pf" / "repo_index.db"
if db_file.exists():
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
# Try with common extensions if no extension
test_paths = [resolved]
if not Path(resolved).suffix:
for ext in [".ts", ".tsx", ".js", ".jsx"]:
test_paths.append(resolved + ext)
test_paths.append(resolved + "/index.ts")
test_paths.append(resolved + "/index.js")
for test_path in test_paths:
cursor.execute("SELECT 1 FROM files WHERE path = ? LIMIT 1", (test_path,))
if cursor.fetchone():
conn.close()
return test_path
conn.close()
except sqlite3.Error:
pass
# Return resolved even if file check failed
return resolved
# 2. Handle relative imports (./foo, ../bar/baz)
elif import_str.startswith("."):
# Resolve relative to source file
try:
# Remove leading dots and slashes
rel_import = import_str.lstrip("./")
# Go up directories for ../
up_count = import_str.count("../")
current_dir = source_dir
for _ in range(up_count):
current_dir = current_dir.parent
if up_count > 0:
rel_import = import_str.replace("../", "")
# Build the target path
target_path = current_dir / rel_import
rel_target = str(target_path.relative_to(self.project_root)).replace("\\", "/")
# Check if this file exists (try with extensions)
db_file = self.project_root / ".pf" / "repo_index.db"
if db_file.exists():
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
# Try with common extensions
for ext in ["", ".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"]:
test_path = rel_target + ext
cursor.execute("SELECT 1 FROM files WHERE path = ? LIMIT 1", (test_path,))
if cursor.fetchone():
conn.close()
return test_path
conn.close()
except sqlite3.Error:
pass
return rel_target
except (ValueError, OSError):
pass
# 3. Handle node_modules imports (just return as-is, they're external)
else:
# For npm packages, just return the package name
return import_str
# If nothing worked, return original
return import_str
else:
# Default: return as-is
return import_str
def get_file_metrics(self, file_path: Path) -> dict[str, Any]:
"""Get basic metrics for a file."""
metrics = {"loc": 0, "churn": None}
# When working with manifest data, skip file reading
# The manifest already has loc and other metrics
if not file_path.exists():
# File doesn't exist, we're working with manifest data
# Return default metrics - the caller should use manifest data instead
return metrics
# Count lines of code
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
metrics["loc"] = sum(1 for _ in f)
except (IOError, UnicodeDecodeError, OSError) as e:
print(f"Warning: Failed to read {file_path} for metrics: {e}")
# Still return default metrics but LOG the failure
# Get git churn (commit count)
try:
# Use temp files to avoid buffer overflow
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:
stdout_path = stdout_fp.name
stderr_path = stderr_fp.name
result = subprocess.run(
["git", "log", "--oneline", str(file_path)],
stdout=stdout_fp,
stderr=stderr_fp,
text=True,
timeout=5,
cwd=Path.cwd(),
shell=IS_WINDOWS # Windows compatibility fix
)
with open(stdout_path, 'r', encoding='utf-8') as f:
result.stdout = f.read()
with open(stderr_path, 'r', encoding='utf-8') as f:
result.stderr = f.read()
os.unlink(stdout_path)
os.unlink(stderr_path)
if result.returncode == 0:
metrics["churn"] = len(result.stdout.strip().split("\n"))
except (subprocess.TimeoutExpired, OSError, IOError) as e:
print(f"Warning: Failed to get git churn for {file_path}: {e}")
# Still return default metrics but LOG the failure
return metrics
def build_import_graph(
self,
root: str = ".",
langs: list[str] | None = None,
file_filter: str | None = None,
file_list: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
"""Build import/dependency graph for the project."""
root_path = Path(root).resolve()
nodes = {}
edges = []
# Collect all source files
files = []
manifest_lookup = {} # Map file paths to manifest items for metrics
if file_list is not None:
# Use provided file list from manifest
# The manifest already contains all the file info we need
for item in file_list:
manifest_path = Path(item['path'])
# Use the path from manifest directly - we don't need actual files
# The manifest has all the data (path, ext, content, etc.)
file = root_path / manifest_path # Just for consistent path handling
# Store manifest item for later metric lookup
manifest_lookup[str(file)] = item
# Detect language from extension in manifest
lang = self.detect_language(manifest_path) # Use manifest path
if lang and (not langs or lang in langs):
files.append((file, lang))
else:
# Fall back to original os.walk logic for backward compatibility
for dirpath, dirnames, filenames in os.walk(root_path):
# CRITICAL: Prune excluded directories before os.walk descends into them
# This prevents traversal into .venv and other SKIP_DIRS
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
# Also prune based on exclude_patterns
if self.exclude_patterns:
dirnames[:] = [d for d in dirnames
if not any(pattern in d for pattern in self.exclude_patterns)]
# Process files in this directory
for filename in filenames:
file = Path(dirpath) / filename
if not self.should_skip(file):
lang = self.detect_language(file)
if lang and (not langs or lang in langs):
files.append((file, lang))
# Process files with progress bar
with click.progressbar(
files,
label="Building import graph",
show_pos=True,
show_percent=True,
show_eta=True,
item_show_func=lambda x: str(x[0].name) if x else None,
) as bar:
for file_path, lang in bar:
# Create node for this file
rel_path = str(file_path.relative_to(root_path)).replace("\\", "/") # Normalize separators
node_id = rel_path # Already normalized
# Get metrics from manifest if available, otherwise from file
if str(file_path) in manifest_lookup:
# Use manifest data which already has metrics
manifest_item = manifest_lookup[str(file_path)]
loc = manifest_item.get('loc', 0)
churn = None # Manifest doesn't have churn data
else:
# Fall back to reading file metrics
metrics = self.get_file_metrics(file_path)
loc = metrics["loc"]
churn = metrics["churn"]
node = GraphNode(
id=node_id,
file=rel_path, # Already normalized
lang=lang,
loc=loc,
churn=churn,
type="module",
)
nodes[node_id] = asdict(node)
# Extract imports and create edges
# Pass the relative path that matches what's in the database
imports = self.extract_imports_from_db(rel_path)
for imp in imports:
target = self.resolve_import_path(imp, file_path, lang)
edge = GraphEdge(
source=node_id,
target=target,
type="import",
file=rel_path, # Already normalized
)
edges.append(asdict(edge))
return {
"nodes": list(nodes.values()),
"edges": edges,
"metadata": {
"root": str(root_path),
"languages": list(set(n["lang"] for n in nodes.values())),
"total_files": len(nodes),
"total_imports": len(edges),
},
}
def build_call_graph(
self,
root: str = ".",
langs: list[str] | None = None,
file_filter: str | None = None,
file_list: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
"""Build call graph for the project."""
root_path = Path(root).resolve()
nodes = {}
edges = []
# Collect all source files
files = []
if file_list is not None:
# Use provided file list from manifest
# The manifest already contains all the file info we need
for item in file_list:
manifest_path = Path(item['path'])
# Use the path from manifest directly - we don't need actual files
# The manifest has all the data (path, ext, content, etc.)
file = root_path / manifest_path # Just for consistent path handling
# Detect language from extension in manifest
lang = self.detect_language(manifest_path) # Use manifest path
if lang and (not langs or lang in langs):
files.append((file, lang))
else:
# Fall back to original os.walk logic for backward compatibility
for dirpath, dirnames, filenames in os.walk(root_path):
# CRITICAL: Prune excluded directories before os.walk descends into them
# This prevents traversal into .venv and other SKIP_DIRS
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
# Also prune based on exclude_patterns
if self.exclude_patterns:
dirnames[:] = [d for d in dirnames
if not any(pattern in d for pattern in self.exclude_patterns)]
# Process files in this directory
for filename in filenames:
file = Path(dirpath) / filename
if not self.should_skip(file):
lang = self.detect_language(file)
if lang and (not langs or lang in langs):
files.append((file, lang))
# Process files with progress bar to extract functions and calls
with click.progressbar(
files,
label="Building call graph",
show_pos=True,
show_percent=True,
show_eta=True,
item_show_func=lambda x: str(x[0].name) if x else None,
) as bar:
for file_path, lang in bar:
rel_path = str(file_path.relative_to(root_path)).replace("\\", "/") # Normalize separators
module_id = rel_path # Already normalized
# Extract exported functions/classes from database
exports = self.extract_exports_from_db(rel_path)
for export in exports:
func_id = f"{module_id}::{export}"
node = GraphNode(
id=func_id,
file=rel_path, # Already normalized
lang=lang,
type="function",
)
nodes[func_id] = asdict(node)
# Extract calls from database
calls = self.extract_calls_from_db(rel_path)
for call, method in calls:
# Try to resolve the call target
if method:
# Method call
target_id = f"{call}.{method}"
else:
# Function call
target_id = call
# Create edge from module to called function
edge = GraphEdge(
source=module_id,
target=target_id,
type="call",
file=rel_path, # Already normalized
)
edges.append(asdict(edge))
return {
"nodes": list(nodes.values()),
"edges": edges,
"metadata": {
"root": str(root_path),
"languages": langs or [],
"total_functions": len(nodes),
"total_calls": len(edges),
},
}
def merge_graphs(self, import_graph: dict, call_graph: dict) -> dict[str, Any]:
"""Merge import and call graphs into a unified graph."""
# Combine nodes (dedup by id)
nodes = {}
for node in import_graph["nodes"]:
nodes[node["id"]] = node
for node in call_graph["nodes"]:
nodes[node["id"]] = node
# Combine edges
edges = import_graph["edges"] + call_graph["edges"]
return {
"nodes": list(nodes.values()),
"edges": edges,
"metadata": {
"root": import_graph["metadata"]["root"],
"languages": list(
set(
import_graph["metadata"]["languages"]
+ call_graph["metadata"].get("languages", [])
)
),
"total_nodes": len(nodes),
"total_edges": len(edges),
},
}
def _extract_imports_regex(self, file_path: Path, lang: str) -> list[str]:
"""Regex-based fallback for extracting imports.
This method is used when AST parsing fails or is unavailable.
"""
if lang not in self.IMPORT_PATTERNS:
return []
imports = []
patterns = [re.compile(p, re.MULTILINE) for p in self.IMPORT_PATTERNS[lang]]
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
for pattern in patterns:
matches = pattern.findall(content)
imports.extend(matches)
except (IOError, UnicodeDecodeError, OSError) as e:
print(f"Warning: Failed to extract imports from {file_path}: {e}")
# Return empty list but LOG the failure
return imports
def _extract_exports_regex(self, file_path: Path, lang: str) -> list[str]:
"""Regex-based fallback for extracting exports.
This method is used when AST parsing fails or is unavailable.
"""
if lang not in self.EXPORT_PATTERNS:
return []
exports = []
patterns = [re.compile(p, re.MULTILINE) for p in self.EXPORT_PATTERNS[lang]]
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
for pattern in patterns:
matches = pattern.findall(content)
# Flatten tuples if regex has groups
for match in matches:
if isinstance(match, tuple):
exports.extend([m for m in match if m])
else:
exports.append(match)
except (IOError, UnicodeDecodeError, OSError) as e:
print(f"Warning: Failed to extract exports from {file_path}: {e}")
# Return empty list but LOG the failure
# Filter exports for Go (only capitalized are public)
if lang == "go":
exports = [e for e in exports if e and e[0].isupper()]
return exports
def _extract_calls_regex(self, file_path: Path, lang: str) -> list[tuple[str, str | None]]:
"""Regex-based fallback for extracting function calls.
This method is used when AST parsing fails or is unavailable.
"""
if lang not in self.CALL_PATTERNS:
return []
calls = []
patterns = [re.compile(p) for p in self.CALL_PATTERNS[lang]]
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
for pattern in patterns:
matches = pattern.findall(content)
for match in matches:
if isinstance(match, tuple):
# Method call: (object, method)
calls.append(match)
else:
# Function call
calls.append((match, None))
except (IOError, UnicodeDecodeError, OSError) as e:
print(f"Warning: Failed to extract calls from {file_path}: {e}")
# Return empty list but LOG the failure
return calls