Initial commit: TheAuditor v1.0.1 - AI-centric SAST and Code Intelligence Platform

2025-12-17 11:24:19 +01:00 · 2025-09-07 20:39:47 +07:00
commit ba5c287b02
215 changed files with 50911 additions and 0 deletions
--- a/theauditor/commands/rules.py
+++ b/theauditor/commands/rules.py
@@ -0,0 +1,226 @@
+"""Rules command - inspect and summarize detection capabilities."""
+
+import os
+import yaml
+import importlib
+import inspect
+from pathlib import Path
+from typing import Dict, List, Any
+
+import click
+
+from theauditor.utils import handle_exceptions
+from theauditor.utils.exit_codes import ExitCodes
+
+
+@click.command(name="rules")
+@click.option(
+    "--summary",
+    is_flag=True,
+    default=False,
+    help="Generate a summary of all detection capabilities",
+)
+@handle_exceptions
+def rules_command(summary: bool) -> None:
+    """Inspect and summarize TheAuditor's detection rules and patterns.
+    
+    Args:
+        summary: If True, generate a comprehensive capability report
+    """
+    if not summary:
+        click.echo(click.style("[ERROR] Please specify --summary to generate a capability report", fg="red"), err=True)
+        raise SystemExit(ExitCodes.TASK_INCOMPLETE)
+    
+    # Get the base path for patterns and rules
+    base_path = Path(__file__).parent.parent
+    patterns_path = base_path / "patterns"
+    rules_path = base_path / "rules"
+    
+    # Create output directory
+    output_dir = Path(".pf")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_file = output_dir / "auditor_capabilities.md"
+    
+    # Collect output in a list
+    output_lines = []
+    output_lines.append("# TheAuditor Detection Capabilities\n")
+    
+    # Also print to console
+    print("# TheAuditor Detection Capabilities\n")
+    
+    # Scan YAML patterns
+    print("## YAML Patterns\n")
+    output_lines.append("## YAML Patterns\n")
+    yaml_patterns = scan_yaml_patterns(patterns_path)
+    total_patterns = 0
+    
+    for category, files in yaml_patterns.items():
+        if files:
+            category_display = "patterns/" if category == "." else f"patterns/{category}/"
+            print(f"### {category_display}\n")
+            output_lines.append(f"### {category_display}\n")
+            for file_name, patterns in files.items():
+                if patterns:
+                    print(f"**{file_name}** ({len(patterns)} patterns)")
+                    output_lines.append(f"**{file_name}** ({len(patterns)} patterns)")
+                    for pattern in patterns:
+                        print(f"- `{pattern}`")
+                        output_lines.append(f"- `{pattern}`")
+                    print()
+                    output_lines.append("")
+                    total_patterns += len(patterns)
+    
+    # Scan Python rules
+    print("## Python AST Rules\n")
+    output_lines.append("## Python AST Rules\n")
+    python_rules = scan_python_rules(rules_path)
+    total_rules = 0
+    
+    for module_path, functions in python_rules.items():
+        if functions:
+            # Make path relative to rules/ for readability
+            display_path = module_path.replace(str(rules_path) + os.sep, "")
+            print(f"### {display_path}")
+            output_lines.append(f"### {display_path}")
+            for func in functions:
+                print(f"- `{func}()`")
+                output_lines.append(f"- `{func}()`")
+            print()
+            output_lines.append("")
+            total_rules += len(functions)
+    
+    # Print summary statistics
+    print("## Summary Statistics\n")
+    output_lines.append("## Summary Statistics\n")
+    print(f"- **Total YAML Patterns**: {total_patterns}")
+    output_lines.append(f"- **Total YAML Patterns**: {total_patterns}")
+    print(f"- **Total Python Rules**: {total_rules}")
+    output_lines.append(f"- **Total Python Rules**: {total_rules}")
+    print(f"- **Combined Detection Capabilities**: {total_patterns + total_rules}")
+    output_lines.append(f"- **Combined Detection Capabilities**: {total_patterns + total_rules}")
+    
+    # Write to file
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write('\n'.join(output_lines))
+    
+    click.echo(click.style(f"\n[SUCCESS] Capability report generated successfully", fg="green"))
+    click.echo(f"[INFO] Report saved to: {output_file}")
+    raise SystemExit(ExitCodes.SUCCESS)
+
+
+def scan_yaml_patterns(patterns_path: Path) -> Dict[str, Dict[str, List[str]]]:
+    """Scan YAML pattern files and extract pattern names.
+    
+    Args:
+        patterns_path: Path to the patterns directory
+        
+    Returns:
+        Dictionary mapping category -> file -> list of pattern names
+    """
+    results = {}
+    
+    if not patterns_path.exists():
+        return results
+    
+    # Walk through all subdirectories
+    for root, dirs, files in os.walk(patterns_path):
+        # Skip __pycache__ directories
+        dirs[:] = [d for d in dirs if d != "__pycache__"]
+        
+        for file in files:
+            if file.endswith(".yml") or file.endswith(".yaml"):
+                file_path = Path(root) / file
+                
+                # Determine category from directory structure
+                rel_path = file_path.relative_to(patterns_path)
+                # If file is in root of patterns/, use "." as category
+                # If in subdirectory like frameworks/, use that as category
+                if rel_path.parent == Path("."):
+                    category = "."
+                else:
+                    category = str(rel_path.parent)
+                
+                if category not in results:
+                    results[category] = {}
+                
+                # Parse YAML and extract pattern names
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        data = yaml.safe_load(f)
+                        
+                    if data and isinstance(data, list):
+                        pattern_names = []
+                        for pattern in data:
+                            if isinstance(pattern, dict) and 'name' in pattern:
+                                pattern_names.append(pattern['name'])
+                        
+                        if pattern_names:
+                            results[category][file] = pattern_names
+                            
+                except (yaml.YAMLError, OSError) as e:
+                    # Skip files that can't be parsed
+                    continue
+    
+    return results
+
+
+def scan_python_rules(rules_path: Path) -> Dict[str, List[str]]:
+    """Scan Python rule files and find all find_* functions.
+    
+    Args:
+        rules_path: Path to the rules directory
+        
+    Returns:
+        Dictionary mapping module path -> list of find_* function names
+    """
+    results = {}
+    
+    if not rules_path.exists():
+        return results
+    
+    # First, check what's exposed in the main __init__.py
+    init_file = rules_path / "__init__.py"
+    if init_file.exists():
+        try:
+            module = importlib.import_module("theauditor.rules")
+            exposed_functions = []
+            for name, obj in inspect.getmembers(module, inspect.isfunction):
+                if name.startswith("find_"):
+                    exposed_functions.append(name)
+            if exposed_functions:
+                results["rules/__init__.py (exposed)"] = exposed_functions
+        except ImportError:
+            pass
+    
+    # Walk through all Python files
+    for root, dirs, files in os.walk(rules_path):
+        # Skip __pycache__ directories
+        dirs[:] = [d for d in dirs if d != "__pycache__"]
+        
+        for file in files:
+            if file.endswith(".py"):
+                file_path = Path(root) / file
+                
+                # Skip __init__.py files for now (we handle them separately)
+                if file == "__init__.py":
+                    continue
+                
+                # Try basic text scanning (more reliable than import)
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                    
+                    # Simple regex to find function definitions
+                    import re
+                    pattern = r'^def\s+(find_\w+)\s*\('
+                    matches = re.findall(pattern, content, re.MULTILINE)
+                    
+                    if matches:
+                        # Make path relative for display
+                        display_path = str(file_path.relative_to(rules_path.parent))
+                        results[display_path] = matches
+                        
+                except OSError:
+                    continue
+    
+    return results