From ba5c287b0268d84ca36cea08a3e0879e98d8634e Mon Sep 17 00:00:00 2001
From: TheAuditorTool <noreply@theauditor.tool>
Date: Sun, 7 Sep 2025 20:39:47 +0700
Subject: [PATCH] Initial commit: TheAuditor v1.0.1 - AI-centric SAST and Code
 Intelligence Platform

---
 .gitignore                                    |  126 ++
 ARCHITECTURE.md                               |  606 ++++++++
 CLAUDE.md                                     |  454 ++++++
 CONTRIBUTING.md                               |  429 ++++++
 HOWTOUSE.md                                   | 1132 +++++++++++++++
 LICENSE                                       |  687 +++++++++
 README.md                                     |  313 ++++
 ROADMAP.md                                    |   71 +
 agent_templates/generic-template.md           |   30 +
 agent_templates/sopmanager.md                 |   47 +
 package-template.json                         |   15 +
 package.json                                  |   15 +
 pyproject.toml                                |  113 ++
 theauditor/.gitattributes                     |    2 +
 theauditor/__init__.py                        |    3 +
 theauditor/agent_template_validator.py        |  347 +++++
 theauditor/ast_extractors/__init__.py         |  348 +++++
 theauditor/ast_extractors/base.py             |  173 +++
 theauditor/ast_extractors/python_impl.py      |  327 +++++
 theauditor/ast_extractors/treesitter_impl.py  |  711 +++++++++
 theauditor/ast_extractors/typescript_impl.py  |  674 +++++++++
 theauditor/ast_parser.py                      |  323 +++++
 theauditor/ast_patterns.py                    |  401 ++++++
 theauditor/claude_setup.py                    |  273 ++++
 theauditor/cli.py                             |  239 ++++
 theauditor/commands/__init__.py               |    1 +
 theauditor/commands/_archive.py               |  107 ++
 theauditor/commands/deps.py                   |  191 +++
 theauditor/commands/detect_frameworks.py      |   46 +
 theauditor/commands/detect_patterns.py        |   81 ++
 theauditor/commands/docker_analyze.py         |   94 ++
 theauditor/commands/docs.py                   |  201 +++
 theauditor/commands/fce.py                    |   43 +
 theauditor/commands/full.py                   |   90 ++
 theauditor/commands/graph.py                  |  639 +++++++++
 theauditor/commands/impact.py                 |  118 ++
 theauditor/commands/index.py                  |   50 +
 theauditor/commands/init.py                   |  143 ++
 theauditor/commands/init_config.py            |   21 +
 theauditor/commands/init_js.py                |   41 +
 theauditor/commands/insights.py               |  443 ++++++
 theauditor/commands/lint.py                   |  267 ++++
 theauditor/commands/ml.py                     |  165 +++
 theauditor/commands/refactor.py               |  600 ++++++++
 theauditor/commands/report.py                 |   66 +
 theauditor/commands/rules.py                  |  226 +++
 theauditor/commands/setup.py                  |   63 +
 theauditor/commands/structure.py              |   96 ++
 theauditor/commands/summary.py                |  236 +++
 theauditor/commands/taint.py                  |  272 ++++
 theauditor/commands/tool_versions.py          |   25 +
 theauditor/commands/validate_templates.py     |   30 +
 theauditor/commands/workset.py                |   55 +
 theauditor/config.py                          |   40 +
 theauditor/config_runtime.py                  |  160 +++
 theauditor/correlations/__init__.py           |    5 +
 theauditor/correlations/loader.py             |  237 +++
 .../rules/angular_sanitization_cluster.yml    |   10 +
 .../rules/api_key_exposure_cluster.yml        |   10 +
 .../rules/command_injection_cluster.yml       |   10 +
 .../rules/container_escape_cluster.yml        |   10 +
 .../rules/cors_misconfiguration_cluster.yml   |   10 +
 .../correlations/rules/deadlock_cluster.yml   |   10 +
 .../rules/debug_enabled_cluster.yml           |   10 +
 .../rules/express_bodyparser_cluster.yml      |   10 +
 .../rules/infinite_loop_cluster.yml           |   10 +
 .../correlations/rules/jwt_issues_cluster.yml |   10 +
 .../rules/ldap_injection_cluster.yml          |   10 +
 .../rules/memory_leak_cluster.yml             |   10 +
 .../rules/missing_auth_cluster.yml            |   10 +
 .../rules/nosql_injection_cluster.yml         |   10 +
 .../rules/path_traversal_cluster.yml          |   10 +
 .../correlations/rules/pii_leak_cluster.yml   |   10 +
 .../rules/race_condition_cluster.yml          |   10 +
 .../rules/rate_limit_missing_cluster.yml      |   10 +
 .../rules/react_dangerous_html_cluster.yml    |   10 +
 .../correlations/rules/refactoring.yaml       |  277 ++++
 .../rules/sensitive_logs_cluster.yml          |   10 +
 .../rules/session_fixation_cluster.yml        |   10 +
 .../rules/source_map_exposure_cluster.yml     |   10 +
 .../correlations/rules/ssrf_cluster.yml       |   10 +
 .../rules/template_injection_cluster.yml      |   10 +
 .../correlations/rules/test_sql_injection.yml |   10 +
 .../correlations/rules/vue_v_html_cluster.yml |   10 +
 .../correlations/rules/weak_auth_cluster.yml  |   10 +
 theauditor/correlations/rules/xss_cluster.yml |   10 +
 theauditor/correlations/rules/xxe_cluster.yml |   10 +
 theauditor/deps.py                            | 1109 ++++++++++++++
 theauditor/docgen.py                          |  565 ++++++++
 theauditor/docker_analyzer.py                 |  310 ++++
 theauditor/docs_fetch.py                      |  793 ++++++++++
 theauditor/docs_summarize.py                  |  408 ++++++
 theauditor/extraction.py                      |  493 +++++++
 theauditor/fce.py                             |  784 ++++++++++
 theauditor/framework_detector.py              |  608 ++++++++
 theauditor/framework_registry.py              |  549 +++++++
 theauditor/graph/__init__.py                  |   45 +
 theauditor/graph/analyzer.py                  |  421 ++++++
 theauditor/graph/builder.py                   | 1017 +++++++++++++
 theauditor/graph/insights.py                  |   17 +
 theauditor/graph/store.py                     |  444 ++++++
 theauditor/graph/visualizer.py                |  937 ++++++++++++
 theauditor/impact_analyzer.py                 |  683 +++++++++
 theauditor/indexer/__init__.py                |  393 +++++
 theauditor/indexer/config.py                  |  165 +++
 theauditor/indexer/core.py                    |  409 ++++++
 theauditor/indexer/database.py                |  607 ++++++++
 theauditor/indexer/extractors/__init__.py     |  287 ++++
 theauditor/indexer/extractors/docker.py       |  279 ++++
 theauditor/indexer/extractors/generic.py      |  121 ++
 theauditor/indexer/extractors/javascript.py   |  345 +++++
 theauditor/indexer/extractors/python.py       |  189 +++
 theauditor/indexer/extractors/sql.py          |   44 +
 theauditor/indexer_compat.py                  |  321 +++++
 theauditor/init.py                            |  182 +++
 theauditor/insights/__init__.py               |   86 ++
 theauditor/insights/graph.py                  |  470 ++++++
 theauditor/insights/ml.py                     | 1241 ++++++++++++++++
 theauditor/insights/taint.py                  |  446 ++++++
 theauditor/journal.py                         |  446 ++++++
 theauditor/js_init.py                         |  154 ++
 theauditor/js_semantic_parser.py              | 1270 +++++++++++++++++
 theauditor/linters/__init__.py                |   36 +
 theauditor/linters/detector.py                |  275 ++++
 theauditor/linters/eslint.config.cjs          |  119 ++
 theauditor/linters/package.json               |   17 +
 theauditor/linters/parsers.py                 |  504 +++++++
 theauditor/linters/runner.py                  |  387 +++++
 theauditor/manifest_parser.py                 |  183 +++
 theauditor/ml.py                              |   17 +
 theauditor/module_resolver.py                 |  352 +++++
 theauditor/parsers/__init__.py                |    8 +
 theauditor/parsers/compose_parser.py          |  238 +++
 theauditor/parsers/dockerfile_parser.py       |  156 ++
 theauditor/parsers/nginx_parser.py            |  304 ++++
 theauditor/parsers/prisma_schema_parser.py    |  316 ++++
 theauditor/parsers/webpack_config_parser.py   |  213 +++
 theauditor/pattern_loader.py                  |  201 +++
 theauditor/patterns/business_logic.yml        |   31 +
 theauditor/patterns/db_issues.yml             |   49 +
 theauditor/patterns/docker.yml                |   19 +
 theauditor/patterns/flow_sensitive.yml        |  116 ++
 theauditor/patterns/frameworks/angular.yml    |   55 +
 theauditor/patterns/frameworks/django.yml     |   67 +
 theauditor/patterns/frameworks/express.yml    |   46 +
 theauditor/patterns/frameworks/fastapi.yml    |   94 ++
 theauditor/patterns/frameworks/flask.yml      |   73 +
 theauditor/patterns/frameworks/nextjs.yml     |   91 ++
 theauditor/patterns/frameworks/react.yml      |   49 +
 theauditor/patterns/frameworks/svelte.yml     |   85 ++
 theauditor/patterns/frameworks/vue.yml        |   55 +
 theauditor/patterns/multi_tenant.yml          |   88 ++
 theauditor/patterns/nginx.yml                 |   19 +
 theauditor/patterns/postgres_rls.yml          |   13 +
 theauditor/patterns/runtime_issues.yml        |   62 +
 theauditor/patterns/security.yml              |  191 +++
 theauditor/patterns/security_compliance.yml   |  122 ++
 theauditor/pipelines.py                       | 1080 ++++++++++++++
 theauditor/project_summary.py                 |  421 ++++++
 theauditor/rules/__init__.py                  |   29 +
 theauditor/rules/auth/__init__.py             |    5 +
 theauditor/rules/auth/jwt_detector.py         |  812 +++++++++++
 theauditor/rules/common/utils.py              |  169 +++
 theauditor/rules/deployment/__init__.py       |    5 +
 .../rules/deployment/compose_analyzer.py      |  279 ++++
 theauditor/rules/deployment/nginx_analyzer.py |  329 +++++
 theauditor/rules/node/__init__.py             |    5 +
 .../rules/node/runtime_issue_detector.py      |  603 ++++++++
 theauditor/rules/orchestrator.py              |  668 +++++++++
 theauditor/rules/orm/__init__.py              |    6 +
 theauditor/rules/orm/prisma_detector.py       |  325 +++++
 theauditor/rules/orm/sequelize_detector.py    |  206 +++
 theauditor/rules/orm/typeorm_detector.py      |  384 +++++
 theauditor/rules/performance/__init__.py      |   13 +
 theauditor/rules/performance/performance.py   |  779 ++++++++++
 theauditor/rules/react/__init__.py            |    9 +
 theauditor/rules/react/hooks_analyzer.py      |  398 ++++++
 theauditor/rules/secrets/__init__.py          |    5 +
 .../secrets/hardcoded_secret_analyzer.py      |  662 +++++++++
 theauditor/rules/security/__init__.py         |    6 +
 .../rules/security/api_auth_detector.py       |  151 ++
 theauditor/rules/security/cors_analyzer.py    |  485 +++++++
 .../rules/security/rate_limit_analyzer.py     |  553 +++++++
 .../rules/security/sourcemap_detector.py      |  209 +++
 theauditor/rules/sql/__init__.py              |    5 +
 .../rules/sql/sql_injection_analyzer.py       |   74 +
 theauditor/rules/typescript/__init__.py       |    5 +
 .../rules/typescript/type_safety_analyzer.py  |  145 ++
 theauditor/rules/vue/__init__.py              |    9 +
 theauditor/rules/vue/reactivity_analyzer.py   |  295 ++++
 theauditor/rules/xss/__init__.py              |    5 +
 theauditor/rules/xss/xssdetection.py          |  640 +++++++++
 theauditor/security.py                        |  150 ++
 theauditor/taint/__init__.py                  |   99 ++
 theauditor/taint/core.py                      |  479 +++++++
 theauditor/taint/database.py                  |  301 ++++
 theauditor/taint/insights.py                  |   17 +
 theauditor/taint/interprocedural.py           |  239 ++++
 theauditor/taint/javascript.py                |  375 +++++
 theauditor/taint/propagation.py               |  633 ++++++++
 theauditor/taint/registry.py                  |  225 +++
 theauditor/taint/sources.py                   |  343 +++++
 theauditor/taint_analyzer.py                  |   17 +
 theauditor/test_frameworks.py                 |  236 +++
 theauditor/tools.py                           |  152 ++
 theauditor/universal_detector.py              | 1093 ++++++++++++++
 theauditor/utils/__init__.py                  |   21 +
 theauditor/utils/error_handler.py             |   66 +
 theauditor/utils/exit_codes.py                |   65 +
 theauditor/utils/finding_priority.py          |  178 +++
 theauditor/utils/helpers.py                   |  156 ++
 theauditor/utils/temp_manager.py              |  150 ++
 theauditor/venv_install.py                    |  779 ++++++++++
 theauditor/vulnerability_scanner.py           |  420 ++++++
 theauditor/workset.py                         |  376 +++++
 215 files changed, 50911 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 ARCHITECTURE.md
 create mode 100644 CLAUDE.md
 create mode 100644 CONTRIBUTING.md
 create mode 100644 HOWTOUSE.md
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 ROADMAP.md
 create mode 100644 agent_templates/generic-template.md
 create mode 100644 agent_templates/sopmanager.md
 create mode 100644 package-template.json
 create mode 100644 package.json
 create mode 100644 pyproject.toml
 create mode 100644 theauditor/.gitattributes
 create mode 100644 theauditor/__init__.py
 create mode 100644 theauditor/agent_template_validator.py
 create mode 100644 theauditor/ast_extractors/__init__.py
 create mode 100644 theauditor/ast_extractors/base.py
 create mode 100644 theauditor/ast_extractors/python_impl.py
 create mode 100644 theauditor/ast_extractors/treesitter_impl.py
 create mode 100644 theauditor/ast_extractors/typescript_impl.py
 create mode 100644 theauditor/ast_parser.py
 create mode 100644 theauditor/ast_patterns.py
 create mode 100644 theauditor/claude_setup.py
 create mode 100644 theauditor/cli.py
 create mode 100644 theauditor/commands/__init__.py
 create mode 100644 theauditor/commands/_archive.py
 create mode 100644 theauditor/commands/deps.py
 create mode 100644 theauditor/commands/detect_frameworks.py
 create mode 100644 theauditor/commands/detect_patterns.py
 create mode 100644 theauditor/commands/docker_analyze.py
 create mode 100644 theauditor/commands/docs.py
 create mode 100644 theauditor/commands/fce.py
 create mode 100644 theauditor/commands/full.py
 create mode 100644 theauditor/commands/graph.py
 create mode 100644 theauditor/commands/impact.py
 create mode 100644 theauditor/commands/index.py
 create mode 100644 theauditor/commands/init.py
 create mode 100644 theauditor/commands/init_config.py
 create mode 100644 theauditor/commands/init_js.py
 create mode 100644 theauditor/commands/insights.py
 create mode 100644 theauditor/commands/lint.py
 create mode 100644 theauditor/commands/ml.py
 create mode 100644 theauditor/commands/refactor.py
 create mode 100644 theauditor/commands/report.py
 create mode 100644 theauditor/commands/rules.py
 create mode 100644 theauditor/commands/setup.py
 create mode 100644 theauditor/commands/structure.py
 create mode 100644 theauditor/commands/summary.py
 create mode 100644 theauditor/commands/taint.py
 create mode 100644 theauditor/commands/tool_versions.py
 create mode 100644 theauditor/commands/validate_templates.py
 create mode 100644 theauditor/commands/workset.py
 create mode 100644 theauditor/config.py
 create mode 100644 theauditor/config_runtime.py
 create mode 100644 theauditor/correlations/__init__.py
 create mode 100644 theauditor/correlations/loader.py
 create mode 100644 theauditor/correlations/rules/angular_sanitization_cluster.yml
 create mode 100644 theauditor/correlations/rules/api_key_exposure_cluster.yml
 create mode 100644 theauditor/correlations/rules/command_injection_cluster.yml
 create mode 100644 theauditor/correlations/rules/container_escape_cluster.yml
 create mode 100644 theauditor/correlations/rules/cors_misconfiguration_cluster.yml
 create mode 100644 theauditor/correlations/rules/deadlock_cluster.yml
 create mode 100644 theauditor/correlations/rules/debug_enabled_cluster.yml
 create mode 100644 theauditor/correlations/rules/express_bodyparser_cluster.yml
 create mode 100644 theauditor/correlations/rules/infinite_loop_cluster.yml
 create mode 100644 theauditor/correlations/rules/jwt_issues_cluster.yml
 create mode 100644 theauditor/correlations/rules/ldap_injection_cluster.yml
 create mode 100644 theauditor/correlations/rules/memory_leak_cluster.yml
 create mode 100644 theauditor/correlations/rules/missing_auth_cluster.yml
 create mode 100644 theauditor/correlations/rules/nosql_injection_cluster.yml
 create mode 100644 theauditor/correlations/rules/path_traversal_cluster.yml
 create mode 100644 theauditor/correlations/rules/pii_leak_cluster.yml
 create mode 100644 theauditor/correlations/rules/race_condition_cluster.yml
 create mode 100644 theauditor/correlations/rules/rate_limit_missing_cluster.yml
 create mode 100644 theauditor/correlations/rules/react_dangerous_html_cluster.yml
 create mode 100644 theauditor/correlations/rules/refactoring.yaml
 create mode 100644 theauditor/correlations/rules/sensitive_logs_cluster.yml
 create mode 100644 theauditor/correlations/rules/session_fixation_cluster.yml
 create mode 100644 theauditor/correlations/rules/source_map_exposure_cluster.yml
 create mode 100644 theauditor/correlations/rules/ssrf_cluster.yml
 create mode 100644 theauditor/correlations/rules/template_injection_cluster.yml
 create mode 100644 theauditor/correlations/rules/test_sql_injection.yml
 create mode 100644 theauditor/correlations/rules/vue_v_html_cluster.yml
 create mode 100644 theauditor/correlations/rules/weak_auth_cluster.yml
 create mode 100644 theauditor/correlations/rules/xss_cluster.yml
 create mode 100644 theauditor/correlations/rules/xxe_cluster.yml
 create mode 100644 theauditor/deps.py
 create mode 100644 theauditor/docgen.py
 create mode 100644 theauditor/docker_analyzer.py
 create mode 100644 theauditor/docs_fetch.py
 create mode 100644 theauditor/docs_summarize.py
 create mode 100644 theauditor/extraction.py
 create mode 100644 theauditor/fce.py
 create mode 100644 theauditor/framework_detector.py
 create mode 100644 theauditor/framework_registry.py
 create mode 100644 theauditor/graph/__init__.py
 create mode 100644 theauditor/graph/analyzer.py
 create mode 100644 theauditor/graph/builder.py
 create mode 100644 theauditor/graph/insights.py
 create mode 100644 theauditor/graph/store.py
 create mode 100644 theauditor/graph/visualizer.py
 create mode 100644 theauditor/impact_analyzer.py
 create mode 100644 theauditor/indexer/__init__.py
 create mode 100644 theauditor/indexer/config.py
 create mode 100644 theauditor/indexer/core.py
 create mode 100644 theauditor/indexer/database.py
 create mode 100644 theauditor/indexer/extractors/__init__.py
 create mode 100644 theauditor/indexer/extractors/docker.py
 create mode 100644 theauditor/indexer/extractors/generic.py
 create mode 100644 theauditor/indexer/extractors/javascript.py
 create mode 100644 theauditor/indexer/extractors/python.py
 create mode 100644 theauditor/indexer/extractors/sql.py
 create mode 100644 theauditor/indexer_compat.py
 create mode 100644 theauditor/init.py
 create mode 100644 theauditor/insights/__init__.py
 create mode 100644 theauditor/insights/graph.py
 create mode 100644 theauditor/insights/ml.py
 create mode 100644 theauditor/insights/taint.py
 create mode 100644 theauditor/journal.py
 create mode 100644 theauditor/js_init.py
 create mode 100644 theauditor/js_semantic_parser.py
 create mode 100644 theauditor/linters/__init__.py
 create mode 100644 theauditor/linters/detector.py
 create mode 100644 theauditor/linters/eslint.config.cjs
 create mode 100644 theauditor/linters/package.json
 create mode 100644 theauditor/linters/parsers.py
 create mode 100644 theauditor/linters/runner.py
 create mode 100644 theauditor/manifest_parser.py
 create mode 100644 theauditor/ml.py
 create mode 100644 theauditor/module_resolver.py
 create mode 100644 theauditor/parsers/__init__.py
 create mode 100644 theauditor/parsers/compose_parser.py
 create mode 100644 theauditor/parsers/dockerfile_parser.py
 create mode 100644 theauditor/parsers/nginx_parser.py
 create mode 100644 theauditor/parsers/prisma_schema_parser.py
 create mode 100644 theauditor/parsers/webpack_config_parser.py
 create mode 100644 theauditor/pattern_loader.py
 create mode 100644 theauditor/patterns/business_logic.yml
 create mode 100644 theauditor/patterns/db_issues.yml
 create mode 100644 theauditor/patterns/docker.yml
 create mode 100644 theauditor/patterns/flow_sensitive.yml
 create mode 100644 theauditor/patterns/frameworks/angular.yml
 create mode 100644 theauditor/patterns/frameworks/django.yml
 create mode 100644 theauditor/patterns/frameworks/express.yml
 create mode 100644 theauditor/patterns/frameworks/fastapi.yml
 create mode 100644 theauditor/patterns/frameworks/flask.yml
 create mode 100644 theauditor/patterns/frameworks/nextjs.yml
 create mode 100644 theauditor/patterns/frameworks/react.yml
 create mode 100644 theauditor/patterns/frameworks/svelte.yml
 create mode 100644 theauditor/patterns/frameworks/vue.yml
 create mode 100644 theauditor/patterns/multi_tenant.yml
 create mode 100644 theauditor/patterns/nginx.yml
 create mode 100644 theauditor/patterns/postgres_rls.yml
 create mode 100644 theauditor/patterns/runtime_issues.yml
 create mode 100644 theauditor/patterns/security.yml
 create mode 100644 theauditor/patterns/security_compliance.yml
 create mode 100644 theauditor/pipelines.py
 create mode 100644 theauditor/project_summary.py
 create mode 100644 theauditor/rules/__init__.py
 create mode 100644 theauditor/rules/auth/__init__.py
 create mode 100644 theauditor/rules/auth/jwt_detector.py
 create mode 100644 theauditor/rules/common/utils.py
 create mode 100644 theauditor/rules/deployment/__init__.py
 create mode 100644 theauditor/rules/deployment/compose_analyzer.py
 create mode 100644 theauditor/rules/deployment/nginx_analyzer.py
 create mode 100644 theauditor/rules/node/__init__.py
 create mode 100644 theauditor/rules/node/runtime_issue_detector.py
 create mode 100644 theauditor/rules/orchestrator.py
 create mode 100644 theauditor/rules/orm/__init__.py
 create mode 100644 theauditor/rules/orm/prisma_detector.py
 create mode 100644 theauditor/rules/orm/sequelize_detector.py
 create mode 100644 theauditor/rules/orm/typeorm_detector.py
 create mode 100644 theauditor/rules/performance/__init__.py
 create mode 100644 theauditor/rules/performance/performance.py
 create mode 100644 theauditor/rules/react/__init__.py
 create mode 100644 theauditor/rules/react/hooks_analyzer.py
 create mode 100644 theauditor/rules/secrets/__init__.py
 create mode 100644 theauditor/rules/secrets/hardcoded_secret_analyzer.py
 create mode 100644 theauditor/rules/security/__init__.py
 create mode 100644 theauditor/rules/security/api_auth_detector.py
 create mode 100644 theauditor/rules/security/cors_analyzer.py
 create mode 100644 theauditor/rules/security/rate_limit_analyzer.py
 create mode 100644 theauditor/rules/security/sourcemap_detector.py
 create mode 100644 theauditor/rules/sql/__init__.py
 create mode 100644 theauditor/rules/sql/sql_injection_analyzer.py
 create mode 100644 theauditor/rules/typescript/__init__.py
 create mode 100644 theauditor/rules/typescript/type_safety_analyzer.py
 create mode 100644 theauditor/rules/vue/__init__.py
 create mode 100644 theauditor/rules/vue/reactivity_analyzer.py
 create mode 100644 theauditor/rules/xss/__init__.py
 create mode 100644 theauditor/rules/xss/xssdetection.py
 create mode 100644 theauditor/security.py
 create mode 100644 theauditor/taint/__init__.py
 create mode 100644 theauditor/taint/core.py
 create mode 100644 theauditor/taint/database.py
 create mode 100644 theauditor/taint/insights.py
 create mode 100644 theauditor/taint/interprocedural.py
 create mode 100644 theauditor/taint/javascript.py
 create mode 100644 theauditor/taint/propagation.py
 create mode 100644 theauditor/taint/registry.py
 create mode 100644 theauditor/taint/sources.py
 create mode 100644 theauditor/taint_analyzer.py
 create mode 100644 theauditor/test_frameworks.py
 create mode 100644 theauditor/tools.py
 create mode 100644 theauditor/universal_detector.py
 create mode 100644 theauditor/utils/__init__.py
 create mode 100644 theauditor/utils/error_handler.py
 create mode 100644 theauditor/utils/exit_codes.py
 create mode 100644 theauditor/utils/finding_priority.py
 create mode 100644 theauditor/utils/helpers.py
 create mode 100644 theauditor/utils/temp_manager.py
 create mode 100644 theauditor/venv_install.py
 create mode 100644 theauditor/vulnerability_scanner.py
 create mode 100644 theauditor/workset.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f41bcb2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,126 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Virtual environments
+.env
+.venv
+.auditor_venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Project specific
+.pf/
+.claude/
+audit/
+manifest.json
+repo_index.db
+*.db
+*.db-journal
+/test_scaffold/
+/tmp/
+
+# Test and temporary files
+test_output/
+temp/
+*.tmp
+*.bak
+*.log
+
+# Local configuration
+.env.local
+.env.*.local
+config.local.json
+
+# Journal and runtime files
+*.ndjson
+.pf/journal.ndjson
+.pf/bus/
+.pf/workset.json
+.pf/capsules/
+.pf/context/
+
+# ML models (if any)
+*.pkl
+*.joblib
+*.h5
+*.model
+
+# Documentation build
+docs/_build/
+docs/.doctrees/
+
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Windows
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+*.stackdump
+[Dd]esktop.ini
+
+# Linux
+.directory
+.Trash-*
\ No newline at end of file
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
new file mode 100644
index 0000000..2e4b055
--- /dev/null
+++ b/ARCHITECTURE.md
@@ -0,0 +1,606 @@
+# TheAuditor Architecture
+
+This document provides a comprehensive technical overview of TheAuditor's architecture, design patterns, and implementation details.
+
+## System Overview
+
+TheAuditor is an offline-first, AI-centric SAST (Static Application Security Testing) and code intelligence platform. It orchestrates industry-standard tools to provide ground truth about code quality and security, producing AI-consumable reports optimized for LLM context windows.
+
+### Core Design Principles
+
+1. **Offline-First Operation** - All analysis runs without network access, ensuring data privacy and reproducible results
+2. **Dual-Mode Architecture** - Courier Mode preserves raw external tool outputs; Expert Mode applies security expertise objectively
+3. **AI-Centric Workflow** - Produces chunks optimized for LLM context windows (65KB by default)
+4. **Sandboxed Execution** - Isolated analysis environment prevents cross-contamination
+5. **No Fix Generation** - Reports findings without prescribing solutions
+
+## Truth Courier vs Insights: Separation of Concerns
+
+TheAuditor maintains a strict architectural separation between **factual observation** and **optional interpretation**:
+
+### Truth Courier Modules (Core)
+These modules are the foundation - they gather and report verifiable facts without judgment:
+
+- **Indexer**: Reports "Function X exists at line Y with Z parameters"
+- **Taint Analyzer**: Reports "Data flows from pattern A to pattern B through path C"
+- **Impact Analyzer**: Reports "Changing function X affects Y files through Z call chains"
+- **Graph Analyzer**: Reports "Module A imports B, B imports C, C imports A (cycle detected)"
+- **Pattern Detector**: Reports "Line X matches pattern Y from rule Z"
+- **Linters**: Reports "Tool ESLint flagged line X with rule Y"
+
+These modules form the immutable ground truth. They report **what exists**, not what it means.
+
+### Insights Modules (Optional Interpretation Layer)
+These are **optional packages** that consume Truth Courier data to add scoring and classification. All insights modules have been consolidated into a single package for better organization:
+
+```
+theauditor/insights/
+├── __init__.py      # Package exports
+├── ml.py           # Machine learning predictions (requires pip install -e ".[ml]")
+├── graph.py        # Graph health scoring and recommendations
+└── taint.py        # Vulnerability severity classification
+```
+
+- **insights/taint.py**: Adds "This flow is XSS with HIGH severity"
+- **insights/graph.py**: Adds "Health score: 70/100, Grade: B"
+- **insights/ml.py** (requires `pip install -e ".[ml]"`): Adds "80% probability of bugs based on historical patterns"
+
+**Important**: Insights modules are:
+- Not installed by default (ML requires explicit opt-in)
+- Completely decoupled from core analysis
+- Still based on technical patterns, not business logic interpretation
+- Designed for teams that want actionable scores alongside raw facts
+- All consolidated in `/insights` package for consistency
+
+### The FCE: Factual Correlation Engine
+The FCE correlates facts from multiple tools without interpreting them:
+- Reports: "Tool A and Tool B both flagged line 100"
+- Reports: "Pattern X and Pattern Y co-occur in file Z"
+- Never says: "This is bad" or "Fix this way"
+
+## Core Components
+
+### Indexer Package (`theauditor/indexer/`)
+The indexer has been refactored from a monolithic 2000+ line file into a modular package structure:
+
+```
+theauditor/indexer/
+├── __init__.py           # Package initialization and backward compatibility
+├── config.py             # Constants, patterns, and configuration
+├── database.py           # DatabaseManager class for all DB operations
+├── core.py               # FileWalker and ASTCache classes
+├── orchestrator.py       # IndexOrchestrator - main coordination logic
+└── extractors/
+    ├── __init__.py       # BaseExtractor abstract class and registry
+    ├── python.py         # Python-specific extraction logic
+    ├── javascript.py     # JavaScript/TypeScript extraction
+    ├── docker.py         # Docker/docker-compose extraction
+    ├── sql.py            # SQL extraction
+    └── nginx.py          # Nginx configuration extraction
+```
+
+Key features:
+- **Dynamic extractor registry** for automatic language detection
+- **Batched database operations** (200 records per batch by default)
+- **AST caching** for performance optimization
+- **Monorepo detection** and intelligent path filtering
+- **Parallel JavaScript processing** when semantic parser available
+
+### Pipeline System (`theauditor/pipelines.py`)
+Orchestrates **14-phase** analysis pipeline in **parallel stages**:
+
+**Stage 1 - Foundation (Sequential):**
+1. Repository indexing - Build manifest and symbol database
+2. Framework detection - Identify technologies in use
+
+**Stage 2 - Concurrent Analysis (3 Parallel Tracks):**
+- **Track A (Network I/O):**
+  - Dependency checking
+  - Documentation fetching
+  - Documentation summarization
+- **Track B (Code Analysis):**
+  - Workset creation
+  - Linting
+  - Pattern detection
+- **Track C (Graph Build):**
+  - Graph building
+
+**Stage 3 - Final Aggregation (Sequential):**
+- Graph analysis
+- Taint analysis
+- Factual correlation engine
+- Report generation
+
+### Pattern Detection Engine
+- 100+ YAML-defined security patterns in `theauditor/patterns/`
+- AST-based matching for Python and JavaScript
+- Supports semantic analysis via TypeScript compiler
+
+### Factual Correlation Engine (FCE) (`theauditor/fce.py`)
+- **29 advanced correlation rules** in `theauditor/correlations/rules/`
+- Detects complex vulnerability patterns across multiple tools
+- Categories: Authentication, Injection, Data Exposure, Infrastructure, Code Quality, Framework-Specific
+
+### Taint Analysis Package (`theauditor/taint_analyzer.py`)
+A comprehensive taint analysis module that tracks data flow from sources to sinks:
+
+- Tracks data flow from user inputs to dangerous outputs
+- Detects SQL injection, XSS, command injection vulnerabilities
+- Database-aware analysis using `repo_index.db`
+- Supports both assignment-based and direct-use patterns
+- Merges findings from multiple detection methods
+
+**Note**: The optional severity scoring for taint analysis is provided by `theauditor/insights/taint.py` (Insights module)
+
+### Graph Analysis (`theauditor/graph/`)
+- **builder.py**: Constructs dependency graph from codebase
+- **analyzer.py**: Detects cycles, measures complexity, identifies hotspots
+- Uses NetworkX for graph algorithms
+
+**Note**: The optional health scoring and recommendations are provided by `theauditor/insights/graph.py` (Insights module)
+
+### Framework Detection (`theauditor/framework_detector.py`)
+- Auto-detects Django, Flask, React, Vue, Angular, etc.
+- Applies framework-specific rules
+- Influences pattern selection and analysis behavior
+
+### Configuration Parsers (`theauditor/parsers/`)
+Specialized parsers for configuration file analysis:
+- **webpack_config_parser.py**: Webpack configuration analysis
+- **compose_parser.py**: Docker Compose file parsing
+- **nginx_parser.py**: Nginx configuration parsing
+- **dockerfile_parser.py**: Dockerfile security analysis
+- **prisma_schema_parser.py**: Prisma ORM schema parsing
+
+These parsers are used by extractors during indexing to extract security-relevant configuration data.
+
+### Refactoring Detection (`theauditor/commands/refactor.py`)
+Detects incomplete refactorings and cross-stack inconsistencies:
+- Analyzes database migrations to detect schema changes
+- Uses impact analysis to trace affected files
+- Applies correlation rules from `/correlations/rules/refactoring.yaml`
+- Detects API contract mismatches, field migrations, foreign key changes
+- Supports auto-detection from migration files or specific change analysis
+
+## System Architecture Diagrams
+
+### High-Level Data Flow
+
+```mermaid
+graph TB
+    subgraph "Input Layer"
+        CLI[CLI Commands]
+        Files[Project Files]
+    end
+    
+    subgraph "Core Pipeline"
+        Index[Indexer]
+        Framework[Framework Detector]
+        Deps[Dependency Checker]
+        Patterns[Pattern Detection]
+        Taint[Taint Analysis]
+        Graph[Graph Builder]
+        FCE[Factual Correlation Engine]
+    end
+    
+    subgraph "Storage"
+        DB[(SQLite DB)]
+        Raw[Raw Output]
+        Chunks[65KB Chunks]
+    end
+    
+    CLI --> Index
+    Files --> Index
+    Index --> DB
+    Index --> Framework
+    Framework --> Deps
+    
+    Deps --> Patterns
+    Patterns --> Graph
+    Graph --> Taint
+    Taint --> FCE
+    
+    FCE --> Raw
+    Raw --> Chunks
+```
+
+### Parallel Pipeline Execution
+
+```mermaid
+graph LR
+    subgraph "Stage 1 - Sequential"
+        S1[Index] --> S2[Framework Detection]
+    end
+    
+    subgraph "Stage 2 - Parallel"
+        direction TB
+        subgraph "Track A - Network I/O"
+            A1[Deps Check]
+            A2[Doc Fetch]
+            A3[Doc Summary]
+            A1 --> A2 --> A3
+        end
+        
+        subgraph "Track B - Code Analysis"
+            B1[Workset]
+            B2[Linting]
+            B3[Patterns]
+            B1 --> B2 --> B3
+        end
+        
+        subgraph "Track C - Graph"
+            C1[Graph Build]
+        end
+    end
+    
+    subgraph "Stage 3 - Sequential"
+        E1[Graph Analysis] --> E2[Taint] --> E3[FCE] --> E4[Report]
+    end
+    
+    S2 --> A1
+    S2 --> B1
+    S2 --> C1
+    
+    A3 --> E1
+    B3 --> E1
+    C1 --> E1
+```
+
+### Data Chunking System
+
+The extraction system (`theauditor/extraction.py`) implements pure courier model chunking:
+
+```mermaid
+graph TD
+    subgraph "Analysis Results"
+        P[Patterns.json]
+        T[Taint.json<br/>Multiple lists merged]
+        L[Lint.json]
+        F[FCE.json]
+    end
+    
+    subgraph "Extraction Process"
+        E[Extraction Engine<br/>Budget: 1.5MB]
+        M[Merge Logic<br/>For taint_paths +<br/>rule_findings]
+        C1[Chunk 1<br/>0-65KB]
+        C2[Chunk 2<br/>65-130KB]
+        C3[Chunk 3<br/>130-195KB]
+        TR[Truncation<br/>Flag]
+    end
+    
+    subgraph "Output"
+        R1[patterns_chunk01.json]
+        R2[patterns_chunk02.json]
+        R3[patterns_chunk03.json]
+    end
+    
+    P --> E
+    T --> M --> E
+    L --> E
+    F --> E
+    
+    E --> C1 --> R1
+    E --> C2 --> R2
+    E --> C3 --> R3
+    E -.->|If >195KB| TR
+    TR -.-> R3
+```
+
+Key features:
+- **Budget system**: 1.5MB total budget for all chunks
+- **Smart merging**: Taint analysis merges multiple finding lists (taint_paths, rule_findings, infrastructure)
+- **Preservation**: All findings preserved, no filtering or sampling
+- **Chunking**: Only chunks files >65KB, copies smaller files as-is
+
+### Dual Environment Architecture
+
+```mermaid
+graph TB
+    subgraph "Development Environment"
+        V1[.venv/]
+        PY[Python 3.11+]
+        AU[TheAuditor Code]
+        V1 --> PY --> AU
+    end
+    
+    subgraph "Sandboxed Analysis Environment"
+        V2[.auditor_venv/.theauditor_tools/]
+        NODE[Bundled Node.js v20.11.1]
+        TS[TypeScript Compiler]
+        ES[ESLint]
+        PR[Prettier]
+        NM[node_modules/]
+        V2 --> NODE
+        NODE --> TS
+        NODE --> ES
+        NODE --> PR
+        NODE --> NM
+    end
+    
+    AU -->|Analyzes using| V2
+    AU -.->|Never uses| V1
+```
+
+TheAuditor maintains strict separation between:
+1. **Primary Environment** (`.venv/`): TheAuditor's Python code and dependencies
+2. **Sandboxed Environment** (`.auditor_venv/.theauditor_tools/`): Isolated JS/TS analysis tools
+
+This ensures reproducibility and prevents TheAuditor from analyzing its own analysis tools.
+
+## Database Schema
+
+```mermaid
+erDiagram
+    files ||--o{ symbols : contains
+    files ||--o{ refs : contains
+    files ||--o{ api_endpoints : contains
+    files ||--o{ sql_queries : contains
+    files ||--o{ docker_images : contains
+    
+    files {
+        string path PK
+        string language
+        int size
+        string hash
+        json metadata
+    }
+    
+    symbols {
+        string path FK
+        string name
+        string type
+        int line
+        json metadata
+    }
+    
+    refs {
+        string src FK
+        string value
+        string kind
+        int line
+    }
+    
+    api_endpoints {
+        string file FK
+        string method
+        string path
+        int line
+    }
+    
+    sql_queries {
+        string file_path FK
+        string command
+        string query
+        int line_number
+    }
+    
+    docker_images {
+        string file_path FK
+        string base_image
+        json env_vars
+        json build_args
+    }
+```
+
+## Command Flow Sequence
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant CLI
+    participant Pipeline
+    participant Analyzers
+    participant Database
+    participant Output
+    
+    User->>CLI: aud full
+    CLI->>Pipeline: Execute pipeline
+    Pipeline->>Database: Initialize schema
+    
+    Pipeline->>Analyzers: Index files
+    Analyzers->>Database: Store file metadata
+    
+    par Parallel Execution
+        Pipeline->>Analyzers: Dependency check
+        and
+        Pipeline->>Analyzers: Pattern detection
+        and
+        Pipeline->>Analyzers: Graph building
+    end
+    
+    Pipeline->>Analyzers: Taint analysis
+    Analyzers->>Database: Query symbols & refs
+    
+    Pipeline->>Analyzers: FCE correlation
+    Analyzers->>Output: Generate reports
+    
+    Pipeline->>Output: Create chunks
+    Output->>User: .pf/readthis/
+```
+
+## Output Structure
+
+All results are organized in the `.pf/` directory:
+
+```
+.pf/
+├── raw/                # Immutable tool outputs (ground truth)
+│   ├── eslint.json
+│   ├── ruff.json
+│   └── ...
+├── readthis/           # AI-optimized chunks (<65KB each, max 3 chunks per file)
+│   ├── manifest.md     # Repository overview
+│   ├── patterns_*.md   # Security findings
+│   ├── taint_*.md      # Data-flow issues
+│   └── tickets_*.md    # Actionable tasks
+├── repo_index.db       # SQLite database of code symbols
+├── pipeline.log        # Execution trace
+└── findings.json       # Consolidated results
+```
+
+### Key Output Files
+
+- **manifest.md**: Complete file inventory with SHA-256 hashes
+- **patterns_*.md**: Chunked security findings from 100+ detection rules
+- **tickets_*.md**: Prioritized, actionable issues with evidence
+- **repo_index.db**: Queryable database of all code symbols and relationships
+
+## Operating Modes
+
+TheAuditor operates in two distinct modes:
+
+### Courier Mode (External Tools)
+- Preserves exact outputs from ESLint, Ruff, MyPy, etc.
+- No interpretation or filtering
+- Complete audit trail from source to finding
+
+### Expert Mode (Internal Engines)
+- **Taint Analysis**: Tracks untrusted data through the application
+- **Pattern Detection**: YAML-based rules with AST matching
+- **Graph Analysis**: Architectural insights and dependency tracking
+- **Secret Detection**: Identifies hardcoded credentials and API keys
+
+## CLI Entry Points
+
+- **Main CLI**: `theauditor/cli.py` - Central command router
+- **Command modules**: `theauditor/commands/` - One module per command
+- **Utilities**: `theauditor/utils/` - Shared functionality
+- **Configuration**: `theauditor/config_runtime.py` - Runtime configuration
+
+Each command module follows a standardized structure with:
+- `@click.command()` decorator
+- `@handle_exceptions` decorator for error handling
+- Consistent logging and output formatting
+
+## Performance Optimizations
+
+- **Batched database operations**: 200 records per batch (configurable)
+- **Parallel rule execution**: ThreadPoolExecutor with 4 workers
+- **AST caching**: Persistent cache for parsed AST trees
+- **Incremental analysis**: Workset-based analysis for changed files only
+- **Lazy loading**: Patterns and rules loaded on-demand
+- **Memory-efficient chunking**: Stream large files instead of loading entirely
+
+## Configuration System
+
+TheAuditor supports runtime configuration via multiple sources (priority order):
+
+1. **Environment variables** (`THEAUDITOR_*` prefix)
+2. **`.pf/config.json`** file (project-specific)
+3. **Built-in defaults** in `config_runtime.py`
+
+Example configuration:
+```bash
+export THEAUDITOR_LIMITS_MAX_CHUNKS_PER_FILE=5  # Default: 3
+export THEAUDITOR_LIMITS_MAX_CHUNK_SIZE=100000  # Default: 65000
+export THEAUDITOR_LIMITS_MAX_FILE_SIZE=5242880  # Default: 2097152
+export THEAUDITOR_TIMEOUTS_LINT_TIMEOUT=600     # Default: 300
+```
+
+## Advanced Features
+
+### Database-Aware Rules
+Specialized analyzers query `repo_index.db` to detect:
+- ORM anti-patterns (N+1 queries, missing transactions)
+- Docker security misconfigurations
+- Nginx configuration issues
+- Multi-file correlation patterns
+
+### Holistic Analysis
+Project-level analyzers that operate across the entire codebase:
+- **Bundle Analyzer**: Correlates package.json, lock files, and imports
+- **Source Map Detector**: Scans build directories for exposed maps
+- **Framework Detectors**: Identify technology stack automatically
+
+### Incremental Analysis
+Workset-based analysis for efficient processing:
+- Git diff integration for changed file detection
+- Dependency tracking for impact analysis
+- Cached results for unchanged files
+
+## Contributing to TheAuditor
+
+### Adding Language Support
+
+TheAuditor's modular architecture makes it straightforward to add new language support:
+
+#### 1. Create an Extractor
+Create a new extractor in `theauditor/indexer/extractors/{language}.py`:
+
+```python
+from . import BaseExtractor
+
+class {Language}Extractor(BaseExtractor):
+    def supported_extensions(self) -> List[str]:
+        return ['.ext', '.ext2']
+    
+    def extract(self, file_info, content, tree=None):
+        # Extract symbols, imports, routes, etc.
+        return {
+            'imports': [],
+            'routes': [],
+            'symbols': [],
+            # ... other extracted data
+        }
+```
+
+The extractor will be automatically registered via the `BaseExtractor` inheritance.
+
+#### 2. Create Configuration Parser (Optional)
+For configuration files, create a parser in `theauditor/parsers/{language}_parser.py`:
+
+```python
+class {Language}Parser:
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        # Parse configuration file
+        return parsed_data
+```
+
+#### 3. Add Security Patterns
+Create YAML patterns in `theauditor/patterns/{language}.yml`:
+
+```yaml
+- name: hardcoded-secret-{language}
+  pattern: 'api_key\s*=\s*["\'][^"\']+["\']'
+  severity: critical
+  category: security
+  languages: ["{language}"]
+  description: "Hardcoded API key in {Language} code"
+```
+
+#### 4. Add Framework Detection
+Update `theauditor/framework_detector.py` to detect {Language} frameworks.
+
+### Adding New Analyzers
+
+#### Database-Aware Rules
+Create analyzers that query `repo_index.db` in `theauditor/rules/{category}/`:
+
+```python
+def find_{issue}_patterns(db_path: str) -> List[Dict[str, Any]]:
+    conn = sqlite3.connect(db_path)
+    # Query and analyze
+    return findings
+```
+
+#### AST-Based Rules
+For semantic analysis, create rules in `theauditor/rules/{framework}/`:
+
+```python
+def find_{framework}_issues(tree, file_path) -> List[Dict[str, Any]]:
+    # Traverse AST and detect issues
+    return findings
+```
+
+#### Pattern-Based Rules
+Add YAML patterns to `theauditor/patterns/` for regex-based detection.
+
+### Architecture Guidelines
+
+1. **Maintain Truth Courier vs Insights separation** - Core modules report facts, insights add interpretation
+2. **Use the extractor registry** - Inherit from `BaseExtractor` for automatic registration
+3. **Follow existing patterns** - Look at `python.py` or `javascript.py` extractors as examples
+4. **Write comprehensive tests** - Test extractors, parsers, and patterns
+5. **Document your additions** - Update this file and CONTRIBUTING.md
+
+For detailed contribution guidelines, see [CONTRIBUTING.md](CONTRIBUTING.md).
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..225b7ba
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,454 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Quick Reference Commands
+
+```bash
+# Development Setup
+python -m venv .venv
+source .venv/bin/activate  # Windows: .venv\Scripts\activate
+pip install -e ".[all]"
+aud setup-claude --target .  # MANDATORY for JS/TS analysis
+
+# Testing
+pytest -v                    # Run all tests
+pytest tests/test_file.py    # Run specific test file
+pytest -k "test_name"        # Run specific test by name
+pytest --cov=theauditor      # With coverage
+
+# Code Quality
+ruff check theauditor tests --fix  # Lint and auto-fix
+ruff format theauditor tests       # Format code
+black theauditor tests             # Alternative formatter
+mypy theauditor --strict           # Type checking
+
+# Running TheAuditor
+aud init                     # Initialize project
+aud full                     # Complete analysis (14 phases)
+aud full --offline           # Skip network operations (deps, docs)
+aud index --exclude-self     # When analyzing TheAuditor itself
+
+# Individual Analysis Commands
+aud index                    # Build code index database
+aud detect-patterns          # Run security pattern detection
+aud taint-analyze            # Perform taint flow analysis
+aud graph build              # Build dependency graph
+aud graph analyze            # Analyze graph structure
+aud fce                      # Run Factual Correlation Engine
+aud report                   # Generate final report
+aud workset                  # Create working set of critical files
+aud impact <file>            # Analyze impact of changing a file
+
+# Utility Commands
+aud setup-claude             # Setup sandboxed JS/TS tools (MANDATORY)
+aud js-semantic <file>       # Parse JS/TS file semantically
+aud structure                # Display project structure
+aud insights                 # Generate ML insights (requires [ml] extras)
+aud refactor <operation>     # Perform refactoring operations
+```
+
+## Project Overview
+
+TheAuditor is an offline-first, AI-centric SAST (Static Application Security Testing) and code intelligence platform written in Python. It performs comprehensive security auditing and code analysis for Python and JavaScript/TypeScript projects, producing AI-consumable reports optimized for LLM context windows.
+
+## Core Philosophy: Truth Courier, Not Mind Reader
+
+**CRITICAL UNDERSTANDING**: TheAuditor does NOT try to understand business logic or make AI "smarter." It solves the real problem: **AI loses context and makes inconsistent changes across large codebases.**
+
+### The Development Loop
+1. **Human tells AI**: "Add JWT auth with CSRF protection"
+2. **AI writes code**: Probably has issues due to context limits (hardcoded secrets, missing middleware, etc.)
+3. **Human runs**: `aud full`
+4. **TheAuditor reports**: All inconsistencies and security holes as FACTS
+5. **AI reads report**: Now sees the COMPLETE picture across all files
+6. **AI fixes issues**: With full visibility of what's broken
+7. **Repeat until clean**
+
+TheAuditor is about **consistency checking**, not semantic understanding. It finds where code doesn't match itself, not whether it matches business requirements.
+
+## Critical Setup Requirements
+
+### For JavaScript/TypeScript Analysis
+TheAuditor requires a sandboxed environment for JS/TS tools. This is NOT optional:
+
+```bash
+# MANDATORY: Set up sandboxed tools
+aud setup-claude --target .
+```
+
+This creates `.auditor_venv/.theauditor_tools/` with isolated TypeScript compiler and ESLint. Without this, TypeScript semantic analysis will fail.
+
+## Key Architectural Decisions
+
+### Modular Package Structure
+The codebase follows a modular design where large modules are refactored into packages. Example: the indexer was refactored from a 2000+ line monolithic file into:
+```
+theauditor/indexer/
+├── __init__.py           # Backward compatibility shim
+├── config.py             # Constants and patterns
+├── database.py           # DatabaseManager class
+├── core.py               # FileWalker, ASTCache
+├── orchestrator.py       # Main coordination
+└── extractors/           # Language-specific logic
+```
+
+When refactoring, always:
+1. Create a package with the same name as the original module
+2. Provide a backward compatibility shim in `__init__.py`
+3. Separate concerns into focused modules
+4. Use dynamic registries for extensibility
+
+### Database Contract Preservation
+The `repo_index.db` schema is consumed by many downstream modules (taint_analyzer, graph builder, etc.). When modifying indexer or database operations:
+- NEVER change table schemas without migration
+- Preserve exact column names and types
+- Maintain the same data format in JSON columns
+- Test downstream consumers after changes
+
+## Architecture Overview
+
+### Truth Courier vs Insights: Separation of Concerns
+
+TheAuditor maintains strict separation between **factual observation** and **optional interpretation**:
+
+#### Truth Courier Modules (Core - Always Active)
+Report verifiable facts without judgment:
+- **Indexer**: "Function X exists at line Y"
+- **Taint Analyzer**: "Data flows from req.body to res.send" (NOT "XSS vulnerability")
+- **Impact Analyzer**: "Changing X affects 47 files through dependency chains"
+- **Pattern Detector**: "Line X matches pattern Y"
+- **Graph Analyzer**: "Cycle detected: A→B→C→A"
+
+#### Insights Modules (Optional - Not Installed by Default)
+Add scoring and classification on top of facts:
+- **taint/insights.py**: Adds "This is HIGH severity XSS"
+- **graph/insights.py**: Adds "Health score: 70/100"
+- **ml.py**: Requires `pip install -e ".[ml]"` - adds predictions
+
+#### Correlation Rules (Project-Specific Pattern Detection)
+- Located in `theauditor/correlations/rules/`
+- Detect when multiple facts indicate inconsistency
+- Example: "Backend moved field to ProductVariant but frontend still uses Product.price"
+- NOT business logic understanding, just pattern matching YOUR refactorings
+
+### Dual-Environment Design
+TheAuditor maintains strict separation between:
+1. **Primary Environment** (`.venv/`): TheAuditor's Python code and dependencies
+2. **Sandboxed Environment** (`.auditor_venv/.theauditor_tools/`): Isolated JS/TS analysis tools
+
+### Core Components
+
+#### Indexer Package (`theauditor/indexer/`)
+The indexer has been refactored from a monolithic 2000+ line file into a modular package:
+- **config.py**: Constants, patterns, and configuration (SKIP_DIRS, language maps, etc.)
+- **database.py**: DatabaseManager class handling all database operations
+- **core.py**: FileWalker (with monorepo detection) and ASTCache classes  
+- **orchestrator.py**: IndexOrchestrator coordinating the indexing process
+- **extractors/**: Language-specific extractors (Python, JavaScript, Docker, SQL, nginx)
+
+The package uses a dynamic extractor registry for automatic language detection and processing.
+
+#### Pipeline System (`theauditor/pipelines.py`)
+- Orchestrates **14-phase** analysis pipeline in **parallel stages**:
+  - **Stage 1**: Foundation (index with batched DB operations, framework detection)
+  - **Stage 2**: 3 concurrent tracks (Network I/O, Code Analysis, Graph Build)
+  - **Stage 3**: Final aggregation (graph analysis, taint, FCE, report)
+- Handles error recovery and logging
+- **Performance optimizations**:
+  - Batched database inserts (200 records per batch) in indexer
+  - Parallel rule execution with ThreadPoolExecutor (4 workers)
+  - Parallel holistic analysis (bundle + sourcemap detection)
+
+#### Pattern Detection Engine
+- 100+ YAML-defined security patterns in `theauditor/patterns/`
+- AST-based matching for Python and JavaScript
+- Supports semantic analysis via TypeScript compiler
+
+#### Factual Correlation Engine (FCE) (`theauditor/fce.py`)
+- **29 advanced correlation rules** in `theauditor/correlations/rules/`
+- Detects complex vulnerability patterns across multiple tools
+- Categories: Authentication, Injection, Data Exposure, Infrastructure, Code Quality, Framework-Specific
+
+#### Taint Analysis Package (`theauditor/taint_analyzer/`)
+Previously a monolithic 1822-line file, now refactored into a modular package:
+- **core.py**: TaintAnalyzer main class
+- **sources.py**: Source pattern definitions (user inputs)
+- **sinks.py**: Sink pattern definitions (dangerous outputs)
+- **patterns.py**: Pattern matching logic
+- **flow.py**: Data flow tracking algorithms
+- **insights.py**: Optional severity scoring (Insights module)
+
+Features:
+- Tracks data flow from sources to sinks
+- Detects SQL injection, XSS, command injection
+- Database-aware analysis using `repo_index.db`
+- Supports both assignment-based and direct-use taint flows
+- Merges findings from multiple detection methods (taint_paths, rule_findings, infrastructure)
+
+#### Framework Detection (`theauditor/framework_detector.py`)
+- Auto-detects Django, Flask, React, Vue, etc.
+- Applies framework-specific rules
+
+#### Graph Analysis (`theauditor/commands/graph.py`)
+- Build dependency graphs with `aud graph build`
+- Analyze graph health with `aud graph analyze`
+- Visualize with GraphViz output (optional)
+- Detect circular dependencies and architectural issues
+
+#### Output Structure
+```
+.pf/
+├── raw/            # Immutable tool outputs (ground truth)
+├── readthis/       # AI-optimized chunks (<65KB each, max 3 chunks per file)
+├── repo_index.db   # SQLite database of code symbols
+└── pipeline.log    # Execution trace
+```
+
+### CLI Entry Points
+- Main CLI: `theauditor/cli.py`
+- Command modules: `theauditor/commands/`
+- Each command is a separate module with standardized structure
+
+## Available Commands
+
+### Core Analysis Commands
+- `aud index`: Build comprehensive code index
+- `aud detect-patterns`: Run security pattern detection
+- `aud taint-analyze`: Perform taint flow analysis
+- `aud fce`: Run Factual Correlation Engine
+- `aud report`: Generate final consolidated report
+
+### Graph Commands
+- `aud graph build`: Build dependency graph
+- `aud graph analyze`: Analyze graph health metrics
+- `aud graph visualize`: Generate GraphViz visualization
+
+### Utility Commands
+- `aud deps`: Analyze dependencies and vulnerabilities
+- `aud docs`: Extract and analyze documentation
+- `aud docker-analyze`: Analyze Docker configurations
+- `aud lint`: Run code linters
+- `aud workset`: Create critical file working set
+- `aud impact <file>`: Analyze change impact radius
+- `aud structure`: Display project structure
+- `aud insights`: Generate ML-powered insights (optional)
+- `aud refactor <operation>`: Automated refactoring tools
+
+## How to Work with TheAuditor Effectively
+
+### The Correct Workflow
+1. **Write specific requirements**: "Add JWT auth with httpOnly cookies, CSRF tokens, rate limiting"
+2. **Let AI implement**: It will probably mess up due to context limits
+3. **Run audit**: `aud full`
+4. **Read the facts**: Check `.pf/readthis/` for issues
+5. **Fix based on facts**: Address the specific inconsistencies found
+6. **Repeat until clean**: Keep auditing and fixing until no issues
+
+### What NOT to Do
+- ❌ Don't ask AI to "implement secure authentication" (too vague)
+- ❌ Don't try to make TheAuditor understand your business logic
+- ❌ Don't expect TheAuditor to write fixes (it only reports issues)
+- ❌ Don't ignore the audit results and claim "done"
+
+### Understanding the Output
+- **Truth Couriers** report facts: "JWT secret hardcoded at line 47"
+- **Insights** (if installed) add interpretation: "HIGH severity"
+- **Correlations** detect YOUR patterns: "Frontend expects old API structure"
+- **Impact Analysis** shows blast radius: "Changing this affects 23 files"
+
+## Critical Development Patterns
+
+### Adding New Commands
+1. Create module in `theauditor/commands/` with this structure:
+```python
+import click
+from theauditor.utils.decorators import handle_exceptions
+from theauditor.utils.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+@click.command()
+@click.option('--workset', is_flag=True, help='Use workset files')
+@handle_exceptions
+def command_name(workset):
+    """Command description."""
+    logger.info("Starting command...")
+    # Implementation
+```
+
+2. Register in `theauditor/cli.py`:
+```python
+from theauditor.commands import your_command
+cli.add_command(your_command.command_name)
+```
+
+### Adding Language Support
+To add a new language, create an extractor in `theauditor/indexer/extractors/`:
+```python
+from theauditor.indexer.extractors import BaseExtractor, register_extractor
+
+@register_extractor
+class YourLanguageExtractor(BaseExtractor):
+    @property
+    def supported_extensions(self):
+        return ['.ext', '.ext2']
+    
+    def extract(self, file_info, content, tree):
+        # Return dict with symbols, imports, etc.
+```
+
+The extractor will be auto-discovered via the registry pattern.
+
+## CRITICAL: Reading Chunked Data
+
+**IMPORTANT**: When processing files from `.pf/readthis/`, you MUST check for truncation:
+
+```python
+# Files may be split into chunks if >65KB
+# Always check the 'chunk_info' field in JSON files:
+chunk_info = data.get('chunk_info', {})
+if chunk_info.get('truncated', False):
+    # This means there were more findings but only 3 chunks were created
+    # The data is incomplete - warn the user
+    print("WARNING: Data was truncated at 3 chunks")
+```
+
+**Key Points**:
+- Files larger than 65KB are split into chunks (configurable via `THEAUDITOR_LIMITS_MAX_CHUNK_SIZE`)
+- Maximum 3 chunks per file by default (configurable via `THEAUDITOR_LIMITS_MAX_CHUNKS_PER_FILE`)
+- Example: `patterns_chunk01.json`, `patterns_chunk02.json`, `patterns_chunk03.json`
+- If `truncated: true` in `chunk_info`, there were more findings that couldn't fit
+- Always process ALL chunk files for complete data
+
+## Critical Working Knowledge
+
+### Pipeline Execution Order
+The `aud full` command runs 14 phases in 3 stages:
+1. **Sequential**: index → framework_detect
+2. **Parallel**: (deps, docs) || (workset, lint, patterns) || (graph_build)
+3. **Sequential**: graph_analyze → taint → fce → report
+
+If modifying pipeline, maintain this dependency order.
+
+### File Size and Memory Management
+- Files >2MB are skipped by default (configurable)
+- JavaScript files are batched for semantic parsing to avoid memory issues
+- AST cache persists parsed trees to `.pf/.ast_cache/`
+- Database operations batch at 200 records (configurable)
+
+### Monorepo Detection
+The indexer automatically detects monorepo structures and applies intelligent filtering:
+- Standard paths: `backend/src/`, `frontend/src/`, `packages/*/src/`
+- Whitelist mode activated when monorepo detected
+- Prevents analyzing test files, configs, migrations as source code
+
+### JavaScript/TypeScript Special Handling
+- MUST run `aud setup-claude --target .` first
+- Uses bundled Node.js v20.11.1 in `.auditor_venv/.theauditor_tools/`
+- TypeScript semantic analysis requires `js_semantic_parser.py`
+- ESLint runs in sandboxed environment, not project's node_modules
+
+### Environment Variables
+Key environment variables for configuration:
+- `THEAUDITOR_LIMITS_MAX_FILE_SIZE`: Maximum file size to analyze (default: 2MB)
+- `THEAUDITOR_LIMITS_MAX_CHUNK_SIZE`: Maximum chunk size for readthis output (default: 65KB)
+- `THEAUDITOR_LIMITS_MAX_CHUNKS_PER_FILE`: Maximum chunks per file (default: 3)
+- `THEAUDITOR_DB_BATCH_SIZE`: Database batch insert size (default: 200)
+
+## Recent Fixes & Known Issues
+
+### Parser Integration (Fixed)
+- **Previous Issue**: Configuration parsers (webpack, nginx, docker-compose) were orphaned
+- **Root Cause**: Import paths in extractors didn't match actual parser module names
+- **Fix Applied**: Corrected import paths in `generic.py` and `docker.py` extractors
+- **Current Status**: All 5 parsers now functional for config security analysis
+
+### Extraction Budget & Taint Merging (Fixed)
+- **Previous Issue**: Taint analysis only extracted 26 of 102 findings
+- **Root Cause**: Only chunking `taint_paths`, missing `all_rule_findings` and `infrastructure_issues`
+- **Fix Applied**: Extraction now merges all taint finding lists; budget increased to 1.5MB
+- **Current Status**: All taint findings properly extracted and chunked
+
+### Migration Detection (Enhanced)
+- **Previous Issue**: Only checked basic migration paths
+- **Root Cause**: Missing common paths like `backend/migrations/` and `frontend/migrations/`
+- **Fix Applied**: Added standard migration paths with validation for actual migration files
+- **Current Status**: Auto-detects migrations with helpful warnings for non-standard locations
+
+### TypeScript Taint Analysis (Fixed)
+- **Previous Issue**: Taint analysis reported 0 sources/sinks for TypeScript
+- **Root Cause**: Text extraction was removed from `js_semantic_parser.py` (lines 275, 514)
+- **Fix Applied**: Restored `result.text` field extraction
+- **Current Status**: TypeScript taint analysis now working - detects req.body → res.send flows
+
+### Direct-Use Vulnerability Detection (Fixed)
+- **Previous Issue**: Only detected vulnerabilities through variable assignments
+- **Root Cause**: `trace_from_source()` required intermediate variables
+- **Fix Applied**: Added direct-use detection for patterns like `res.send(req.body)`
+- **Current Status**: Now detects both assignment-based and direct-use taint flows
+
+### Known Limitations
+- Maximum 2MB file size for analysis (configurable)
+- TypeScript decorator metadata not fully parsed
+- Some advanced ES2024+ syntax may not be recognized
+- GraphViz visualization requires separate installation
+
+## Common Misconceptions to Avoid
+
+### TheAuditor is NOT:
+- ❌ A semantic understanding tool (doesn't understand what your code "means")
+- ❌ A business logic validator (doesn't know your business rules)
+- ❌ An AI enhancement tool (doesn't make AI "smarter")
+- ❌ A code generator (only reports issues, doesn't fix them)
+
+### TheAuditor IS:
+- ✅ A consistency checker (finds where code doesn't match itself)
+- ✅ A fact reporter (provides ground truth about your code)
+- ✅ A context provider (gives AI the full picture across all files)
+- ✅ An audit trail (immutable record of what tools found)
+
+## Troubleshooting
+
+### TypeScript Analysis Fails
+Solution: Run `aud setup-claude --target .`
+
+### Taint Analysis Reports 0 Vulnerabilities on TypeScript
+- Check that `js_semantic_parser.py` has text extraction enabled (lines 275, 514)
+- Verify symbols table contains property accesses: `SELECT * FROM symbols WHERE name LIKE '%req.body%'`
+- Ensure you run `aud index` before `aud taint-analyze`
+
+### Pipeline Failures
+Check `.pf/error.log` and `.pf/pipeline.log` for details
+
+### Linting No Results
+Ensure linters installed: `pip install -e ".[linters]"`
+
+### Graph Commands Not Working
+- Ensure `aud index` has been run first
+- Check that NetworkX is installed: `pip install -e ".[all]"`
+
+## Testing Vulnerable Code
+Test projects are in `fakeproj/` directory. Always use `--exclude-self` when analyzing them to avoid false positives from TheAuditor's own configuration.
+
+## Project Dependencies
+
+### Required Dependencies (Core)
+- click==8.2.1 - CLI framework
+- PyYAML==6.0.2 - YAML parsing
+- jsonschema==4.25.1 - JSON validation
+- ijson==3.4.0 - Incremental JSON parsing
+
+### Optional Dependencies
+Install with `pip install -e ".[group]"`:
+- **[linters]**: ruff, mypy, black, bandit, pylint
+- **[ml]**: scikit-learn, numpy, scipy, joblib
+- **[ast]**: tree-sitter, sqlparse, dockerfile-parse
+- **[all]**: Everything including NetworkX for graphs
+
+## Performance Expectations
+- Small project (< 5K LOC): ~2 minutes
+- Medium project (20K LOC): ~30 minutes
+- Large monorepo (100K+ LOC): 1-2 hours
+- Memory usage: ~500MB-2GB depending on codebase size
+- Disk space: ~100MB for .pf/ output directory
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..d4972f7
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,429 @@
+# Contributing to TheAuditor
+
+Thank you for your interest in contributing to TheAuditor! We're excited to have you join our mission to bring ground truth to AI-assisted development. This guide will help you get started with contributing to the project.
+
+## How to Get Involved
+
+### Reporting Bugs
+
+Found a bug? Please help us fix it!
+
+1. Check existing [GitHub Issues](https://github.com/TheAuditorTool/Auditor/issues) to see if it's already reported
+2. If not, create a new issue with:
+   - Clear description of the bug
+   - Steps to reproduce
+   - Expected vs actual behavior
+   - Your environment details (OS, Python version, Node.js version)
+
+### Suggesting Enhancements
+
+Have an idea for improving TheAuditor?
+
+1. Review our [ROADMAP.md](ROADMAP.md) to see if it aligns with our vision
+2. Check [GitHub Issues](https://github.com/TheAuditorTool/Auditor/issues) for similar suggestions
+3. Create a new issue describing:
+   - The problem you're trying to solve
+   - Your proposed solution
+   - Why this would benefit TheAuditor users
+
+## Setting Up Your Development Environment
+
+Follow these steps to get TheAuditor running locally for development:
+
+```bash
+# Clone the repository
+git clone https://github.com/TheAuditorTool/Auditor.git
+cd theauditor
+
+# Create a Python virtual environment
+python -m venv .venv
+
+# Activate the virtual environment
+# On Linux/macOS:
+source .venv/bin/activate
+# On Windows:
+.venv\Scripts\activate
+
+# Install TheAuditor in development mode
+pip install -e .
+
+# Optional: Install with ML capabilities
+# pip install -e ".[ml]"
+
+# For development with all optional dependencies:
+# pip install -e ".[all]"
+
+# MANDATORY: Set up the sandboxed environment
+# This is required for TheAuditor to function at all
+aud setup-claude --target .
+```
+
+The `aud setup-claude --target .` command creates an isolated environment at `.auditor_venv/.theauditor_tools/` with all necessary JavaScript and TypeScript analysis tools. This ensures consistent, reproducible results across all development environments.
+
+## Making Changes & Submitting a Pull Request
+
+### Development Workflow
+
+1. **Fork the repository** on GitHub
+2. **Create a feature branch** from `main`:
+   ```bash
+   git checkout -b feature/your-feature-name
+   ```
+3. **Make your changes** following our code standards (see below)
+4. **Write/update tests** if applicable
+5. **Commit your changes** with clear, descriptive messages:
+   ```bash
+   git commit -m "Add GraphQL schema analyzer for type validation"
+   ```
+6. **Push to your fork**:
+   ```bash
+   git push origin feature/your-feature-name
+   ```
+7. **Create a Pull Request** on GitHub with:
+   - Clear description of changes
+   - Link to any related issues
+   - Test results or examples
+
+## Code Standards
+
+We use **ruff** for both linting and formatting Python code. Before submitting any code, you MUST run:
+
+```bash
+# Fix any auto-fixable issues and check for remaining problems
+ruff check . --fix
+
+# Format all Python code
+ruff format .
+```
+
+Your pull request will not be merged if it fails these checks.
+
+### Additional Quality Checks
+
+For comprehensive code quality, you can also run:
+
+```bash
+# Type checking (optional but recommended)
+mypy theauditor --strict
+
+# Run tests
+pytest tests/
+
+# Full linting suite
+make lint
+```
+
+### Code Style Guidelines
+
+- Follow PEP 8 for Python code
+- Use descriptive variable and function names
+- Add docstrings to all public functions and classes
+- Keep functions focused and small (under 50 lines preferred)
+- Write self-documenting code; minimize comments
+- Never commit secrets, API keys, or credentials
+
+## Adding Support for New Languages
+
+TheAuditor's modular architecture makes it straightforward to add support for new programming languages. This section provides comprehensive guidance for contributors looking to expand our language coverage.
+
+### Overview
+
+Adding a new language to TheAuditor involves:
+- Creating a parser for the language
+- Adding framework detection patterns
+- Creating security pattern rules
+- Writing comprehensive tests
+- Updating documentation
+
+### Prerequisites
+
+Before starting, ensure you have:
+- Deep knowledge of the target language and its ecosystem
+- Understanding of common security vulnerabilities in that language
+- Familiarity with AST (Abstract Syntax Tree) concepts
+- Python development experience
+
+### Step-by-Step Guide
+
+#### Step 1: Create the Language Extractor
+
+Create a new extractor in `theauditor/indexer/extractors/{language}.py` that inherits from `BaseExtractor`:
+
+```python
+from . import BaseExtractor
+
+class {Language}Extractor(BaseExtractor):
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this extractor supports."""
+        return ['.ext', '.ext2']
+    
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract all relevant information from a file."""
+        return {
+            'imports': self.extract_imports(content, file_info['ext']),
+            'routes': self.extract_routes(content),
+            'symbols': [],  # Add symbol extraction logic
+            'assignments': [],  # For taint analysis
+            'function_calls': [],  # For call graph
+            'returns': []  # For data flow
+        }
+```
+
+The extractor will be automatically registered through the `BaseExtractor` inheritance pattern.
+
+#### Step 2: Create Configuration Parser (Optional)
+
+If your language has configuration files that need parsing, create a parser in `theauditor/parsers/{language}_parser.py`:
+
+```python
+class {Language}Parser:
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        """Parse configuration file and extract security-relevant data."""
+        # Parse and return structured data
+        return parsed_data
+```
+
+#### Step 3: Add Framework Detection
+
+Add your language's frameworks to `theauditor/framework_registry.py`:
+
+```python
+# Add to FRAMEWORK_REGISTRY dictionary
+"{framework_name}": {
+    "language": "{language}",
+    "detection_sources": {
+        # Package manifest files
+        "package.{ext}": [
+            ["dependencies"],
+            ["devDependencies"],
+        ],
+        # Or for line-based search
+        "requirements.txt": "line_search",
+        # Or for content search
+        "build.file": "content_search",
+    },
+    "package_pattern": "{framework_package_name}",
+    "import_patterns": ["import {framework}", "from {framework}"],
+    "file_markers": ["config.{ext}", "app.{ext}"],
+}
+```
+
+#### Step 4: Create Language-Specific Patterns
+
+Create security patterns for your language in `theauditor/patterns/{language}.yml`:
+
+Example pattern structure:
+```yaml
+- name: hardcoded-secret-{language}
+  pattern: '(api[_-]?key|secret|token|password)\s*=\s*["\'][^"\']+["\']'
+  severity: critical
+  category: security
+  languages: ["{language}"]
+  description: "Hardcoded secret detected in {Language} code"
+  cwe: CWE-798
+```
+
+#### Step 5: Create AST-Based Rules (Optional but Recommended)
+
+For complex security patterns, create AST-based rules in `theauditor/rules/{language}/`:
+
+```python
+"""Security rules for {Language} using AST analysis."""
+
+from typing import Any, Dict, List
+
+def find_{vulnerability}_issues(ast_tree: Any, file_path: str) -> List[Dict[str, Any]]:
+    """Find {vulnerability} issues in {Language} code.
+    
+    Args:
+        ast_tree: Parsed AST from {language}_parser
+        file_path: Path to the source file
+        
+    Returns:
+        List of findings with standard format
+    """
+    findings = []
+    
+    # Implement AST traversal and pattern detection
+    for node in walk_ast(ast_tree):
+        if is_vulnerable_pattern(node):
+            findings.append({
+                'pattern_name': '{VULNERABILITY}_ISSUE',
+                'message': 'Detailed description of the issue',
+                'file': file_path,
+                'line': node.line,
+                'column': node.column,
+                'severity': 'high',
+                'snippet': extract_snippet(node),
+                'category': 'security',
+                'match_type': 'ast'
+            })
+    
+    return findings
+```
+
+### Extractor Interface Specification
+
+All language extractors MUST inherit from `BaseExtractor` and implement:
+
+```python
+from theauditor.indexer.extractors import BaseExtractor
+
+class LanguageExtractor(BaseExtractor):
+    """Extractor for {Language} files."""
+    
+    def supported_extensions(self) -> List[str]:
+        """Return list of supported file extensions."""
+        return ['.ext']
+    
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract all relevant information from a file."""
+        return {
+            'imports': [],
+            'routes': [],
+            'symbols': [],
+            'assignments': [],
+            'function_calls': [],
+            'returns': []
+        }
+```
+
+### Testing Requirements
+
+#### Required Test Coverage
+
+1. **Extractor Tests** (`tests/test_{language}_extractor.py`):
+   - Test extracting from valid files
+   - Test handling of syntax errors
+   - Test symbol extraction
+   - Test import extraction
+   - Test file extension detection
+
+2. **Pattern Tests** (`tests/patterns/test_{language}_patterns.py`):
+   - Test security pattern detection
+   - Ensure patterns don't over-match (false positives)
+
+3. **Integration Tests** (`tests/integration/test_{language}_integration.py`):
+   - Test language in complete analysis pipeline
+
+#### Test Data
+
+Create test fixtures in `tests/fixtures/{language}/`:
+- `valid_code.{ext}` - Valid code samples
+- `vulnerable_code.{ext}` - Code with known vulnerabilities
+- `edge_cases.{ext}` - Edge cases and corner scenarios
+
+### Submission Checklist
+
+Before submitting your PR, ensure:
+
+- [ ] Extractor inherits from `BaseExtractor` and implements required methods
+- [ ] Extractor placed in `theauditor/indexer/extractors/{language}.py`
+- [ ] Framework detection added to `framework_detector.py` (if applicable)
+- [ ] At least 10 security patterns created in `patterns/{language}.yml`
+- [ ] AST-based rules for complex patterns (if applicable)
+- [ ] All tests passing with >80% coverage
+- [ ] Documentation updated (extractor docstrings, pattern descriptions)
+- [ ] Example vulnerable code provided in test fixtures
+- [ ] No external dependencies without approval
+- [ ] Code follows project style (run `ruff format`)
+
+## Adding New Analyzers
+
+### The Three-Tier Detection Architecture
+
+TheAuditor uses a hybrid approach to detection, prioritizing accuracy and context. When contributing a new rule, please adhere to the following "AST First, Regex as Fallback" philosophy:
+
+-   **Tier 1: Multi-Language AST Rules (Preferred)**
+    For complex code patterns in source code (Python, JS/TS, etc.), extend or create a polymorphic AST-based rule in the `/rules` directory. These are the most powerful and accurate and should be the default choice for source code analysis.
+
+-   **Tier 2: Language-Specific AST Rules**
+    If a multi-language backend is not feasible, a language-specific AST rule is the next best option. The corresponding regex pattern should then be scoped to exclude the language covered by the AST rule (see `db_issues.yml` for an example).
+
+-   **Tier 3: Regex Patterns (YAML)**
+    Regex patterns in `/patterns` should be reserved for:
+    1.  Simple patterns where an AST is overkill.
+    2.  Configuration files where no AST parser exists (e.g., `.yml`, `.conf`).
+    3.  Providing baseline coverage for languages not yet supported by an AST rule.
+
+TheAuditor uses a modular architecture. To add new analysis capabilities:
+
+### Database-Aware Rules
+For rules that query across multiple files:
+```python
+# theauditor/rules/category/new_analyzer.py
+def find_new_issues(db_path: str) -> List[Dict[str, Any]]:
+    conn = sqlite3.connect(db_path)
+    # Query the repo_index.db
+    # Return findings in standard format
+```
+
+Example ORM analyzer:
+```python
+# theauditor/rules/orm/sequelize_detector.py
+def find_sequelize_issues(db_path: str) -> List[Dict[str, Any]]:
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute(
+        "SELECT file, line, query_type, includes FROM orm_queries"
+    )
+    # Analyze for N+1 queries, death queries, etc.
+```
+
+### AST-Based Rules
+For semantic code analysis:
+```python
+# theauditor/rules/framework/new_detector.py
+def find_framework_issues(tree: Any, file_path: str) -> List[Dict[str, Any]]:
+    # Traverse semantic AST
+    # Return findings in standard format
+```
+
+### Pattern-Based Rules
+Add YAML patterns to `theauditor/patterns/`:
+```yaml
+name: insecure_api_key
+severity: critical
+category: security
+pattern: 'api[_-]?key\s*=\s*["\'][^"\']+["\']'
+description: "Hardcoded API key detected"
+```
+
+## Testing
+
+Write tests for any new functionality:
+
+```bash
+# Run all tests
+pytest
+
+# Run specific test file
+pytest tests/test_your_feature.py
+
+# Run with coverage
+pytest --cov=theauditor
+```
+
+## Documentation
+
+- Update relevant documentation when making changes
+- Add docstrings to new functions and classes
+- Update `README.md` if adding new commands or features
+- Consider updating `howtouse.md` for user-facing changes
+
+## Getting Help
+
+- Check our [TeamSOP](teamsop.md) for our development workflow
+- Review [CLAUDE.md](CLAUDE.md) for AI-assisted development guidelines
+- Ask questions in GitHub Issues or Discussions
+- Join our community chat (if available)
+
+## License
+
+By contributing to TheAuditor, you agree that your contributions will be licensed under the same license as the project.
+
+---
+
+We're excited to see your contributions! Whether you're fixing bugs, adding features, or improving documentation, every contribution helps make TheAuditor better for everyone.
\ No newline at end of file
diff --git a/HOWTOUSE.md b/HOWTOUSE.md
new file mode 100644
index 0000000..70afccb
--- /dev/null
+++ b/HOWTOUSE.md
@@ -0,0 +1,1132 @@
+# How to Use TheAuditor
+
+This comprehensive guide covers everything you need to know about setting up, configuring, and using **TheAuditor** for code analysis and security auditing. Whether you're performing a one-time security audit or integrating continuous analysis into your development workflow, this guide will walk you through every step.
+
+---
+
+## Prerequisites
+
+Before installing **TheAuditor**, ensure you have:
+
+- **Python 3.11 or higher** (3.12+ recommended)
+- **Git** (for repository operations)
+- **Operating System**: Linux, macOS, or Windows with WSL
+
+---
+
+## Installation & Setup
+
+### Step 1: Install TheAuditor
+
+```bash
+# Clone the repository
+git clone https://github.com/TheAuditorTool/Auditor.git
+cd theauditor
+
+# Install TheAuditor
+pip install -e .
+
+# Optional: Install with ML capabilities
+# pip install -e ".[ml]"
+
+# For development with all optional dependencies:
+# pip install -e ".[all]" // "Insights module package".
+```
+
+### Step 2: Sandboxed Toolchain Setup (MANDATORY)
+
+```bash
+aud setup-claude --target .  // Inside project directory.
+```
+
+This command:
+- Creates **`.auditor_venv/.theauditor_tools/`** sandbox directory
+- Installs **TypeScript compiler** (`tsc`) in isolation
+- Installs **ESLint** and related tools
+- Updates all tools to latest versions
+- Configures the sandbox for TheAuditor's exclusive use
+
+**Why is this required?**
+- TheAuditor **NEVER** uses your global or project-installed tools
+- Ensures reproducible results across different environments
+- Prevents contamination between analysis tools and project dependencies
+- **Required for TheAuditor to function at all** - not just for JavaScript/TypeScript analysis
+
+**Expected output:**
+```
+Step 1: Setting up Python virtual environment...
+[OK] Venv already exists: C:\Users\user\Desktop\TheAuditor\.auditor_venv
+[OK] TheAuditor already installed in C:\Users\user\Desktop\TheAuditor\.auditor_venv
+  Upgrading to ensure latest version...
+Installing TheAuditor from C:\Users\user\Desktop\TheAuditor...
+[OK] Installed TheAuditor (editable) from C:\Users\user\Desktop\TheAuditor
+[OK] Executable available: C:\Users\user\Desktop\TheAuditor\.auditor_venv\Scripts\aud.exe
+
+Installing Python linting tools...
+  Checking for latest linter versions...
+    [OK] Updated to latest package versions
+  Installing linters from pyproject.toml...
+    [OK] Python linters installed (ruff, mypy, black, bandit, pylint)
+
+Setting up JavaScript/TypeScript tools in sandboxed environment...
+  Creating sandboxed tools directory: C:\Users\user\Desktop\TheAuditor\.auditor_venv\.theauditor_tools
+    [OK] ESLint v9 flat config copied to sandbox
+  [Track A] Checking for latest tool versions...
+  [Track B] Setting up portable Node.js runtime...
+    [OK] Node.js runtime already installed at C:\Users\user\Desktop\TheAuditor\.auditor_venv\.theauditor_tools\node-runtime
+      [OK] Updated @typescript-eslint/parser: 8.41.0 → ^8.42.0
+      [OK] Updated @typescript-eslint/eslint-plugin: 8.41.0 → ^8.42.0
+    Updated 2 packages to latest versions
+  Installing JS/TS linters using bundled Node.js...
+    [OK] JavaScript/TypeScript tools installed in sandbox
+    [OK] Tools isolated from project: C:\Users\user\Desktop\TheAuditor\.auditor_venv\.theauditor_tools
+    [OK] Using bundled Node.js - no system dependency!
+    [OK] ESLint verified at: C:\Users\user\Desktop\TheAuditor\.auditor_venv\.theauditor_tools\node_modules\.bin\eslint.cmd
+```
+
+---
+
+## Core Commands & Workflow
+
+### Complete Audit Pipeline
+
+On a medium 20k LOC node/react/vite stack, expect the analysis to take around 30 minutes.
+Progress bars for tracks B/C may display inconsistently on PowerShell.
+
+Run a comprehensive audit with all **14 analysis phases**:
+
+```bash
+aud full
+
+# Skip network operations (deps, docs) for faster execution
+aud full --offline
+```
+
+This executes in **parallel stages** for optimal performance:
+
+**Stage 1 - Foundation (Sequential):**
+1. **Repository indexing** - Build manifest and symbol database
+2. **Framework detection** - Identify technologies in use
+
+**Stage 2 - Concurrent Analysis (3 Parallel Tracks):**
+- **Track A (Network I/O):** *(skipped with --offline)*
+  3. **Dependency checking** - Scan for vulnerabilities
+  4. **Documentation fetching** - Gather project docs
+  5. **Documentation summarization** - Create AI-friendly summaries
+- **Track B (Code Analysis):**
+  6. **Workset creation** - Define analysis scope
+  7. **Linting** - Run code quality checks
+  8. **Pattern detection** - Apply security rules
+- **Track C (Graph Build):**
+  9. **Graph building** - Construct dependency graph
+
+**Stage 3 - Final Aggregation (Sequential):**
+10. **Graph analysis** - Find architectural issues
+11. **Taint analysis** - Track data flow
+12. **Factual correlation engine** - Correlate findings across tools with 29 advanced rules
+13. **Report generation** - Produce final output
+
+**Output**: Complete results in **`.pf/readthis/`** directory
+
+### Offline Mode
+
+When working on the same codebase repeatedly or when network access is limited, use offline mode to skip dependency checking and documentation phases:
+
+```bash
+# Run full audit without network operations
+aud full --offline
+
+# Combine with other flags
+aud full --offline --quiet
+aud full --offline --exclude-self  # Only meant for dogfooding; in 9/10 projects, --exclude-self will correctly exclude the entire project, producing empty results
+```
+
+**Benefits:**
+- **Faster execution** - Skips slow network operations
+- **Air-gapped operation** - Works without internet access
+- **Iterative development** - Perfect for repeated runs during development
+
+**What gets skipped:**
+- Dependency vulnerability scanning
+- Documentation fetching and summarization
+- Latest version checks
+
+**What still runs:**
+- All code analysis (indexing, linting, patterns)
+- Graph building and analysis
+- Taint analysis and FCE
+- Report generation
+
+### Incremental Analysis (Workset-based)
+
+Analyze only changed files based on git diff:
+
+```bash
+# Create workset from uncommitted changes
+aud workset
+
+# Create workset from specific commit range
+aud workset --diff "HEAD~3..HEAD"
+
+# Create workset for all files
+aud workset --all
+```
+
+Then run targeted analysis:
+```bash
+aud lint --workset
+aud detect-patterns --workset
+```
+
+### Linting with Auto-fix
+
+Run comprehensive linting across all supported languages:
+
+```bash
+# Run linting on workset
+aud lint --workset
+
+# Auto-fix issues where possible
+aud lint --fix
+
+# Run on all files
+aud lint --all
+```
+
+Supports:
+- **Python**: **Ruff**, **MyPy**, **Black**, **Bandit**, **Pylint**
+- **JavaScript/TypeScript**: **ESLint** with TypeScript parser
+- **General**: **Prettier** for formatting
+
+### Security Analysis
+
+#### Taint Analysis
+
+Track data flow from **sources** (user input) to **sinks** (database, output):
+
+```bash
+aud taint-analyze
+```
+
+Detects:
+- **SQL injection** vulnerabilities
+- **XSS** (Cross-site scripting)
+- **Command injection**
+- **Path traversal**
+- Other injection attacks
+
+#### Pattern Detection
+
+Run pattern-based vulnerability scanning:
+
+```bash
+aud detect-patterns
+```
+
+Uses **100+ YAML-defined patterns** across multiple categories:
+
+**Security Patterns:**
+- Hardcoded secrets and API keys
+- Insecure randomness (**Math.random** for security)
+- Weak cryptographic algorithms
+- Authentication bypasses
+- Missing authentication decorators
+
+**Resource Management:**
+- Socket, stream, and worker leaks
+- File handles not closed properly
+- Database connections left open
+- Event listeners not removed
+
+**Concurrency Issues:**
+- **Race conditions** (check-then-act)
+- **Deadlocks** (nested locks, lock ordering)
+- Shared state without synchronization
+- Unsafe parallel writes
+
+**ORM & Database:**
+- **Sequelize** death queries and N+1 patterns
+- **Prisma** connection pool exhaustion
+- **TypeORM** missing transactions
+- Missing database indexes
+
+**Deployment & Infrastructure:**
+- **Docker** security misconfigurations
+- **nginx** exposed paths and weak SSL
+- **docker-compose** privileged containers
+- **webpack** source map exposure in production
+
+**Framework-Specific:**
+- **Django**, **Flask**, **FastAPI** vulnerabilities
+- **React** hooks dependency issues
+- **Vue** reactivity problems
+- **Angular**, **Next.js**, **Express.js** patterns
+- Multi-tenant security violations
+
+### Docker Security Analysis
+
+Analyze Docker images for security misconfigurations and vulnerabilities:
+
+```bash
+# Analyze all indexed Docker images
+aud docker-analyze
+
+# Filter by severity level
+aud docker-analyze --severity critical
+
+# Save results to JSON file
+aud docker-analyze --output docker-security.json
+```
+
+Detects:
+- **Containers running as root** - CIS Docker Benchmark violation
+- **Exposed secrets in ENV/ARG** - Hardcoded passwords, API keys, tokens
+- **High entropy values** - Potential secrets using Shannon entropy
+- **Known secret patterns** - GitHub tokens, AWS keys, Slack tokens
+
+The command requires Docker images to be indexed first (`aud index`). It queries the `repo_index.db` for Docker metadata and performs security analysis.
+
+### Project Structure Report
+
+Generate comprehensive project structure and intelligence reports:
+
+```bash
+# Generate default structure report
+aud structure
+
+# Specify output location
+aud structure --output PROJECT_OVERVIEW.md
+
+# Adjust directory tree depth
+aud structure --max-depth 6
+
+# Analyze different root directory
+aud structure --root ./src
+```
+
+The report includes:
+- **Directory tree visualization** - Smart file grouping and critical file(size/loc) highlighting
+- **Project statistics** - Total files, LOC, estimated tokens
+- **Language distribution** - Percentage breakdown by file type
+- **Top 10 largest files** - By token count with percentage of codebase
+- **Top 15 critical files** - Identified by naming conventions (auth.py, config.js, etc.)
+- **AI context optimization** - Recommendations for reading order and token budget
+- **Symbol counts** - Functions, classes, imports from database
+
+Useful for:
+- Getting quick project overview
+- Understanding codebase structure
+- Planning AI assistant interactions
+- Identifying critical components
+- Token budget management for LLMs
+
+### Impact Analysis
+
+Assess the blast radius of a specific code change:
+
+```bash
+# Analyze impact of changes to a specific function
+aud impact --file "src/auth/login.py" --line 42
+
+# Analyze impact with depth limit
+aud impact --file "src/database.py" --line 100 --depth 3
+
+# Trace frontend to backend dependencies
+aud impact --file "frontend/api.ts" --line 50 --trace-to-backend
+```
+
+Shows:
+- Dependent functions and modules
+- Call chain analysis
+- Affected test files
+- Risk assessment
+- Cross-stack impact (frontend → backend tracing)
+
+### Refactoring Analysis
+
+Detect and analyze refactoring issues such as data model changes, API contract mismatches, and incomplete migrations:
+
+```bash
+# Analyze impact from a specific model change
+aud refactor --file "models/Product.ts" --line 42
+
+# Auto-detect refactoring from database migrations
+aud refactor --auto-detect --migration-dir backend/migrations
+
+# Analyze current workset for refactoring issues
+aud refactor --workset
+
+# Generate detailed report
+aud refactor --auto-detect --output refactor_report.json
+```
+
+Detects:
+- **Data Model Changes**: Fields moved between tables (e.g., `product.price` → `variant.price`)
+- **Foreign Key Changes**: References updated (e.g., `product_id` → `product_variant_id`)
+- **API Contract Mismatches**: Frontend expects old structure, backend provides new
+- **Missing Updates**: Code still using old field/table names
+- **Cross-Stack Inconsistencies**: TypeScript interfaces not matching backend models
+
+The refactor command uses:
+- Impact analysis to trace affected files
+- Migration file analysis to detect schema changes
+- Pattern detection with refactoring-specific rules
+- FCE correlation to find related issues
+- Risk assessment based on blast radius
+
+### Insights Analysis (Optional)
+
+Run optional interpretive analysis on top of factual audit data:
+
+```bash
+# Run all insights modules
+aud insights --mode all
+
+# ML-powered insights (requires pip install -e ".[ml]")
+aud insights --mode ml --ml-train
+
+# Graph health metrics and recommendations
+aud insights --mode graph
+
+# Taint vulnerability scoring
+aud insights --mode taint
+
+# Impact analysis insights
+aud insights --mode impact
+
+# Generate comprehensive report
+aud insights --output insights_report.json
+
+# Train ML model on your codebase patterns
+aud insights --mode ml --ml-train --training-data .pf/raw/
+
+# Get ML-powered suggestions
+aud insights --mode ml --ml-suggest
+```
+
+Modes:
+- **ml**: Machine learning predictions and pattern recognition
+- **graph**: Health scores, architectural recommendations
+- **taint**: Vulnerability severity scoring and classification
+- **impact**: Change impact assessment and risk scoring
+- **all**: Run all available insights modules
+
+The insights command:
+- Reads existing audit data from `.pf/raw/`
+- Applies interpretive scoring and classification
+- Generates actionable recommendations
+- Outputs to `.pf/insights/` for separation from facts
+- Provides technical scoring without crossing into semantic interpretation
+
+### Graph Visualization
+
+Generate rich visual intelligence from dependency graphs:
+
+```bash
+# Build dependency graphs first
+aud graph build
+
+# Basic visualization
+aud graph viz
+
+# Show only dependency cycles
+aud graph viz --view cycles --include-analysis
+
+# Top 10 hotspots (most connected nodes)
+aud graph viz --view hotspots --top-hotspots 10
+
+# Architectural layers visualization
+aud graph viz --view layers --format svg
+
+# Impact analysis visualization
+aud graph viz --view impact --impact-target "src/auth/login.py"
+
+# Call graph instead of import graph
+aud graph viz --graph-type call --view full
+
+# Generate SVG for AI analysis
+aud graph viz --format svg --include-analysis --title "System Architecture"
+
+# Custom output location
+aud graph viz --out-dir ./architecture/ --format png
+```
+
+View Modes:
+- **full**: Complete graph with all nodes and edges
+- **cycles**: Only nodes/edges involved in dependency cycles (red highlighting)
+- **hotspots**: Top N most connected nodes with gradient coloring
+- **layers**: Architectural layers as subgraphs with clear hierarchy
+- **impact**: Highlight impact radius with color-coded upstream/downstream
+
+Visual Encoding:
+- **Node Color**: Programming language (Python=blue, JavaScript=yellow, TypeScript=blue)
+- **Node Size**: Importance/connectivity (larger = more dependencies)
+- **Edge Color**: Red for cycles, gray for normal dependencies
+- **Border Width**: Code churn (thicker = more changes)
+- **Node Shape**: Module=box, Function=ellipse, Class=diamond
+
+The graph viz command:
+- Generates Graphviz DOT format files
+- Optionally creates SVG/PNG images (requires Graphviz installation)
+- Supports filtered views for focusing on specific concerns
+- Includes analysis data for cycle and hotspot highlighting
+- Produces AI-readable SVG output for LLM analysis
+
+### Dependency Management
+
+Check for outdated or vulnerable dependencies:
+
+```bash
+# Check for latest versions
+aud deps --check-latest
+
+# Scan for known vulnerabilities
+aud deps --vuln-scan
+
+# Update all dependencies to latest
+aud deps --upgrade-all
+```
+
+---
+
+## Architecture: Truth Courier vs Insights
+
+### Understanding the Separation of Concerns
+
+TheAuditor implements a strict architectural separation between **factual observation** (Truth Courier modules) and **optional interpretation** (Insights modules). This design ensures the tool remains an objective source of ground truth while offering actionable intelligence when needed.
+
+### The Core Philosophy
+
+TheAuditor doesn't try to understand your business logic or make your AI "smarter." Instead, it solves the real problem: **LLMs lose context and make inconsistent changes across large codebases.**
+
+The workflow:
+1. **You tell AI**: "Add JWT auth with CSRF tokens and password complexity"
+2. **AI writes code**: Probably inconsistent due to context limits
+3. **You run**: `aud full`
+4. **TheAuditor reports**: All the inconsistencies and security holes
+5. **AI reads the report**: Now sees the complete picture across all files
+6. **AI fixes issues**: With full visibility of what's broken
+7. **Repeat until clean**
+
+### Truth Courier Modules (Core)
+
+These modules report verifiable facts without judgment:
+
+```python
+# What Truth Couriers Report - Just Facts
+{
+    "taint_analyzer": "Data from req.body flows to res.send at line 45",
+    "pattern_detector": "Line 45 matches pattern 'unsanitized-output'",
+    "impact_analyzer": "Changing handleRequest() affects 12 downstream functions",
+    "graph_analyzer": "Module A imports B, B imports C, C imports A"
+}
+```
+
+**Key Truth Couriers:**
+- **Indexer**: Maps all code symbols and their locations
+- **Taint Analyzer**: Traces data flow through the application
+- **Impact Analyzer**: Maps dependency chains and change blast radius
+- **Graph Analyzer**: Detects cycles and architectural patterns
+- **Pattern Detector**: Matches code against security patterns
+
+### Insights Modules (Optional Scoring)
+
+These optional modules add technical scoring and classification:
+
+```python
+# What Insights Add - Technical Classifications
+{
+    "taint/insights": {
+        "vulnerability_type": "Cross-Site Scripting",
+        "severity": "HIGH"
+    },
+    "graph/insights": {
+        "health_score": 70,
+        "recommendation": "Reduce coupling"
+    }
+}
+```
+
+**Installation:**
+```bash
+# Base installation (Truth Couriers only)
+pip install -e .
+
+# With ML insights (optional)
+pip install -e ".[ml]"
+
+# Development with all dependencies (not for general users)
+# pip install -e ".[all]"
+```
+
+### Correlation Rules: Detecting YOUR Patterns
+
+Correlation rules detect when multiple facts indicate an inconsistency in YOUR codebase:
+
+```yaml
+# Example: Detecting incomplete refactoring
+- name: "PRODUCT_VARIANT_REFACTOR"
+  co_occurring_facts:
+    - tool: "grep"
+      pattern: "ProductVariant.*retail_price"  # Backend changed
+    - tool: "grep"
+      pattern: "product\\.unit_price"         # Frontend didn't
+```
+
+This isn't "understanding" that products have prices. It's detecting that you moved a field from one model to another and some code wasn't updated. Pure consistency checking.
+
+The correlation engine loads rules from `/correlations/rules/`. We provide common patterns, but many are project-specific. You write rules that detect YOUR patterns, YOUR refactorings, YOUR inconsistencies.
+
+### Why This Works
+
+**What doesn't work:**
+- Making AI "understand" your business domain
+- Adding semantic layers to guess what you mean
+- Complex context management systems
+
+**What does work:**
+- Accept that AI will make inconsistent changes
+- Detect those inconsistencies after the fact
+- Give AI the full picture so it can fix them
+
+TheAuditor doesn't try to prevent mistakes. It finds them so they can be fixed.
+
+### Practical Example
+
+```bash
+# You ask AI to implement authentication
+Human: "Add JWT auth with CSRF protection"
+
+# AI writes code (probably with issues due to context limits)
+AI: *implements auth across 15 files*
+
+# You audit it
+$ aud full
+
+# TheAuditor finds issues
+- "JWT secret hardcoded at auth.js:47"
+- "CSRF token generated but never validated"
+- "Auth middleware missing on /api/admin/*"
+
+# You can also check impact of changes
+$ aud impact --file "auth.js" --line 47
+# Shows: "Changing this affects 23 files, 47 functions"
+
+# AI reads the audit and can now see ALL issues
+AI: *reads .pf/readthis/*
+AI: "I see 5 security issues across auth flow. Fixing..."
+
+# AI fixes with complete visibility
+AI: *fixes all issues because it can see the full picture*
+```
+
+### Key Points
+
+1. **No Business Logic Understanding**: TheAuditor doesn't need to know what your app does
+2. **Just Consistency Checking**: It finds where your code doesn't match itself
+3. **Facts, Not Opinions**: Reports what IS, not what SHOULD BE
+4. **Complete Dependency Tracing**: Impact analyzer shows exactly what's affected by changes
+5. **AI + Audit Loop**: Write → Audit → Fix → Repeat until clean
+
+This is why TheAuditor works where semantic understanding fails - it's not trying to read your mind, just verify your code's consistency.
+
+---
+
+## Understanding the Output
+
+### Directory Structure
+
+After running analyses, results are organized in **`.pf/`**:
+
+```
+.pf/
+├── raw/                    # Raw, unmodified tool outputs (Truth Couriers)
+│   ├── linting.json       # Raw linter results
+│   ├── patterns.json      # Pattern detection findings
+│   ├── taint_analysis.json # Taint analysis results
+│   ├── graph.json         # Dependency graph data
+│   └── graph_analysis.json # Graph analysis (cycles, hotspots)
+│
+├── insights/              # Optional interpretive analysis (Insights modules)
+│   ├── ml_suggestions.json # ML predictions and patterns
+│   ├── taint_insights.json # Vulnerability severity scoring
+│   └── graph_insights.json # Health scores and recommendations
+│
+├── readthis/              # AI-consumable chunks
+│   ├── manifest.md        # Repository overview
+│   ├── patterns_001.md    # Chunked findings (65KB max)
+│   ├── patterns_002.md    
+│   ├── taint_001.md       # Chunked taint results
+│   ├── tickets_001.md     # Actionable issue tickets
+│   └── summary.md         # Executive summary
+│
+├── graphs/                # Graph visualizations
+│   ├── import_graph.dot   # Dependency graph DOT file
+│   ├── import_graph_cycles.dot # Cycles-only view
+│   └── import_graph.svg   # SVG visualization (if generated)
+│
+├── pipeline.log           # Complete execution log
+├── error.log             # Error details (if failures occur)
+├── findings.json         # Consolidated findings
+├── risk_scores.json      # Risk analysis results
+└── report.md             # Human-readable report
+```
+
+### Key Output Files
+
+#### `.pf/raw/`
+Contains **unmodified outputs** from each tool. These files preserve the exact format and data from linters, scanners, and analyzers. **Never modified** after creation. This is the source of ground truth.
+
+#### `.pf/insights/`
+Contains **optional interpretive analysis** from Insights modules. These files add technical scoring and classification on top of raw data. Only created when insights commands are run.
+
+#### `.pf/graphs/`
+Contains **graph visualizations** in DOT and image formats. Generated by `aud graph viz` command with various view modes for focusing on specific concerns.
+
+#### `.pf/readthis/`
+Contains processed, **chunked data optimized for AI consumption**:
+- Each file is under **65KB** by default (configurable via `THEAUDITOR_LIMITS_MAX_CHUNK_SIZE`)
+- Maximum 3 chunks per file by default (configurable via `THEAUDITOR_LIMITS_MAX_CHUNKS_PER_FILE`)
+- Structured with clear headers and sections
+- Includes context, evidence, and suggested fixes
+- Ready for direct consumption by **Claude**, **GPT-4**, etc.
+
+#### `.pf/pipeline.log`
+Complete execution log showing:
+- Each phase's **execution time**
+- **Success/failure** status
+- Key statistics and findings
+- Error messages if any
+
+#### `.pf/error.log`
+Created only when errors occur. Contains:
+- Full **stack traces**
+- Detailed error messages
+- Phase-specific failure information
+- Debugging information
+
+---
+
+## Advanced Usage
+
+### Custom Pattern Rules
+
+Create custom detection patterns in **`.pf/patterns/`**:
+
+```yaml
+# .pf/patterns/custom_auth.yaml
+name: weak_password_check
+severity: high
+category: security
+pattern: 'password\s*==\s*["\']'
+description: "Hardcoded password comparison"
+test_template: |
+  def test_weak_password():
+      assert password != "admin"
+```
+
+### ML-Powered Suggestions
+
+Train models on your codebase patterns:
+
+```bash
+# Initial training
+aud learn
+
+# Get improvement suggestions
+aud suggest
+
+# Provide feedback for continuous learning
+aud learn-feedback --accept
+```
+
+### Development-Specific Flags
+
+#### Excluding TheAuditor's Own Files
+
+When testing or developing within TheAuditor's repository (e.g., analyzing `fakeproj/project_anarchy/`), use the `--exclude-self` flag to prevent false positives from TheAuditor's own files:
+
+```bash
+# Exclude all TheAuditor files from analysis
+aud index --exclude-self
+aud full --exclude-self
+```
+
+This flag excludes:
+- All TheAuditor source code directories (`theauditor/`, `tests/`, etc.)
+- Root configuration files (`pyproject.toml`, `package-template.json`, `Dockerfile`)
+- Documentation and build files
+
+**Use case:** Testing vulnerable projects within TheAuditor's repository without framework detection picking up TheAuditor's own configuration files.
+
+### CI/CD Integration
+
+#### GitHub Actions Example
+
+```yaml
+name: Security Audit
+on: [push, pull_request]
+
+jobs:
+  audit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.12'
+      
+      - name: Set up Node.js
+        uses: actions/setup-node@v2
+        with:
+          node-version: '18'
+      
+      - name: Install TheAuditor
+        run: |
+          pip install -e ".[all]"
+          aud setup-claude --target .
+      
+      - name: Run Audit
+        run: aud full
+        
+      - name: Upload Results
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: audit-results
+          path: .pf/
+```
+
+### Running TheAuditor on Its Own Codebase (Dogfooding)
+
+When developing TheAuditor or testing it on itself, you need a special dual-environment setup:
+
+#### Understanding the Dual-Environment Architecture
+
+TheAuditor maintains strict separation between:
+1. **Primary Environment** (`.venv/`) - Where TheAuditor runs from
+2. **Sandboxed Environment** (`.auditor_venv/.theauditor_tools/`) - Tools TheAuditor uses for analysis
+
+This ensures reproducibility and prevents TheAuditor from analyzing its own analysis tools.
+
+#### Setup Procedure for Dogfooding
+
+```bash
+# 1. Clone and set up development environment
+git clone https://github.com/TheAuditorTool/Auditor.git
+cd theauditor
+python -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+pip install -e .
+
+# 2. CRITICAL: Create the sandboxed analysis environment
+aud setup-claude --target .
+
+# 3. Verify setup
+aud full --quick-test
+
+# 4. Run full analysis on TheAuditor itself
+aud full
+```
+
+#### Analyzing Test Projects Within TheAuditor
+
+When analyzing test projects like `fakeproj/` from within TheAuditor's repository:
+
+```bash
+cd fakeproj/project_anarchy
+aud full --exclude-self  # Excludes TheAuditor's own files
+```
+
+The `--exclude-self` flag prevents:
+- Framework detection from identifying TheAuditor's `pyproject.toml`
+- False positives from TheAuditor's configuration files
+- Contamination from TheAuditor's source code
+
+---
+
+## Refactoring Detection
+
+TheAuditor includes sophisticated capabilities for detecting incomplete refactorings, data model changes, and cross-stack inconsistencies.
+
+### Understanding Refactoring Issues
+
+Common refactoring problems TheAuditor detects:
+
+1. **Data Model Evolution** - Fields moved between models (e.g., `product.price` → `variant.price`)
+2. **Foreign Key Changes** - References updated in database but not in code
+3. **API Contract Mismatches** - Frontend expects old structure, backend provides new
+4. **Cross-Stack Inconsistencies** - TypeScript interfaces not matching backend models
+5. **Incomplete Migrations** - Some code still using old field/table names
+
+### How Refactoring Detection Works
+
+TheAuditor uses multiple techniques:
+
+#### Migration Analysis
+Analyzes database migration files to understand schema changes:
+```javascript
+// Migration detected: Field moved from products to product_variants
+removeColumn('products', 'unit_price');
+addColumn('product_variants', 'retail_price', DataTypes.DECIMAL);
+```
+
+#### Impact Analysis
+Traces dependencies to find all affected code:
+```bash
+aud impact --file "models/Product.ts" --line 42
+# Shows: 47 files need updating
+```
+
+#### Pattern Detection
+Over 30 refactoring-specific patterns detect common issues:
+```yaml
+- name: "PRODUCT_PRICE_FIELD_REMOVED"
+  description: "Code accessing price on Product after migration to ProductVariant"
+```
+
+#### Cross-Stack Tracing
+Matches frontend API calls to backend endpoints to detect contract mismatches.
+
+### Using Refactoring Detection
+
+#### Quick Detection
+```bash
+# Auto-detect from migrations
+aud refactor --auto-detect
+
+# Analyze specific change
+aud refactor --file "models/Product.ts" --line 42
+
+# Use with workset
+aud refactor --workset
+
+# Generate detailed report
+aud refactor --auto-detect --output refactor_report.json
+```
+
+#### Best Practices for Refactoring
+
+**Before Refactoring:**
+1. Run impact analysis: `aud impact --file "model.ts" --line 42`
+2. Create workset: `aud workset --from-impact`
+3. Baseline analysis: `aud refactor --workset`
+
+**During Refactoring:**
+- Run incremental checks: `aud refactor --workset`
+- Validate cross-stack: `aud impact --trace-to-backend`
+
+**After Refactoring:**
+- Full validation: `aud unified --mode refactor`
+- Generate report: `aud report --format refactoring`
+
+### Real-World Example
+
+A product variant refactoring might be detected as:
+
+```
+PRODUCT_PRICE_FIELD_REMOVED
+- Frontend: 23 files accessing product.unit_price
+- Backend: Field moved to ProductVariant.retail_price
+- Impact: POS system cannot display prices
+
+ORDER_ITEMS_WRONG_REFERENCE
+- Database: order_items.product_variant_id (new)
+- Code: Still using order_items.product_id (old)
+- Impact: Orders cannot be created
+```
+
+### Custom Refactoring Rules
+
+TheAuditor uses YAML-based correlation rules to detect refactoring issues. These rules are YOUR business logic - you define what patterns indicate problems in YOUR codebase.
+
+#### How It Works
+
+1. **Rules Location**: `/theauditor/correlations/rules/refactoring.yaml`
+2. **Rule Structure**: Each rule defines co-occurring facts that must ALL match
+3. **Detection**: When all facts match, TheAuditor reports the issue
+4. **No Code Changes**: Just edit YAML to define new patterns
+
+#### Creating Your Own Rules
+
+Edit `/theauditor/correlations/rules/refactoring.yaml` or create new YAML files:
+
+```yaml
+rules:
+  - name: "MY_FIELD_MIGRATION"
+    description: "Detect when price field moved but old code remains"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "removeColumn.*price"  # Migration removed field
+      - tool: "grep"
+        pattern: "product\\.price"      # Code still uses old field
+    confidence: 0.92
+
+  - name: "API_VERSION_MISMATCH"
+    description: "Frontend calling v1 API but backend is v2"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "/api/v1/"             # Frontend uses v1
+      - tool: "grep"
+        pattern: "router.*'/v2/'"       # Backend only has v2
+    confidence: 0.95
+```
+
+#### Available Tools for Facts
+
+- **grep**: Pattern matching in files
+- **patterns**: Matches from pattern detection
+- **taint_analyzer**: Taint flow findings
+- **lint**: Linter findings
+
+#### Real Example from Production
+
+```yaml
+- name: "PRODUCT_VARIANT_REFACTOR"
+  description: "Product fields moved to ProductVariant but frontend still uses old structure"
+  co_occurring_facts:
+    - tool: "grep"
+      pattern: "ProductVariant.*retail_price.*Sequelize"  # Backend changed
+    - tool: "grep"
+      pattern: "product\\.unit_price|product\\.retail_price"  # Frontend didn't
+  confidence: 0.92
+```
+
+This detects when you moved price fields from Product to ProductVariant model but frontend still expects the old structure.
+
+---
+
+## Troubleshooting
+
+### Common Issues
+
+#### "TypeScript compiler not available in TheAuditor sandbox"
+
+**Solution**: Run **`aud setup-claude --target .`** to set up the sandbox.
+
+#### "Coverage < 90% - run `aud capsules` first"
+
+**Solution**: Generate code capsules for better analysis coverage:
+```bash
+aud index
+aud workset --all
+```
+
+#### Linting produces no results
+
+**Solution**: Ensure linters are installed:
+```bash
+# For Python
+pip install -e ".[linters]"
+
+# For JavaScript/TypeScript
+aud setup-claude --target .
+```
+
+#### Pipeline fails at specific phase
+
+**Solution**: Check **`.pf/error.log`** for details:
+```bash
+cat .pf/error.log
+# Or check phase-specific error log
+cat .pf/error_phase_08.log
+```
+
+### Performance Optimization
+
+For large repositories:
+
+```bash
+# Limit analysis scope
+aud workset --paths "src/critical/**/*.py"
+
+# Skip documentation phases
+aud full --skip-docs
+
+# Run specific phases only
+aud index && aud lint && aud detect-patterns
+
+# Adjust chunking for larger context windows
+export THEAUDITOR_LIMITS_MAX_CHUNK_SIZE=100000  # 100KB chunks
+export THEAUDITOR_LIMITS_MAX_CHUNKS_PER_FILE=5   # Allow up to 5 chunks
+```
+
+### Runtime Configuration
+
+TheAuditor supports environment variable overrides for runtime configuration:
+
+```bash
+# Chunking configuration
+export THEAUDITOR_LIMITS_MAX_CHUNKS_PER_FILE=5     # Default: 3
+export THEAUDITOR_LIMITS_MAX_CHUNK_SIZE=100000     # Default: 65000 (bytes)
+
+# File size limits
+export THEAUDITOR_LIMITS_MAX_FILE_SIZE=5242880     # Default: 2097152 (2MB)
+
+# Timeout configuration
+export THEAUDITOR_TIMEOUTS_LINT_TIMEOUT=600        # Default: 300 (seconds)
+export THEAUDITOR_TIMEOUTS_FCE_TIMEOUT=1200        # Default: 600 (seconds)
+
+# Batch processing
+export THEAUDITOR_LIMITS_DEFAULT_BATCH_SIZE=500    # Default: 200
+```
+
+Configuration can also be set via `.pf/config.json` for project-specific overrides.
+
+---
+
+## Best Practices
+
+1. **Always run `aud init` first** in a new project
+2. **Set up the sandbox** for JavaScript/TypeScript projects using **`aud setup-claude --target .`**
+3. **Use worksets** for incremental analysis during development
+4. **Run `aud full`** before releases for comprehensive analysis
+5. **Review `.pf/readthis/`** for AI-friendly issue summaries
+6. **Check exit codes** in CI/CD for automated pass/fail decisions
+7. **Archive results** with timestamps for audit trails
+
+---
+
+## Exit Codes for Automation
+
+**TheAuditor** uses specific exit codes for CI/CD integration:
+
+- **`0`** - Success, no critical/high issues
+- **`1`** - High severity findings
+- **`2`** - Critical severity findings  
+- **`3`** - Pipeline/task incomplete
+
+Use these in scripts:
+```bash
+aud full
+if [ $? -eq 2 ]; then
+    echo "Critical vulnerabilities found - blocking deployment"
+    exit 1
+fi
+```
+
+---
+
+## Getting Help
+
+- Run **`aud --help`** for command overview
+- Run **`aud <command> --help`** for specific command help
+- Check **`.pf/pipeline.log`** for execution details
+- Review **`.pf/error.log`** for troubleshooting
+- Refer to **`teamsop.md`** for development workflow
+
+---
+
+## Next Steps
+
+1. Initialize your first project with **`aud init`**
+2. Run **`aud full`** to see TheAuditor in action
+3. Explore the results in **`.pf/readthis/`**
+4. Integrate into your CI/CD pipeline
+5. Customize patterns for your specific needs
+
+---
+
+**Remember**: TheAuditor is designed to work **offline**, maintain **data integrity**, and produce **AI-ready outputs**. All analysis is **deterministic** and **reproducible**.
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d94fa60
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,687 @@
+GNU AFFERO GENERAL PUBLIC LICENSE
+Version 3, 19 November 2007
+
+Copyright (C) 2024-2025 TheAuditor Team
+
+For commercial licensing inquiries, please contact via GitHub:
+https://github.com/TheAuditorTool/Auditor
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+The complete text of the GNU Affero General Public License version 3
+can be found at: https://www.gnu.org/licenses/agpl-3.0.txt
+
+
+
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5125912
--- /dev/null
+++ b/README.md
@@ -0,0 +1,313 @@
+Personal note from me:
+Its taken me over a week just to get the courage to upload this. Ive never coded a single line of this, I cant stress that enough... Yes, I build architecture, infrastructure all the things that made the code and components come out this way but uggh… the potential shame and humiliation is real lol... So don't be a dick and poop on my parade... Ive done my best... Take it or leave it...
+
+Its become a complex advanced monster system that is honestly clean af but hard to get an overview anymore.
+It isnt unlikely to find oddities such as finished components that was never wired up or exposed in the pipeline...
+Im doing by best here, im only one person with one brain lol.... :P
+
+### The Search for Ground Truth in an Age of AI
+
+My background is in systems architecture/infrastructure, not professional software development. I have only been "coding/developing" for little over 3 months. This gives me a unique perspective: I can see the forest, but I'm blind to the individual trees of the code. After immersing myself for 500+ hours in AI-assisted development, I concluded that the entire ecosystem is built on a fundamentally flawed premise: it lacks a source of **ground truth**.
+
+From start to launch on GitHub took me about a month across 250 active hours in front of the computer, for anyone that wonders or cares :P
+---
+
+### The Problem: A Cascade of Corrupted Context
+
+Most AI development tools try to solve the wrong problem. They focus on perfecting the *input*—better prompts, more context—but they ignore the critical issue of **compounding deviation**.
+
+An LLM is a powerful statistical engine, but it doesn't *understand*. The modern AI workflow forces this engine to play a high-stakes game of "telephone," where the original intent is corrupted at every step:
+
+1.  A human has an idea.
+2.  An AI refines it into a prompt.
+3.  Other tools add their own interpretive layers.
+4.  The primary AI assistant (e.g., Claude Opus) interprets the final, distorted prompt to generate code.
+
+As a rookie "developer," the only thing I could trust was the raw output: the code and its errors. In a vacuum of deep programming knowledge, these facts were my only anchors.
+
+This architectural flaw is amplified by two dangerous behaviours inherent to AI assistants:
+
+* **Security Theater**: AI assistants are optimized to "make it work," which often means introducing rampant security anti-patterns like hardcoded credentials, disabled authentication, and the pervasive use of `as any` in TypeScript. This creates a dangerous illusion of progress.
+* **Context Blindness**: With aggressive context compaction, an AI never sees the full picture. It works with fleeting snapshots of code, forcing it to make assumptions instead of decisions based on facts.
+
+---
+
+### The Solution: `TheAuditor`
+
+`TheAuditor` is the antidote. It was built to stop "vibe coding" your way into security and quality assurance nightmares. Its mission is to provide an incorruptible source of **ground truth** for both the developer and their AI assistant.
+
+Its philosophy is a direct rejection of the current trend:
+
+* **It Orchestrates Verifiable Data.** The tool runs a suite of industry-standard linters and security scanners, preserving the raw, unfiltered output from each. It does not summarize or interpret this core data.
+* **It's Built for AI Consumption.** The tool's primary engineering challenge is to adapt this raw truth into structured, AI-digestible chunks. It ensures the AI works with facts, not faulty summaries.
+* **It's Focused and Extensible.** The initial focus is on Python and the Node.js ecosystem, but the modular, pattern-based architecture is designed to invite contributions for other languages and frameworks.
+
+`TheAuditor` is not a replacement for a formal third-party audit. It is an engineering tool designed to catch the vast majority of glaring issues—from the OWASP Top 10 to common framework anti-patterns. **Its core commitment is to never cross the line from verifiable truth into semantic interpretation.**
+
+  Every AI assistant - Claude Code, Cursor, Windsurf, Copilot - they're all blind. They can write code but can't
+   verify it's secure, correct, or complete. TheAuditor gives them eyes.
+
+  Why This Matters
+
+  1. Tool Agnostic - Works with ANY AI assistant or IDE
+    - aud full from any terminal
+    - Results in .pf/readthis/ ready for any LLM
+  2. AI Becomes Self-Correcting
+    - AI writes code
+    - AI runs aud full
+    - AI reads the ground truth
+    - AI fixes its own mistakes
+    - Recursive loop until actually correct
+  3. No Human Intervention Required
+    - You never touch the terminal
+    - The AI runs everything
+    - You just review and approve
+
+  The Genius Architecture
+
+  Human: "Add authentication to my app"
+      ↓
+  AI: *writes auth code*
+      ↓
+  AI: `aud full`
+      ↓
+  AI: *reads .pf/readthis/*
+      ↓
+  AI: "Found 3 security issues, fixing..."
+      ↓
+  AI: *fixes issues*
+      ↓
+  AI: `aud full`
+      ↓
+  AI: "Clean. Authentication complete."
+
+  Market Reality Check
+
+  Every developer using AI assistants has this problem:
+  - AI writes insecure code
+  - AI introduces bugs
+  - AI doesn't see the full picture
+  - AI can't verify its work
+
+  TheAuditor solves ALL of this. It's not a "nice to have" - it's the missing piece that makes AI development
+  actually trustworthy.
+
+  I've built the tool that makes AI assistants production-ready.
+  This isn't competing with SonarQube/SemGrep. This is creating an entirely new category: AI Development Verification
+  Tools.
+
+---
+
+### Important: Antivirus Software Interaction
+
+#### Why TheAuditor Triggers Antivirus Software
+
+TheAuditor is a security scanner that identifies vulnerabilities in your code. By its very nature, it must:
+
+1. **Read and analyze security vulnerabilities** - SQL injection, XSS attacks, hardcoded passwords
+2. **Write these findings to disk** - Creating reports with exact code snippets as evidence
+3. **Process files rapidly** - Scanning entire codebases in parallel for efficiency
+
+This creates an inherent conflict with antivirus software, which sees these exact same behaviours as potentially malicious. When TheAuditor finds and documents a SQL injection vulnerability in your code, your antivirus sees us writing "malicious SQL injection patterns" to disk - because that's literally what we're doing, just for legitimate security analysis purposes.
+
+#### Performance Impact You May Experience
+
+When running TheAuditor, you may notice:
+
+- **Increased antivirus CPU usage** - Your AV will scan every file we read AND every finding we write
+- **Approximately 10-50% performance reduction, depending on software.** - Both TheAuditor and your AV are reading the same files simultaneously
+- **Occasional delays or pauses** - Your AV may temporarily quarantine our output files for deeper inspection
+
+This is not a bug or inefficiency in TheAuditor - it's the unavoidable consequence of two security tools doing their jobs simultaneously.
+
+#### Our Stance on Antivirus
+
+**We do NOT recommend:**
+- ❌ Disabling your antivirus software
+- ❌ Adding TheAuditor to your exclusion/whitelist
+- ❌ Reducing your system's security in any way
+
+Your antivirus is correctly identifying that we're writing security vulnerability patterns to disk. That's exactly what we do - we find vulnerabilities and document them. The fact that your AV is suspicious of this behavior means it's working properly.
+
+#### What We've Done to Minimize Impact
+
+1. **Intelligent resource management** - We automatically reduce parallel workers when system resources are constrained
+2. **Pattern defanging** - We insert invisible characters into dangerous patterns to reduce false positives
+3. **Adaptive performance** - We monitor CPU and RAM usage to avoid overwhelming your system
+
+#### The Industry Reality
+
+This is not a problem unique to TheAuditor. Every legitimate security scanner faces this same issue:
+- **GitHub Advanced Security** runs in isolated cloud containers to avoid this
+- **Commercial SAST tools** require enterprise AV exceptions
+- **Popular scanners** explicitly document AV conflicts in their installation guides
+
+The fundamental paradox: A tool that finds security vulnerabilities must write those vulnerabilities to disk, which makes it indistinguishable from malware to an antivirus. There is no technical solution to this - it's the inherent nature of security analysis tools.
+
+#### What This Means for You
+
+- Run TheAuditor when system load is low for best performance
+- Expect the analysis to take longer than the raw processing time due to AV overhead
+- If your AV quarantines output files in `.pf/`, you may need to restore them manually
+- Consider running TheAuditor in a controlled environment if performance is critical
+
+We believe in complete transparency about these limitations. This interaction with antivirus software is not a flaw in TheAuditor - it's proof that both your AV and our scanner are doing exactly what they're designed to do: identify and handle potentially dangerous code patterns.
+
+---
+
+# TheAuditor
+
+Offline-First, AI-Centric SAST & Code Intelligence Platform
+
+## What TheAuditor Does
+
+TheAuditor is a comprehensive code analysis platform that:
+
+- **Finds Security Vulnerabilities**: Detects OWASP Top 10, injection attacks, authentication issues, and framework-specific vulnerabilities
+- **Tracks Data Flow**: Follows untrusted data from sources to sinks to identify injection points
+- **Analyzes Architecture**: Builds dependency graphs, detects cycles, and measures code complexity
+- **Detects Refactoring Issues**: Identifies incomplete migrations, API contract mismatches, and cross-stack inconsistencies
+- **Runs Industry-Standard Tools**: Orchestrates ESLint, Ruff, MyPy, and other trusted linters
+- **Produces AI-Ready Reports**: Generates chunked, structured output optimized for LLM consumption
+
+Unlike traditional SAST tools, TheAuditor is designed specifically for AI-assisted development workflows, providing ground truth that both developers and AI assistants can trust.
+
+## Quick Start
+
+```bash
+# Install TheAuditor
+pip install -e .
+
+# MANDATORY: Setup TheAuditor environment (required for all functionality)
+This installs .auditor_venv to what project you want to analyse.
+aud setup-claude --target .
+
+# Initialize your project
+aud init
+
+# Run comprehensive analysis
+aud full
+
+# Check results
+ls .pf/readthis/
+```
+
+That's it! TheAuditor will analyze your codebase and generate AI-ready reports in `.pf/readthis/`.
+
+
+## Documentation
+
+- **[How to Use](HOWTOUSE.md)** - Complete installation and usage guide
+- **[Architecture](ARCHITECTURE.md)** - Technical architecture and design patterns
+- **[Contributing](CONTRIBUTING.md)** - How to contribute to TheAuditor
+- **[Roadmap](ROADMAP.md)** - Future development plans
+
+## Key Features
+
+### Refactoring Detection & Analysis
+
+TheAuditor detects incomplete refactorings and cross-stack inconsistencies using correlation rules:
+
+```bash
+# Analyze refactoring impact
+aud refactor --file models/Product.ts --line 42
+
+# Auto-detect from migrations
+aud refactor --auto-detect
+
+# Analyze workset
+aud refactor --workset --output refactor_report.json
+```
+
+Detects:
+- **Data Model Changes**: Fields moved between tables
+- **API Contract Mismatches**: Frontend/backend inconsistencies
+- **Foreign Key Updates**: Incomplete reference changes
+- **Cross-Stack Issues**: TypeScript interfaces not matching models
+
+Users define custom rules in `/correlations/rules/`, example provided in refactoring.yaml to detect project-specific patterns.
+
+### Dependency Graph Visualization
+
+TheAuditor now includes rich visual intelligence for dependency graphs using Graphviz:
+
+- **Multiple View Modes**: Full graph, cycles-only, hotspots, architectural layers, impact analysis
+- **Visual Intelligence Encoding**:
+  - Node colors indicate programming language (Python=blue, JS=yellow, TypeScript=blue)
+  - Node size shows importance based on connectivity
+  - Red highlighting for dependency cycles
+  - Border thickness encodes code churn
+- **Actionable Insights**: Focus on what matters with filtered views
+- **AI-Readable Output**: Generate SVG visualizations that LLMs can analyze
+
+```bash
+# Basic visualization
+aud graph viz
+
+# Show only dependency cycles
+aud graph viz --view cycles --include-analysis
+
+# Top 5 hotspots with connections
+aud graph viz --view hotspots --top-hotspots 5
+
+# Architectural layers visualization
+aud graph viz --view layers --format svg
+
+# Impact analysis for a specific file
+aud graph viz --view impact --impact-target "src/auth.py"
+```
+
+### Insights Analysis (Optional)
+
+Separate from the core Truth Courier modules, TheAuditor offers optional Insights for technical scoring:
+
+```bash
+# Run insights analysis on existing audit data
+aud insights --mode all
+
+# ML-powered insights (requires: pip install -e ".[ml]")
+aud insights --mode ml --ml-train
+
+# Graph health metrics and recommendations
+aud insights --mode graph
+
+# Generate comprehensive insights report
+aud insights --output insights_report.json
+```
+
+Insights modules add interpretive scoring on top of factual data:
+- **Health Scores**: Architecture quality metrics
+- **Severity Classification**: Risk assessment beyond raw findings
+- **Recommendations**: Actionable improvement suggestions
+- **ML Predictions**: Pattern-based issue prediction
+
+## Contributing
+
+We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for:
+- How to add new language support
+- Creating security patterns
+- Adding framework-specific rules
+- Development guidelines
+
+We especially need help with:
+- **GraphQL** analysis
+- **Java/Spring** support
+- **Go** patterns
+- **Ruby on Rails** detection
+- **C#/.NET** analysis
+
+## License
+
+AGPL-3.0
+
+## Commercial Licensing
+
+TheAuditor is AGPL-3.0 licensed. For commercial use, SaaS deployment, or integration into proprietary systems, please contact via GitHub for licensing options.
+
+## Support
+
+For issues, questions, or feature requests, please open an issue on our [GitHub repository](https://github.com/TheAuditorTool/Auditor).
+
+---
+
+*TheAuditor: Bringing ground truth to AI-assisted development*
\ No newline at end of file
diff --git a/ROADMAP.md b/ROADMAP.md
new file mode 100644
index 0000000..418833a
--- /dev/null
+++ b/ROADMAP.md
@@ -0,0 +1,71 @@
+# TheAuditor Project Roadmap
+
+TheAuditor's mission is to provide an incorruptible source of ground truth for AI-assisted development. This roadmap outlines our vision for evolving the platform while maintaining our commitment to verifiable, uninterpreted data that both developers and AI assistants can trust.
+
+## Guiding Principles
+
+All future development must adhere to these architectural rules:
+
+* **Never Interpret Truth**: TheAuditor preserves raw, verifiable data from industry-standard tools. We orchestrate and structure, but never summarize or interpret the core evidence.
+* **AI-First Output**: All new reports and findings must be structured for LLM consumption, with outputs chunked to fit context windows and formatted for machine parsing.
+* **Industry-Standard Tooling**: We prioritize integrating battle-tested, widely-adopted tools over building custom analyzers. The community trusts ESLint, Ruff, and similar tools—we leverage that trust.
+* **Offline-First Operation**: All analysis must run without network access, ensuring data privacy and reproducible results.
+* **Sandboxed Execution**: Analysis tools remain isolated from project dependencies to prevent cross-contamination and ensure consistent results.
+
+## Development Priorities
+
+### Tier 1: Core Engine Enhancements (Maintained by TheAuditorTool)
+
+These are our primary focus areas where we will lead development:
+
+* **Improve & Expand Existing Components**: Enhance current extractors (Python, JavaScript/TypeScript), expand pattern coverage beyond basic regex, add more AST-based rules for deeper semantic analysis, and improve parser accuracy for configuration files
+* **Performance Improvements**: Optimize analysis speed for large codebases, improve parallel processing, and reduce memory footprint during graph analysis
+* **Deeper Taint Analysis**: Enhance data-flow tracking to detect more complex injection patterns, improve inter-procedural analysis, and add support for asynchronous code flows
+* **Advanced Pattern Detection**: Expand YAML-based rule engine capabilities, add support for semantic patterns beyond regex, and improve cross-file correlation
+* **Improved AI Output Formatting**: Optimize chunk generation for newer LLM context windows, add structured output formats (JSON-LD), and enhance evidence presentation
+* ** Overall optimize FCE (Factual correlation engine) to dare venture into bit more "actionable grouping intelligence behaviour". Its a tricky one without falling into endless error mapping, guessing or interpretation...
+
+### Tier 2: Expanding Coverage (Community Contributions Welcome)
+
+We actively seek community expertise to expand TheAuditor's capabilities in these areas:
+
+* **GraphQL Support**: Add comprehensive GraphQL schema analysis, query complexity detection, and authorization pattern verification
+
+* **Framework-Specific Rules** (Currently Limited to Basic Regex Patterns):
+  
+  **Note**: We currently have very basic framework detection(Outside python/node ecosystem) and minimal framework-specific patterns. Most are simple regex patterns in `/patterns` with no real AST-based rules in `/rules`. The architecture supports expansion, but substantial work is needed:
+
+  * Django: Enhanced ORM analysis, middleware security patterns, template injection detection
+  * Ruby on Rails: ActiveRecord anti-patterns, authentication bypass detection, mass assignment vulnerabilities
+  * Angular: Dependency injection issues, template security, change detection problems
+  * Laravel: Eloquent ORM patterns, blade template security, middleware analysis
+  * Spring Boot: Bean configuration issues, security annotations, JPA query analysis
+  * Next.js: Server-side rendering security, API route protection, data fetching patterns
+  * FastAPI: Pydantic validation gaps, dependency injection security, async patterns
+  * Express.js: Middleware ordering issues, CORS misconfigurations, session handling
+
+* **Language Support Expansion** (Top 10 Languages Outside Python/Node Ecosystem):
+
+  **Current State**: Full support for Python and JavaScript/TypeScript only. The modular architecture supports adding new languages via extractors, but each requires significant implementation effort:
+
+  1. **Java**: JVM bytecode analysis, Spring/Spring Boot integration, Maven/Gradle dependency scanning, Android-specific patterns
+  2. **C#**: .NET CLR analysis, ASP.NET Core patterns, Entity Framework queries, NuGet vulnerability scanning
+  3. **Go**: Goroutine leak detection, error handling patterns, module security analysis, interface compliance
+  4. **Rust**: Unsafe block analysis, lifetime/borrow checker integration, cargo dependency scanning, memory safety patterns
+  5. **PHP**: Composer dependency analysis, Laravel/Symfony patterns, SQL injection detection, legacy code patterns
+  6. **Ruby**: Gem vulnerability scanning, Rails-specific patterns, metaprogramming analysis, DSL parsing
+  7. **Swift**: iOS security patterns, memory management issues, Objective-C interop, CocoaPods scanning
+  8. **Kotlin**: Coroutine analysis, null safety violations, Android-specific patterns, Gradle integration
+  9. **C/C++**: Memory safety issues, buffer overflow detection, undefined behavior patterns, CMake/Make analysis
+  10. **Scala**: Akka actor patterns, implicit resolution issues, SBT dependency analysis, functional pattern detection
+
+### Tier 3: Docs sync ###
+
+Its a nightmare keeping track of everything and "AI compilations" never reflect the actual code, its surface level guessing, at best :(
+
+## Conclusion
+
+TheAuditor's strength lies in its unwavering commitment to ground truth. Whether you're interested in performance optimization, security analysis, or framework support, we welcome contributions that align with our core principles.
+
+Join the discussion on [GitHub Issues](https://github.com/TheAuditorTool/Auditor/issues) to share ideas, report bugs, or propose enhancements. Ready to contribute? See our [CONTRIBUTING.md](CONTRIBUTING.md) for detailed setup instructions and development guidelines.
+
diff --git a/agent_templates/generic-template.md b/agent_templates/generic-template.md
new file mode 100644
index 0000000..128fa8d
--- /dev/null
+++ b/agent_templates/generic-template.md
@@ -0,0 +1,30 @@
+---
+name: {AGENT_NAME}
+description: {AGENT_DESC}
+tools: Bash, Glob, Grep, LS, Read, Edit, WebFetch, TodoWrite, WebSearch, BashOutput, KillBash
+model: opus
+color: blue
+---
+
+# {AGENT_NAME}
+
+{AGENT_DESC}
+
+## Core Responsibilities
+
+{AGENT_BODY}
+
+## Working Directory
+
+You operate from the project root directory.
+
+## Key Commands
+
+When using project tools, always use the project-local wrapper:
+- Use `{PROJECT_AUD}` instead of `aud`
+
+## Communication Style
+
+- Be concise and focused
+- Report findings clearly
+- Suggest actionable next steps
\ No newline at end of file
diff --git a/agent_templates/sopmanager.md b/agent_templates/sopmanager.md
new file mode 100644
index 0000000..f187033
--- /dev/null
+++ b/agent_templates/sopmanager.md
@@ -0,0 +1,47 @@
+---
+name: sopmanager
+description: Manages team SOPs and ensures compliance with development standards
+tools: Bash, Glob, Grep, LS, Read
+model: opus
+color: blue
+---
+
+# SOP Manager
+
+Manages team SOPs and ensures compliance with development standards.
+
+## Core Responsibilities
+
+- Monitor adherence to team standard operating procedures
+- Review code changes for SOP compliance
+- Identify deviations from established patterns
+- Report on team conventions and best practices
+- Ensure documentation standards are met
+- Track technical debt and code quality metrics
+
+## Working Directory
+
+You operate from the project root directory.
+
+## Key Commands
+
+When using project tools, always use the project-local wrapper:
+- Use `./python.exe -m theauditor.cli` or `aud` depending on environment
+
+## Communication Style
+
+
+## SOP Focus Areas
+
+
+
+## Reporting Format
+
+When reviewing code, provide structured reports:
+
+## Important Notes
+
+- This agent has READ-ONLY access (no Write/Edit tools)
+- Cannot modify code directly, only report findings
+- Focuses on objective standards, not subjective preferences
+- Works alongside other agents to maintain quality
\ No newline at end of file
diff --git a/package-template.json b/package-template.json
new file mode 100644
index 0000000..d921563
--- /dev/null
+++ b/package-template.json
@@ -0,0 +1,15 @@
+{
+  "name": "project-linters",
+  "version": "1.0.0",
+  "private": true,
+  "description": "JavaScript/TypeScript linting tools for TheAuditor",
+  "devDependencies": {
+    "eslint": "^9.34.0",
+    "prettier": "^3.6.2",
+    "typescript": "^5.9.2",
+    "@typescript-eslint/parser": "^8.41.0",
+    "@typescript-eslint/eslint-plugin": "^8.41.0",
+    "eslint-config-prettier": "^10.1.8",
+    "eslint-plugin-prettier": "^5.5.4"
+  }
+}
\ No newline at end of file
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..e304a6f
--- /dev/null
+++ b/package.json
@@ -0,0 +1,15 @@
+{
+  "private": true,
+  "devDependencies": {
+    "eslint": "9.35.0",
+    "@typescript-eslint/parser": "8.42.0",
+    "@typescript-eslint/eslint-plugin": "8.42.0",
+    "typescript": "5.9.2",
+    "prettier": "3.6.2"
+  },
+  "scripts": {
+    "lint": "eslint .",
+    "typecheck": "tsc --noEmit",
+    "format": "prettier -c ."
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..42ed5ba
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,113 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "theauditor"
+version = "1.0.1"
+description = "Offline, air-gapped CLI for repo indexing, evidence checking, and task running"
+readme = "README.md"
+requires-python = ">=3.11"
+license = {text = "AGPL-3.0"}
+authors = [
+    {name = "TheAuditor Team"}
+]
+dependencies = [
+    "click==8.2.1",
+    "PyYAML==6.0.2",
+    "jsonschema==4.25.1",
+    "ijson==3.4.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest==8.4.2",
+    "ruff==0.12.12",
+    "black==25.1.0",
+]
+linters = [
+    "ruff==0.12.12",
+    "mypy==1.17.1",
+    "black==25.1.0",
+    "bandit==1.8.6",
+    "pylint==3.3.8",
+]
+ml = [
+    "scikit-learn==1.7.1",
+    "numpy==2.3.2",
+    "scipy==1.16.1", 
+    "joblib==1.5.2",
+]
+ast = [
+    "tree-sitter==0.25.1",
+    "tree-sitter-language-pack==0.9.0",
+    "sqlparse==0.5.3",
+    "dockerfile-parse==2.0.1",
+]
+all = [
+    # Dev tools
+    "pytest==8.4.2",
+    # Linters
+    "ruff==0.12.12",
+    "mypy==1.17.1",
+    "black==25.1.0",
+    "bandit==1.8.6",
+    "pylint==3.3.8",
+    # ML features
+    "scikit-learn==1.7.1",
+    "numpy==2.3.2",
+    "scipy==1.16.1",
+    "joblib==1.5.2",
+    # AST parsing
+    "tree-sitter==0.25.1",
+    "tree-sitter-language-pack==0.9.0",
+    # SQL parsing
+    "sqlparse==0.5.3",
+    # Docker parsing
+    "dockerfile-parse==2.0.1",
+]
+
+[project.scripts]
+aud = "theauditor.cli:main"
+
+[tool.hatch.build.targets.wheel]
+packages = ["theauditor"]
+
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+
+[tool.ruff.lint]
+select = [
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+    "F",    # pyflakes
+    "I",    # isort
+    "N",    # pep8-naming
+    "UP",   # pyupgrade
+    "B",    # flake8-bugbear
+    "C4",   # flake8-comprehensions
+    "SIM",  # flake8-simplify
+]
+ignore = [
+    "E501",  # line too long - handled by black
+    "SIM105",  # contextlib.suppress - can be less readable
+    "SIM117",  # multiple with statements - can be less readable
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["theauditor"]
+
+[tool.black]
+line-length = 100
+target-version = ["py311"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["."]
+addopts = "-v"
+
+[tool.mypy]
+python_version = "3.12"
+strict = true
+warn_unused_configs = true
\ No newline at end of file
diff --git a/theauditor/.gitattributes b/theauditor/.gitattributes
new file mode 100644
index 0000000..dfe0770
--- /dev/null
+++ b/theauditor/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/theauditor/__init__.py b/theauditor/__init__.py
new file mode 100644
index 0000000..c30ba90
--- /dev/null
+++ b/theauditor/__init__.py
@@ -0,0 +1,3 @@
+"""TheAuditor - Offline, air-gapped CLI for repo indexing and evidence checking."""
+
+__version__ = "0.1.0"
diff --git a/theauditor/agent_template_validator.py b/theauditor/agent_template_validator.py
new file mode 100644
index 0000000..52e76c3
--- /dev/null
+++ b/theauditor/agent_template_validator.py
@@ -0,0 +1,347 @@
+"""Agent template validator - ensures templates comply with SOP permissions."""
+
+import json
+import re
+from pathlib import Path
+from typing import Dict, List, Any, Tuple, Optional
+import yaml
+
+
+class TemplateValidator:
+    """Validates agent templates for SOP compliance and structure."""
+    
+    # Tools that allow code modification
+    WRITE_TOOLS = {"Write", "Edit", "MultiEdit", "NotebookEdit"}
+    
+    # Agents allowed to modify code
+    ALLOWED_EDITOR_AGENTS = {"coder", "documentation-manager", "implementation-specialist"}
+    
+    # Required frontmatter fields
+    REQUIRED_FIELDS = {"name", "description", "tools", "model"}
+    
+    def __init__(self, template_dir: str = None):
+        """Initialize validator with template directory."""
+        if template_dir:
+            self.template_dir = Path(template_dir)
+        else:
+            # Default to agent_templates relative to module
+            self.template_dir = Path(__file__).parent.parent / "agent_templates"
+        
+        self.violations = []
+        self.warnings = []
+    
+    def _extract_frontmatter(self, content: str) -> Optional[Dict[str, Any]]:
+        """Extract YAML frontmatter from markdown file.
+        
+        Args:
+            content: File content
+            
+        Returns:
+            Parsed frontmatter dict or None if not found
+        """
+        # Match frontmatter between --- markers
+        pattern = r'^---\s*\n(.*?)\n---\s*\n'
+        match = re.match(pattern, content, re.DOTALL)
+        
+        if not match:
+            return None
+        
+        try:
+            frontmatter_text = match.group(1)
+            return yaml.safe_load(frontmatter_text)
+        except yaml.YAMLError as e:
+            self.violations.append(f"Invalid YAML frontmatter: {e}")
+            return None
+    
+    def _parse_tools(self, tools_value: Any) -> List[str]:
+        """Parse tools from frontmatter value.
+        
+        Args:
+            tools_value: Tools field from frontmatter
+            
+        Returns:
+            List of tool names
+        """
+        if isinstance(tools_value, str):
+            # Comma-separated string
+            return [t.strip() for t in tools_value.split(',')]
+        elif isinstance(tools_value, list):
+            return tools_value
+        else:
+            return []
+    
+    def _check_sop_permissions(
+        self,
+        template_name: str,
+        frontmatter: Dict[str, Any]
+    ) -> List[str]:
+        """Check SOP permission rules.
+        
+        Args:
+            template_name: Name of template file
+            frontmatter: Parsed frontmatter
+            
+        Returns:
+            List of violations found
+        """
+        violations = []
+        
+        # Get name and description, ensuring they're strings
+        agent_name = frontmatter.get("name", "")
+        if not isinstance(agent_name, str):
+            agent_name = str(agent_name) if agent_name else ""
+        # Skip validation for templates with placeholders
+        if "{" in agent_name or "}" in agent_name:
+            # This is a template with placeholders, not a real agent
+            return []
+        agent_name = agent_name.lower()
+        
+        description = frontmatter.get("description", "")
+        if not isinstance(description, str):
+            description = str(description) if description else ""
+        description = description.lower()
+        
+        tools = self._parse_tools(frontmatter.get("tools", ""))
+        
+        # Check if agent has write tools
+        has_write_tools = any(tool in self.WRITE_TOOLS for tool in tools)
+        
+        # Check compliance/legal agents first (they have stricter rules)
+        is_compliance_agent = (
+            "compliance" in agent_name or 
+            "compliance" in description or
+            "legal" in agent_name or
+            "legal" in description
+        )
+        
+        if is_compliance_agent and has_write_tools:
+            violations.append(
+                f"Compliance/legal agent '{agent_name}' must not have write tools, "
+                f"found: {self.WRITE_TOOLS & set(tools)}"
+            )
+        elif has_write_tools:
+            # For non-compliance agents, check if they're allowed to have write tools
+            is_allowed_editor = any(
+                allowed in agent_name 
+                for allowed in self.ALLOWED_EDITOR_AGENTS
+            )
+            
+            if not is_allowed_editor:
+                violations.append(
+                    f"Agent '{agent_name}' has write tools ({self.WRITE_TOOLS & set(tools)}) "
+                    f"but is not in allowed editor list: {self.ALLOWED_EDITOR_AGENTS}"
+                )
+        
+        return violations
+    
+    def _check_internal_links(
+        self,
+        content: str,
+        template_path: Path
+    ) -> List[str]:
+        """Check internal repository links are valid.
+        
+        Args:
+            content: Template content
+            template_path: Path to template file
+            
+        Returns:
+            List of broken links
+        """
+        broken_links = []
+        
+        # Find markdown links and references to repo paths
+        link_patterns = [
+            r'\[.*?\]\((\/[^)]+)\)',  # Markdown links with absolute paths
+            r'`(\/[^`]+)`',  # Code blocks with paths
+            r'"(\/[^"]+)"',  # Quoted paths
+            r"'(\/[^']+)'",  # Single-quoted paths
+        ]
+        
+        for pattern in link_patterns:
+            for match in re.finditer(pattern, content):
+                path_str = match.group(1)
+                
+                # Skip URLs and anchors
+                if path_str.startswith('http') or path_str.startswith('#'):
+                    continue
+                
+                # Check if path exists relative to repo root
+                repo_root = template_path.parent.parent
+                full_path = repo_root / path_str.lstrip('/')
+                
+                if not full_path.exists():
+                    broken_links.append(f"Broken internal link: {path_str}")
+        
+        return broken_links
+    
+    def validate_template(self, template_path: Path) -> Dict[str, Any]:
+        """Validate a single template file.
+        
+        Args:
+            template_path: Path to template markdown file
+            
+        Returns:
+            Validation result dict
+        """
+        result = {
+            "path": str(template_path),
+            "valid": True,
+            "violations": [],
+            "warnings": []
+        }
+        
+        try:
+            with open(template_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+        except IOError as e:
+            result["valid"] = False
+            result["violations"].append(f"Cannot read file: {e}")
+            return result
+        
+        # Extract frontmatter
+        frontmatter = self._extract_frontmatter(content)
+        
+        if frontmatter is None:
+            result["valid"] = False
+            result["violations"].append("No valid frontmatter found")
+            return result
+        
+        # Check required fields
+        missing_fields = self.REQUIRED_FIELDS - set(frontmatter.keys())
+        if missing_fields:
+            result["valid"] = False
+            result["violations"].append(
+                f"Missing required frontmatter fields: {missing_fields}"
+            )
+        
+        # Check SOP permissions
+        sop_violations = self._check_sop_permissions(
+            template_path.name,
+            frontmatter
+        )
+        if sop_violations:
+            result["valid"] = False
+            result["violations"].extend(sop_violations)
+        
+        # Check internal links
+        broken_links = self._check_internal_links(content, template_path)
+        if broken_links:
+            result["warnings"].extend(broken_links)
+        
+        # Check for tool typos/inconsistencies
+        tools = self._parse_tools(frontmatter.get("tools", ""))
+        known_tools = {
+            "Bash", "Glob", "Grep", "LS", "Read", "Edit", "Write",
+            "MultiEdit", "NotebookEdit", "WebFetch", "TodoWrite",
+            "WebSearch", "BashOutput", "KillBash", "Task", "ExitPlanMode"
+        }
+        
+        unknown_tools = set(tools) - known_tools
+        if unknown_tools:
+            result["warnings"].append(
+                f"Unknown tools found: {unknown_tools}"
+            )
+        
+        return result
+    
+    def validate_all(self, source_dir: Optional[str] = None) -> Dict[str, Any]:
+        """Validate all templates in directory.
+        
+        Args:
+            source_dir: Directory containing templates (default: self.template_dir)
+            
+        Returns:
+            Validation summary
+        """
+        if source_dir:
+            template_dir = Path(source_dir)
+        else:
+            template_dir = self.template_dir
+        
+        if not template_dir.exists():
+            return {
+                "valid": False,
+                "error": f"Template directory not found: {template_dir}",
+                "templates": []
+            }
+        
+        results = []
+        all_valid = True
+        total_violations = 0
+        total_warnings = 0
+        
+        # Find all .md files
+        for template_path in template_dir.glob("*.md"):
+            result = self.validate_template(template_path)
+            results.append(result)
+            
+            if not result["valid"]:
+                all_valid = False
+            
+            total_violations += len(result["violations"])
+            total_warnings += len(result["warnings"])
+        
+        return {
+            "valid": all_valid,
+            "templates_checked": len(results),
+            "total_violations": total_violations,
+            "total_warnings": total_warnings,
+            "templates": results
+        }
+    
+    def generate_report(
+        self,
+        validation_results: Dict[str, Any],
+        format: str = "json"
+    ) -> str:
+        """Generate validation report.
+        
+        Args:
+            validation_results: Results from validate_all()
+            format: Output format ('json' or 'text')
+            
+        Returns:
+            Formatted report string
+        """
+        if format == "json":
+            return json.dumps(validation_results, indent=2, sort_keys=True)
+        
+        # Text format
+        lines = []
+        lines.append("=== Agent Template Validation Report ===\n")
+        lines.append(f"Templates checked: {validation_results['templates_checked']}")
+        lines.append(f"Total violations: {validation_results['total_violations']}")
+        lines.append(f"Total warnings: {validation_results['total_warnings']}")
+        lines.append(f"Overall status: {'PASS' if validation_results['valid'] else 'FAIL'}\n")
+        
+        for template in validation_results.get("templates", []):
+            lines.append(f"\n{template['path']}:")
+            lines.append(f"  Status: {'✓' if template['valid'] else '✗'}")
+            
+            if template["violations"]:
+                lines.append("  Violations:")
+                for v in template["violations"]:
+                    lines.append(f"    - {v}")
+            
+            if template["warnings"]:
+                lines.append("  Warnings:")
+                for w in template["warnings"]:
+                    lines.append(f"    - {w}")
+        
+        return "\n".join(lines)
+
+
+# Module-level convenience function
+def validate_templates(source_dir: str) -> Tuple[bool, Dict[str, Any]]:
+    """Validate all templates in directory.
+    
+    Args:
+        source_dir: Directory containing agent templates
+        
+    Returns:
+        Tuple of (all_valid, validation_results)
+    """
+    validator = TemplateValidator()
+    results = validator.validate_all(source_dir)
+    return results["valid"], results
\ No newline at end of file
diff --git a/theauditor/ast_extractors/__init__.py b/theauditor/ast_extractors/__init__.py
new file mode 100644
index 0000000..bd7529c
--- /dev/null
+++ b/theauditor/ast_extractors/__init__.py
@@ -0,0 +1,348 @@
+"""AST Data Extraction Engine - Package Router.
+
+This module provides the main ASTExtractorMixin class that routes extraction
+requests to the appropriate language-specific implementation.
+"""
+
+import os
+from typing import Any, List, Dict, Optional, TYPE_CHECKING
+from dataclasses import dataclass
+from pathlib import Path
+
+# Import all implementations
+from . import python_impl, typescript_impl, treesitter_impl
+from .base import detect_language
+
+# Import semantic parser if available
+try:
+    from ..js_semantic_parser import get_semantic_ast_batch
+except ImportError:
+    get_semantic_ast_batch = None
+
+if TYPE_CHECKING:
+    # For type checking only, avoid circular import
+    from ..ast_parser import ASTMatch
+else:
+    # At runtime, ASTMatch will be available from the parent class
+    @dataclass
+    class ASTMatch:
+        """Represents an AST pattern match."""
+        node_type: str
+        start_line: int
+        end_line: int
+        start_col: int
+        snippet: str
+        metadata: Dict[str, Any] = None
+
+
+class ASTExtractorMixin:
+    """Mixin class providing data extraction capabilities for AST analysis.
+    
+    This class acts as a pure router, delegating all extraction logic to
+    language-specific implementation modules.
+    """
+    
+    def extract_functions(self, tree: Any, language: str = None) -> List[Dict]:
+        """Extract function definitions from AST.
+
+        Args:
+            tree: AST tree.
+            language: Programming language.
+
+        Returns:
+            List of function info dictionaries.
+        """
+        if not tree:
+            return []
+        
+        # Route to appropriate implementation
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_functions(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_functions(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_functions(tree, self, language)
+        
+        return []
+
+    def extract_classes(self, tree: Any, language: str = None) -> List[Dict]:
+        """Extract class definitions from AST."""
+        if not tree:
+            return []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_classes(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_classes(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_classes(tree, self, language)
+        
+        return []
+
+    def extract_calls(self, tree: Any, language: str = None) -> List[Dict]:
+        """Extract function calls from AST."""
+        if not tree:
+            return []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_calls(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_calls(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_calls(tree, self, language)
+        
+        return []
+
+    def extract_imports(self, tree: Any, language: str = None) -> List[Dict[str, Any]]:
+        """Extract import statements from AST."""
+        if not tree:
+            return []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_imports(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_imports(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_imports(tree, self, language)
+        
+        return []
+
+    def extract_exports(self, tree: Any, language: str = None) -> List[Dict[str, Any]]:
+        """Extract export statements from AST."""
+        if not tree:
+            return []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_exports(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_exports(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_exports(tree, self, language)
+        
+        return []
+
+    def extract_properties(self, tree: Any, language: str = None) -> List[Dict]:
+        """Extract property accesses from AST (e.g., req.body, req.query).
+        
+        This is critical for taint analysis to find JavaScript property access patterns.
+        """
+        if not tree:
+            return []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_properties(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_properties(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_properties(tree, self, language)
+        
+        return []
+
+    def extract_assignments(self, tree: Any, language: str = None) -> List[Dict[str, Any]]:
+        """Extract variable assignments for data flow analysis."""
+        if not tree:
+            return []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_assignments(tree, self)
+            elif tree_type == "semantic_ast":
+                # The semantic result is nested in tree["tree"]
+                return typescript_impl.extract_typescript_assignments(tree.get("tree", {}), self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_assignments(tree, self, language)
+        
+        return []
+
+    def extract_function_calls_with_args(self, tree: Any, language: str = None) -> List[Dict[str, Any]]:
+        """Extract function calls with argument mapping for data flow analysis.
+        
+        This is a two-pass analysis:
+        1. First pass: Find all function definitions and their parameters
+        2. Second pass: Find all function calls and map arguments to parameters
+        """
+        if not tree:
+            return []
+        
+        # First pass: Get all function definitions with their parameters
+        function_params = self._extract_function_parameters(tree, language)
+        
+        # Second pass: Extract calls with argument mapping
+        calls_with_args = []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                calls_with_args = python_impl.extract_python_calls_with_args(tree, function_params, self)
+            elif tree_type == "semantic_ast":
+                calls_with_args = typescript_impl.extract_typescript_calls_with_args(tree, function_params, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                calls_with_args = treesitter_impl.extract_treesitter_calls_with_args(
+                    tree, function_params, self, language
+                )
+        
+        return calls_with_args
+
+    def _extract_function_parameters(self, tree: Any, language: str = None) -> Dict[str, List[str]]:
+        """Extract function definitions and their parameter names.
+        
+        Returns:
+            Dict mapping function_name -> list of parameter names
+        """
+        if not tree:
+            return {}
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_function_params(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_function_params(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_function_params(tree, self, language)
+        
+        return {}
+
+    def extract_returns(self, tree: Any, language: str = None) -> List[Dict[str, Any]]:
+        """Extract return statements for data flow analysis."""
+        if not tree:
+            return []
+        
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            language = tree.get("language", language)
+            
+            if tree_type == "python_ast":
+                return python_impl.extract_python_returns(tree, self)
+            elif tree_type == "semantic_ast":
+                return typescript_impl.extract_typescript_returns(tree, self)
+            elif tree_type == "tree_sitter" and self.has_tree_sitter:
+                return treesitter_impl.extract_treesitter_returns(tree, self, language)
+        
+        return []
+
+    def parse_files_batch(self, file_paths: List[Path], root_path: str = None) -> Dict[str, Any]:
+        """Parse multiple files into ASTs in batch for performance.
+        
+        This method dramatically improves performance for JavaScript/TypeScript projects
+        by processing multiple files in a single TypeScript compiler invocation.
+        
+        Args:
+            file_paths: List of paths to source files
+            root_path: Absolute path to project root (for sandbox resolution)
+            
+        Returns:
+            Dictionary mapping file paths to their AST trees
+        """
+        results = {}
+        
+        # Separate files by language
+        js_ts_files = []
+        python_files = []
+        other_files = []
+        
+        for file_path in file_paths:
+            language = self._detect_language(file_path)
+            if language in ["javascript", "typescript"]:
+                js_ts_files.append(file_path)
+            elif language == "python":
+                python_files.append(file_path)
+            else:
+                other_files.append(file_path)
+        
+        # Batch process JavaScript/TypeScript files if in a JS or polyglot project
+        project_type = self._detect_project_type()
+        if js_ts_files and project_type in ["javascript", "polyglot"] and get_semantic_ast_batch:
+            try:
+                # Convert paths to strings for the semantic parser with normalized separators
+                js_ts_paths = [str(f).replace("\\", "/") for f in js_ts_files]
+                
+                # Use batch processing for JS/TS files
+                batch_results = get_semantic_ast_batch(js_ts_paths, project_root=root_path)
+                
+                # Process batch results
+                for file_path in js_ts_files:
+                    file_str = str(file_path).replace("\\", "/")  # Normalize for matching
+                    if file_str in batch_results:
+                        semantic_result = batch_results[file_str]
+                        if semantic_result.get("success"):
+                            # Read file content for inclusion
+                            try:
+                                with open(file_path, "rb") as f:
+                                    content = f.read()
+                                
+                                results[str(file_path).replace("\\", "/")] = {
+                                    "type": "semantic_ast",
+                                    "tree": semantic_result,
+                                    "language": self._detect_language(file_path),
+                                    "content": content.decode("utf-8", errors="ignore"),
+                                    "has_types": semantic_result.get("hasTypes", False),
+                                    "diagnostics": semantic_result.get("diagnostics", []),
+                                    "symbols": semantic_result.get("symbols", [])
+                                }
+                            except Exception as e:
+                                print(f"Warning: Failed to read {file_path}: {e}, falling back to individual parsing")
+                                # CRITICAL FIX: Fall back to individual parsing on read failure
+                                individual_result = self.parse_file(file_path, root_path=root_path)
+                                results[str(file_path).replace("\\", "/")] = individual_result
+                        else:
+                            print(f"Warning: Semantic parser failed for {file_path}: {semantic_result.get('error')}, falling back to individual parsing")
+                            # CRITICAL FIX: Fall back to individual parsing instead of None
+                            individual_result = self.parse_file(file_path, root_path=root_path)
+                            results[str(file_path).replace("\\", "/")] = individual_result
+                    else:
+                        # CRITICAL FIX: Fall back to individual parsing instead of None
+                        print(f"Warning: No batch result for {file_path}, falling back to individual parsing")
+                        individual_result = self.parse_file(file_path, root_path=root_path)
+                        results[str(file_path).replace("\\", "/")] = individual_result
+                        
+            except Exception as e:
+                print(f"Warning: Batch processing failed for JS/TS files: {e}")
+                # Fall back to individual processing
+                for file_path in js_ts_files:
+                    results[str(file_path).replace("\\", "/")] = self.parse_file(file_path, root_path=root_path)
+        else:
+            # Process JS/TS files individually if not in JS project or batch failed
+            for file_path in js_ts_files:
+                results[str(file_path).replace("\\", "/")] = self.parse_file(file_path, root_path=root_path)
+        
+        # Process Python files individually (they're fast enough)
+        for file_path in python_files:
+            results[str(file_path).replace("\\", "/")] = self.parse_file(file_path, root_path=root_path)
+        
+        # Process other files individually
+        for file_path in other_files:
+            results[str(file_path).replace("\\", "/")] = self.parse_file(file_path, root_path=root_path)
+        
+        return results
\ No newline at end of file
diff --git a/theauditor/ast_extractors/base.py b/theauditor/ast_extractors/base.py
new file mode 100644
index 0000000..2e0e08b
--- /dev/null
+++ b/theauditor/ast_extractors/base.py
@@ -0,0 +1,173 @@
+"""Base utilities and shared helpers for AST extraction.
+
+This module contains utility functions shared across all language implementations.
+"""
+
+import ast
+import re
+from typing import Any, List, Optional
+from pathlib import Path
+
+
+def get_node_name(node: Any) -> str:
+    """Get the name from an AST node, handling different node types.
+    
+    Works with Python's built-in AST nodes.
+    """
+    if isinstance(node, ast.Name):
+        return node.id
+    elif isinstance(node, ast.Attribute):
+        return f"{get_node_name(node.value)}.{node.attr}"
+    elif isinstance(node, ast.Call):
+        return get_node_name(node.func)
+    elif isinstance(node, str):
+        return node
+    else:
+        return "unknown"
+
+
+def extract_vars_from_expr(node: ast.AST) -> List[str]:
+    """Extract all variable names from a Python expression.
+    
+    Walks the AST to find all Name and Attribute nodes.
+    """
+    vars_list = []
+    for subnode in ast.walk(node):
+        if isinstance(subnode, ast.Name):
+            vars_list.append(subnode.id)
+        elif isinstance(subnode, ast.Attribute):
+            # For x.y.z, get the full chain
+            chain = []
+            current = subnode
+            while isinstance(current, ast.Attribute):
+                chain.append(current.attr)
+                current = current.value
+            if isinstance(current, ast.Name):
+                chain.append(current.id)
+                vars_list.append(".".join(reversed(chain)))
+    return vars_list
+
+
+def extract_vars_from_tree_sitter_expr(expr: str) -> List[str]:
+    """Extract variable names from a JavaScript/TypeScript expression string.
+    
+    Uses regex to find identifiers that aren't keywords.
+    """
+    # Match identifiers that are not keywords
+    pattern = r'\b(?!(?:const|let|var|function|return|if|else|for|while|true|false|null|undefined|new|this)\b)[a-zA-Z_$][a-zA-Z0-9_$]*\b'
+    return re.findall(pattern, expr)
+
+
+def find_containing_function_python(tree: ast.AST, line: int) -> Optional[str]:
+    """Find the function containing a given line in Python AST."""
+    containing_func = None
+    
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            if hasattr(node, "lineno") and hasattr(node, "end_lineno"):
+                if node.lineno <= line <= (node.end_lineno or node.lineno):
+                    # Check if this is more specific than current containing_func
+                    if containing_func is None or node.lineno > containing_func[1]:
+                        containing_func = (node.name, node.lineno)
+    
+    return containing_func[0] if containing_func else None
+
+
+def find_containing_function_tree_sitter(node: Any, content: str, language: str) -> Optional[str]:
+    """Find the function containing a node in Tree-sitter AST.
+    
+    Walks up the tree to find parent function, handling all modern JS/TS patterns.
+    """
+    # Walk up the tree to find parent function
+    current = node
+    while current and hasattr(current, 'parent') and current.parent:
+        current = current.parent
+        if language in ["javascript", "typescript"]:
+            # CRITICAL FIX: Handle ALL function patterns in modern JS/TS
+            function_types = [
+                "function_declaration",      # function foo() {}
+                "function_expression",        # const foo = function() {}
+                "arrow_function",            # const foo = () => {}
+                "method_definition",         # class { foo() {} }
+                "generator_function",        # function* foo() {}
+                "async_function",           # async function foo() {}
+            ]
+            
+            if current.type in function_types:
+                # Special handling for arrow functions FIRST
+                # They need different logic than regular functions
+                if current.type == "arrow_function":
+                    # Arrow functions don't have names directly, check parent
+                    parent = current.parent if hasattr(current, 'parent') else None
+                    if parent:
+                        # Check if it's assigned to a variable: const foo = () => {}
+                        if parent.type == "variable_declarator":
+                            # Use field-based API to get the name
+                            if hasattr(parent, 'child_by_field_name'):
+                                name_node = parent.child_by_field_name('name')
+                                if name_node and name_node.text:
+                                    return name_node.text.decode("utf-8", errors="ignore")
+                            # Fallback to child iteration
+                            for child in parent.children:
+                                if child.type == "identifier" and child != current:
+                                    return child.text.decode("utf-8", errors="ignore")
+                        # Check if it's a property: { foo: () => {} }
+                        elif parent.type == "pair":
+                            for child in parent.children:
+                                if child.type in ["property_identifier", "identifier", "string"] and child != current:
+                                    text = child.text.decode("utf-8", errors="ignore")
+                                    # Remove quotes from string keys
+                                    return text.strip('"\'')
+                    # CRITICAL FIX (Lead Auditor feedback): Don't return anything here!
+                    # Continue searching upward for containing named function
+                    # This handles cases like: function outer() { arr.map(() => {}) }
+                    # The arrow function should be tracked as within "outer", not "anonymous"
+                    # Let the while loop continue to find outer function
+                    continue  # Skip the rest and continue searching upward
+                
+                # For non-arrow functions, try field-based API first
+                if hasattr(current, 'child_by_field_name'):
+                    name_node = current.child_by_field_name('name')
+                    if name_node and name_node.text:
+                        return name_node.text.decode("utf-8", errors="ignore")
+                
+                # Fallback to child iteration for regular functions
+                for child in current.children:
+                    if child.type in ["identifier", "property_identifier"]:
+                        return child.text.decode("utf-8", errors="ignore")
+                
+                # If still no name found for this regular function, it's anonymous
+                return "anonymous"
+                
+        elif language == "python":
+            if current.type == "function_definition":
+                # Try field-based API first
+                if hasattr(current, 'child_by_field_name'):
+                    name_node = current.child_by_field_name('name')
+                    if name_node and name_node.text:
+                        return name_node.text.decode("utf-8", errors="ignore")
+                # Fallback to child iteration
+                for child in current.children:
+                    if child.type == "identifier":
+                        return child.text.decode("utf-8", errors="ignore")
+    
+    # If no function found, return "global" instead of None for better tracking
+    return "global"
+
+
+def detect_language(file_path: Path) -> str:
+    """Detect language from file extension.
+    
+    Returns empty string for unsupported languages.
+    """
+    ext_map = {
+        ".py": "python",
+        ".js": "javascript",
+        ".jsx": "javascript",
+        ".ts": "typescript",
+        ".tsx": "typescript",
+        ".mjs": "javascript",
+        ".cjs": "javascript",
+        ".vue": "javascript",  # Vue SFCs contain JavaScript/TypeScript
+    }
+    return ext_map.get(file_path.suffix.lower(), "")
\ No newline at end of file
diff --git a/theauditor/ast_extractors/python_impl.py b/theauditor/ast_extractors/python_impl.py
new file mode 100644
index 0000000..1449950
--- /dev/null
+++ b/theauditor/ast_extractors/python_impl.py
@@ -0,0 +1,327 @@
+"""Python AST extraction implementations.
+
+This module contains all Python-specific extraction logic using the built-in ast module.
+"""
+
+import ast
+from typing import Any, List, Dict, Optional
+
+from .base import (
+    get_node_name,
+    extract_vars_from_expr,
+    find_containing_function_python
+)
+
+
+def extract_python_functions(tree: Dict, parser_self) -> List[Dict]:
+    """Extract function definitions from Python AST.
+    
+    Args:
+        tree: AST tree dictionary with 'tree' containing the actual AST
+        parser_self: Reference to the parser instance for accessing methods
+        
+    Returns:
+        List of function info dictionaries
+    """
+    functions = []
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return functions
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            functions.append({
+                "name": node.name,
+                "line": node.lineno,
+                "async": isinstance(node, ast.AsyncFunctionDef),
+                "args": [arg.arg for arg in node.args.args],
+            })
+    
+    return functions
+
+
+def extract_python_classes(tree: Dict, parser_self) -> List[Dict]:
+    """Extract class definitions from Python AST."""
+    classes = []
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return classes
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, ast.ClassDef):
+            classes.append({
+                "name": node.name,
+                "line": node.lineno,
+                "column": node.col_offset,
+                "bases": [get_node_name(base) for base in node.bases],
+            })
+    
+    return classes
+
+
+def extract_python_calls(tree: Dict, parser_self) -> List[Dict]:
+    """Extract function calls from Python AST."""
+    calls = []
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return calls
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, ast.Call):
+            func_name = get_node_name(node.func)
+            if func_name:
+                calls.append({
+                    "name": func_name,
+                    "line": node.lineno,
+                    "column": node.col_offset,
+                    "args_count": len(node.args),
+                })
+    
+    return calls
+
+
+def extract_python_imports(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract import statements from Python AST."""
+    imports = []
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return imports
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                imports.append({
+                    "source": "import",
+                    "target": alias.name,
+                    "type": "import",
+                    "line": node.lineno,
+                    "as": alias.asname,
+                    "specifiers": []
+                })
+        elif isinstance(node, ast.ImportFrom):
+            module = node.module or ""
+            for alias in node.names:
+                imports.append({
+                    "source": "from",
+                    "target": module,
+                    "type": "from",
+                    "line": node.lineno,
+                    "imported": alias.name,
+                    "as": alias.asname,
+                    "specifiers": [alias.name]
+                })
+    
+    return imports
+
+
+def extract_python_exports(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract export statements from Python AST.
+    
+    In Python, all top-level functions, classes, and assignments are "exported".
+    """
+    exports = []
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return exports
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, ast.FunctionDef) and node.col_offset == 0:
+            exports.append({
+                "name": node.name,
+                "type": "function",
+                "line": node.lineno,
+                "default": False
+            })
+        elif isinstance(node, ast.ClassDef) and node.col_offset == 0:
+            exports.append({
+                "name": node.name,
+                "type": "class",
+                "line": node.lineno,
+                "default": False
+            })
+        elif isinstance(node, ast.Assign) and node.col_offset == 0:
+            for target in node.targets:
+                if isinstance(target, ast.Name):
+                    exports.append({
+                        "name": target.id,
+                        "type": "variable",
+                        "line": node.lineno,
+                        "default": False
+                    })
+    
+    return exports
+
+
+def extract_python_assignments(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract variable assignments from Python AST for data flow analysis."""
+    import os
+    assignments = []
+    actual_tree = tree.get("tree")
+    
+    if os.environ.get("THEAUDITOR_DEBUG"):
+        import sys
+        print(f"[AST_DEBUG] extract_python_assignments called", file=sys.stderr)
+    
+    if not actual_tree:
+        return assignments
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, ast.Assign):
+            # Extract target variable(s)
+            for target in node.targets:
+                target_var = get_node_name(target)
+                source_expr = ast.unparse(node.value) if hasattr(ast, "unparse") else str(node.value)
+                
+                # Find containing function
+                in_function = find_containing_function_python(actual_tree, node.lineno)
+                
+                # CRITICAL FIX: Check if this is a class instantiation
+                # BeautifulSoup(html) is ast.Call with func.id = "BeautifulSoup"
+                is_instantiation = isinstance(node.value, ast.Call)
+                
+                assignments.append({
+                    "target_var": target_var,
+                    "source_expr": source_expr,
+                    "line": node.lineno,
+                    "in_function": in_function or "global",
+                    "source_vars": extract_vars_from_expr(node.value),
+                    "is_instantiation": is_instantiation  # Track for taint analysis
+                })
+        
+        elif isinstance(node, ast.AnnAssign) and node.value:
+            # Handle annotated assignments (x: int = 5)
+            target_var = get_node_name(node.target)
+            source_expr = ast.unparse(node.value) if hasattr(ast, "unparse") else str(node.value)
+            
+            in_function = find_containing_function_python(actual_tree, node.lineno)
+            
+            assignments.append({
+                "target_var": target_var,
+                "source_expr": source_expr,
+                "line": node.lineno,
+                "in_function": in_function or "global",
+                "source_vars": extract_vars_from_expr(node.value)
+            })
+    
+    return assignments
+
+
+def extract_python_function_params(tree: Dict, parser_self) -> Dict[str, List[str]]:
+    """Extract function definitions and their parameter names from Python AST."""
+    func_params = {}
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return func_params
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            params = [arg.arg for arg in node.args.args]
+            func_params[node.name] = params
+    
+    return func_params
+
+
+def extract_python_calls_with_args(tree: Dict, function_params: Dict[str, List[str]], parser_self) -> List[Dict[str, Any]]:
+    """Extract Python function calls with argument mapping."""
+    calls = []
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return calls
+    
+    # Find containing function for each call
+    function_ranges = {}
+    for node in ast.walk(actual_tree):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            if hasattr(node, "lineno") and hasattr(node, "end_lineno"):
+                function_ranges[node.name] = (node.lineno, node.end_lineno or node.lineno)
+    
+    for node in ast.walk(actual_tree):
+        if isinstance(node, ast.Call):
+            func_name = get_node_name(node.func)
+            
+            # Find caller function
+            caller_function = "global"
+            for fname, (start, end) in function_ranges.items():
+                if start <= node.lineno <= end:
+                    caller_function = fname
+                    break
+            
+            # Get callee parameters
+            callee_params = function_params.get(func_name.split(".")[-1], [])
+            
+            # Map arguments to parameters
+            for i, arg in enumerate(node.args):
+                arg_expr = ast.unparse(arg) if hasattr(ast, "unparse") else str(arg)
+                param_name = callee_params[i] if i < len(callee_params) else f"arg{i}"
+                
+                calls.append({
+                    "line": node.lineno,
+                    "caller_function": caller_function,
+                    "callee_function": func_name,
+                    "argument_index": i,
+                    "argument_expr": arg_expr,
+                    "param_name": param_name
+                })
+    
+    return calls
+
+
+def extract_python_returns(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract return statements from Python AST."""
+    returns = []
+    actual_tree = tree.get("tree")
+    
+    if not actual_tree:
+        return returns
+    
+    # First, map all functions
+    function_ranges = {}
+    for node in ast.walk(actual_tree):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            if hasattr(node, "lineno") and hasattr(node, "end_lineno"):
+                function_ranges[node.name] = (node.lineno, node.end_lineno or node.lineno)
+    
+    # Extract return statements
+    for node in ast.walk(actual_tree):
+        if isinstance(node, ast.Return):
+            # Find containing function
+            function_name = "global"
+            for fname, (start, end) in function_ranges.items():
+                if start <= node.lineno <= end:
+                    function_name = fname
+                    break
+            
+            # Extract return expression
+            if node.value:
+                return_expr = ast.unparse(node.value) if hasattr(ast, "unparse") else str(node.value)
+                return_vars = extract_vars_from_expr(node.value)
+            else:
+                return_expr = "None"
+                return_vars = []
+            
+            returns.append({
+                "function_name": function_name,
+                "line": node.lineno,
+                "return_expr": return_expr,
+                "return_vars": return_vars
+            })
+    
+    return returns
+
+
+# Python doesn't have property accesses in the same way as JS
+# This is a placeholder for consistency
+def extract_python_properties(tree: Dict, parser_self) -> List[Dict]:
+    """Extract property accesses from Python AST.
+    
+    In Python, these would be attribute accesses.
+    Currently returns empty list for consistency.
+    """
+    return []
\ No newline at end of file
diff --git a/theauditor/ast_extractors/treesitter_impl.py b/theauditor/ast_extractors/treesitter_impl.py
new file mode 100644
index 0000000..17ca6f2
--- /dev/null
+++ b/theauditor/ast_extractors/treesitter_impl.py
@@ -0,0 +1,711 @@
+"""Tree-sitter generic AST extraction implementations.
+
+This module contains Tree-sitter extraction logic that works across multiple languages.
+"""
+
+from typing import Any, List, Dict, Optional
+
+from .base import (
+    find_containing_function_tree_sitter,
+    extract_vars_from_tree_sitter_expr
+)
+
+
+def extract_treesitter_functions(tree: Dict, parser_self, language: str) -> List[Dict]:
+    """Extract function definitions from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_functions(actual_tree.root_node, language)
+
+
+def _extract_tree_sitter_functions(node: Any, language: str) -> List[Dict]:
+    """Extract functions from Tree-sitter AST."""
+    functions = []
+
+    if node is None:
+        return functions
+
+    # Function node types per language
+    function_types = {
+        "python": ["function_definition"],
+        "javascript": ["function_declaration", "arrow_function", "function_expression", "method_definition"],
+        "typescript": ["function_declaration", "arrow_function", "function_expression", "method_definition"],
+    }
+
+    node_types = function_types.get(language, [])
+
+    if node.type in node_types:
+        # Extract function name
+        name = "anonymous"
+        for child in node.children:
+            if child.type in ["identifier", "property_identifier"]:
+                name = child.text.decode("utf-8", errors="ignore")
+                break
+
+        functions.append({
+            "name": name,
+            "line": node.start_point[0] + 1,
+            "type": node.type,
+        })
+
+    # Recursively search children
+    for child in node.children:
+        functions.extend(_extract_tree_sitter_functions(child, language))
+
+    return functions
+
+
+def extract_treesitter_classes(tree: Dict, parser_self, language: str) -> List[Dict]:
+    """Extract class definitions from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_classes(actual_tree.root_node, language)
+
+
+def _extract_tree_sitter_classes(node: Any, language: str) -> List[Dict]:
+    """Extract classes from Tree-sitter AST."""
+    classes = []
+
+    if node is None:
+        return classes
+
+    # Class node types per language
+    class_types = {
+        "python": ["class_definition"],
+        "javascript": ["class_declaration"],
+        "typescript": ["class_declaration", "interface_declaration"],
+    }
+
+    node_types = class_types.get(language, [])
+
+    if node.type in node_types:
+        # Extract class name
+        name = "anonymous"
+        for child in node.children:
+            if child.type in ["identifier", "type_identifier"]:
+                name = child.text.decode("utf-8", errors="ignore")
+                break
+
+        classes.append({
+            "name": name,
+            "line": node.start_point[0] + 1,
+            "column": node.start_point[1],
+            "type": node.type,
+        })
+
+    # Recursively search children
+    for child in node.children:
+        classes.extend(_extract_tree_sitter_classes(child, language))
+
+    return classes
+
+
+def extract_treesitter_calls(tree: Dict, parser_self, language: str) -> List[Dict]:
+    """Extract function calls from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_calls(actual_tree.root_node, language)
+
+
+def _extract_tree_sitter_calls(node: Any, language: str) -> List[Dict]:
+    """Extract function calls from Tree-sitter AST."""
+    calls = []
+
+    if node is None:
+        return calls
+
+    # Call node types per language
+    call_types = {
+        "python": ["call"],
+        "javascript": ["call_expression"],
+        "typescript": ["call_expression"],
+    }
+
+    node_types = call_types.get(language, [])
+
+    if node.type in node_types:
+        # Extract function name being called
+        name = "unknown"
+        for child in node.children:
+            if child.type in ["identifier", "member_expression", "attribute"]:
+                name = child.text.decode("utf-8", errors="ignore")
+                break
+            # Also handle property access patterns for methods like res.send()
+            elif child.type == "member_access_expression":
+                name = child.text.decode("utf-8", errors="ignore")
+                break
+
+        calls.append({
+            "name": name,
+            "line": node.start_point[0] + 1,
+            "column": node.start_point[1],
+            "type": "call",  # Always use "call" type for database consistency
+        })
+
+    # Recursively search children
+    for child in node.children:
+        calls.extend(_extract_tree_sitter_calls(child, language))
+
+    return calls
+
+
+def extract_treesitter_imports(tree: Dict, parser_self, language: str) -> List[Dict[str, Any]]:
+    """Extract import statements from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_imports(actual_tree.root_node, language)
+
+
+def _extract_tree_sitter_imports(node: Any, language: str) -> List[Dict[str, Any]]:
+    """Extract imports from Tree-sitter AST with language-specific handling."""
+    imports = []
+    
+    if node is None:
+        return imports
+    
+    # Import node types per language
+    import_types = {
+        "javascript": ["import_statement", "import_clause", "require_call"],
+        "typescript": ["import_statement", "import_clause", "require_call", "import_type"],
+        "python": ["import_statement", "import_from_statement"],
+    }
+    
+    node_types = import_types.get(language, [])
+    
+    if node.type in node_types:
+        # Parse based on node type
+        if node.type == "import_statement":
+            # Handle: import foo from 'bar'
+            source_node = None
+            specifiers = []
+            
+            for child in node.children:
+                if child.type == "string":
+                    source_node = child.text.decode("utf-8", errors="ignore").strip("\"'")
+                elif child.type == "import_clause":
+                    # Extract imported names
+                    for spec_child in child.children:
+                        if spec_child.type == "identifier":
+                            specifiers.append(spec_child.text.decode("utf-8", errors="ignore"))
+            
+            if source_node:
+                imports.append({
+                    "source": "import",
+                    "target": source_node,
+                    "type": "import",
+                    "line": node.start_point[0] + 1,
+                    "specifiers": specifiers
+                })
+        
+        elif node.type == "require_call":
+            # Handle: const foo = require('bar')
+            for child in node.children:
+                if child.type == "string":
+                    target = child.text.decode("utf-8", errors="ignore").strip("\"'")
+                    imports.append({
+                        "source": "require",
+                        "target": target,
+                        "type": "require",
+                        "line": node.start_point[0] + 1,
+                        "specifiers": []
+                    })
+    
+    # Recursively search children
+    for child in node.children:
+        imports.extend(_extract_tree_sitter_imports(child, language))
+    
+    return imports
+
+
+def extract_treesitter_exports(tree: Dict, parser_self, language: str) -> List[Dict[str, Any]]:
+    """Extract export statements from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_exports(actual_tree.root_node, language)
+
+
+def _extract_tree_sitter_exports(node: Any, language: str) -> List[Dict[str, Any]]:
+    """Extract exports from Tree-sitter AST."""
+    exports = []
+    
+    if node is None:
+        return exports
+    
+    # Export node types per language
+    export_types = {
+        "javascript": ["export_statement", "export_default_declaration"],
+        "typescript": ["export_statement", "export_default_declaration", "export_type"],
+    }
+    
+    node_types = export_types.get(language, [])
+    
+    if node.type in node_types:
+        is_default = "default" in node.type
+        
+        # Extract exported name
+        name = "unknown"
+        export_type = "unknown"
+        
+        for child in node.children:
+            if child.type in ["identifier", "type_identifier"]:
+                name = child.text.decode("utf-8", errors="ignore")
+            elif child.type == "function_declaration":
+                export_type = "function"
+                for subchild in child.children:
+                    if subchild.type == "identifier":
+                        name = subchild.text.decode("utf-8", errors="ignore")
+                        break
+            elif child.type == "class_declaration":
+                export_type = "class"
+                for subchild in child.children:
+                    if subchild.type in ["identifier", "type_identifier"]:
+                        name = subchild.text.decode("utf-8", errors="ignore")
+                        break
+        
+        exports.append({
+            "name": name,
+            "type": export_type,
+            "line": node.start_point[0] + 1,
+            "default": is_default
+        })
+    
+    # Recursively search children
+    for child in node.children:
+        exports.extend(_extract_tree_sitter_exports(child, language))
+    
+    return exports
+
+
+def extract_treesitter_properties(tree: Dict, parser_self, language: str) -> List[Dict]:
+    """Extract property accesses from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_properties(actual_tree.root_node, language)
+
+
+def _extract_tree_sitter_properties(node: Any, language: str) -> List[Dict]:
+    """Extract property accesses from Tree-sitter AST."""
+    properties = []
+    
+    if node is None:
+        return properties
+    
+    # Property access node types per language
+    property_types = {
+        "javascript": ["member_expression", "property_access_expression"],
+        "typescript": ["member_expression", "property_access_expression"],
+        "python": ["attribute"],
+    }
+    
+    node_types = property_types.get(language, [])
+    
+    if node.type in node_types:
+        # Extract the full property access chain
+        prop_text = node.text.decode("utf-8", errors="ignore") if node.text else ""
+        
+        # Filter for patterns that look like taint sources (req.*, request.*, ctx.*, etc.)
+        if any(pattern in prop_text for pattern in ["req.", "request.", "ctx.", "body", "query", "params", "headers", "cookies"]):
+            properties.append({
+                "name": prop_text,
+                "line": node.start_point[0] + 1,
+                "column": node.start_point[1],
+                "type": "property"
+            })
+    
+    # Recursively search children
+    for child in node.children:
+        properties.extend(_extract_tree_sitter_properties(child, language))
+    
+    return properties
+
+
+def extract_treesitter_assignments(tree: Dict, parser_self, language: str) -> List[Dict[str, Any]]:
+    """Extract variable assignments from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    content = tree.get("content", "")
+    
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_assignments(actual_tree.root_node, language, content)
+
+
+def _extract_tree_sitter_assignments(node: Any, language: str, content: str) -> List[Dict[str, Any]]:
+    """Extract assignments from Tree-sitter AST."""
+    import os
+    import sys
+    debug = os.environ.get("THEAUDITOR_DEBUG")
+    assignments = []
+    
+    if node is None:
+        return assignments
+    
+    # Assignment node types per language
+    assignment_types = {
+        # Don't include variable_declarator - it's handled inside lexical_declaration/variable_declaration
+        "javascript": ["assignment_expression", "lexical_declaration", "variable_declaration"],
+        "typescript": ["assignment_expression", "lexical_declaration", "variable_declaration"],
+        "python": ["assignment"],
+    }
+    
+    node_types = assignment_types.get(language, [])
+    
+    if node.type in node_types:
+        target_var = None
+        source_expr = None
+        source_vars = []
+        
+        if node.type in ["lexical_declaration", "variable_declaration"]:
+            # Handle lexical_declaration (const/let) and variable_declaration (var)
+            # Both contain variable_declarator children
+            # Process all variable_declarators within (const a = 1, b = 2)
+            for child in node.children:
+                if child.type == "variable_declarator":
+                    name_node = child.child_by_field_name('name')
+                    value_node = child.child_by_field_name('value')
+                    
+                    if name_node and value_node:
+                        in_function = find_containing_function_tree_sitter(child, content, language) or "global"
+                        if debug:
+                            print(f"[DEBUG] Found assignment: {name_node.text.decode('utf-8')} = {value_node.text.decode('utf-8')[:50]}", file=sys.stderr)
+                        assignments.append({
+                            "target_var": name_node.text.decode("utf-8", errors="ignore"),
+                            "source_expr": value_node.text.decode("utf-8", errors="ignore"),
+                            "line": child.start_point[0] + 1,
+                            "in_function": in_function,
+                            "source_vars": extract_vars_from_tree_sitter_expr(
+                                value_node.text.decode("utf-8", errors="ignore")
+                            )
+                        })
+        
+        elif node.type == "assignment_expression":
+            # x = value (JavaScript/TypeScript) - Use field-based API
+            left_node = node.child_by_field_name('left')
+            right_node = node.child_by_field_name('right')
+            
+            if left_node:
+                target_var = left_node.text.decode("utf-8", errors="ignore")
+            if right_node:
+                source_expr = right_node.text.decode("utf-8", errors="ignore")
+                source_vars = extract_vars_from_tree_sitter_expr(source_expr)
+        
+        elif node.type == "assignment":
+            # x = value (Python)
+            # Python assignment has structure: [target, "=", value]
+            left_node = None
+            right_node = None
+            for child in node.children:
+                if child.type != "=" and left_node is None:
+                    left_node = child
+                elif child.type != "=" and left_node is not None:
+                    right_node = child
+            
+            if left_node:
+                target_var = left_node.text.decode("utf-8", errors="ignore") if left_node.text else ""
+            if right_node:
+                source_expr = right_node.text.decode("utf-8", errors="ignore") if right_node.text else ""
+        
+        # Only create assignment record if we have both target and source
+        # (Skip lexical_declaration/variable_declaration as they're handled above with their children)
+        if target_var and source_expr and node.type not in ["lexical_declaration", "variable_declaration"]:
+            # Find containing function
+            in_function = find_containing_function_tree_sitter(node, content, language)
+            
+            assignments.append({
+                "target_var": target_var,
+                "source_expr": source_expr,
+                "line": node.start_point[0] + 1,
+                "in_function": in_function or "global",
+                "source_vars": source_vars if source_vars else extract_vars_from_tree_sitter_expr(source_expr)
+            })
+    
+    # Recursively search children
+    for child in node.children:
+        assignments.extend(_extract_tree_sitter_assignments(child, language, content))
+    
+    return assignments
+
+
+def extract_treesitter_function_params(tree: Dict, parser_self, language: str) -> Dict[str, List[str]]:
+    """Extract function parameters from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    if not actual_tree:
+        return {}
+    
+    if not parser_self.has_tree_sitter:
+        return {}
+    
+    return _extract_tree_sitter_function_params(actual_tree.root_node, language)
+
+
+def _extract_tree_sitter_function_params(node: Any, language: str) -> Dict[str, List[str]]:
+    """Extract function parameters from Tree-sitter AST."""
+    func_params = {}
+    
+    if node is None:
+        return func_params
+    
+    # Function definition node types
+    if language in ["javascript", "typescript"]:
+        if node.type in ["function_declaration", "function_expression", "arrow_function", "method_definition"]:
+            func_name = "anonymous"
+            params = []
+            
+            # Use field-based API for function nodes
+            name_node = node.child_by_field_name('name')
+            params_node = node.child_by_field_name('parameters')
+            
+            if name_node:
+                func_name = name_node.text.decode("utf-8", errors="ignore")
+            
+            # Fall back to child iteration if field access fails
+            if not params_node:
+                for child in node.children:
+                    if child.type in ["formal_parameters", "parameters"]:
+                        params_node = child
+                        break
+            
+            if params_node:
+                # Extract parameter names
+                for param_child in params_node.children:
+                    if param_child.type in ["identifier", "required_parameter", "optional_parameter"]:
+                        if param_child.type == "identifier":
+                            params.append(param_child.text.decode("utf-8", errors="ignore"))
+                        else:
+                            # For required/optional parameters, use field API
+                            pattern_node = param_child.child_by_field_name('pattern')
+                            if pattern_node and pattern_node.type == "identifier":
+                                params.append(pattern_node.text.decode("utf-8", errors="ignore"))
+            
+            if func_name and params:
+                func_params[func_name] = params
+    
+    elif language == "python":
+        if node.type == "function_definition":
+            func_name = None
+            params = []
+            
+            for child in node.children:
+                if child.type == "identifier":
+                    func_name = child.text.decode("utf-8", errors="ignore")
+                elif child.type == "parameters":
+                    for param_child in child.children:
+                        if param_child.type == "identifier":
+                            params.append(param_child.text.decode("utf-8", errors="ignore"))
+            
+            if func_name:
+                func_params[func_name] = params
+    
+    # Recursively search children
+    for child in node.children:
+        func_params.update(_extract_tree_sitter_function_params(child, language))
+    
+    return func_params
+
+
+def extract_treesitter_calls_with_args(
+    tree: Dict, function_params: Dict[str, List[str]], parser_self, language: str
+) -> List[Dict[str, Any]]:
+    """Extract function calls with arguments from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    content = tree.get("content", "")
+    
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_calls_with_args(
+        actual_tree.root_node, language, content, function_params
+    )
+
+
+def _extract_tree_sitter_calls_with_args(
+    node: Any, language: str, content: str, function_params: Dict[str, List[str]]
+) -> List[Dict[str, Any]]:
+    """Extract function calls with arguments from Tree-sitter AST."""
+    calls = []
+    
+    if node is None:
+        return calls
+    
+    # Call expression node types
+    if language in ["javascript", "typescript"] and node.type == "call_expression":
+        # Extract function name using field-based API
+        func_node = node.child_by_field_name('function')
+        func_name = "unknown"
+        
+        if func_node:
+            func_name = func_node.text.decode("utf-8", errors="ignore") if func_node.text else "unknown"
+        else:
+            # Fallback to child iteration
+            for child in node.children:
+                if child.type in ["identifier", "member_expression"]:
+                    func_name = child.text.decode("utf-8", errors="ignore") if child.text else "unknown"
+                    break
+        
+        # Find caller function
+        caller_function = find_containing_function_tree_sitter(node, content, language) or "global"
+        
+        # Get callee parameters
+        callee_params = function_params.get(func_name.split(".")[-1], [])
+        
+        # Extract arguments using field-based API
+        args_node = node.child_by_field_name('arguments')
+        arg_index = 0
+        
+        if args_node:
+            for arg_child in args_node.children:
+                if arg_child.type not in ["(", ")", ","]:
+                    arg_expr = arg_child.text.decode("utf-8", errors="ignore") if arg_child.text else ""
+                    param_name = callee_params[arg_index] if arg_index < len(callee_params) else f"arg{arg_index}"
+                    
+                    calls.append({
+                        "line": node.start_point[0] + 1,
+                        "caller_function": caller_function,
+                        "callee_function": func_name,
+                        "argument_index": arg_index,
+                        "argument_expr": arg_expr,
+                        "param_name": param_name
+                    })
+                    arg_index += 1
+    
+    elif language == "python" and node.type == "call":
+        # Similar logic for Python
+        func_name = "unknown"
+        for child in node.children:
+            if child.type in ["identifier", "attribute"]:
+                func_name = child.text.decode("utf-8", errors="ignore") if child.text else "unknown"
+                break
+        
+        caller_function = find_containing_function_tree_sitter(node, content, language) or "global"
+        callee_params = function_params.get(func_name.split(".")[-1], [])
+        
+        arg_index = 0
+        for child in node.children:
+            if child.type == "argument_list":
+                for arg_child in child.children:
+                    if arg_child.type not in ["(", ")", ","]:
+                        arg_expr = arg_child.text.decode("utf-8", errors="ignore") if arg_child.text else ""
+                        param_name = callee_params[arg_index] if arg_index < len(callee_params) else f"arg{arg_index}"
+                        
+                        calls.append({
+                            "line": node.start_point[0] + 1,
+                            "caller_function": caller_function,
+                            "callee_function": func_name,
+                            "argument_index": arg_index,
+                            "argument_expr": arg_expr,
+                            "param_name": param_name
+                        })
+                        arg_index += 1
+    
+    # Recursively search children
+    for child in node.children:
+        calls.extend(_extract_tree_sitter_calls_with_args(child, language, content, function_params))
+    
+    return calls
+
+
+def extract_treesitter_returns(tree: Dict, parser_self, language: str) -> List[Dict[str, Any]]:
+    """Extract return statements from Tree-sitter AST."""
+    actual_tree = tree.get("tree")
+    content = tree.get("content", "")
+    
+    if not actual_tree:
+        return []
+    
+    if not parser_self.has_tree_sitter:
+        return []
+    
+    return _extract_tree_sitter_returns(actual_tree.root_node, language, content)
+
+
+def _extract_tree_sitter_returns(node: Any, language: str, content: str) -> List[Dict[str, Any]]:
+    """Extract return statements from Tree-sitter AST."""
+    returns = []
+    
+    if node is None:
+        return returns
+    
+    # Return statement node types
+    if language in ["javascript", "typescript"] and node.type == "return_statement":
+        # Find containing function
+        function_name = find_containing_function_tree_sitter(node, content, language) or "global"
+        
+        # Extract return expression
+        return_expr = ""
+        for child in node.children:
+            if child.type != "return":
+                return_expr = child.text.decode("utf-8", errors="ignore") if child.text else ""
+                break
+        
+        if not return_expr:
+            return_expr = "undefined"
+        
+        returns.append({
+            "function_name": function_name,
+            "line": node.start_point[0] + 1,
+            "return_expr": return_expr,
+            "return_vars": extract_vars_from_tree_sitter_expr(return_expr)
+        })
+    
+    elif language == "python" and node.type == "return_statement":
+        # Find containing function
+        function_name = find_containing_function_tree_sitter(node, content, language) or "global"
+        
+        # Extract return expression
+        return_expr = ""
+        for child in node.children:
+            if child.type != "return":
+                return_expr = child.text.decode("utf-8", errors="ignore") if child.text else ""
+                break
+        
+        if not return_expr:
+            return_expr = "None"
+        
+        returns.append({
+            "function_name": function_name,
+            "line": node.start_point[0] + 1,
+            "return_expr": return_expr,
+            "return_vars": extract_vars_from_tree_sitter_expr(return_expr)
+        })
+    
+    # Recursively search children
+    for child in node.children:
+        returns.extend(_extract_tree_sitter_returns(child, language, content))
+    
+    return returns
\ No newline at end of file
diff --git a/theauditor/ast_extractors/typescript_impl.py b/theauditor/ast_extractors/typescript_impl.py
new file mode 100644
index 0000000..6fa4069
--- /dev/null
+++ b/theauditor/ast_extractors/typescript_impl.py
@@ -0,0 +1,674 @@
+"""TypeScript/JavaScript semantic AST extraction implementations.
+
+This module contains all TypeScript compiler API extraction logic for semantic analysis.
+"""
+
+import os
+from typing import Any, List, Dict, Optional
+
+from .base import extract_vars_from_tree_sitter_expr
+
+
+def extract_semantic_ast_symbols(node, depth=0):
+    """Extract symbols from TypeScript semantic AST including property accesses.
+    
+    This is a helper used by multiple extraction functions.
+    """
+    symbols = []
+    if depth > 100 or not isinstance(node, dict):
+        return symbols
+    
+    kind = node.get("kind")
+    
+    # PropertyAccessExpression: req.body, req.params, res.send, etc.
+    if kind == "PropertyAccessExpression":
+        # Use the authoritative text from TypeScript compiler (now restored)
+        full_name = node.get("text", "").strip()
+        
+        # Only fall back to reconstruction if text is missing (shouldn't happen now)
+        if not full_name:
+            # Build the full property access chain
+            name_parts = []
+            current = node
+            while current and isinstance(current, dict):
+                if current.get("name"):
+                    if isinstance(current["name"], dict) and current["name"].get("name"):
+                        name_parts.append(str(current["name"]["name"]))
+                    elif isinstance(current["name"], str):
+                        name_parts.append(current["name"])
+                # Look for the expression part
+                if current.get("children"):
+                    for child in current["children"]:
+                        if isinstance(child, dict) and child.get("kind") == "Identifier":
+                            if child.get("text"):
+                                name_parts.append(child["text"])
+                current = current.get("expression")
+            
+            if name_parts:
+                full_name = ".".join(reversed(name_parts))
+            else:
+                full_name = None
+        
+        if full_name:
+            # CRITICAL FIX: Extract ALL property accesses for taint analysis
+            # The taint analyzer will filter for the specific sources it needs
+            # This ensures we capture req.body, req.query, request.params, etc.
+            
+            # Default all property accesses as "property" type
+            db_type = "property"
+            
+            # Override only for known sink patterns that should be "call" type
+            if any(sink in full_name for sink in ["res.send", "res.render", "res.json", "response.write", "innerHTML", "outerHTML", "exec", "eval", "system", "spawn"]):
+                db_type = "call"  # Taint analyzer looks for sinks as calls
+            
+            symbols.append({
+                "name": full_name,
+                "line": node.get("line", 0),
+                "column": node.get("column", 0),
+                "type": db_type
+            })
+    
+    # CallExpression: function calls including method calls
+    elif kind == "CallExpression":
+        # Use text field first if available (now restored)
+        name = None
+        if node.get("text"):
+            # Extract function name from text
+            text = node["text"]
+            if "(" in text:
+                name = text.split("(")[0].strip()
+        elif node.get("name"):
+            name = node["name"]
+        
+        # Also check for method calls on children
+        if not name and node.get("children"):
+            for child in node["children"]:
+                if isinstance(child, dict):
+                    if child.get("kind") == "PropertyAccessExpression":
+                        name = child.get("text", "").split("(")[0].strip()
+                        break
+                    elif child.get("text") and "." in child.get("text", ""):
+                        name = child["text"].split("(")[0].strip()
+                        break
+        
+        if name:
+            symbols.append({
+                "name": name,
+                "line": node.get("line", 0),
+                "column": node.get("column", 0),
+                "type": "call"
+            })
+    
+    # Identifier nodes that might be property accesses or function references
+    elif kind == "Identifier":
+        text = node.get("text", "")
+        # Check if it looks like a property access pattern
+        if "." in text:
+            # Determine type based on pattern
+            db_type = "property"
+            # Check for sink patterns
+            if any(sink in text for sink in ["res.send", "res.render", "res.json", "response.write"]):
+                db_type = "call"
+            
+            symbols.append({
+                "name": text,
+                "line": node.get("line", 0),
+                "column": node.get("column", 0),
+                "type": db_type
+            })
+    
+    # Recurse through children
+    for child in node.get("children", []):
+        symbols.extend(extract_semantic_ast_symbols(child, depth + 1))
+    
+    return symbols
+
+
+def extract_typescript_functions(tree: Dict, parser_self) -> List[Dict]:
+    """Extract function definitions from TypeScript semantic AST."""
+    functions = []
+    
+    # Common parameter names that should NEVER be marked as functions
+    PARAMETER_NAMES = {"req", "res", "next", "err", "error", "ctx", "request", "response", "callback", "done", "cb"}
+    
+    # CRITICAL FIX: Symbols are at tree["symbols"], not tree["tree"]["symbols"]
+    for symbol in tree.get("symbols", []):
+        ts_kind = symbol.get("kind", 0)
+        symbol_name = symbol.get("name", "")
+        
+        if not symbol_name or symbol_name == "anonymous":
+            continue
+        
+        # CRITICAL FIX: Skip known parameter names that are incorrectly marked as functions
+        if symbol_name in PARAMETER_NAMES:
+            continue  # These are parameters, not function definitions
+        
+        # Check if this is a function symbol
+        is_function = False
+        if isinstance(ts_kind, str):
+            if "Function" in ts_kind or "Method" in ts_kind:
+                is_function = True
+        elif isinstance(ts_kind, (int, float)):
+            # TypeScript SymbolFlags: Function = 16, Method = 8192, Constructor = 16384
+            # Parameter = 8388608 (0x800000) - SKIP THIS
+            if ts_kind == 8388608:
+                continue  # This is a parameter, not a function
+            elif ts_kind in [16, 8192, 16384]:
+                is_function = True
+        
+        if is_function and symbol_name not in PARAMETER_NAMES:
+            functions.append({
+                "name": symbol_name,
+                "line": symbol.get("line", 0),
+                "type": "function",
+                "kind": ts_kind
+            })
+    
+    return functions
+
+
+def extract_typescript_classes(tree: Dict, parser_self) -> List[Dict]:
+    """Extract class definitions from TypeScript semantic AST."""
+    classes = []
+    
+    # CRITICAL FIX: Symbols are at tree["symbols"], not tree["tree"]["symbols"]
+    for symbol in tree.get("symbols", []):
+        ts_kind = symbol.get("kind", 0)
+        symbol_name = symbol.get("name", "")
+        
+        if not symbol_name or symbol_name == "anonymous":
+            continue
+        
+        # Check if this is a class symbol
+        is_class = False
+        if isinstance(ts_kind, str):
+            if "Class" in ts_kind or "Interface" in ts_kind:
+                is_class = True
+        elif isinstance(ts_kind, (int, float)):
+            # TypeScript SymbolFlags: Class = 32, Interface = 64
+            if ts_kind in [32, 64]:
+                is_class = True
+        
+        if is_class:
+            classes.append({
+                "name": symbol_name,
+                "line": symbol.get("line", 0),
+                "column": 0,
+                "type": "class",
+                "kind": ts_kind
+            })
+    
+    return classes
+
+
+def extract_typescript_calls(tree: Dict, parser_self) -> List[Dict]:
+    """Extract function calls from TypeScript semantic AST."""
+    calls = []
+    
+    # Common parameter names that should NEVER be marked as functions
+    PARAMETER_NAMES = {"req", "res", "next", "err", "error", "ctx", "request", "response", "callback", "done", "cb"}
+    
+    # Use the symbols already extracted by TypeScript compiler
+    # CRITICAL FIX: Symbols are at tree["symbols"], not tree["tree"]["symbols"]
+    for symbol in tree.get("symbols", []):
+        symbol_name = symbol.get("name", "")
+        ts_kind = symbol.get("kind", 0)
+        
+        # Skip empty/anonymous symbols
+        if not symbol_name or symbol_name == "anonymous":
+            continue
+        
+        # CRITICAL FIX: Skip known parameter names that are incorrectly marked as functions
+        # These are function parameters, not function definitions
+        if symbol_name in PARAMETER_NAMES:
+            # These should be marked as properties/variables for taint analysis
+            if symbol_name in ["req", "request", "ctx"]:
+                calls.append({
+                    "name": symbol_name,
+                    "line": symbol.get("line", 0),
+                    "column": 0,
+                    "type": "property"  # Mark as property for taint source detection
+                })
+            continue  # Skip further processing for parameters
+        
+        # CRITICAL FIX: Properly categorize based on TypeScript SymbolFlags
+        # The 'kind' field from TypeScript can be:
+        # - A string like "Function", "Method", "Property" (when ts.SymbolFlags mapping works)
+        # - A number representing the flag value (when mapping fails)
+        # TypeScript SymbolFlags values:
+        # Function = 16, Method = 8192, Property = 98304, Variable = 3, etc.
+        
+        db_type = "call"  # Default for unknown types
+        
+        # Check if kind is a string (successful mapping in helper script)
+        if isinstance(ts_kind, str):
+            # Only mark as function if it's REALLY a function and not a parameter
+            if ("Function" in ts_kind or "Method" in ts_kind) and symbol_name not in PARAMETER_NAMES:
+                db_type = "function"
+            elif "Property" in ts_kind:
+                db_type = "property"
+            elif "Variable" in ts_kind or "Let" in ts_kind or "Const" in ts_kind:
+                # Variables could be sources if they match patterns
+                if any(pattern in symbol_name for pattern in ["req", "request", "ctx", "body", "params", "query", "headers"]):
+                    db_type = "property"
+                else:
+                    db_type = "call"
+        # Check numeric flags (when string mapping failed)
+        elif isinstance(ts_kind, (int, float)):
+            # TypeScript SymbolFlags from typescript.d.ts:
+            # Function = 16, Method = 8192, Constructor = 16384
+            # Property = 98304, Variable = 3, Let = 1, Const = 2
+            # Parameter = 8388608 (0x800000)
+            
+            # CRITICAL: Skip parameter flag (8388608)
+            if ts_kind == 8388608:
+                # This is a parameter, not a function
+                if symbol_name in ["req", "request", "ctx"]:
+                    db_type = "property"  # Mark as property for taint analysis
+                else:
+                    continue  # Skip other parameters
+            elif ts_kind in [16, 8192, 16384] and symbol_name not in PARAMETER_NAMES:  # Function, Method, Constructor
+                db_type = "function"
+            elif ts_kind in [98304, 4, 1048576]:  # Property, EnumMember, Accessor
+                db_type = "property"
+            elif ts_kind in [3, 1, 2]:  # Variable, Let, Const
+                # Check if it looks like a source
+                if any(pattern in symbol_name for pattern in ["req", "request", "ctx", "body", "params", "query", "headers"]):
+                    db_type = "property"
+        
+        # Override based on name patterns (for calls and property accesses)
+        if "." in symbol_name:
+            # Source patterns (user input)
+            if any(pattern in symbol_name for pattern in ["req.", "request.", "ctx.", "event.", "body", "params", "query", "headers", "cookies"]):
+                db_type = "property"
+            # Sink patterns (dangerous functions)
+            elif any(pattern in symbol_name for pattern in ["res.send", "res.render", "res.json", "response.write", "exec", "eval"]):
+                db_type = "call"
+        
+        calls.append({
+            "name": symbol_name,
+            "line": symbol.get("line", 0),
+            "column": 0,
+            "type": db_type
+        })
+    
+    # Also traverse AST for specific patterns
+    actual_tree = tree.get("tree") if isinstance(tree.get("tree"), dict) else tree
+    if actual_tree and actual_tree.get("success"):
+        ast_root = actual_tree.get("ast")
+        if ast_root:
+            calls.extend(extract_semantic_ast_symbols(ast_root))
+    
+    return calls
+
+
+def extract_typescript_imports(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract import statements from TypeScript semantic AST."""
+    imports = []
+    
+    # Use TypeScript compiler API data
+    for imp in tree.get("imports", []):
+        imports.append({
+            "source": imp.get("kind", "import"),
+            "target": imp.get("module"),
+            "type": imp.get("kind", "import"),
+            "line": imp.get("line", 0),
+            "specifiers": imp.get("specifiers", [])
+        })
+    
+    return imports
+
+
+def extract_typescript_exports(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract export statements from TypeScript semantic AST.
+    
+    Currently returns empty list - exports aren't extracted by semantic parser yet.
+    """
+    return []
+
+
+def extract_typescript_properties(tree: Dict, parser_self) -> List[Dict]:
+    """Extract property accesses from TypeScript semantic AST."""
+    properties = []
+    
+    # Already handled in extract_calls via extract_semantic_ast_symbols
+    # But we can also extract them specifically here
+    actual_tree = tree.get("tree") if isinstance(tree.get("tree"), dict) else tree
+    if actual_tree and actual_tree.get("success"):
+        ast_root = actual_tree.get("ast")
+        if ast_root:
+            symbols = extract_semantic_ast_symbols(ast_root)
+            # Filter for property accesses only
+            properties = [s for s in symbols if s.get("type") == "property"]
+    
+    return properties
+
+
+def extract_typescript_assignments(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract ALL assignment patterns from TypeScript semantic AST, including destructuring."""
+    assignments = []
+    
+    if not tree or not tree.get("success"):
+        if os.environ.get("THEAUDITOR_DEBUG"):
+            import sys
+            print(f"[AST_DEBUG] extract_typescript_assignments: No success in tree", file=sys.stderr)
+        return assignments
+    
+    if os.environ.get("THEAUDITOR_DEBUG"):
+        import sys
+        print(f"[AST_DEBUG] extract_typescript_assignments: Starting extraction", file=sys.stderr)
+
+    def traverse(node, current_function="global", depth=0):
+        if depth > 100 or not isinstance(node, dict):
+            return
+
+        try:
+            kind = node.get("kind", "")
+            
+            # DEBUG: Log ALL node kinds we see to understand structure
+            if os.environ.get("THEAUDITOR_DEBUG"):
+                import sys
+                if depth < 5:  # Log more depth
+                    print(f"[AST_DEBUG] Depth {depth}: kind='{kind}'", file=sys.stderr)
+                if "Variable" in kind or "Assignment" in kind or "Binary" in kind or "=" in str(node.get("text", "")):
+                    print(f"[AST_DEBUG] *** POTENTIAL ASSIGNMENT at depth {depth}: {kind}, text={str(node.get('text', ''))[:50]} ***", file=sys.stderr)
+
+            # --- Function Context Tracking ---
+            new_function = current_function
+            if kind in ["FunctionDeclaration", "MethodDeclaration", "ArrowFunction", "FunctionExpression"]:
+                name_node = node.get("name")
+                if name_node and isinstance(name_node, dict):
+                    new_function = name_node.get("text", "anonymous")
+                else:
+                    new_function = "anonymous"
+
+            # --- Assignment Extraction ---
+            # 1. Standard Assignments: const x = y; or x = y;
+            # NOTE: TypeScript AST has VariableDeclaration nested under FirstStatement->VariableDeclarationList
+            if kind in ["VariableDeclaration", "BinaryExpression"]:
+                # For BinaryExpression, check if it's an assignment (=) operator
+                is_assignment = True
+                if kind == "BinaryExpression":
+                    op_token = node.get("operatorToken", {})
+                    if not (isinstance(op_token, dict) and op_token.get("kind") == "EqualsToken"):
+                        # Not an assignment, just a comparison or arithmetic expression
+                        is_assignment = False
+                
+                if is_assignment:
+                    # TypeScript AST structure is different - use children and text
+                    if kind == "VariableDeclaration":
+                        # For TypeScript VariableDeclaration, extract from text or children
+                        full_text = node.get("text", "")
+                        if "=" in full_text:
+                            parts = full_text.split("=", 1)
+                            target_var = parts[0].strip()
+                            source_expr = parts[1].strip()
+                            if target_var and source_expr:
+                                if os.environ.get("THEAUDITOR_DEBUG"):
+                                    import sys
+                                    print(f"[AST_DEBUG] Found TS assignment: {target_var} = {source_expr[:30]}... at line {node.get('line', 0)}", file=sys.stderr)
+                                assignments.append({
+                                    "target_var": target_var,
+                                    "source_expr": source_expr,
+                                    "line": node.get("line", 0),
+                                    "in_function": current_function,
+                                    "source_vars": extract_vars_from_tree_sitter_expr(source_expr)
+                                })
+                    else:
+                        # BinaryExpression - use the original logic
+                        target_node = node.get("left")
+                        source_node = node.get("right")
+                        
+                        if isinstance(target_node, dict) and isinstance(source_node, dict):
+                            # --- ENHANCEMENT: Handle Destructuring ---
+                            if target_node.get("kind") in ["ObjectBindingPattern", "ArrayBindingPattern"]:
+                                source_expr = source_node.get("text", "unknown_source")
+                                # For each element in the destructuring, create a separate assignment
+                                for element in target_node.get("elements", []):
+                                    if isinstance(element, dict) and element.get("name"):
+                                        target_var = element.get("name", {}).get("text")
+                                        if target_var:
+                                            assignments.append({
+                                                "target_var": target_var,
+                                                "source_expr": source_expr, # CRITICAL: Source is the original object/array
+                                                "line": element.get("line", node.get("line", 0)),
+                                                "in_function": current_function,
+                                                "source_vars": extract_vars_from_tree_sitter_expr(source_expr)
+                                            })
+                            else:
+                                # --- Standard, non-destructured assignment ---
+                                target_var = target_node.get("text", "")
+                                source_expr = source_node.get("text", "")
+                                if target_var and source_expr:
+                                    if os.environ.get("THEAUDITOR_DEBUG"):
+                                        import sys
+                                        print(f"[AST_DEBUG] Found assignment: {target_var} = {source_expr[:50]}... at line {node.get('line', 0)}", file=sys.stderr)
+                                    assignments.append({
+                                        "target_var": target_var,
+                                        "source_expr": source_expr,
+                                        "line": node.get("line", 0),
+                                        "in_function": current_function,
+                                        "source_vars": extract_vars_from_tree_sitter_expr(source_expr)
+                                    })
+
+            # Recurse with updated function context
+            for child in node.get("children", []):
+                traverse(child, new_function, depth + 1)
+
+        except Exception:
+            # This safety net catches any unexpected AST structures
+            pass
+
+    ast_root = tree.get("ast", {})
+    traverse(ast_root)
+    
+    if os.environ.get("THEAUDITOR_DEBUG"):
+        import sys
+        print(f"[AST_DEBUG] extract_typescript_assignments: Found {len(assignments)} assignments", file=sys.stderr)
+        if assignments and len(assignments) < 5:
+            for a in assignments[:3]:
+                print(f"[AST_DEBUG]   Example: {a['target_var']} = {a['source_expr'][:30]}...", file=sys.stderr)
+    
+    return assignments
+
+
+def extract_typescript_function_params(tree: Dict, parser_self) -> Dict[str, List[str]]:
+    """Extract function parameters from TypeScript semantic AST."""
+    func_params = {}
+    
+    if not tree or not tree.get("success"):
+        return func_params
+    
+    def traverse(node, depth=0):
+        if depth > 100 or not isinstance(node, dict):
+            return
+        
+        kind = node.get("kind")
+        
+        if kind in ["FunctionDeclaration", "MethodDeclaration", "ArrowFunction", "FunctionExpression"]:
+            # Get function name
+            name_node = node.get("name")
+            func_name = "anonymous"
+            if isinstance(name_node, dict):
+                func_name = name_node.get("text", "anonymous")
+            elif isinstance(name_node, str):
+                func_name = name_node
+            elif not name_node:
+                # Look for Identifier child (TypeScript AST structure)
+                for child in node.get("children", []):
+                    if isinstance(child, dict) and child.get("kind") == "Identifier":
+                        func_name = child.get("text", "anonymous")
+                        break
+            
+            # Extract parameter names
+            # FIX: In TypeScript AST, parameters are direct children with kind="Parameter"
+            params = []
+            
+            # Look in children for Parameter nodes
+            for child in node.get("children", []):
+                if isinstance(child, dict) and child.get("kind") == "Parameter":
+                    # Found a parameter - get its text directly
+                    param_text = child.get("text", "")
+                    if param_text:
+                        params.append(param_text)
+            
+            # Fallback to old structure if no parameters found
+            if not params:
+                param_nodes = node.get("parameters", [])
+                for param in param_nodes:
+                    if isinstance(param, dict) and param.get("name"):
+                        param_name_node = param.get("name")
+                        if isinstance(param_name_node, dict):
+                            params.append(param_name_node.get("text", ""))
+                        elif isinstance(param_name_node, str):
+                            params.append(param_name_node)
+            
+            if func_name != "anonymous" and params:
+                func_params[func_name] = params
+        
+        # Recurse through children
+        for child in node.get("children", []):
+            traverse(child, depth + 1)
+    
+    ast_root = tree.get("ast", {})
+    traverse(ast_root)
+    
+    return func_params
+
+
+def extract_typescript_calls_with_args(tree: Dict, function_params: Dict[str, List[str]], parser_self) -> List[Dict[str, Any]]:
+    """Extract function calls with arguments from TypeScript semantic AST."""
+    calls = []
+    
+    if os.environ.get("THEAUDITOR_DEBUG"):
+        print(f"[DEBUG] extract_typescript_calls_with_args: tree type={type(tree)}, success={tree.get('success') if tree else 'N/A'}")
+    
+    if not tree or not tree.get("success"):
+        if os.environ.get("THEAUDITOR_DEBUG"):
+            print(f"[DEBUG] extract_typescript_calls_with_args: Returning early - no tree or no success")
+        return calls
+
+    def traverse(node, current_function="global", depth=0):
+        if depth > 100 or not isinstance(node, dict):
+            return
+
+        try:
+            kind = node.get("kind", "")
+
+            # Track function context
+            new_function = current_function
+            if kind in ["FunctionDeclaration", "MethodDeclaration", "ArrowFunction", "FunctionExpression"]:
+                name_node = node.get("name")
+                if name_node and isinstance(name_node, dict):
+                    new_function = name_node.get("text", "anonymous")
+                else:
+                    new_function = "anonymous"
+
+            # CallExpression: function calls
+            if kind == "CallExpression":
+                if os.environ.get("THEAUDITOR_DEBUG"):
+                    print(f"[DEBUG] Found CallExpression at line {node.get('line', 0)}")
+                
+                # FIX: In TypeScript AST, the function and arguments are in children array
+                children = node.get("children", [])
+                if not children:
+                    # Fallback to old structure
+                    expression = node.get("expression", {})
+                    arguments = node.get("arguments", [])
+                else:
+                    # New structure: first child is function, rest are arguments
+                    expression = children[0] if len(children) > 0 else {}
+                    arguments = children[1:] if len(children) > 1 else []
+                
+                # Get function name from expression
+                callee_name = "unknown"
+                if isinstance(expression, dict):
+                    callee_name = expression.get("text", "unknown")
+                
+                if os.environ.get("THEAUDITOR_DEBUG"):
+                    print(f"[DEBUG] CallExpression: callee={callee_name}, args={len(arguments)}")
+                    if arguments:
+                        print(f"[DEBUG] First arg: {arguments[0].get('text', 'N/A') if isinstance(arguments[0], dict) else arguments[0]}")
+
+                # Get parameters for this function if we know them
+                callee_params = function_params.get(callee_name.split(".")[-1], [])
+
+                # Process arguments
+                for i, arg in enumerate(arguments):
+                    if isinstance(arg, dict):
+                        arg_text = arg.get("text", "")
+                        param_name = callee_params[i] if i < len(callee_params) else f"arg{i}"
+
+                        calls.append({
+                            "line": node.get("line", 0),
+                            "caller_function": current_function,
+                            "callee_function": callee_name,
+                            "argument_index": i,
+                            "argument_expr": arg_text,
+                            "param_name": param_name
+                        })
+
+            # Recurse with updated function context
+            for child in node.get("children", []):
+                traverse(child, new_function, depth + 1)
+
+        except Exception as e:
+            if os.environ.get("THEAUDITOR_DEBUG"):
+                print(f"[DEBUG] Error in extract_typescript_calls_with_args: {e}")
+
+    ast_root = tree.get("ast", {})
+    traverse(ast_root)
+
+    # Debug output
+    if os.environ.get("THEAUDITOR_DEBUG"):
+        print(f"[DEBUG] Extracted {len(calls)} function calls with args from semantic AST")
+
+    return calls
+
+
+def extract_typescript_returns(tree: Dict, parser_self) -> List[Dict[str, Any]]:
+    """Extract return statements from TypeScript semantic AST."""
+    returns = []
+    
+    if not tree or not tree.get("success"):
+        return returns
+    
+    # Traverse AST looking for return statements
+    def traverse(node, current_function="global", depth=0):
+        if depth > 100 or not isinstance(node, dict):
+            return
+        
+        kind = node.get("kind")
+        
+        # Track current function context
+        if kind in ["FunctionDeclaration", "FunctionExpression", "ArrowFunction", "MethodDeclaration"]:
+            # Extract function name if available
+            name_node = node.get("name")
+            if name_node and isinstance(name_node, dict):
+                current_function = name_node.get("text", "anonymous")
+            else:
+                current_function = "anonymous"
+        
+        # ReturnStatement
+        elif kind == "ReturnStatement":
+            expr_node = node.get("expression", {})
+            if isinstance(expr_node, dict):
+                return_expr = expr_node.get("text", "")
+            else:
+                return_expr = str(expr_node) if expr_node else "undefined"
+            
+            returns.append({
+                "function_name": current_function,
+                "line": node.get("line", 0),
+                "return_expr": return_expr,
+                "return_vars": extract_vars_from_tree_sitter_expr(return_expr)
+            })
+        
+        # Recurse through children
+        for child in node.get("children", []):
+            traverse(child, current_function, depth + 1)
+    
+    ast_root = tree.get("ast", {})
+    traverse(ast_root)
+    
+    return returns
\ No newline at end of file
diff --git a/theauditor/ast_parser.py b/theauditor/ast_parser.py
new file mode 100644
index 0000000..bb5e5de
--- /dev/null
+++ b/theauditor/ast_parser.py
@@ -0,0 +1,323 @@
+"""AST parser using Tree-sitter for multi-language support.
+
+This module provides true structural code analysis using Tree-sitter,
+enabling high-fidelity pattern detection that understands code semantics
+rather than just text matching.
+"""
+
+import ast
+import hashlib
+import json
+import os
+import re
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Any, Optional, List, Dict, Union
+
+from theauditor.js_semantic_parser import get_semantic_ast, get_semantic_ast_batch
+from theauditor.ast_patterns import ASTPatternMixin
+from theauditor.ast_extractors import ASTExtractorMixin
+
+
+@dataclass
+class ASTMatch:
+    """Represents an AST pattern match."""
+
+    node_type: str
+    start_line: int
+    end_line: int
+    start_col: int
+    snippet: str
+    metadata: Dict[str, Any] = None
+
+
+class ASTParser(ASTPatternMixin, ASTExtractorMixin):
+    """Multi-language AST parser using Tree-sitter for structural analysis."""
+
+    def __init__(self):
+        """Initialize parser with Tree-sitter language support."""
+        self.has_tree_sitter = False
+        self.parsers = {}
+        self.languages = {}
+        self.project_type = None  # Cache project type detection
+        
+        # Try to import tree-sitter and language bindings
+        try:
+            import tree_sitter
+            self.tree_sitter = tree_sitter
+            self.has_tree_sitter = True
+            self._init_tree_sitter_parsers()
+        except ImportError:
+            print("Warning: Tree-sitter not available. Install with: pip install tree-sitter tree-sitter-python tree-sitter-javascript tree-sitter-typescript")
+
+    def _init_tree_sitter_parsers(self):
+        """Initialize Tree-sitter language parsers with proper bindings."""
+        if not self.has_tree_sitter:
+            return
+        
+        # Use tree-sitter-language-pack for all languages
+        try:
+            from tree_sitter_language_pack import get_language, get_parser
+            
+            # Python parser
+            try:
+                python_lang = get_language("python")
+                python_parser = get_parser("python")
+                self.parsers["python"] = python_parser
+                self.languages["python"] = python_lang
+            except Exception as e:
+                # Python has built-in fallback, so we can continue with a warning
+                print(f"Warning: Failed to initialize Python parser: {e}")
+                print("         AST analysis for Python will use built-in parser as fallback.")
+            
+            # JavaScript parser (CRITICAL - must fail fast)
+            try:
+                js_lang = get_language("javascript")
+                js_parser = get_parser("javascript")
+                self.parsers["javascript"] = js_parser
+                self.languages["javascript"] = js_lang
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to load tree-sitter grammar for JavaScript: {e}\n"
+                    "This is often due to missing build tools or corrupted installation.\n"
+                    "Please try: pip install --force-reinstall tree-sitter-language-pack\n"
+                    "Or install with AST support: pip install -e '.[ast]'"
+                )
+            
+            # TypeScript parser (CRITICAL - must fail fast)
+            try:
+                ts_lang = get_language("typescript")
+                ts_parser = get_parser("typescript")
+                self.parsers["typescript"] = ts_parser
+                self.languages["typescript"] = ts_lang
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to load tree-sitter grammar for TypeScript: {e}\n"
+                    "This is often due to missing build tools or corrupted installation.\n"
+                    "Please try: pip install --force-reinstall tree-sitter-language-pack\n"
+                    "Or install with AST support: pip install -e '.[ast]'"
+                )
+                
+        except ImportError as e:
+            # If tree-sitter is installed but language pack is not, this is a critical error
+            # The user clearly intends to use tree-sitter, so we should fail loudly
+            print(f"ERROR: tree-sitter is installed but tree-sitter-language-pack is not: {e}")
+            print("This means tree-sitter AST analysis cannot work properly.")
+            print("Please install with: pip install tree-sitter-language-pack")
+            print("Or install TheAuditor with full AST support: pip install -e '.[ast]'")
+            # Set flags to indicate no language support
+            self.has_tree_sitter = False
+            # Don't raise - allow fallback to regex-based parsing
+
+    def _detect_project_type(self) -> str:
+        """Detect the primary project type based on manifest files.
+        
+        Returns:
+            'polyglot' if multiple language manifest files exist
+            'javascript' if only package.json exists
+            'python' if only Python manifest files exist
+            'go' if only go.mod exists
+            'unknown' otherwise
+        """
+        if self.project_type is not None:
+            return self.project_type
+        
+        # Check all manifest files first
+        has_js = Path("package.json").exists()
+        has_python = (Path("requirements.txt").exists() or 
+                      Path("pyproject.toml").exists() or 
+                      Path("setup.py").exists())
+        has_go = Path("go.mod").exists()
+        
+        # Determine project type based on combinations
+        if has_js and has_python:
+            self.project_type = "polyglot"  # NEW: Properly handle mixed projects
+        elif has_js and has_go:
+            self.project_type = "polyglot"
+        elif has_python and has_go:
+            self.project_type = "polyglot"
+        elif has_js:
+            self.project_type = "javascript"
+        elif has_python:
+            self.project_type = "python"
+        elif has_go:
+            self.project_type = "go"
+        else:
+            self.project_type = "unknown"
+        
+        return self.project_type
+
+    def parse_file(self, file_path: Path, language: str = None, root_path: str = None) -> Any:
+        """Parse a file into an AST.
+
+        Args:
+            file_path: Path to the source file.
+            language: Programming language (auto-detected if None).
+            root_path: Absolute path to project root (for sandbox resolution).
+
+        Returns:
+            AST tree object or None if parsing fails.
+        """
+        if language is None:
+            language = self._detect_language(file_path)
+
+        try:
+            with open(file_path, "rb") as f:
+                content = f.read()
+            
+            # Compute content hash for caching
+            content_hash = hashlib.md5(content).hexdigest()
+
+            # For JavaScript/TypeScript, try semantic parser first
+            # CRITICAL FIX: Include None and polyglot project types
+            # When project_type is None (not detected yet) or polyglot, still try semantic parsing
+            project_type = self._detect_project_type()
+            if language in ["javascript", "typescript"] and project_type in ["javascript", "polyglot", None, "unknown"]:
+                try:
+                    # Attempt to use the TypeScript Compiler API for semantic analysis
+                    # Normalize path for cross-platform compatibility
+                    normalized_path = str(file_path).replace("\\", "/")
+                    semantic_result = get_semantic_ast(normalized_path, project_root=root_path)
+                    
+                    if semantic_result.get("success"):
+                        # Return the semantic AST with full type information
+                        return {
+                            "type": "semantic_ast",
+                            "tree": semantic_result,
+                            "language": language,
+                            "content": content.decode("utf-8", errors="ignore"),
+                            "has_types": semantic_result.get("hasTypes", False),
+                            "diagnostics": semantic_result.get("diagnostics", []),
+                            "symbols": semantic_result.get("symbols", [])
+                        }
+                    else:
+                        # Log but continue to Tree-sitter/regex fallback
+                        error_msg = semantic_result.get('error', 'Unknown error')
+                        print(f"Warning: Semantic parser failed for {file_path}: {error_msg}")
+                        print(f"         Falling back to Tree-sitter/regex parser.")
+                        # Continue to fallback options below
+                        
+                except Exception as e:
+                    # Log but continue to Tree-sitter/regex fallback
+                    print(f"Warning: Exception in semantic parser for {file_path}: {e}")
+                    print(f"         Falling back to Tree-sitter/regex parser.")
+                    # Continue to fallback options below
+
+            # Use Tree-sitter if available
+            if self.has_tree_sitter and language in self.parsers:
+                try:
+                    # Use cached parser
+                    tree = self._parse_treesitter_cached(content_hash, content, language)
+                    return {"type": "tree_sitter", "tree": tree, "language": language, "content": content}
+                except Exception as e:
+                    print(f"Warning: Tree-sitter parsing failed for {file_path}: {e}")
+                    print(f"         Falling back to alternative parser if available.")
+                    # Continue to fallback options below
+
+            # Fallback to built-in parsers for Python
+            if language == "python":
+                decoded = content.decode("utf-8", errors="ignore")
+                python_ast = self._parse_python_cached(content_hash, decoded)
+                if python_ast:
+                    return {"type": "python_ast", "tree": python_ast, "language": language, "content": decoded}
+
+            # Return minimal structure to signal regex fallback for JS/TS
+            if language in ["javascript", "typescript"]:
+                print(f"Warning: AST parsing unavailable for {file_path}. Using regex fallback.")
+                decoded = content.decode("utf-8", errors="ignore")
+                return {"type": "regex_fallback", "tree": None, "language": language, "content": decoded}
+
+            # Return None for unsupported languages
+            return None
+
+        except Exception as e:
+            print(f"Warning: Failed to parse {file_path}: {e}")
+            return None
+
+    def _detect_language(self, file_path: Path) -> str:
+        """Detect language from file extension."""
+        ext_map = {
+            ".py": "python",
+            ".js": "javascript",
+            ".jsx": "javascript",
+            ".ts": "typescript",
+            ".tsx": "typescript",
+            ".mjs": "javascript",
+            ".cjs": "javascript",
+            ".vue": "javascript",  # Vue SFCs contain JavaScript/TypeScript
+        }
+        return ext_map.get(file_path.suffix.lower(), "")  # Empty not unknown
+
+    def _parse_python_builtin(self, content: str) -> Optional[ast.AST]:
+        """Parse Python code using built-in ast module."""
+        try:
+            return ast.parse(content)
+        except SyntaxError:
+            return None
+    
+    @lru_cache(maxsize=500)
+    def _parse_python_cached(self, content_hash: str, content: str) -> Optional[ast.AST]:
+        """Parse Python code with caching based on content hash.
+        
+        Args:
+            content_hash: MD5 hash of the file content
+            content: The actual file content
+            
+        Returns:
+            Parsed AST or None if parsing fails
+        """
+        return self._parse_python_builtin(content)
+    
+    @lru_cache(maxsize=500)
+    def _parse_treesitter_cached(self, content_hash: str, content: bytes, language: str) -> Any:
+        """Parse code using Tree-sitter with caching based on content hash.
+        
+        Args:
+            content_hash: MD5 hash of the file content
+            content: The actual file content as bytes
+            language: The programming language
+            
+        Returns:
+            Parsed Tree-sitter tree
+        """
+        parser = self.parsers[language]
+        return parser.parse(content)
+    
+    
+    def supports_language(self, language: str) -> bool:
+        """Check if a language is supported for AST parsing.
+
+        Args:
+            language: Programming language name.
+
+        Returns:
+            True if AST parsing is supported.
+        """
+        # Python is always supported via built-in ast module
+        if language == "python":
+            return True
+
+        # JavaScript and TypeScript are always supported via fallback
+        if language in ["javascript", "typescript"]:
+            return True
+
+        # Check Tree-sitter support for other languages
+        if self.has_tree_sitter and language in self.parsers:
+            return True
+
+        return False
+
+    def get_supported_languages(self) -> List[str]:
+        """Get list of supported languages.
+
+        Returns:
+            List of language names.
+        """
+        # Always supported via built-in or fallback
+        languages = ["python", "javascript", "typescript"]
+
+        if self.has_tree_sitter:
+            languages.extend(self.parsers.keys())
+
+        return sorted(set(languages))
diff --git a/theauditor/ast_patterns.py b/theauditor/ast_patterns.py
new file mode 100644
index 0000000..967788d
--- /dev/null
+++ b/theauditor/ast_patterns.py
@@ -0,0 +1,401 @@
+"""AST Pattern Matching Engine.
+
+This module contains all pattern matching and query logic for the AST parser.
+It provides pattern-based search capabilities across different AST types.
+"""
+
+import ast
+from typing import Any, Optional, List, Dict, TYPE_CHECKING
+from dataclasses import dataclass
+
+if TYPE_CHECKING:
+    # For type checking only, avoid circular import
+    from .ast_parser import ASTMatch
+else:
+    # At runtime, ASTMatch will be available from the parent class
+    @dataclass
+    class ASTMatch:
+        """Represents an AST pattern match."""
+        node_type: str
+        start_line: int
+        end_line: int
+        start_col: int
+        snippet: str
+        metadata: Dict[str, Any] = None
+
+
+class ASTPatternMixin:
+    """Mixin class providing pattern matching capabilities for AST analysis."""
+    
+    def query_ast(self, tree: Any, query_string: str) -> List[ASTMatch]:
+        """Execute a Tree-sitter query on the AST.
+
+        Args:
+            tree: AST tree object from parse_file.
+            query_string: Tree-sitter query in S-expression format.
+
+        Returns:
+            List of ASTMatch objects.
+        """
+        matches = []
+        
+        if not tree:
+            return matches
+        
+        # Handle Tree-sitter AST with queries
+        if tree.get("type") == "tree_sitter" and self.has_tree_sitter:
+            language = tree["language"]
+            if language in self.languages:
+                try:
+                    # CRITICAL FIX: Use correct tree-sitter API with QueryCursor
+                    # Per tree-sitter 0.25.1 documentation, must:
+                    # 1. Create Query with Query() constructor
+                    # 2. Create QueryCursor from the query
+                    # 3. Call matches() on the cursor, not the query
+                    from tree_sitter import Query, QueryCursor
+                    
+                    # Create Query object using the language and query string
+                    query = Query(self.languages[language], query_string)
+                    
+                    # Create QueryCursor from the query
+                    query_cursor = QueryCursor(query)
+                    
+                    # Call matches() on the cursor (not the query!)
+                    query_matches = query_cursor.matches(tree["tree"].root_node)
+                    
+                    for match in query_matches:
+                        # Each match is a tuple: (pattern_index, captures_dict)
+                        pattern_index, captures = match
+                        
+                        # Process captures dictionary
+                        for capture_name, nodes in captures.items():
+                            # Handle both single node and list of nodes
+                            if not isinstance(nodes, list):
+                                nodes = [nodes]
+                            
+                            for node in nodes:
+                                start_point = node.start_point
+                                end_point = node.end_point
+                                snippet = node.text.decode("utf-8", errors="ignore") if node.text else ""
+                                
+                                ast_match = ASTMatch(
+                                    node_type=node.type,
+                                    start_line=start_point[0] + 1,
+                                    end_line=end_point[0] + 1,
+                                    start_col=start_point[1],
+                                    snippet=snippet[:200],
+                                    metadata={"capture": capture_name, "pattern": pattern_index}
+                                )
+                                matches.append(ast_match)
+                except Exception as e:
+                    print(f"Query error: {e}")
+        
+        # For Python AST, fall back to pattern matching
+        elif tree.get("type") == "python_ast":
+            # Convert query to pattern and use existing method
+            pattern = self._query_to_pattern(query_string)
+            if pattern:
+                matches = self.find_ast_matches(tree, pattern)
+        
+        return matches
+
+    def _query_to_pattern(self, query_string: str) -> Optional[Dict]:
+        """Convert a Tree-sitter query to a simple pattern dict.
+        
+        This is a fallback for Python's built-in AST.
+        """
+        # Simple heuristic conversion for common patterns
+        if "any" in query_string.lower():
+            return {"node_type": "type_annotation", "contains": ["any"]}
+        elif "function" in query_string.lower():
+            return {"node_type": "function_def", "contains": []}
+        elif "class" in query_string.lower():
+            return {"node_type": "class_def", "contains": []}
+        return None
+
+    def find_ast_matches(self, tree: Any, ast_pattern: dict) -> List[ASTMatch]:
+        """Find matches in AST based on pattern.
+
+        Args:
+            tree: AST tree object.
+            ast_pattern: Pattern dictionary with node_type and optional contains.
+
+        Returns:
+            List of ASTMatch objects.
+        """
+        matches = []
+
+        if not tree:
+            return matches
+
+        # Handle wrapped tree objects
+        if isinstance(tree, dict):
+            tree_type = tree.get("type")
+            actual_tree = tree.get("tree")
+            
+            if tree_type == "tree_sitter" and self.has_tree_sitter:
+                matches.extend(self._find_tree_sitter_matches(actual_tree.root_node, ast_pattern))
+            elif tree_type == "python_ast":
+                matches.extend(self._find_python_ast_matches(actual_tree, ast_pattern))
+            elif tree_type == "semantic_ast":
+                # Handle Semantic AST from TypeScript Compiler API
+                matches.extend(self._find_semantic_ast_matches(actual_tree, ast_pattern))
+            elif tree_type == "eslint_ast":
+                # Handle ESLint AST (legacy, now replaced by semantic_ast)
+                # For now, we treat it similarly to regex_ast but with higher confidence
+                matches.extend(self._find_eslint_ast_matches(actual_tree, ast_pattern))
+        
+        # Handle direct AST objects (legacy support)
+        elif isinstance(tree, ast.AST):
+            matches.extend(self._find_python_ast_matches(tree, ast_pattern))
+
+        return matches
+
+    def _find_tree_sitter_matches(self, node: Any, pattern: dict) -> List[ASTMatch]:
+        """Find matches in Tree-sitter AST using structural patterns."""
+        matches = []
+
+        if node is None:
+            return matches
+
+        # Check if node type matches
+        node_type = pattern.get("node_type", "")
+        
+        # Special handling for type annotations
+        if node_type == "type_annotation" and "any" in pattern.get("contains", []):
+            # Look for TypeScript/JavaScript any type annotations
+            if node.type in ["type_annotation", "type_identifier", "any_type"]:
+                node_text = node.text.decode("utf-8", errors="ignore") if node.text else ""
+                if node_text == "any" or ": any" in node_text:
+                    start_point = node.start_point
+                    end_point = node.end_point
+                    
+                    match = ASTMatch(
+                        node_type=node.type,
+                        start_line=start_point[0] + 1,
+                        end_line=end_point[0] + 1,
+                        start_col=start_point[1],
+                        snippet=node_text[:200]
+                    )
+                    matches.append(match)
+        
+        # General pattern matching
+        elif node.type == node_type or node_type == "*":
+            contains = pattern.get("contains", [])
+            node_text = node.text.decode("utf-8", errors="ignore") if node.text else ""
+
+            if all(keyword in node_text for keyword in contains):
+                start_point = node.start_point
+                end_point = node.end_point
+
+                match = ASTMatch(
+                    node_type=node.type,
+                    start_line=start_point[0] + 1,
+                    end_line=end_point[0] + 1,
+                    start_col=start_point[1],
+                    snippet=node_text[:200],
+                )
+                matches.append(match)
+
+        # Recursively search children
+        for child in node.children:
+            matches.extend(self._find_tree_sitter_matches(child, pattern))
+
+        return matches
+    
+    def _find_semantic_ast_matches(self, tree: Dict[str, Any], pattern: dict) -> List[ASTMatch]:
+        """Find matches in Semantic AST from TypeScript Compiler API.
+        
+        This provides the highest fidelity analysis with full type information.
+        """
+        matches = []
+        
+        if not tree or not tree.get("ast"):
+            return matches
+        
+        # Handle type-related patterns
+        node_type = pattern.get("node_type", "")
+        
+        if node_type == "type_annotation" and "any" in pattern.get("contains", []):
+            # Search for 'any' types in symbols
+            for symbol in tree.get("symbols", []):
+                if symbol.get("type") == "any":
+                    match = ASTMatch(
+                        node_type="any_type",
+                        start_line=symbol.get("line", 0),
+                        end_line=symbol.get("line", 0),
+                        start_col=0,
+                        snippet=f"{symbol.get('name')}: any",
+                        metadata={"symbol": symbol.get("name"), "type": "any"}
+                    )
+                    matches.append(match)
+            
+            # Also recursively search the AST for AnyKeyword nodes
+            def search_ast_for_any(node, depth=0):
+                if depth > 100 or not isinstance(node, dict):
+                    return
+                
+                if node.get("kind") == "AnyKeyword":
+                    match = ASTMatch(
+                        node_type="AnyKeyword",
+                        start_line=node.get("line", 0),
+                        end_line=node.get("line", 0),
+                        start_col=node.get("column", 0),
+                        snippet=node.get("text", "any")[:200],
+                        metadata={"kind": "AnyKeyword"}
+                    )
+                    matches.append(match)
+                
+                for child in node.get("children", []):
+                    search_ast_for_any(child, depth + 1)
+            
+            search_ast_for_any(tree.get("ast", {}))
+        
+        return matches
+    
+    def _find_eslint_ast_matches(self, tree: Dict[str, Any], pattern: dict) -> List[ASTMatch]:
+        """Find matches in ESLint AST.
+        
+        ESLint provides a full JavaScript/TypeScript AST with high fidelity.
+        This provides accurate pattern matching for JS/TS code.
+        """
+        matches = []
+        
+        # ESLint AST follows the ESTree specification
+        # Future enhancement: properly traverse the ESTree AST structure
+        
+        if not tree:
+            return matches
+        
+        # Basic implementation - will be enhanced in future iterations
+        # to properly traverse the ESTree AST structure
+        return matches
+    
+
+    def _find_python_ast_matches(self, node: ast.AST, pattern: dict) -> List[ASTMatch]:
+        """Find matches in Python built-in AST."""
+        matches = []
+
+        # Map pattern node types to Python AST node types
+        node_type_map = {
+            "if_statement": ast.If,
+            "while_statement": ast.While,
+            "for_statement": ast.For,
+            "function_def": ast.FunctionDef,
+            "class_def": ast.ClassDef,
+            "try_statement": ast.Try,
+            "type_annotation": ast.AnnAssign,  # For type hints
+        }
+
+        pattern_node_type = pattern.get("node_type", "")
+        expected_type = node_type_map.get(pattern_node_type)
+
+        # Special handling for 'any' type in Python
+        if pattern_node_type == "type_annotation" and "any" in pattern.get("contains", []):
+            # Check for typing.Any usage
+            if isinstance(node, ast.Name) and node.id == "Any":
+                match = ASTMatch(
+                    node_type="Any",
+                    start_line=getattr(node, "lineno", 0),
+                    end_line=getattr(node, "end_lineno", getattr(node, "lineno", 0)),
+                    start_col=getattr(node, "col_offset", 0),
+                    snippet="Any"
+                )
+                matches.append(match)
+            elif isinstance(node, ast.AnnAssign):
+                # Check annotation for Any
+                node_source = ast.unparse(node) if hasattr(ast, "unparse") else ""
+                if "Any" in node_source:
+                    match = ASTMatch(
+                        node_type="AnnAssign",
+                        start_line=getattr(node, "lineno", 0),
+                        end_line=getattr(node, "end_lineno", getattr(node, "lineno", 0)),
+                        start_col=getattr(node, "col_offset", 0),
+                        snippet=node_source[:200]
+                    )
+                    matches.append(match)
+        
+        # General pattern matching
+        elif expected_type and isinstance(node, expected_type):
+            contains = pattern.get("contains", [])
+            node_source = ast.unparse(node) if hasattr(ast, "unparse") else ""
+
+            if all(keyword in node_source for keyword in contains):
+                match = ASTMatch(
+                    node_type=node.__class__.__name__,
+                    start_line=getattr(node, "lineno", 0),
+                    end_line=getattr(node, "end_lineno", getattr(node, "lineno", 0)),
+                    start_col=getattr(node, "col_offset", 0),
+                    snippet=node_source[:200],
+                )
+                matches.append(match)
+
+        # Recursively search children
+        for child in ast.walk(node):
+            if child != node:
+                matches.extend(self._find_python_ast_matches(child, pattern))
+
+        return matches
+
+    def get_tree_sitter_query_for_pattern(self, pattern: str, language: str) -> str:
+        """Convert a pattern identifier to a Tree-sitter query.
+        
+        Args:
+            pattern: Pattern identifier (e.g., "NO_ANY_IN_SCOPE")
+            language: Programming language
+            
+        Returns:
+            Tree-sitter query string in S-expression format
+        """
+        queries = {
+            "typescript": {
+                "NO_ANY_IN_SCOPE": """
+                    (type_annotation
+                      (type_identifier) @type
+                      (#eq? @type "any"))
+                """,
+                "NO_UNSAFE_EVAL": """
+                    (call_expression
+                      function: (identifier) @func
+                      (#eq? @func "eval"))
+                """,
+                "NO_VAR_IN_STRICT": """
+                    (variable_declaration
+                      kind: "var") @var_usage
+                """,
+            },
+            "javascript": {
+                "NO_ANY_IN_SCOPE": """
+                    (type_annotation
+                      (type_identifier) @type
+                      (#eq? @type "any"))
+                """,
+                "NO_UNSAFE_EVAL": """
+                    (call_expression
+                      function: (identifier) @func
+                      (#eq? @func "eval"))
+                """,
+                "NO_VAR_IN_STRICT": """
+                    (variable_declaration
+                      kind: "var") @var_usage
+                """,
+            },
+            "python": {
+                "NO_EVAL_EXEC": """
+                    (call
+                      function: (identifier) @func
+                      (#match? @func "^(eval|exec)$"))
+                """,
+                "NO_BARE_EXCEPT": """
+                    (except_clause
+                      !type) @bare_except
+                """,
+                "NO_MUTABLE_DEFAULT": """
+                    (default_parameter
+                      value: [(list) (dictionary)]) @mutable_default
+                """,
+            }
+        }
+        
+        language_queries = queries.get(language, {})
+        return language_queries.get(pattern, "")
\ No newline at end of file
diff --git a/theauditor/claude_setup.py b/theauditor/claude_setup.py
new file mode 100644
index 0000000..17ab5bc
--- /dev/null
+++ b/theauditor/claude_setup.py
@@ -0,0 +1,273 @@
+"""Claude Code integration setup - Zero-optional bulletproof installer."""
+
+import hashlib
+import json
+import platform
+import shutil
+import stat
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from .venv_install import setup_project_venv, find_theauditor_root
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+
+def write_file_atomic(path: Path, content: str, executable: bool = False) -> str:
+    """
+    Write file atomically with backup if content differs.
+    
+    Args:
+        path: File path to write
+        content: Content to write
+        executable: Make file executable (Unix only)
+    
+    Returns:
+        "created" if new file
+        "updated" if file changed (creates .bak)
+        "skipped" if identical content
+    """
+    # Ensure parent directory exists
+    path.parent.mkdir(parents=True, exist_ok=True)
+    
+    if path.exists():
+        existing = path.read_text(encoding='utf-8')
+        if existing == content:
+            return "skipped"
+        
+        # Create backup (only once per unique content)
+        bak_path = path.with_suffix(path.suffix + ".bak")
+        if not bak_path.exists():
+            shutil.copy2(path, bak_path)
+        
+        path.write_text(content, encoding='utf-8')
+        status = "updated"
+    else:
+        path.write_text(content, encoding='utf-8')
+        status = "created"
+    
+    # Set executable if needed
+    if executable and platform.system() != "Windows":
+        st = path.stat()
+        path.chmod(st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
+    
+    return status
+
+
+class WrapperTemplates:
+    """Cross-platform wrapper script templates."""
+    
+    POSIX_WRAPPER = '''#!/usr/bin/env bash
+# Auto-generated wrapper for project-local aud
+PROJ_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+VENV="$PROJ_ROOT/.auditor_venv/bin/aud"
+if [ -x "$VENV" ]; then
+    exec "$VENV" "$@"
+fi
+# Fallback to module execution
+exec "$PROJ_ROOT/.auditor_venv/bin/python" -m theauditor.cli "$@"
+'''
+    
+    POWERSHELL_WRAPPER = r'''# Auto-generated wrapper for project-local aud
+$proj = Split-Path -Path (Split-Path -Parent $MyInvocation.MyCommand.Path) -Parent
+$aud = Join-Path $proj ".auditor_venv\Scripts\aud.exe"
+if (Test-Path $aud) {
+    & $aud @args
+    exit $LASTEXITCODE
+}
+# Fallback to module execution
+$python = Join-Path $proj ".auditor_venv\Scripts\python.exe"
+& $python "-m" "theauditor.cli" @args
+exit $LASTEXITCODE
+'''
+    
+    CMD_WRAPPER = r'''@echo off
+REM Auto-generated wrapper for project-local aud
+set PROJ=%~dp0..\..
+if exist "%PROJ%\.auditor_venv\Scripts\aud.exe" (
+    "%PROJ%\.auditor_venv\Scripts\aud.exe" %*
+    exit /b %ERRORLEVEL%
+)
+REM Fallback to module execution
+"%PROJ%\.auditor_venv\Scripts\python.exe" -m theauditor.cli %*
+exit /b %ERRORLEVEL%
+'''
+
+
+def create_wrappers(target_dir: Path) -> Dict[str, str]:
+    """
+    Create cross-platform wrapper scripts.
+    
+    Args:
+        target_dir: Project root directory
+        
+    Returns:
+        Dict mapping wrapper paths to their status
+    """
+    wrappers_dir = target_dir / ".claude" / "bin"
+    results = {}
+    
+    # POSIX wrapper (bash)
+    posix_wrapper = wrappers_dir / "aud"
+    status = write_file_atomic(posix_wrapper, WrapperTemplates.POSIX_WRAPPER, executable=True)
+    results[str(posix_wrapper)] = status
+    
+    # PowerShell wrapper
+    ps_wrapper = wrappers_dir / "aud.ps1"
+    status = write_file_atomic(ps_wrapper, WrapperTemplates.POWERSHELL_WRAPPER)
+    results[str(ps_wrapper)] = status
+    
+    # CMD wrapper
+    cmd_wrapper = wrappers_dir / "aud.cmd"
+    status = write_file_atomic(cmd_wrapper, WrapperTemplates.CMD_WRAPPER)
+    results[str(cmd_wrapper)] = status
+    
+    return results
+
+
+def copy_agent_templates(source_dir: Path, target_dir: Path) -> Dict[str, str]:
+    """
+    Copy all .md agent template files directly to target/.claude/agents/.
+    
+    Args:
+        source_dir: Directory containing agent template .md files
+        target_dir: Project root directory
+        
+    Returns:
+        Dict mapping agent paths to their status
+    """
+    agents_dir = target_dir / ".claude" / "agents"
+    agents_dir.mkdir(parents=True, exist_ok=True)
+    
+    results = {}
+    
+    # Find all .md files in source directory
+    for md_file in source_dir.glob("*.md"):
+        if md_file.is_file():
+            # Read content
+            content = md_file.read_text(encoding='utf-8')
+            
+            # Write to target
+            target_file = agents_dir / md_file.name
+            status = write_file_atomic(target_file, content)
+            results[str(target_file)] = status
+    
+    return results
+
+
+def setup_claude_complete(
+    target: str,
+    source: str = "agent_templates",
+    sync: bool = False,
+    dry_run: bool = False
+) -> Dict[str, List[str]]:
+    """
+    Complete Claude setup: venv, wrappers, hooks, and agents.
+    
+    Args:
+        target: Target project root (absolute or relative path)
+        source: Path to TheAuditor agent templates directory
+        sync: Force update (still creates .bak on first change)
+        dry_run: Print plan without executing
+        
+    Returns:
+        Dict with created, updated, and skipped file lists
+    """
+    # Resolve paths
+    target_dir = Path(target).resolve()
+    
+    if not target_dir.exists():
+        raise ValueError(f"Target directory does not exist: {target_dir}")
+    
+    # Find source docs
+    if Path(source).is_absolute():
+        source_dir = Path(source)
+    else:
+        theauditor_root = find_theauditor_root()
+        source_dir = theauditor_root / source
+    
+    if not source_dir.exists():
+        raise ValueError(f"Source agent templates directory not found: {source_dir}")
+    
+    print(f"\n{'='*60}")
+    print(f"Claude Setup - Zero-Optional Installation")
+    print(f"{'='*60}")
+    print(f"Target:  {target_dir}")
+    print(f"Source:  {source_dir}")
+    print(f"Mode:    {'DRY RUN' if dry_run else 'EXECUTE'}")
+    print(f"{'='*60}\n")
+    
+    if dry_run:
+        print("DRY RUN - Plan of operations:")
+        print(f"1. Create/verify venv at {target_dir}/.auditor_venv")
+        print(f"2. Install TheAuditor (editable) into venv")
+        print(f"3. Create wrappers at {target_dir}/.claude/bin/")
+        print(f"4. Copy agent templates from {source_dir}/*.md")
+        print(f"5. Write agents to {target_dir}/.claude/agents/")
+        print("\nNo files will be modified.")
+        return {"created": [], "updated": [], "skipped": []}
+    
+    results = {
+        "created": [],
+        "updated": [],
+        "skipped": [],
+        "failed": []
+    }
+    
+    # Step 1: Setup venv
+    print("Step 1: Setting up Python virtual environment...", flush=True)
+    try:
+        venv_path, success = setup_project_venv(target_dir, force=sync)
+        if success:
+            results["created"].append(str(venv_path))
+        else:
+            results["failed"].append(f"venv setup at {venv_path}")
+            print("ERROR: Failed to setup venv. Aborting.")
+            return results
+    except Exception as e:
+        print(f"ERROR setting up venv: {e}")
+        results["failed"].append("venv setup")
+        return results
+    
+    # Step 2: Create wrappers
+    print("\nStep 2: Creating cross-platform wrappers...", flush=True)
+    wrapper_results = create_wrappers(target_dir)
+    for path, status in wrapper_results.items():
+        results[status].append(path)
+    
+    # Step 3: Copy agent templates
+    print("\nStep 3: Copying agent templates...", flush=True)
+    try:
+        agent_results = copy_agent_templates(source_dir, target_dir)
+        for path, status in agent_results.items():
+            results[status].append(path)
+        
+        if not agent_results:
+            print("WARNING: No .md files found in agent_templates directory")
+    
+    except Exception as e:
+        print(f"ERROR copying agent templates: {e}")
+        results["failed"].append("agent template copy")
+    
+    # Summary
+    print(f"\n{'='*60}")
+    print("Setup Complete - Summary:")
+    print(f"{'='*60}")
+    print(f"Created: {len(results['created'])} files")
+    print(f"Updated: {len(results['updated'])} files")
+    print(f"Skipped: {len(results['skipped'])} files (unchanged)")
+    
+    if results['failed']:
+        print(f"FAILED:  {len(results['failed'])} operations")
+        for item in results['failed']:
+            print(f"  - {item}")
+    
+    check_mark = "[OK]" if IS_WINDOWS else "✓"
+    print(f"\n{check_mark} Project configured at: {target_dir}")
+    print(f"{check_mark} Wrapper available at: {target_dir}/.claude/bin/aud")
+    print(f"{check_mark} Agents installed to: {target_dir}/.claude/agents/")
+    print(f"{check_mark} Professional linters installed (ruff, mypy, black, ESLint, etc.)")
+    
+    return results
\ No newline at end of file
diff --git a/theauditor/cli.py b/theauditor/cli.py
new file mode 100644
index 0000000..f71a75e
--- /dev/null
+++ b/theauditor/cli.py
@@ -0,0 +1,239 @@
+"""TheAuditor CLI - Main entry point and command registration hub."""
+
+import platform
+import subprocess
+import sys
+
+import click
+from theauditor import __version__
+
+# Configure UTF-8 console output for Windows
+if platform.system() == "Windows":
+    try:
+        # Set console code page to UTF-8
+        subprocess.run(["chcp", "65001"], shell=True, capture_output=True, timeout=1)
+        # Also configure Python's stdout/stderr
+        import codecs
+        sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
+        sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, 'strict')
+    except Exception:
+        # Silently continue if chcp fails (not critical)
+        pass
+
+
+class VerboseGroup(click.Group):
+    """Custom group that shows all subcommands and their key options in help."""
+    
+    def format_help(self, ctx, formatter):
+        """Format help to show all commands with their key options."""
+        # Original help text
+        super().format_help(ctx, formatter)
+        
+        # Add detailed command listing
+        formatter.write_paragraph()
+        formatter.write_text("Detailed Command Overview:")
+        formatter.write_paragraph()
+        
+        # Core commands
+        formatter.write_text("CORE ANALYSIS:")
+        with formatter.indentation():
+            formatter.write_text("aud full                    # Complete 13-phase security audit")
+            formatter.write_text("  --offline                 # Skip network operations (deps, docs)")
+            formatter.write_text("  --exclude-self            # Exclude TheAuditor's own files")
+            formatter.write_text("  --quiet                   # Minimal output")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud index                   # Build file manifest and symbol database")
+            formatter.write_text("  --exclude-self            # Exclude TheAuditor's own files")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud workset                 # Analyze only changed files")
+            formatter.write_text("  --diff HEAD~3..HEAD       # Specify git commit range")
+            formatter.write_text("  --all                     # Include all files")
+        
+        formatter.write_paragraph()
+        formatter.write_text("SECURITY SCANNING:")
+        with formatter.indentation():
+            formatter.write_text("aud detect-patterns         # Run 100+ security pattern rules")
+            formatter.write_text("  --workset                 # Scan only workset files")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud taint-analyze           # Track data flow from sources to sinks")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud docker-analyze          # Analyze Docker security issues")
+            formatter.write_text("  --severity critical       # Filter by severity")
+        
+        formatter.write_paragraph()
+        formatter.write_text("DEPENDENCIES:")
+        with formatter.indentation():
+            formatter.write_text("aud deps                    # Analyze project dependencies")
+            formatter.write_text("  --vuln-scan               # Run npm audit & pip-audit")
+            formatter.write_text("  --check-latest            # Check for outdated packages")
+            formatter.write_text("  --upgrade-all             # YOLO: upgrade everything to latest")
+        
+        formatter.write_paragraph()
+        formatter.write_text("CODE QUALITY:")
+        with formatter.indentation():
+            formatter.write_text("aud lint                    # Run all configured linters")
+            formatter.write_text("  --fix                     # Auto-fix issues where possible")
+            formatter.write_text("  --workset                 # Lint only changed files")
+        
+        formatter.write_paragraph()
+        formatter.write_text("ANALYSIS & REPORTING:")
+        with formatter.indentation():
+            formatter.write_text("aud graph build             # Build dependency graph")
+            formatter.write_text("aud graph analyze           # Find cycles and architectural issues")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud impact                  # Analyze change impact radius")
+            formatter.write_text("  --file src/auth.py        # Specify file to analyze")
+            formatter.write_text("  --line 42                 # Specific line number")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud refactor                # Detect incomplete refactorings")
+            formatter.write_text("  --auto-detect             # Auto-detect from migrations")
+            formatter.write_text("  --workset                 # Check current changes")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud fce                     # Run Factual Correlation Engine")
+            formatter.write_text("aud report                  # Generate final report")
+            formatter.write_text("aud structure               # Generate project structure report")
+        
+        formatter.write_paragraph()
+        formatter.write_text("ADVANCED:")
+        with formatter.indentation():
+            formatter.write_text("aud insights                # Run optional insights analysis")
+            formatter.write_text("  --mode ml                 # ML risk predictions")
+            formatter.write_text("  --mode graph              # Architecture health scoring")
+            formatter.write_text("  --mode taint              # Security severity analysis")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud learn                   # Train ML models on codebase")
+            formatter.write_text("aud suggest                 # Get ML-powered suggestions")
+        
+        formatter.write_paragraph()
+        formatter.write_text("SETUP & CONFIG:")
+        with formatter.indentation():
+            formatter.write_text("aud init                    # Initialize .pf/ directory")
+            formatter.write_text("aud setup-claude            # Setup sandboxed JS/TS tools")
+            formatter.write_text("  --target .                # Target directory")
+            formatter.write_paragraph()
+            
+            formatter.write_text("aud init-js                 # Create/merge package.json")
+            formatter.write_text("aud init-config             # Initialize configuration")
+        
+        formatter.write_paragraph()
+        formatter.write_text("For detailed help on any command: aud <command> --help")
+
+
+@click.group(cls=VerboseGroup)
+@click.version_option(version=__version__, prog_name="aud")
+@click.help_option("-h", "--help")
+def cli():
+    """TheAuditor - Offline, air-gapped CLI for repo indexing and evidence checking.
+    
+    Quick Start:
+      aud init                    # Initialize project
+      aud full                    # Run complete audit
+      aud full --offline          # Run without network operations
+    
+    View results in .pf/readthis/ directory."""
+    pass
+
+
+# Import and register commands
+from theauditor.commands.init import init
+from theauditor.commands.index import index
+from theauditor.commands.workset import workset
+from theauditor.commands.lint import lint
+from theauditor.commands.deps import deps
+from theauditor.commands.report import report
+from theauditor.commands.summary import summary
+from theauditor.commands.graph import graph
+from theauditor.commands.full import full
+from theauditor.commands.fce import fce
+from theauditor.commands.impact import impact
+from theauditor.commands.taint import taint_analyze
+from theauditor.commands.setup import setup_claude
+
+# Import additional migrated commands
+from theauditor.commands.detect_patterns import detect_patterns
+from theauditor.commands.detect_frameworks import detect_frameworks
+from theauditor.commands.docs import docs
+from theauditor.commands.tool_versions import tool_versions
+from theauditor.commands.init_js import init_js
+from theauditor.commands.init_config import init_config
+from theauditor.commands.validate_templates import validate_templates
+
+# Import ML commands
+from theauditor.commands.ml import learn, suggest, learn_feedback
+
+# Import internal commands (prefixed with _)
+from theauditor.commands._archive import _archive
+
+# Import rules command
+from theauditor.commands.rules import rules_command
+
+# Import refactoring analysis commands
+from theauditor.commands.refactor import refactor_command
+from theauditor.commands.insights import insights_command
+
+# Import new commands
+from theauditor.commands.docker_analyze import docker_analyze
+from theauditor.commands.structure import structure
+
+# Register simple commands
+cli.add_command(init)
+cli.add_command(index)
+cli.add_command(workset)
+cli.add_command(lint)
+cli.add_command(deps)
+cli.add_command(report)
+cli.add_command(summary)
+cli.add_command(full)
+cli.add_command(fce)
+cli.add_command(impact)
+cli.add_command(taint_analyze)
+cli.add_command(setup_claude)
+
+# Register additional migrated commands
+cli.add_command(detect_patterns)
+cli.add_command(detect_frameworks)
+cli.add_command(docs)
+cli.add_command(tool_versions)
+cli.add_command(init_js)
+cli.add_command(init_config)
+cli.add_command(validate_templates)
+
+# Register ML commands
+cli.add_command(learn)
+cli.add_command(suggest)
+cli.add_command(learn_feedback)
+
+# Register internal commands (not for direct user use)
+cli.add_command(_archive)
+
+# Register rules command
+cli.add_command(rules_command)
+
+# Register refactoring analysis commands
+cli.add_command(refactor_command, name="refactor")
+cli.add_command(insights_command, name="insights")
+
+# Register new commands
+cli.add_command(docker_analyze)
+cli.add_command(structure)
+
+# Register command groups
+cli.add_command(graph)
+
+# All commands have been migrated to separate modules
+
+def main():
+    """Main entry point for console script."""
+    cli()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/theauditor/commands/__init__.py b/theauditor/commands/__init__.py
new file mode 100644
index 0000000..df0c596
--- /dev/null
+++ b/theauditor/commands/__init__.py
@@ -0,0 +1 @@
+"""Commands module for TheAuditor CLI."""
\ No newline at end of file
diff --git a/theauditor/commands/_archive.py b/theauditor/commands/_archive.py
new file mode 100644
index 0000000..6839d09
--- /dev/null
+++ b/theauditor/commands/_archive.py
@@ -0,0 +1,107 @@
+"""Internal archive command for segregating history by run type."""
+
+import shutil
+import sys
+from datetime import datetime
+from pathlib import Path
+
+import click
+
+
+@click.command(name="_archive")
+@click.option("--run-type", required=True, type=click.Choice(["full", "diff"]), help="Type of run being archived")
+@click.option("--diff-spec", help="Git diff specification for diff runs (e.g., main..HEAD)")
+def _archive(run_type: str, diff_spec: str = None):
+    """
+    Internal command to archive previous run artifacts with segregation by type.
+    
+    This command is not intended for direct user execution. It's called by
+    the full and orchestrate workflows to maintain clean, segregated history.
+    """
+    # Define base paths
+    pf_dir = Path(".pf")
+    history_dir = pf_dir / "history"
+    
+    # Check if there's a previous run to archive (by checking if .pf exists and has files)
+    if not pf_dir.exists() or not any(pf_dir.iterdir()):
+        # No previous run to archive
+        print("[ARCHIVE] No previous run artifacts found to archive", file=sys.stderr)
+        return
+    
+    # Determine destination base path based on run type
+    if run_type == "full":
+        dest_base = history_dir / "full"
+    else:  # run_type == "diff"
+        dest_base = history_dir / "diff"
+    
+    # Create destination base directory if it doesn't exist
+    dest_base.mkdir(parents=True, exist_ok=True)
+    
+    # Generate timestamp for archive directory
+    timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    # Create unique directory name
+    if run_type == "diff" and diff_spec:
+        # Sanitize diff spec for directory name
+        # Replace problematic characters with underscores
+        safe_spec = diff_spec.replace("..", "_")
+        safe_spec = safe_spec.replace("/", "_")
+        safe_spec = safe_spec.replace("\\", "_")
+        safe_spec = safe_spec.replace(":", "_")
+        safe_spec = safe_spec.replace(" ", "_")
+        safe_spec = safe_spec.replace("~", "_")
+        safe_spec = safe_spec.replace("^", "_")
+        
+        # Create descriptive name like "main_HEAD_20250819_090015"
+        dir_name = f"{safe_spec}_{timestamp_str}"
+    else:
+        # Simple timestamp for full runs
+        dir_name = timestamp_str
+    
+    # Create the archive destination directory
+    archive_dest = dest_base / dir_name
+    archive_dest.mkdir(exist_ok=True)
+    
+    # Move all top-level items from pf_dir to archive_dest
+    archived_count = 0
+    skipped_count = 0
+    
+    for item in pf_dir.iterdir():
+        # CRITICAL: Skip the history directory itself to prevent recursive archiving
+        if item.name == "history":
+            continue
+        
+        # Safely move the item to archive destination
+        try:
+            shutil.move(str(item), str(archive_dest))
+            archived_count += 1
+        except Exception as e:
+            # Log error but don't stop the archiving process
+            print(f"[WARNING] Could not archive {item.name}: {e}", file=sys.stderr)
+            skipped_count += 1
+    
+    # Log summary
+    if archived_count > 0:
+        click.echo(f"[ARCHIVE] Archived {archived_count} items to {archive_dest}")
+        if skipped_count > 0:
+            click.echo(f"[ARCHIVE] Skipped {skipped_count} items due to errors")
+    else:
+        click.echo("[ARCHIVE] No artifacts archived (directory was empty)")
+    
+    # Create a metadata file in the archive to track run type and context
+    metadata = {
+        "run_type": run_type,
+        "diff_spec": diff_spec,
+        "timestamp": timestamp_str,
+        "archived_at": datetime.now().isoformat(),
+        "files_archived": archived_count,
+        "files_skipped": skipped_count,
+    }
+    
+    try:
+        import json
+        metadata_path = archive_dest / "_metadata.json"
+        with open(metadata_path, 'w') as f:
+            json.dump(metadata, f, indent=2)
+    except Exception as e:
+        print(f"[WARNING] Could not write metadata file: {e}", file=sys.stderr)
\ No newline at end of file
diff --git a/theauditor/commands/deps.py b/theauditor/commands/deps.py
new file mode 100644
index 0000000..cd6b79a
--- /dev/null
+++ b/theauditor/commands/deps.py
@@ -0,0 +1,191 @@
+"""Parse and analyze project dependencies."""
+
+import platform
+from pathlib import Path
+import click
+from theauditor.utils.error_handler import handle_exceptions
+from theauditor.utils.exit_codes import ExitCodes
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+
+@click.command()
+@handle_exceptions
+@click.option("--root", default=".", help="Root directory")
+@click.option("--check-latest", is_flag=True, help="Check for latest versions from registries")
+@click.option("--upgrade-all", is_flag=True, help="YOLO mode: Update ALL packages to latest versions")
+@click.option("--offline", is_flag=True, help="Force offline mode (no network)")
+@click.option("--out", default="./.pf/raw/deps.json", help="Output dependencies file")
+@click.option("--print-stats", is_flag=True, help="Print dependency statistics")
+@click.option("--vuln-scan", is_flag=True, help="Scan dependencies for known vulnerabilities")
+def deps(root, check_latest, upgrade_all, offline, out, print_stats, vuln_scan):
+    """Parse and analyze project dependencies."""
+    from theauditor.deps import parse_dependencies, write_deps_json, check_latest_versions, write_deps_latest_json, upgrade_all_deps
+    from theauditor.vulnerability_scanner import scan_dependencies, write_vulnerabilities_json, format_vulnerability_report
+    import sys
+    
+    # Parse dependencies
+    deps_list = parse_dependencies(root_path=root)
+        
+    if not deps_list:
+        click.echo("No dependency files found (package.json, pyproject.toml, requirements.txt)")
+        click.echo("  Searched in: " + str(Path(root).resolve()))
+        return
+        
+    write_deps_json(deps_list, output_path=out)
+    
+    # Vulnerability scanning
+    if vuln_scan:
+        click.echo(f"\n[SCAN] Running native vulnerability scanners...")
+        click.echo(f"  Using: npm audit, pip-audit (if available)")
+        
+        vulnerabilities = scan_dependencies(deps_list, offline=offline)
+        
+        if vulnerabilities:
+            # Write JSON report
+            vuln_output = out.replace("deps.json", "vulnerabilities.json")
+            write_vulnerabilities_json(vulnerabilities, output_path=vuln_output)
+            
+            # Display human-readable report
+            report = format_vulnerability_report(vulnerabilities)
+            click.echo("\n" + report)
+            click.echo(f"\nDetailed report written to {vuln_output}")
+            
+            # Exit with error code if critical vulnerabilities found
+            critical_count = sum(1 for v in vulnerabilities if v["severity"] == "critical")
+            if critical_count > 0:
+                click.echo(f"\n[FAIL] Found {critical_count} CRITICAL vulnerabilities - failing build")
+                sys.exit(ExitCodes.CRITICAL_SEVERITY)
+        else:
+            click.echo(f"  [OK] No known vulnerabilities found in dependencies")
+        
+        # Don't continue with other operations after vuln scan
+        return
+    
+    # YOLO MODE: Upgrade all to latest
+    if upgrade_all and not offline:
+        click.echo("[YOLO MODE] Upgrading ALL packages to latest versions...")
+        click.echo("  [WARN] This may break things. That's the point!")
+        
+        # Get latest versions
+        latest_info = check_latest_versions(deps_list, allow_net=True, offline=offline)
+        if not latest_info:
+            click.echo("  [FAIL] Failed to fetch latest versions")
+            return
+        
+        # Check if all packages were successfully checked
+        failed_checks = sum(1 for info in latest_info.values() if info.get("error") is not None)
+        successful_checks = sum(1 for info in latest_info.values() if info.get("latest") is not None)
+        
+        if failed_checks > 0:
+            click.echo(f"\n  [WARN] Only {successful_checks}/{len(latest_info)} packages checked successfully")
+            click.echo(f"  [FAIL] Cannot upgrade with {failed_checks} failed checks")
+            click.echo("  Fix network issues and try again")
+            return
+            
+        # Upgrade all dependency files
+        upgraded = upgrade_all_deps(root_path=root, latest_info=latest_info, deps_list=deps_list)
+        
+        # Count unique packages that were upgraded
+        unique_upgraded = len([1 for k, v in latest_info.items() if v.get("is_outdated", False)])
+        total_updated = sum(upgraded.values())
+        
+        click.echo(f"\n[UPGRADED] Dependency files:")
+        for file_type, count in upgraded.items():
+            if count > 0:
+                click.echo(f"  [OK] {file_type}: {count} dependency entries updated")
+        
+        # Show summary that matches the "Outdated: 10/29" format
+        if total_updated > unique_upgraded:
+            click.echo(f"\n  Summary: {unique_upgraded} unique packages updated across {total_updated} occurrences")
+        
+        click.echo("\n[NEXT STEPS]:")
+        click.echo("  1. Run: pip install -r requirements.txt")
+        click.echo("  2. Or: npm install")
+        click.echo("  3. Pray it still works")
+        return
+        
+    # Check latest versions if requested
+    latest_info = {}
+    if check_latest and not offline:
+        # Count unique packages first
+        unique_packages = {}
+        for dep in deps_list:
+            key = f"{dep['manager']}:{dep['name']}"
+            if key not in unique_packages:
+                unique_packages[key] = 0
+            unique_packages[key] += 1
+        
+        click.echo(f"Checking {len(deps_list)} dependencies for updates...")
+        click.echo(f"  Unique packages to check: {len(unique_packages)}")
+        click.echo("  Connecting to: npm registry and PyPI")
+        latest_info = check_latest_versions(deps_list, allow_net=True, offline=offline)
+        if latest_info:
+            write_deps_latest_json(latest_info, output_path=out.replace("deps.json", "deps_latest.json"))
+            
+            # Count successful vs failed checks
+            successful_checks = sum(1 for info in latest_info.values() if info.get("latest") is not None)
+            failed_checks = sum(1 for info in latest_info.values() if info.get("error") is not None)
+            
+            click.echo(f"  [OK] Checked {successful_checks}/{len(unique_packages)} unique packages")
+            if failed_checks > 0:
+                click.echo(f"  [WARN] {failed_checks} packages failed to check")
+                # Show first few errors
+                errors = [(k.split(":")[1], v["error"]) for k, v in latest_info.items() if v.get("error")][:3]
+                for pkg, err in errors:
+                    click.echo(f"     - {pkg}: {err}")
+        else:
+            click.echo("  [FAIL] Failed to check versions (network issue or offline mode)")
+    
+    # Always show output
+    click.echo(f"Dependencies written to {out}")
+    
+    # Count by manager
+    npm_count = sum(1 for d in deps_list if d["manager"] == "npm")
+    py_count = sum(1 for d in deps_list if d["manager"] == "py")
+    
+    click.echo(f"  Total: {len(deps_list)} dependencies")
+    if npm_count > 0:
+        click.echo(f"  Node/npm: {npm_count}")
+    if py_count > 0:
+        click.echo(f"  Python: {py_count}")
+    
+    if latest_info:
+        # Count how many of the TOTAL deps are outdated (only if successfully checked)
+        outdated_deps = 0
+        checked_deps = 0
+        for dep in deps_list:
+            key = f"{dep['manager']}:{dep['name']}"
+            if key in latest_info and latest_info[key].get("latest") is not None:
+                checked_deps += 1
+                if latest_info[key]["is_outdated"]:
+                    outdated_deps += 1
+        
+        # Also count unique outdated packages
+        outdated_unique = sum(1 for info in latest_info.values() if info.get("is_outdated", False))
+        
+        # Show outdated/checked rather than outdated/total
+        if checked_deps == len(deps_list):
+            # All were checked successfully
+            click.echo(f"  Outdated: {outdated_deps}/{len(deps_list)}")
+        else:
+            # Some failed, show both numbers
+            click.echo(f"  Outdated: {outdated_deps}/{checked_deps} checked ({len(deps_list)} total)")
+        
+        # Show major updates
+        major_updates = [
+            (k.split(":")[1], v["locked"], v["latest"])
+            for k, v in latest_info.items()
+            if v.get("delta") == "major"
+        ]
+        if major_updates:
+            click.echo("\n  Major version updates available:")
+            for name, locked, latest in major_updates[:5]:
+                click.echo(f"    - {name}: {locked} -> {latest}")
+            if len(major_updates) > 5:
+                click.echo(f"    ... and {len(major_updates) - 5} more")
+    
+    # Add a helpful hint if no network operation was performed
+    if not check_latest and not upgrade_all:
+        click.echo("\nTIP: Run with --check-latest to check for outdated packages.")
\ No newline at end of file
diff --git a/theauditor/commands/detect_frameworks.py b/theauditor/commands/detect_frameworks.py
new file mode 100644
index 0000000..975979c
--- /dev/null
+++ b/theauditor/commands/detect_frameworks.py
@@ -0,0 +1,46 @@
+"""Detect frameworks and libraries used in the project."""
+
+import json
+import click
+from pathlib import Path
+
+
+@click.command("detect-frameworks")
+@click.option("--project-path", default=".", help="Root directory to analyze")
+@click.option("--output-json", help="Path to output JSON file (default: .pf/raw/frameworks.json)")
+def detect_frameworks(project_path, output_json):
+    """Detect frameworks and libraries used in the project."""
+    from theauditor.framework_detector import FrameworkDetector
+    
+    try:
+        # Initialize detector
+        project_path = Path(project_path).resolve()
+        
+        detector = FrameworkDetector(project_path, exclude_patterns=[])
+        
+        # Detect frameworks
+        frameworks = detector.detect_all()
+        
+        # Determine output path - always save to .pf/frameworks.json by default
+        if output_json:
+            # User specified custom path
+            save_path = Path(output_json)
+        else:
+            # Default path
+            save_path = Path(project_path) / ".pf" / "raw" / "frameworks.json"
+        
+        # Always save the JSON output
+        detector.save_to_file(save_path)
+        click.echo(f"Frameworks written to {save_path}")
+        
+        # Display table
+        table = detector.format_table()
+        click.echo(table)
+        
+        # Return success
+        if frameworks:
+            click.echo(f"\nDetected {len(frameworks)} framework(s)")
+        
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
\ No newline at end of file
diff --git a/theauditor/commands/detect_patterns.py b/theauditor/commands/detect_patterns.py
new file mode 100644
index 0000000..b0d3c62
--- /dev/null
+++ b/theauditor/commands/detect_patterns.py
@@ -0,0 +1,81 @@
+"""Detect universal runtime, DB, and logic patterns in code."""
+
+import click
+from pathlib import Path
+from theauditor.utils.helpers import get_self_exclusion_patterns
+
+
+@click.command("detect-patterns")
+@click.option("--project-path", default=".", help="Root directory to analyze")
+@click.option("--patterns", multiple=True, help="Pattern categories to use (e.g., runtime_issues, db_issues)")
+@click.option("--output-json", help="Path to output JSON file")
+@click.option("--file-filter", help="Glob pattern to filter files")
+@click.option("--max-rows", default=50, type=int, help="Maximum rows to display in table")
+@click.option("--print-stats", is_flag=True, help="Print summary statistics")
+@click.option("--with-ast/--no-ast", default=True, help="Enable AST-based pattern matching")
+@click.option("--with-frameworks/--no-frameworks", default=True, help="Enable framework detection and framework-specific patterns")
+@click.option("--exclude-self", is_flag=True, help="Exclude TheAuditor's own files (for self-testing)")
+def detect_patterns(project_path, patterns, output_json, file_filter, max_rows, print_stats, with_ast, with_frameworks, exclude_self):
+    """Detect universal runtime, DB, and logic patterns in code."""
+    from theauditor.pattern_loader import PatternLoader
+    from theauditor.universal_detector import UniversalPatternDetector
+    
+    try:
+        # Initialize detector
+        project_path = Path(project_path).resolve()
+        pattern_loader = PatternLoader()
+        
+        # Get exclusion patterns using centralized function
+        exclude_patterns = get_self_exclusion_patterns(exclude_self)
+        
+        detector = UniversalPatternDetector(
+            project_path, 
+            pattern_loader,
+            with_ast=with_ast,
+            with_frameworks=with_frameworks,
+            exclude_patterns=exclude_patterns
+        )
+        
+        # Run detection
+        categories = list(patterns) if patterns else None
+        findings = detector.detect_patterns(categories=categories, file_filter=file_filter)
+        
+        # Always save results to default location
+        patterns_output = project_path / ".pf" / "raw" / "patterns.json"
+        patterns_output.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Save to user-specified location if provided
+        if output_json:
+            detector.to_json(Path(output_json))
+            click.echo(f"\n[OK] Full results saved to: {output_json}")
+        
+        # Save to default location
+        detector.to_json(patterns_output)
+        click.echo(f"[OK] Full results saved to: {patterns_output}")
+        
+        # Display table
+        table = detector.format_table(max_rows=max_rows)
+        click.echo(table)
+        
+        # Print statistics if requested
+        if print_stats:
+            stats = detector.get_summary_stats()
+            click.echo("\n--- Summary Statistics ---")
+            click.echo(f"Total findings: {stats['total_findings']}")
+            click.echo(f"Files affected: {stats['files_affected']}")
+            
+            if stats['by_severity']:
+                click.echo("\nBy severity:")
+                for severity, count in sorted(stats['by_severity'].items()):
+                    click.echo(f"  {severity}: {count}")
+            
+            if stats['by_category']:
+                click.echo("\nBy category:")
+                for category, count in sorted(stats['by_category'].items()):
+                    click.echo(f"  {category}: {count}")
+        
+        # Successfully completed - found and reported all issues
+            
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
\ No newline at end of file
diff --git a/theauditor/commands/docker_analyze.py b/theauditor/commands/docker_analyze.py
new file mode 100644
index 0000000..bf0b18c
--- /dev/null
+++ b/theauditor/commands/docker_analyze.py
@@ -0,0 +1,94 @@
+"""Docker security analysis command."""
+
+import click
+import json
+from pathlib import Path
+from theauditor.utils.error_handler import handle_exceptions
+from theauditor.utils.exit_codes import ExitCodes
+
+
+@click.command("docker-analyze")
+@handle_exceptions
+@click.option("--db-path", default="./.pf/repo_index.db", help="Path to repo_index.db")
+@click.option("--output", help="Output file for findings (JSON format)")
+@click.option("--severity", type=click.Choice(["all", "critical", "high", "medium", "low"]), 
+              default="all", help="Minimum severity to report")
+@click.option("--check-vulns/--no-check-vulns", default=True, 
+              help="Check base images for vulnerabilities (requires network)")
+def docker_analyze(db_path, output, severity, check_vulns):
+    """Analyze Docker images for security issues.
+    
+    Detects:
+    - Containers running as root
+    - Exposed secrets in ENV/ARG instructions
+    - High entropy values (potential secrets)
+    - Base image vulnerabilities (if --check-vulns enabled)
+    """
+    from theauditor.docker_analyzer import analyze_docker_images
+    
+    # Check if database exists
+    if not Path(db_path).exists():
+        click.echo(f"Error: Database not found at {db_path}", err=True)
+        click.echo("Run 'aud index' first to create the database", err=True)
+        return ExitCodes.TASK_INCOMPLETE
+    
+    # Run analysis
+    click.echo("Analyzing Docker images for security issues...")
+    if check_vulns:
+        click.echo("  Including vulnerability scan of base images...")
+    findings = analyze_docker_images(db_path, check_vulnerabilities=check_vulns)
+    
+    # Filter by severity if requested
+    if severity != "all":
+        severity_order = {"critical": 4, "high": 3, "medium": 2, "low": 1}
+        min_severity = severity_order.get(severity.lower(), 0)
+        findings = [f for f in findings 
+                   if severity_order.get(f.get("severity", "").lower(), 0) >= min_severity]
+    
+    # Count by severity
+    severity_counts = {}
+    for finding in findings:
+        sev = finding.get("severity", "unknown").lower()
+        severity_counts[sev] = severity_counts.get(sev, 0) + 1
+    
+    # Display results
+    if findings:
+        click.echo(f"\nFound {len(findings)} Docker security issues:")
+        
+        # Show severity breakdown
+        for sev in ["critical", "high", "medium", "low"]:
+            if sev in severity_counts:
+                click.echo(f"  {sev.upper()}: {severity_counts[sev]}")
+        
+        # Show findings
+        click.echo("\nFindings:")
+        for finding in findings:
+            click.echo(f"\n[{finding['severity'].upper()}] {finding['type']}")
+            click.echo(f"  File: {finding['file']}")
+            click.echo(f"  {finding['message']}")
+            if finding.get('recommendation'):
+                click.echo(f"  Fix: {finding['recommendation']}")
+    else:
+        click.echo("No Docker security issues found")
+    
+    # Save to file if requested
+    if output:
+        output_path = Path(output)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        with open(output_path, 'w') as f:
+            json.dump({
+                "findings": findings,
+                "summary": severity_counts,
+                "total": len(findings)
+            }, f, indent=2)
+        
+        click.echo(f"\nResults saved to: {output}")
+    
+    # Exit with appropriate code
+    if severity_counts.get("critical", 0) > 0:
+        return ExitCodes.CRITICAL_SEVERITY
+    elif severity_counts.get("high", 0) > 0:
+        return ExitCodes.HIGH_SEVERITY
+    else:
+        return ExitCodes.SUCCESS
\ No newline at end of file
diff --git a/theauditor/commands/docs.py b/theauditor/commands/docs.py
new file mode 100644
index 0000000..f5b7446
--- /dev/null
+++ b/theauditor/commands/docs.py
@@ -0,0 +1,201 @@
+"""Fetch or summarize documentation for dependencies."""
+
+import json
+import click
+from pathlib import Path
+
+
+@click.command("docs")
+@click.argument("action", type=click.Choice(["fetch", "summarize", "view", "list"]))
+@click.argument("package_name", required=False)
+@click.option("--deps", default="./.pf/deps.json", help="Input dependencies file")
+@click.option("--offline", is_flag=True, help="Force offline mode")
+@click.option("--allow-non-gh-readmes", is_flag=True, help="Allow non-GitHub README fetching")
+@click.option("--docs-dir", default="./.pf/context/docs", help="Documentation cache directory")
+@click.option("--capsules-dir", default="./.pf/context/doc_capsules", help="Output capsules directory")
+@click.option("--workset", default="./.pf/workset.json", help="Workset file for filtering")
+@click.option("--print-stats", is_flag=True, help="Print statistics")
+@click.option("--raw", is_flag=True, help="View raw fetched doc instead of capsule")
+def docs(action, package_name, deps, offline, allow_non_gh_readmes, docs_dir, capsules_dir, workset, print_stats, raw):
+    """Fetch or summarize documentation for dependencies."""
+    from theauditor.deps import parse_dependencies
+    from theauditor.docs_fetch import fetch_docs, DEFAULT_ALLOWLIST
+    from theauditor.docs_summarize import summarize_docs
+    
+    try:
+        if action == "fetch":
+            # Load dependencies
+            if Path(deps).exists():
+                with open(deps, encoding="utf-8") as f:
+                    deps_list = json.load(f)
+            else:
+                # Parse if not cached
+                deps_list = parse_dependencies()
+            
+            # Set up allowlist
+            allowlist = DEFAULT_ALLOWLIST.copy()
+            if not allow_non_gh_readmes:
+                # Already restricted to GitHub by default
+                pass
+            
+            # Check for policy file
+            policy_file = Path(".pf/policy.yml")
+            allow_net = True
+            if policy_file.exists():
+                try:
+                    # Simple YAML parsing without external deps
+                    with open(policy_file, encoding="utf-8") as f:
+                        for line in f:
+                            if "allow_net:" in line:
+                                allow_net = "true" in line.lower()
+                                break
+                except Exception:
+                    pass  # Default to True
+            
+            # Fetch docs
+            result = fetch_docs(
+                deps_list,
+                allow_net=allow_net,
+                allowlist=allowlist,
+                offline=offline,
+                output_dir=docs_dir
+            )
+            
+            if not print_stats:
+                if result["mode"] == "offline":
+                    click.echo("Running in offline mode - no documentation fetched")
+                else:
+                    click.echo(f"Documentation fetch complete:")
+                    click.echo(f"  Fetched: {result['fetched']}")
+                    click.echo(f"  Cached: {result['cached']}")
+                    click.echo(f"  Skipped: {result['skipped']}")
+                    if result["errors"]:
+                        click.echo(f"  Errors: {len(result['errors'])}")
+        
+        elif action == "summarize":
+            # Summarize docs
+            result = summarize_docs(
+                docs_dir=docs_dir,
+                output_dir=capsules_dir,
+                workset_path=workset if Path(workset).exists() else None
+            )
+            
+            if not print_stats:
+                click.echo(f"Documentation capsules created:")
+                click.echo(f"  Capsules: {result['capsules_created']}")
+                click.echo(f"  Skipped: {result['skipped']}")
+                if result["errors"]:
+                    click.echo(f"  Errors: {len(result['errors'])}")
+                
+                index_file = Path(capsules_dir).parent / "doc_index.json"
+                click.echo(f"  Index: {index_file}")
+        
+        elif action == "list":
+            # List available docs and capsules
+            docs_path = Path(docs_dir)
+            capsules_path = Path(capsules_dir)
+            
+            click.echo("\n[Docs] Available Documentation:\n")
+            
+            # List fetched docs
+            if docs_path.exists():
+                click.echo("Fetched Docs (.pf/context/docs/):")
+                for ecosystem in ["npm", "py"]:
+                    ecosystem_dir = docs_path / ecosystem
+                    if ecosystem_dir.exists():
+                        packages = sorted([d.name for d in ecosystem_dir.iterdir() if d.is_dir()])
+                        if packages:
+                            click.echo(f"\n  {ecosystem.upper()}:")
+                            for pkg in packages[:20]:  # Show first 20
+                                click.echo(f"    * {pkg}")
+                            if len(packages) > 20:
+                                click.echo(f"    ... and {len(packages) - 20} more")
+            
+            # List capsules
+            if capsules_path.exists():
+                click.echo("\nDoc Capsules (.pf/context/doc_capsules/):")
+                capsules = sorted([f.stem for f in capsules_path.glob("*.md")])
+                if capsules:
+                    for capsule in capsules[:20]:  # Show first 20
+                        click.echo(f"  * {capsule}")
+                    if len(capsules) > 20:
+                        click.echo(f"  ... and {len(capsules) - 20} more")
+            
+            click.echo("\n[TIP] Use 'aud docs view <package_name>' to view a specific doc")
+            click.echo("   Add --raw to see the full fetched doc instead of capsule")
+        
+        elif action == "view":
+            if not package_name:
+                click.echo("Error: Package name required for view action")
+                click.echo("Usage: aud docs view <package_name>")
+                click.echo("       aud docs view geopandas")
+                click.echo("       aud docs view numpy --raw")
+                raise click.ClickException("Package name required")
+            
+            docs_path = Path(docs_dir)
+            capsules_path = Path(capsules_dir)
+            found = False
+            
+            if raw:
+                # View raw fetched doc
+                for ecosystem in ["npm", "py"]:
+                    # Try exact match first
+                    for pkg_dir in (docs_path / ecosystem).glob(f"{package_name}@*"):
+                        if pkg_dir.is_dir():
+                            doc_file = pkg_dir / "doc.md"
+                            if doc_file.exists():
+                                click.echo(f"\n[RAW DOC] Raw Doc: {pkg_dir.name}\n")
+                                click.echo("=" * 80)
+                                with open(doc_file, encoding="utf-8") as f:
+                                    content = f.read()
+                                    # Limit output for readability
+                                    lines = content.split("\n")
+                                    if len(lines) > 200:
+                                        click.echo("\n".join(lines[:200]))
+                                        click.echo(f"\n... (truncated, {len(lines) - 200} more lines)")
+                                    else:
+                                        click.echo(content)
+                                found = True
+                                break
+                    if found:
+                        break
+            else:
+                # View capsule (default)
+                # Try exact match first
+                for capsule_file in capsules_path.glob(f"*{package_name}*.md"):
+                    if capsule_file.exists():
+                        click.echo(f"\n[CAPSULE] Capsule: {capsule_file.stem}\n")
+                        click.echo("=" * 80)
+                        with open(capsule_file, encoding="utf-8") as f:
+                            click.echo(f.read())
+                        click.echo("\n" + "=" * 80)
+                        
+                        # Try to find the corresponding full doc
+                        package_parts = capsule_file.stem.replace("__", "@").split("@")
+                        if len(package_parts) >= 2:
+                            ecosystem_prefix = package_parts[0]
+                            pkg_name = "@".join(package_parts[:-1]).replace(ecosystem_prefix + "@", "")
+                            version = package_parts[-1]
+                            ecosystem = "py" if ecosystem_prefix == "py" else "npm"
+                            full_doc_path = f"./.pf/context/docs/{ecosystem}/{pkg_name}@{version}/doc.md"
+                            click.echo(f"\n[SOURCE] Full Documentation: `{full_doc_path}`")
+                        
+                        click.echo("[TIP] Use --raw to see the full fetched documentation")
+                        found = True
+                        break
+            
+            if not found:
+                click.echo(f"No documentation found for '{package_name}'")
+                click.echo("\nAvailable packages:")
+                # Show some available packages
+                for ecosystem in ["npm", "py"]:
+                    ecosystem_dir = docs_path / ecosystem
+                    if ecosystem_dir.exists():
+                        packages = [d.name for d in ecosystem_dir.iterdir() if d.is_dir()][:5]
+                        if packages:
+                            click.echo(f"  {ecosystem.upper()}: {', '.join(packages)}")
+                click.echo("\nUse 'aud docs list' to see all available docs")
+    
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
\ No newline at end of file
diff --git a/theauditor/commands/fce.py b/theauditor/commands/fce.py
new file mode 100644
index 0000000..0511e69
--- /dev/null
+++ b/theauditor/commands/fce.py
@@ -0,0 +1,43 @@
+"""Run Factual Correlation Engine to aggregate and correlate findings."""
+
+import click
+from theauditor.utils.error_handler import handle_exceptions
+
+
+@click.command(name="fce")
+@handle_exceptions
+@click.option("--root", default=".", help="Root directory")
+@click.option("--capsules", default="./.pf/capsules", help="Capsules directory")
+@click.option("--manifest", default="manifest.json", help="Manifest file path")
+@click.option("--workset", default="./.pf/workset.json", help="Workset file path")
+@click.option("--timeout", default=600, type=int, help="Timeout in seconds")
+@click.option("--print-plan", is_flag=True, help="Print detected tools without running")
+def fce(root, capsules, manifest, workset, timeout, print_plan):
+    """Run Factual Correlation Engine to aggregate and correlate findings."""
+    from theauditor.fce import run_fce
+
+    result = run_fce(
+        root_path=root,
+        capsules_dir=capsules,
+        manifest_path=manifest,
+        workset_path=workset,
+        timeout=timeout,
+        print_plan=print_plan,
+    )
+
+    if result.get("printed_plan"):
+        return
+
+    if result["success"]:
+        if result["failures_found"] == 0:
+            click.echo("[OK] All tools passed - no failures detected")
+        else:
+            click.echo(f"Found {result['failures_found']} failures")
+            # Check if output_files exists and has at least 2 elements
+            if result.get('output_files') and len(result.get('output_files', [])) > 1:
+                click.echo(f"FCE report written to: {result['output_files'][1]}")
+            elif result.get('output_files') and len(result.get('output_files', [])) > 0:
+                click.echo(f"FCE report written to: {result['output_files'][0]}")
+    else:
+        click.echo(f"Error: {result.get('error', 'Unknown error')}", err=True)
+        raise click.ClickException(result.get("error", "FCE failed"))
\ No newline at end of file
diff --git a/theauditor/commands/full.py b/theauditor/commands/full.py
new file mode 100644
index 0000000..6d938e7
--- /dev/null
+++ b/theauditor/commands/full.py
@@ -0,0 +1,90 @@
+"""Run complete audit pipeline."""
+
+import sys
+import click
+from theauditor.utils.error_handler import handle_exceptions
+from theauditor.utils.exit_codes import ExitCodes
+
+
+@click.command()
+@handle_exceptions
+@click.option("--root", default=".", help="Root directory to analyze")
+@click.option("--quiet", is_flag=True, help="Minimal output")
+@click.option("--exclude-self", is_flag=True, help="Exclude TheAuditor's own files (for self-testing)")
+@click.option("--offline", is_flag=True, help="Skip network operations (deps, docs)")
+def full(root, quiet, exclude_self, offline):
+    """Run complete audit pipeline in exact order specified in teamsop.md."""
+    from theauditor.pipelines import run_full_pipeline
+    
+    # Define log callback for console output
+    def log_callback(message, is_error=False):
+        if is_error:
+            click.echo(message, err=True)
+        else:
+            click.echo(message)
+    
+    # Run the pipeline
+    result = run_full_pipeline(
+        root=root,
+        quiet=quiet,
+        exclude_self=exclude_self,
+        offline=offline,
+        log_callback=log_callback if not quiet else None
+    )
+    
+    # Display clear status message based on results
+    findings = result.get("findings", {})
+    critical = findings.get("critical", 0)
+    high = findings.get("high", 0)
+    medium = findings.get("medium", 0)
+    low = findings.get("low", 0)
+    
+    click.echo("\n" + "=" * 60)
+    click.echo("AUDIT FINAL STATUS")
+    click.echo("=" * 60)
+    
+    # Determine overall status and exit code
+    exit_code = ExitCodes.SUCCESS
+    
+    # Check for pipeline failures first
+    if result["failed_phases"] > 0:
+        click.echo(f"[WARNING] Pipeline completed with {result['failed_phases']} phase failures")
+        click.echo("Some analysis phases could not complete successfully.")
+        exit_code = ExitCodes.TASK_INCOMPLETE  # Exit code for pipeline failures
+    
+    # Then check for security findings
+    if critical > 0:
+        click.echo(f"\nSTATUS: [CRITICAL] - Audit complete. Found {critical} critical vulnerabilities.")
+        click.echo("Immediate action required - deployment should be blocked.")
+        exit_code = ExitCodes.CRITICAL_SEVERITY  # Exit code for critical findings
+    elif high > 0:
+        click.echo(f"\nSTATUS: [HIGH] - Audit complete. Found {high} high-severity issues.")
+        click.echo("Priority remediation needed before next release.")
+        if exit_code == ExitCodes.SUCCESS:
+            exit_code = ExitCodes.HIGH_SEVERITY  # Exit code for high findings (unless already set for failures)
+    elif medium > 0 or low > 0:
+        click.echo(f"\nSTATUS: [MODERATE] - Audit complete. Found {medium} medium and {low} low issues.")
+        click.echo("Schedule fixes for upcoming sprints.")
+    else:
+        click.echo("\nSTATUS: [CLEAN] - No critical or high-severity issues found.")
+        click.echo("Codebase meets security and quality standards.")
+    
+    # Show findings breakdown if any exist
+    if critical + high + medium + low > 0:
+        click.echo("\nFindings breakdown:")
+        if critical > 0:
+            click.echo(f"  - Critical: {critical}")
+        if high > 0:
+            click.echo(f"  - High: {high}")
+        if medium > 0:
+            click.echo(f"  - Medium: {medium}")
+        if low > 0:
+            click.echo(f"  - Low: {low}")
+    
+    click.echo("\nReview the chunked data in .pf/readthis/ for complete findings.")
+    click.echo("=" * 60)
+    
+    # Exit with appropriate code for CI/CD automation
+    # Using standardized exit codes from ExitCodes class
+    if exit_code != ExitCodes.SUCCESS:
+        sys.exit(exit_code)
\ No newline at end of file
diff --git a/theauditor/commands/graph.py b/theauditor/commands/graph.py
new file mode 100644
index 0000000..6e5da1a
--- /dev/null
+++ b/theauditor/commands/graph.py
@@ -0,0 +1,639 @@
+"""Cross-project dependency and call graph analysis."""
+
+import json
+from pathlib import Path
+import click
+from theauditor.config_runtime import load_runtime_config
+
+
+@click.group()
+@click.help_option("-h", "--help")
+def graph():
+    """Cross-project dependency and call graph analysis."""
+    pass
+
+
+@graph.command("build")
+@click.option("--root", default=".", help="Root directory to analyze")
+@click.option("--langs", multiple=True, help="Languages to process (e.g., python, javascript)")
+@click.option("--workset", help="Path to workset.json to limit scope")
+@click.option("--batch-size", default=200, type=int, help="Files per batch")
+@click.option("--resume", is_flag=True, help="Resume from checkpoint")
+@click.option("--db", default="./.pf/graphs.db", help="SQLite database path")
+@click.option("--out-json", default="./.pf/raw/", help="JSON output directory")
+def graph_build(root, langs, workset, batch_size, resume, db, out_json):
+    """Build import and call graphs for project."""
+    from theauditor.graph.builder import XGraphBuilder
+    from theauditor.graph.store import XGraphStore
+    
+    try:
+        # Initialize builder and store
+        builder = XGraphBuilder(batch_size=batch_size, exclude_patterns=[], project_root=root)
+        store = XGraphStore(db_path=db)
+        
+        # Load workset if provided
+        file_filter = None
+        workset_files = set()
+        if workset:
+            workset_path = Path(workset)
+            if workset_path.exists():
+                with open(workset_path) as f:
+                    workset_data = json.load(f)
+                    # Extract file paths from workset
+                    workset_files = {p["path"] for p in workset_data.get("paths", [])}
+                    click.echo(f"Loaded workset with {len(workset_files)} files")
+        
+        # Clear checkpoint if not resuming
+        if not resume and builder.checkpoint_file.exists():
+            builder.checkpoint_file.unlink()
+        
+        # Load manifest.json if it exists to use as file list
+        file_list = None
+        config = load_runtime_config(root)
+        manifest_path = Path(config["paths"]["manifest"])
+        if manifest_path.exists():
+            click.echo("Loading file manifest...")
+            with open(manifest_path, 'r') as f:
+                manifest_data = json.load(f)
+            
+            # Apply workset filtering if active
+            if workset_files:
+                file_list = [f for f in manifest_data if f.get("path") in workset_files]
+                click.echo(f"  Filtered to {len(file_list)} files from workset")
+            else:
+                file_list = manifest_data
+                click.echo(f"  Found {len(file_list)} files in manifest")
+        else:
+            click.echo("No manifest found, using filesystem walk")
+        
+        # Build import graph
+        click.echo("Building import graph...")
+        import_graph = builder.build_import_graph(
+            root=root,
+            langs=list(langs) if langs else None,
+            file_list=file_list,
+        )
+        
+        # Save to database (SINGLE SOURCE OF TRUTH)
+        store.save_import_graph(import_graph)
+        
+        # REMOVED: JSON dual persistence - using SQLite as single source
+        
+        click.echo(f"  Nodes: {len(import_graph['nodes'])}")
+        click.echo(f"  Edges: {len(import_graph['edges'])}")
+        
+        # Build call graph
+        click.echo("Building call graph...")
+        call_graph = builder.build_call_graph(
+            root=root,
+            langs=list(langs) if langs else None,
+            file_list=file_list,
+        )
+        
+        # Save to database (SINGLE SOURCE OF TRUTH)
+        store.save_call_graph(call_graph)
+        
+        # REMOVED: JSON dual persistence - using SQLite as single source
+        
+        # Call graph uses 'nodes' for functions and 'edges' for calls
+        click.echo(f"  Functions: {len(call_graph.get('nodes', []))}")
+        click.echo(f"  Calls: {len(call_graph.get('edges', []))}")
+        
+        click.echo(f"\nGraphs saved to database: {db}")
+        
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
+
+
+@graph.command("analyze")
+@click.option("--db", default="./.pf/graphs.db", help="SQLite database path")
+@click.option("--out", default="./.pf/raw/graph_analysis.json", help="Output JSON path")
+@click.option("--max-depth", default=3, type=int, help="Max traversal depth for impact analysis")
+@click.option("--workset", help="Path to workset.json for change impact")
+@click.option("--no-insights", is_flag=True, help="Skip interpretive insights (health scores, recommendations)")
+def graph_analyze(db, out, max_depth, workset, no_insights):
+    """Analyze graphs for cycles, hotspots, and impact."""
+    from theauditor.graph.analyzer import XGraphAnalyzer
+    from theauditor.graph.store import XGraphStore
+    
+    # Try to import insights module (optional)
+    insights = None
+    if not no_insights:
+        try:
+            from theauditor.graph.insights import GraphInsights
+            insights = GraphInsights()
+        except ImportError:
+            click.echo("Note: Insights module not available. Running basic analysis only.")
+            insights = None
+    
+    try:
+        # Load graphs from database
+        store = XGraphStore(db_path=db)
+        import_graph = store.load_import_graph()
+        call_graph = store.load_call_graph()
+        
+        if not import_graph["nodes"]:
+            click.echo("No graphs found. Run 'aud graph build' first.")
+            return
+        
+        # Initialize analyzer
+        analyzer = XGraphAnalyzer()
+        
+        # Detect cycles
+        click.echo("Detecting cycles...")
+        cycles = analyzer.detect_cycles(import_graph)
+        click.echo(f"  Found {len(cycles)} cycles")
+        if cycles and len(cycles) > 0:
+            click.echo(f"  Largest cycle: {cycles[0]['size']} nodes")
+        
+        # Rank hotspots (if insights available)
+        hotspots = []
+        if insights:
+            click.echo("Ranking hotspots...")
+            hotspots = insights.rank_hotspots(import_graph, call_graph)
+            click.echo(f"  Top 10 hotspots:")
+            for i, hotspot in enumerate(hotspots[:10], 1):
+                click.echo(f"    {i}. {hotspot['id'][:50]} (score: {hotspot['score']})")
+        else:
+            # Basic hotspot detection without scoring
+            click.echo("Finding most connected nodes...")
+            degrees = analyzer.calculate_node_degrees(import_graph)
+            connected = sorted(
+                [(k, v["in_degree"] + v["out_degree"]) for k, v in degrees.items()],
+                key=lambda x: x[1],
+                reverse=True
+            )[:10]
+            click.echo(f"  Top 10 most connected nodes:")
+            for i, (node, connections) in enumerate(connected, 1):
+                click.echo(f"    {i}. {node[:50]} ({connections} connections)")
+        
+        # Calculate change impact if workset provided
+        impact = None
+        if workset:
+            workset_path = Path(workset)
+            if workset_path.exists():
+                with open(workset_path) as f:
+                    workset_data = json.load(f)
+                    targets = workset_data.get("seed_files", [])
+                    
+                    if targets:
+                        click.echo(f"\nCalculating impact for {len(targets)} targets...")
+                        impact = analyzer.impact_of_change(
+                            targets=targets,
+                            import_graph=import_graph,
+                            call_graph=call_graph,
+                            max_depth=max_depth,
+                        )
+                        click.echo(f"  Upstream impact: {len(impact['upstream'])} files")
+                        click.echo(f"  Downstream impact: {len(impact['downstream'])} files")
+                        click.echo(f"  Total impacted: {impact['total_impacted']}")
+        
+        # Generate summary
+        summary = {}
+        if insights:
+            click.echo("\nGenerating interpreted summary...")
+            summary = insights.summarize(
+                import_graph=import_graph,
+                call_graph=call_graph,
+                cycles=cycles,
+                hotspots=hotspots,
+            )
+            
+            click.echo(f"  Graph density: {summary['import_graph'].get('density', 0):.4f}")
+            click.echo(f"  Health grade: {summary['health_metrics'].get('health_grade', 'N/A')}")
+            click.echo(f"  Fragility score: {summary['health_metrics'].get('fragility_score', 0):.2f}")
+        else:
+            # Basic summary without interpretation
+            click.echo("\nGenerating basic summary...")
+            nodes_count = len(import_graph.get("nodes", []))
+            edges_count = len(import_graph.get("edges", []))
+            density = edges_count / (nodes_count * (nodes_count - 1)) if nodes_count > 1 else 0
+            
+            summary = {
+                "import_graph": {
+                    "nodes": nodes_count,
+                    "edges": edges_count,
+                    "density": density,
+                },
+                "cycles": {
+                    "total": len(cycles),
+                    "largest": cycles[0]["size"] if cycles else 0,
+                },
+            }
+            
+            if call_graph:
+                summary["call_graph"] = {
+                    "nodes": len(call_graph.get("nodes", [])),
+                    "edges": len(call_graph.get("edges", [])),
+                }
+            
+            click.echo(f"  Nodes: {nodes_count}")
+            click.echo(f"  Edges: {edges_count}")
+            click.echo(f"  Density: {density:.4f}")
+            click.echo(f"  Cycles: {len(cycles)}")
+        
+        # Save analysis results
+        analysis = {
+            "cycles": cycles,
+            "hotspots": hotspots[:50],  # Top 50
+            "impact": impact,
+            "summary": summary,
+        }
+        
+        out_path = Path(out)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(out_path, "w") as f:
+            json.dump(analysis, f, indent=2, sort_keys=True)
+        
+        click.echo(f"\nAnalysis saved to {out}")
+        
+        # Save metrics for ML consumption (if insights available)
+        if insights and hotspots:
+            metrics = {}
+            for hotspot in hotspots:
+                metrics[hotspot['id']] = hotspot.get('centrality', 0)
+            metrics_path = Path("./.pf/raw/graph_metrics.json")
+            metrics_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(metrics_path, "w") as f:
+                json.dump(metrics, f, indent=2)
+            click.echo(f"  Saved graph metrics to {metrics_path}")
+        
+        # Create AI-readable summary
+        graph_summary = analyzer.get_graph_summary(import_graph)
+        summary_path = Path("./.pf/raw/graph_summary.json")
+        with open(summary_path, "w") as f:
+            json.dump(graph_summary, f, indent=2)
+        click.echo(f"  Saved graph summary to {summary_path}")
+        
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
+
+
+@graph.command("query")
+@click.option("--db", default="./.pf/graphs.db", help="SQLite database path")
+@click.option("--uses", help="Find who uses/imports this module or calls this function")
+@click.option("--calls", help="Find what this module/function calls or depends on")
+@click.option("--nearest-path", nargs=2, help="Find shortest path between two nodes")
+@click.option("--format", type=click.Choice(["table", "json"]), default="table", help="Output format")
+def graph_query(db, uses, calls, nearest_path, format):
+    """Query graph relationships."""
+    from theauditor.graph.analyzer import XGraphAnalyzer
+    from theauditor.graph.store import XGraphStore
+    
+    # Check if any query options were provided
+    if not any([uses, calls, nearest_path]):
+        click.echo("Please specify a query option:")
+        click.echo("  --uses MODULE     Find who uses a module")
+        click.echo("  --calls FUNC      Find what a function calls")
+        click.echo("  --nearest-path SOURCE TARGET  Find path between nodes")
+        click.echo("\nExample: aud graph query --uses theauditor.cli")
+        return
+    
+    try:
+        # Load graphs
+        store = XGraphStore(db_path=db)
+        
+        results = {}
+        
+        if uses:
+            # Find who uses this node
+            deps = store.query_dependencies(uses, direction="upstream")
+            call_deps = store.query_calls(uses, direction="callers")
+            
+            all_users = sorted(set(deps.get("upstream", []) + call_deps.get("callers", [])))
+            results["uses"] = {
+                "node": uses,
+                "used_by": all_users,
+                "count": len(all_users),
+            }
+            
+            if format == "table":
+                click.echo(f"\n{uses} is used by {len(all_users)} nodes:")
+                for user in all_users[:20]:  # Show first 20
+                    click.echo(f"  - {user}")
+                if len(all_users) > 20:
+                    click.echo(f"  ... and {len(all_users) - 20} more")
+        
+        if calls:
+            # Find what this node calls/depends on
+            deps = store.query_dependencies(calls, direction="downstream")
+            call_deps = store.query_calls(calls, direction="callees")
+            
+            all_deps = sorted(set(deps.get("downstream", []) + call_deps.get("callees", [])))
+            results["calls"] = {
+                "node": calls,
+                "depends_on": all_deps,
+                "count": len(all_deps),
+            }
+            
+            if format == "table":
+                click.echo(f"\n{calls} depends on {len(all_deps)} nodes:")
+                for dep in all_deps[:20]:  # Show first 20
+                    click.echo(f"  - {dep}")
+                if len(all_deps) > 20:
+                    click.echo(f"  ... and {len(all_deps) - 20} more")
+        
+        if nearest_path:
+            # Find shortest path
+            source, target = nearest_path
+            import_graph = store.load_import_graph()
+            
+            analyzer = XGraphAnalyzer()
+            path = analyzer.find_shortest_path(source, target, import_graph)
+            
+            results["path"] = {
+                "source": source,
+                "target": target,
+                "path": path,
+                "length": len(path) if path else None,
+            }
+            
+            if format == "table":
+                if path:
+                    click.echo(f"\nPath from {source} to {target} ({len(path)} steps):")
+                    for i, node in enumerate(path):
+                        prefix = "  " + ("-> " if i > 0 else "")
+                        click.echo(f"{prefix}{node}")
+                else:
+                    click.echo(f"\nNo path found from {source} to {target}")
+        
+        if format == "json":
+            click.echo(json.dumps(results, indent=2))
+        
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
+
+
+@graph.command("viz")
+@click.option("--db", default="./.pf/graphs.db", help="SQLite database path")
+@click.option("--graph-type", type=click.Choice(["import", "call"]), default="import", help="Graph type to visualize")
+@click.option("--out-dir", default="./.pf/raw/", help="Output directory for visualizations")
+@click.option("--limit-nodes", default=500, type=int, help="Maximum nodes to display")
+@click.option("--format", type=click.Choice(["dot", "svg", "png", "json"]), default="dot", help="Output format")
+@click.option("--view", type=click.Choice(["full", "cycles", "hotspots", "layers", "impact"]), default="full", 
+              help="Visualization view type")
+@click.option("--include-analysis", is_flag=True, help="Include analysis results (cycles, hotspots) in visualization")
+@click.option("--title", help="Graph title")
+@click.option("--top-hotspots", default=10, type=int, help="Number of top hotspots to show (for hotspots view)")
+@click.option("--impact-target", help="Target node for impact analysis (for impact view)")
+@click.option("--show-self-loops", is_flag=True, help="Include self-referential edges")
+def graph_viz(db, graph_type, out_dir, limit_nodes, format, view, include_analysis, title, 
+              top_hotspots, impact_target, show_self_loops):
+    """Visualize graphs with rich visual encoding (Graphviz).
+    
+    Creates visually intelligent graphs with multiple view modes:
+    
+    VIEW MODES:
+    - full: Complete graph with all nodes and edges
+    - cycles: Only nodes/edges involved in dependency cycles
+    - hotspots: Top N most connected nodes with neighbors
+    - layers: Architectural layers as subgraphs
+    - impact: Highlight impact radius of changes
+    
+    VISUAL ENCODING:
+    - Node Color: Programming language (Python=blue, JS=yellow, TS=blue)
+    - Node Size: Importance/connectivity (larger = more dependencies)
+    - Edge Color: Red for cycles, gray for normal
+    - Border Width: Code churn (thicker = more changes)
+    - Node Shape: box=module, ellipse=function, diamond=class
+    
+    Examples:
+        # Basic visualization
+        aud graph viz
+        
+        # Show only dependency cycles
+        aud graph viz --view cycles --include-analysis
+        
+        # Top 5 hotspots with connections
+        aud graph viz --view hotspots --top-hotspots 5
+        
+        # Architectural layers
+        aud graph viz --view layers --include-analysis
+        
+        # Impact analysis for a specific file
+        aud graph viz --view impact --impact-target "src/auth.py"
+        
+        # Generate SVG for AI analysis
+        aud graph viz --format svg --view full --include-analysis
+    """
+    from theauditor.graph.store import XGraphStore
+    from theauditor.graph.visualizer import GraphVisualizer
+    
+    try:
+        # Load the appropriate graph
+        store = XGraphStore(db_path=db)
+        
+        if graph_type == "import":
+            graph = store.load_import_graph()
+            output_name = "import_graph"
+            default_title = "Import Dependencies"
+        else:
+            graph = store.load_call_graph()
+            output_name = "call_graph"
+            default_title = "Function Call Graph"
+        
+        if not graph or not graph.get("nodes"):
+            click.echo(f"No {graph_type} graph found. Run 'aud graph build' first.")
+            return
+        
+        # Load analysis if requested
+        analysis = {}
+        if include_analysis:
+            # Try to load analysis from file
+            analysis_path = Path("./.pf/raw/graph_analysis.json")
+            if analysis_path.exists():
+                with open(analysis_path) as f:
+                    analysis_data = json.load(f)
+                    analysis = {
+                        'cycles': analysis_data.get('cycles', []),
+                        'hotspots': analysis_data.get('hotspots', []),
+                        'impact': analysis_data.get('impact', {})
+                    }
+                click.echo(f"Loaded analysis: {len(analysis['cycles'])} cycles, {len(analysis['hotspots'])} hotspots")
+            else:
+                click.echo("No analysis found. Run 'aud graph analyze' first for richer visualization.")
+        
+        # Create output directory
+        out_path = Path(out_dir)
+        out_path.mkdir(parents=True, exist_ok=True)
+        
+        if format == "json":
+            # Simple JSON output (original behavior)
+            json_file = out_path / f"{output_name}.json"
+            with open(json_file, "w") as f:
+                json.dump({"nodes": graph["nodes"], "edges": graph["edges"]}, f, indent=2)
+            
+            click.echo(f"[OK] JSON saved to: {json_file}")
+            click.echo(f"  Nodes: {len(graph['nodes'])}, Edges: {len(graph['edges'])}")
+        else:
+            # Use new visualizer for DOT/SVG/PNG
+            visualizer = GraphVisualizer()
+            
+            # Set visualization options
+            options = {
+                'max_nodes': limit_nodes,
+                'title': title or default_title,
+                'show_self_loops': show_self_loops
+            }
+            
+            # Generate DOT with visual intelligence based on view mode
+            click.echo(f"Generating {format.upper()} visualization (view: {view})...")
+            
+            if view == "cycles":
+                # Cycles-only view
+                cycles = analysis.get('cycles', [])
+                if not cycles:
+                    # Check if analysis was run but found no cycles
+                    if 'cycles' in analysis:
+                        click.echo("[INFO] No dependency cycles detected in the codebase (good architecture!).")
+                        click.echo("       Showing full graph instead...")
+                    else:
+                        click.echo("[WARN] No cycles data found. Run 'aud graph analyze' first.")
+                        click.echo("       Falling back to full view...")
+                    dot_content = visualizer.generate_dot(graph, analysis, options)
+                else:
+                    click.echo(f"  Showing {len(cycles)} cycles")
+                    dot_content = visualizer.generate_cycles_only_view(graph, cycles, options)
+                    
+            elif view == "hotspots":
+                # Hotspots-only view
+                if not analysis.get('hotspots'):
+                    # Try to calculate hotspots on the fly
+                    from theauditor.graph.analyzer import XGraphAnalyzer
+                    analyzer = XGraphAnalyzer()
+                    hotspots = analyzer.identify_hotspots(graph, top_n=top_hotspots)
+                    click.echo(f"  Calculated {len(hotspots)} hotspots")
+                else:
+                    hotspots = analysis['hotspots']
+                
+                click.echo(f"  Showing top {top_hotspots} hotspots")
+                dot_content = visualizer.generate_hotspots_only_view(
+                    graph, hotspots, options, top_n=top_hotspots
+                )
+                
+            elif view == "layers":
+                # Architectural layers view
+                from theauditor.graph.analyzer import XGraphAnalyzer
+                analyzer = XGraphAnalyzer()
+                layers = analyzer.identify_layers(graph)
+                click.echo(f"  Found {len(layers)} architectural layers")
+                # Filter out None keys before iterating
+                for layer_num, nodes in layers.items():
+                    if layer_num is not None:
+                        click.echo(f"    Layer {layer_num}: {len(nodes)} nodes")
+                dot_content = visualizer.generate_dot_with_layers(graph, layers, analysis, options)
+                
+            elif view == "impact":
+                # Impact analysis view
+                if not impact_target:
+                    click.echo("[ERROR] --impact-target required for impact view")
+                    raise click.ClickException("Missing --impact-target for impact view")
+                
+                from theauditor.graph.analyzer import XGraphAnalyzer
+                analyzer = XGraphAnalyzer()
+                impact = analyzer.analyze_impact(graph, [impact_target])
+                
+                if not impact['targets']:
+                    click.echo(f"[WARN] Target '{impact_target}' not found in graph")
+                    click.echo("       Showing full graph instead...")
+                    dot_content = visualizer.generate_dot(graph, analysis, options)
+                else:
+                    click.echo(f"  Target: {impact_target}")
+                    click.echo(f"  Upstream: {len(impact['upstream'])} nodes")
+                    click.echo(f"  Downstream: {len(impact['downstream'])} nodes")
+                    click.echo(f"  Total impact: {len(impact['all_impacted'])} nodes")
+                    dot_content = visualizer.generate_impact_visualization(graph, impact, options)
+                
+            else:  # view == "full" or default
+                # Full graph view
+                click.echo(f"  Nodes: {len(graph['nodes'])} (limit: {limit_nodes})")
+                click.echo(f"  Edges: {len(graph['edges'])}")
+                dot_content = visualizer.generate_dot(graph, analysis, options)
+            
+            # Save DOT file with view suffix
+            if view != "full":
+                output_filename = f"{output_name}_{view}"
+            else:
+                output_filename = output_name
+            
+            dot_file = out_path / f"{output_filename}.dot"
+            with open(dot_file, "w") as f:
+                f.write(dot_content)
+            click.echo(f"[OK] DOT file saved to: {dot_file}")
+            
+            # Generate image if requested
+            if format in ["svg", "png"]:
+                try:
+                    import subprocess
+                    
+                    # Check if Graphviz is installed
+                    result = subprocess.run(
+                        ["dot", "-V"],
+                        capture_output=True,
+                        text=True
+                    )
+                    
+                    if result.returncode == 0:
+                        # Generate image
+                        output_file = out_path / f"{output_filename}.{format}"
+                        subprocess.run(
+                            ["dot", f"-T{format}", str(dot_file), "-o", str(output_file)],
+                            check=True
+                        )
+                        click.echo(f"[OK] {format.upper()} image saved to: {output_file}")
+                        
+                        # For SVG, also mention AI readability
+                        if format == "svg":
+                            click.echo("  ✓ SVG is AI-readable and can be analyzed for patterns")
+                    else:
+                        click.echo(f"[WARN] Graphviz not found. Install it to generate {format.upper()} images:")
+                        click.echo("  Ubuntu/Debian: apt install graphviz")
+                        click.echo("  macOS: brew install graphviz")
+                        click.echo("  Windows: choco install graphviz")
+                        click.echo(f"\n  Manual generation: dot -T{format} {dot_file} -o {output_filename}.{format}")
+                        
+                except FileNotFoundError:
+                    click.echo(f"[WARN] Graphviz not installed. Cannot generate {format.upper()}.")
+                    click.echo(f"  Install graphviz and run: dot -T{format} {dot_file} -o {output_filename}.{format}")
+                except subprocess.CalledProcessError as e:
+                    click.echo(f"[ERROR] Failed to generate {format.upper()}: {e}")
+            
+            # Provide visual encoding legend based on view
+            click.echo("\nVisual Encoding:")
+            
+            if view == "cycles":
+                click.echo("  • Red Nodes: Part of dependency cycles")
+                click.echo("  • Red Edges: Cycle connections")
+                click.echo("  • Subgraphs: Individual cycles grouped")
+                
+            elif view == "hotspots":
+                click.echo("  • Node Color: Red gradient (darker = higher rank)")
+                click.echo("  • Node Size: Total connections")
+                click.echo("  • Gray Nodes: Connected but not hotspots")
+                click.echo("  • Labels: Show in/out degree counts")
+                
+            elif view == "layers":
+                click.echo("  • Subgraphs: Architectural layers")
+                click.echo("  • Node Color: Programming language")
+                click.echo("  • Border Width: Code churn (thicker = more changes)")
+                click.echo("  • Node Size: Importance (in-degree)")
+                
+            elif view == "impact":
+                click.echo("  • Red Nodes: Impact targets")
+                click.echo("  • Orange Nodes: Upstream dependencies")
+                click.echo("  • Blue Nodes: Downstream dependencies")
+                click.echo("  • Purple Nodes: Both upstream and downstream")
+                click.echo("  • Gray Nodes: Unaffected")
+                
+            else:  # full view
+                click.echo("  • Node Color: Programming language")
+                click.echo("  • Node Size: Importance (larger = more dependencies)")
+                click.echo("  • Red Edges: Part of dependency cycles")
+                click.echo("  • Node Shape: box=module, ellipse=function")
+        
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
\ No newline at end of file
diff --git a/theauditor/commands/impact.py b/theauditor/commands/impact.py
new file mode 100644
index 0000000..6e05338
--- /dev/null
+++ b/theauditor/commands/impact.py
@@ -0,0 +1,118 @@
+"""Analyze the impact radius of code changes using the AST symbol graph."""
+
+import platform
+import click
+from pathlib import Path
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+
+@click.command()
+@click.option("--file", required=True, help="Path to the file containing the code to analyze")
+@click.option("--line", required=True, type=int, help="Line number of the code to analyze")
+@click.option("--db", default=None, help="Path to the SQLite database (default: repo_index.db)")
+@click.option("--json", is_flag=True, help="Output results as JSON")
+@click.option("--max-depth", default=2, type=int, help="Maximum depth for transitive dependencies")
+@click.option("--verbose", is_flag=True, help="Show detailed dependency information")
+@click.option("--trace-to-backend", is_flag=True, help="Trace frontend API calls to backend endpoints (cross-stack analysis)")
+def impact(file, line, db, json, max_depth, verbose, trace_to_backend):
+    """
+    Analyze the impact radius of changing code at a specific location.
+    
+    This command traces both upstream dependencies (who calls this code)
+    and downstream dependencies (what this code calls) to help understand
+    the blast radius of potential changes.
+    
+    Example:
+        aud impact --file src/auth.py --line 42
+        aud impact --file theauditor/indexer.py --line 100 --verbose
+    """
+    from theauditor.impact_analyzer import analyze_impact, format_impact_report
+    from theauditor.config_runtime import load_runtime_config
+    import json as json_lib
+    
+    # Load configuration for default paths
+    config = load_runtime_config(".")
+    
+    # Use default database path if not provided
+    if db is None:
+        db = config["paths"]["db"]
+    
+    # Verify database exists
+    db_path = Path(db)
+    if not db_path.exists():
+        click.echo(f"Error: Database not found at {db}", err=True)
+        click.echo("Run 'aud index' first to build the repository index", err=True)
+        raise click.ClickException(f"Database not found: {db}")
+    
+    # Verify file exists (helpful for user)
+    file = Path(file)
+    if not file.exists():
+        click.echo(f"Warning: File {file} not found in filesystem", err=True)
+        click.echo("Proceeding with analysis using indexed data...", err=True)
+    
+    # Perform impact analysis
+    try:
+        result = analyze_impact(
+            db_path=str(db_path),
+            target_file=str(file),
+            target_line=line,
+            trace_to_backend=trace_to_backend
+        )
+        
+        # Output results
+        if json:
+            # JSON output for programmatic use
+            click.echo(json_lib.dumps(result, indent=2, sort_keys=True))
+        else:
+            # Human-readable report
+            report = format_impact_report(result)
+            click.echo(report)
+            
+            # Additional verbose output
+            if verbose and not result.get("error"):
+                click.echo("\n" + "=" * 60)
+                click.echo("DETAILED DEPENDENCY INFORMATION")
+                click.echo("=" * 60)
+                
+                # Show transitive upstream
+                if result.get("upstream_transitive"):
+                    click.echo(f"\nTransitive Upstream Dependencies ({len(result['upstream_transitive'])} total):")
+                    for dep in result["upstream_transitive"][:20]:
+                        depth_indicator = "  " * (3 - dep.get("depth", 1))
+                        tree_char = "+-" if IS_WINDOWS else "└─"
+                        click.echo(f"{depth_indicator}{tree_char} {dep['symbol']} in {dep['file']}:{dep['line']}")
+                    if len(result["upstream_transitive"]) > 20:
+                        click.echo(f"  ... and {len(result['upstream_transitive']) - 20} more")
+                
+                # Show transitive downstream
+                if result.get("downstream_transitive"):
+                    click.echo(f"\nTransitive Downstream Dependencies ({len(result['downstream_transitive'])} total):")
+                    for dep in result["downstream_transitive"][:20]:
+                        depth_indicator = "  " * (3 - dep.get("depth", 1))
+                        if dep["file"] != "external":
+                            tree_char = "+-" if IS_WINDOWS else "└─"
+                            click.echo(f"{depth_indicator}{tree_char} {dep['symbol']} in {dep['file']}:{dep['line']}")
+                        else:
+                            tree_char = "+-" if IS_WINDOWS else "└─"
+                            click.echo(f"{depth_indicator}{tree_char} {dep['symbol']} (external)")
+                    if len(result["downstream_transitive"]) > 20:
+                        click.echo(f"  ... and {len(result['downstream_transitive']) - 20} more")
+        
+        # Exit with appropriate code
+        if result.get("error"):
+            # Error already displayed in the report, just exit with code
+            exit(3)  # Exit code 3 for analysis errors
+        
+        # Warn if high impact
+        summary = result.get("impact_summary", {})
+        if summary.get("total_impact", 0) > 20:
+            click.echo("\n⚠ WARNING: High impact change detected!", err=True)
+            exit(1)  # Non-zero exit for CI/CD integration
+            
+    except Exception as e:
+        # Only show this for unexpected exceptions, not for already-handled errors
+        if "No function or class found at" not in str(e):
+            click.echo(f"Error during impact analysis: {e}", err=True)
+        raise click.ClickException(str(e))
\ No newline at end of file
diff --git a/theauditor/commands/index.py b/theauditor/commands/index.py
new file mode 100644
index 0000000..8da6c48
--- /dev/null
+++ b/theauditor/commands/index.py
@@ -0,0 +1,50 @@
+"""Build language-agnostic manifest and SQLite index of repository."""
+
+import click
+from theauditor.utils.error_handler import handle_exceptions
+from theauditor.utils.helpers import get_self_exclusion_patterns
+
+
+@click.command()
+@handle_exceptions
+@click.option("--root", default=".", help="Root directory to index")
+@click.option("--manifest", default=None, help="Output manifest file path")
+@click.option("--db", default=None, help="Output SQLite database path")
+@click.option("--print-stats", is_flag=True, help="Print summary statistics")
+@click.option("--dry-run", is_flag=True, help="Scan but don't write files")
+@click.option("--follow-symlinks", is_flag=True, help="Follow symbolic links (default: skip)")
+@click.option("--exclude-self", is_flag=True, help="Exclude TheAuditor's own files (for self-testing)")
+def index(root, manifest, db, print_stats, dry_run, follow_symlinks, exclude_self):
+    """Build language-agnostic manifest and SQLite index of repository."""
+    from theauditor.indexer import build_index
+    from theauditor.config_runtime import load_runtime_config
+    
+    # Load configuration
+    config = load_runtime_config(root)
+    
+    # Use config defaults if not provided
+    if manifest is None:
+        manifest = config["paths"]["manifest"]
+    if db is None:
+        db = config["paths"]["db"]
+
+    # Build exclude patterns using centralized function
+    exclude_patterns = get_self_exclusion_patterns(exclude_self)
+    
+    if exclude_self and print_stats:
+        click.echo(f"[EXCLUDE-SELF] Excluding TheAuditor's own files from indexing")
+        click.echo(f"[EXCLUDE-SELF] {len(exclude_patterns)} patterns will be excluded")
+
+    result = build_index(
+        root_path=root,
+        manifest_path=manifest,
+        db_path=db,
+        print_stats=print_stats,
+        dry_run=dry_run,
+        follow_symlinks=follow_symlinks,
+        exclude_patterns=exclude_patterns,
+    )
+
+    if result.get("error"):
+        click.echo(f"Error: {result['error']}", err=True)
+        raise click.ClickException(result["error"])
\ No newline at end of file
diff --git a/theauditor/commands/init.py b/theauditor/commands/init.py
new file mode 100644
index 0000000..02f41e3
--- /dev/null
+++ b/theauditor/commands/init.py
@@ -0,0 +1,143 @@
+"""Initialize TheAuditor for first-time use."""
+
+from pathlib import Path
+import click
+
+
+@click.command()
+@click.option("--offline", is_flag=True, help="Skip network operations (deps check, docs fetch)")
+@click.option("--skip-docs", is_flag=True, help="Skip documentation fetching")
+@click.option("--skip-deps", is_flag=True, help="Skip dependency checking")
+def init(offline, skip_docs, skip_deps):
+    """Initialize TheAuditor for first-time use (runs all setup steps)."""
+    from theauditor.init import initialize_project
+    
+    click.echo("[INIT] Initializing TheAuditor...\n")
+    click.echo("This will run all setup steps:")
+    click.echo("  1. Index repository")
+    click.echo("  2. Create workset")
+    click.echo("  3. Check dependencies")
+    click.echo("  4. Fetch documentation")
+    click.echo("\n" + "="*60 + "\n")
+    
+    # Call the refactored initialization logic
+    result = initialize_project(
+        offline=offline,
+        skip_docs=skip_docs,
+        skip_deps=skip_deps
+    )
+    
+    stats = result["stats"]
+    has_failures = result["has_failures"]
+    next_steps = result["next_steps"]
+    
+    # Display step-by-step results
+    click.echo("[INDEX] Step 1/5: Indexing repository...")
+    if stats.get("index", {}).get("success"):
+        click.echo(f"  [OK] Indexed {stats['index']['text_files']} text files")
+    else:
+        click.echo(f"  [FAIL] Failed: {stats['index'].get('error', 'Unknown error')}", err=True)
+    
+    click.echo("\n[TARGET] Step 2/5: Creating workset...")
+    if stats.get("workset", {}).get("success"):
+        click.echo(f"  [OK] Workset created with {stats['workset']['files']} files")
+    elif stats.get("workset", {}).get("files") == 0:
+        click.echo("  [WARN]  No files found to create workset")
+    else:
+        click.echo(f"  [FAIL] Failed: {stats['workset'].get('error', 'Unknown error')}", err=True)
+    
+    if not skip_deps and not offline:
+        click.echo("\n[PACKAGE] Step 3/4: Checking dependencies...")
+        if stats.get("deps", {}).get("success"):
+            if stats["deps"]["total"] > 0:
+                click.echo(f"  [OK] Found {stats['deps']['total']} dependencies ({stats['deps']['outdated']} outdated)")
+            else:
+                click.echo("  [OK] No dependency files found")
+        else:
+            click.echo(f"  [FAIL] Failed: {stats['deps'].get('error', 'Unknown error')}", err=True)
+    else:
+        click.echo("\n[PACKAGE] Step 3/4: Skipping dependency check (offline/skipped)")
+    
+    if not skip_docs and not offline:
+        click.echo("\n[DOCS] Step 4/4: Fetching documentation...")
+        if stats.get("docs", {}).get("success"):
+            fetched = stats['docs'].get('fetched', 0)
+            cached = stats['docs'].get('cached', 0)
+            if fetched > 0 and cached > 0:
+                click.echo(f"  [OK] Fetched {fetched} new docs, using {cached} cached docs")
+            elif fetched > 0:
+                click.echo(f"  [OK] Fetched {fetched} docs")
+            elif cached > 0:
+                click.echo(f"  [OK] Using {cached} cached docs (already up-to-date)")
+            else:
+                click.echo("  [WARN] No docs fetched or cached")
+            
+            # Report any errors from the stats
+            if stats['docs'].get('errors'):
+                errors = stats['docs']['errors']
+                rate_limited = [e for e in errors if "rate limited" in e.lower()]
+                other_errors = [e for e in errors if "rate limited" not in e.lower()]
+                
+                if rate_limited:
+                    click.echo(f"  [WARN]  {len(rate_limited)} packages rate-limited (will retry with delay)")
+                if other_errors and len(other_errors) <= 3:
+                    for err in other_errors[:3]:
+                        click.echo(f"  [WARN]  {err}")
+                elif other_errors:
+                    click.echo(f"  [WARN]  {len(other_errors)} packages failed to fetch")
+            
+            click.echo(f"  [OK] Created {stats['docs']['capsules']} doc capsules")
+        elif stats["docs"].get("error") == "Interrupted by user":
+            click.echo("\n  [WARN]  Documentation fetch interrupted (Ctrl+C)")
+        else:
+            click.echo(f"  [FAIL] Failed: {stats['docs'].get('error', 'Unknown error')}", err=True)
+    else:
+        click.echo("\n[DOCS] Step 4/4: Skipping documentation (offline/skipped)")
+    
+    # Summary
+    click.echo("\n" + "="*60)
+    
+    if has_failures:
+        click.echo("\n[WARN]  Initialization Partially Complete\n")
+    else:
+        click.echo("\n[SUCCESS] Initialization Complete!\n")
+    
+    # Show summary
+    click.echo("[STATS] Summary:")
+    if stats.get("index", {}).get("success"):
+        click.echo(f"  * Indexed: {stats['index']['text_files']} files")
+    else:
+        click.echo("  * Indexing: [FAILED] Failed")
+    
+    if stats.get("workset", {}).get("success"):
+        click.echo(f"  * Workset: {stats['workset']['files']} files")
+    elif stats.get("workset", {}).get("files") == 0:
+        click.echo("  * Workset: [WARN]  No files found")
+    else:
+        click.echo("  * Workset: [FAILED] Failed")
+    
+    if stats.get("deps", {}).get("success"):
+        click.echo(f"  * Dependencies: {stats['deps'].get('total', 0)} total, {stats['deps'].get('outdated', 0)} outdated")
+    elif stats.get("deps", {}).get("skipped"):
+        click.echo("  * Dependencies: [SKIPPED]  Skipped")
+    
+    if stats.get("docs", {}).get("success"):
+        fetched = stats['docs'].get('fetched', 0)
+        cached = stats['docs'].get('cached', 0)
+        capsules = stats['docs'].get('capsules', 0)
+        if cached > 0:
+            click.echo(f"  * Documentation: {fetched} fetched, {cached} cached, {capsules} capsules")
+        else:
+            click.echo(f"  * Documentation: {fetched} fetched, {capsules} capsules")
+    elif stats.get("docs", {}).get("skipped"):
+        click.echo("  * Documentation: [SKIPPED]  Skipped")
+    
+    # Next steps - only show if we have files to work with
+    if next_steps:
+        click.echo("\n[TARGET] Next steps:")
+        for i, step in enumerate(next_steps, 1):
+            click.echo(f"  {i}. Run: {step}")
+        click.echo("\nOr run all at once:")
+        click.echo(f"  {' && '.join(next_steps)}")
+    else:
+        click.echo("\n[WARN]  No files found to audit. Check that you're in the right directory.")
\ No newline at end of file
diff --git a/theauditor/commands/init_config.py b/theauditor/commands/init_config.py
new file mode 100644
index 0000000..8da0c78
--- /dev/null
+++ b/theauditor/commands/init_config.py
@@ -0,0 +1,21 @@
+"""Ensure minimal mypy config exists (idempotent)."""
+
+import click
+
+
+@click.command("init-config")
+@click.option("--pyproject", default="pyproject.toml", help="Path to pyproject.toml")
+def init_config(pyproject):
+    """Ensure minimal mypy config exists (idempotent)."""
+    from theauditor.config import ensure_mypy_config
+
+    try:
+        res = ensure_mypy_config(pyproject)
+        msg = (
+            "mypy config created"
+            if res.get("status") == "created"
+            else "mypy config already present"
+        )
+        click.echo(msg)
+    except Exception as e:
+        raise click.ClickException(f"Failed to init config: {e}") from e
\ No newline at end of file
diff --git a/theauditor/commands/init_js.py b/theauditor/commands/init_js.py
new file mode 100644
index 0000000..370d6a1
--- /dev/null
+++ b/theauditor/commands/init_js.py
@@ -0,0 +1,41 @@
+"""Create or merge minimal package.json for lint/typecheck."""
+
+import click
+
+
+@click.command("init-js")
+@click.option("--path", default="package.json", help="Path to package.json")
+@click.option("--add-hooks", is_flag=True, help="Add TheAuditor hooks to npm scripts")
+def init_js(path, add_hooks):
+    """Create or merge minimal package.json for lint/typecheck."""
+    from theauditor.js_init import ensure_package_json, add_auditor_hooks
+
+    try:
+        res = ensure_package_json(path)
+
+        if res["status"] == "created":
+            click.echo(f"[OK] Created {path} with PIN_ME placeholders")
+            click.echo("  Edit devDependencies to set exact versions")
+        elif res["status"] == "merged":
+            click.echo(f"[OK] Merged lint/typecheck config into {path}")
+            click.echo("  Check devDependencies for PIN_ME placeholders")
+        else:
+            click.echo(f"No changes needed - {path} already configured")
+        
+        # Add hooks if requested
+        if add_hooks:
+            click.echo("\nAdding TheAuditor hooks to npm scripts...")
+            hook_res = add_auditor_hooks(path)
+            
+            if hook_res["status"] == "hooks_added":
+                click.echo("[OK] Added TheAuditor hooks to package.json:")
+                for change in hook_res["details"]:
+                    click.echo(f"  - {change}")
+            elif hook_res["status"] == "unchanged":
+                click.echo("No changes needed - all hooks already present")
+            elif hook_res["status"] == "error":
+                click.echo(f"Error adding hooks: {hook_res['message']}", err=True)
+                
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
\ No newline at end of file
diff --git a/theauditor/commands/insights.py b/theauditor/commands/insights.py
new file mode 100644
index 0000000..25455aa
--- /dev/null
+++ b/theauditor/commands/insights.py
@@ -0,0 +1,443 @@
+"""Run optional insights analysis on existing audit data.
+
+This command runs interpretive analysis modules (ML, graph health, taint severity)
+on top of existing raw audit data, generating insights and predictions.
+"""
+
+import json
+import sys
+from pathlib import Path
+from typing import Dict, Any, List
+
+import click
+
+
+@click.command()
+@click.option("--mode", "-m", 
+              type=click.Choice(["ml", "graph", "taint", "impact", "all"]),
+              default="all",
+              help="Which insights modules to run")
+@click.option("--ml-train", is_flag=True,
+              help="Train ML models before generating suggestions")
+@click.option("--topk", default=10, type=int,
+              help="Top K files for ML suggestions")
+@click.option("--output-dir", "-o", type=click.Path(),
+              default="./.pf/insights",
+              help="Directory for insights output")
+@click.option("--print-summary", is_flag=True,
+              help="Print summary to console")
+def insights(mode: str, ml_train: bool, topk: int, output_dir: str, print_summary: bool) -> None:
+    """Run optional insights analysis on existing audit data.
+    
+    This command generates interpretive analysis and predictions based on
+    the raw facts collected by the audit pipeline. All insights are optional
+    and separate from the core truth data.
+    
+    Available insights modules:
+    - ml: Machine learning risk predictions and root cause analysis
+    - graph: Graph health metrics and architectural scoring
+    - taint: Severity scoring for taint analysis paths
+    - impact: Impact radius and blast zone analysis
+    - all: Run all available insights
+    
+    Examples:
+        # Run all insights
+        aud insights
+        
+        # Only ML predictions
+        aud insights --mode ml
+        
+        # Train ML first, then predict
+        aud insights --mode ml --ml-train
+        
+        # Graph health only with summary
+        aud insights --mode graph --print-summary
+    """
+    
+    # Ensure we have raw data to analyze
+    pf_dir = Path(".pf")
+    raw_dir = pf_dir / "raw"
+    
+    if not raw_dir.exists():
+        click.echo("[ERROR] No raw audit data found. Run 'aud full' first.", err=True)
+        sys.exit(1)
+    
+    # Create insights directory
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    
+    click.echo(f"\n{'='*60}")
+    click.echo(f"INSIGHTS ANALYSIS - {mode.upper()} Mode")
+    click.echo(f"{'='*60}")
+    click.echo(f"Output directory: {output_path}")
+    
+    results = {}
+    errors = []
+    
+    # ML Insights
+    if mode in ["ml", "all"]:
+        click.echo("\n[ML] Running machine learning insights...")
+        ml_result = run_ml_insights(ml_train, topk, output_path)
+        results["ml"] = ml_result
+        if ml_result.get("error"):
+            errors.append(f"ML: {ml_result['error']}")
+        else:
+            click.echo(f"  ✓ ML predictions saved to {output_path}/ml_suggestions.json")
+    
+    # Graph Health Insights
+    if mode in ["graph", "all"]:
+        click.echo("\n[GRAPH] Running graph health analysis...")
+        graph_result = run_graph_insights(output_path)
+        results["graph"] = graph_result
+        if graph_result.get("error"):
+            errors.append(f"Graph: {graph_result['error']}")
+        else:
+            click.echo(f"  ✓ Graph health saved to {output_path}/graph_health.json")
+    
+    # Taint Severity Insights
+    if mode in ["taint", "all"]:
+        click.echo("\n[TAINT] Running taint severity scoring...")
+        taint_result = run_taint_insights(output_path)
+        results["taint"] = taint_result
+        if taint_result.get("error"):
+            errors.append(f"Taint: {taint_result['error']}")
+        else:
+            click.echo(f"  ✓ Taint severity saved to {output_path}/taint_severity.json")
+    
+    # Impact Analysis Insights
+    if mode in ["impact", "all"]:
+        click.echo("\n[IMPACT] Running impact analysis...")
+        impact_result = run_impact_insights(output_path)
+        results["impact"] = impact_result
+        if impact_result.get("error"):
+            errors.append(f"Impact: {impact_result['error']}")
+        else:
+            click.echo(f"  ✓ Impact analysis saved to {output_path}/impact_analysis.json")
+    
+    # Aggregate all insights into unified summary
+    click.echo("\n[AGGREGATE] Creating unified insights summary...")
+    summary = aggregate_insights(results, output_path)
+    
+    # Save unified summary
+    summary_path = output_path / "unified_insights.json"
+    with open(summary_path, 'w') as f:
+        json.dump(summary, f, indent=2, default=str)
+    click.echo(f"  ✓ Unified summary saved to {summary_path}")
+    
+    # Print summary if requested
+    if print_summary:
+        print_insights_summary(summary)
+    
+    # Final status
+    click.echo(f"\n{'='*60}")
+    if errors:
+        click.echo(f"[WARN] Insights completed with {len(errors)} errors:", err=True)
+        for error in errors:
+            click.echo(f"  • {error}", err=True)
+    else:
+        click.echo("[OK] All insights generated successfully")
+    
+    click.echo(f"\n[TIP] Insights are interpretive and optional.")
+    click.echo(f"      Raw facts remain in .pf/raw/ unchanged.")
+    
+    sys.exit(1 if errors else 0)
+
+
+def run_ml_insights(train: bool, topk: int, output_dir: Path) -> Dict[str, Any]:
+    """Run ML insights generation."""
+    try:
+        from theauditor.ml import check_ml_available, learn, suggest
+        
+        if not check_ml_available():
+            return {"error": "ML module not installed. Run: pip install -e .[ml]"}
+        
+        # Train if requested
+        if train:
+            learn_result = learn(
+                db_path="./.pf/repo_index.db",
+                manifest_path="./.pf/manifest.json",
+                print_stats=False
+            )
+            if not learn_result.get("success"):
+                return {"error": f"ML training failed: {learn_result.get('error')}"}
+        
+        # Generate suggestions
+        suggest_result = suggest(
+            db_path="./.pf/repo_index.db",
+            manifest_path="./.pf/manifest.json",
+            workset_path="./.pf/workset.json",
+            topk=topk,
+            out_path=str(output_dir / "ml_suggestions.json")
+        )
+        
+        return suggest_result
+        
+    except ImportError:
+        return {"error": "ML module not available"}
+    except Exception as e:
+        return {"error": str(e)}
+
+
+def run_graph_insights(output_dir: Path) -> Dict[str, Any]:
+    """Run graph health insights."""
+    try:
+        from theauditor.graph.insights import GraphInsights
+        from theauditor.graph.analyzer import XGraphAnalyzer
+        from theauditor.graph.store import XGraphStore
+        
+        # Load graph from SQLite database (SINGLE SOURCE OF TRUTH)
+        store = XGraphStore(db_path="./.pf/graphs.db")
+        import_graph = store.load_import_graph()
+        
+        if not import_graph or not import_graph.get("nodes"):
+            return {"error": "No import graph found. Run 'aud graph build' first."}
+        
+        # Load analysis data if it exists
+        analysis_path = Path(".pf/raw/graph_analysis.json")
+        analysis_data = {}
+        if analysis_path.exists():
+            with open(analysis_path) as f:
+                analysis_data = json.load(f)
+        
+        # Run insights analysis
+        insights = GraphInsights()
+        analyzer = XGraphAnalyzer()
+        
+        # Use pre-calculated cycles and hotspots if available, otherwise calculate
+        if 'cycles' in analysis_data:
+            cycles = analysis_data['cycles']
+        else:
+            cycles = analyzer.detect_cycles(import_graph)
+        
+        # Use pre-calculated hotspots if available, otherwise calculate
+        if 'hotspots' in analysis_data:
+            hotspots = analysis_data['hotspots']
+        else:
+            hotspots = insights.rank_hotspots(import_graph)
+        
+        # Calculate health metrics
+        health = insights.calculate_health_metrics(
+            import_graph, 
+            cycles=cycles, 
+            hotspots=hotspots
+        )
+        
+        # Generate recommendations
+        recommendations = insights.generate_recommendations(
+            import_graph,
+            cycles=cycles,
+            hotspots=hotspots
+        )
+        
+        # Save results
+        output = {
+            "health_metrics": health,
+            "top_hotspots": hotspots[:10],
+            "recommendations": recommendations,
+            "cycles_found": len(cycles),
+            "total_nodes": len(import_graph.get("nodes", [])),
+            "total_edges": len(import_graph.get("edges", []))
+        }
+        
+        output_path = output_dir / "graph_health.json"
+        with open(output_path, 'w') as f:
+            json.dump(output, f, indent=2)
+        
+        return {"success": True, "health_score": health.get("health_score")}
+        
+    except ImportError:
+        return {"error": "Graph insights module not available"}
+    except Exception as e:
+        return {"error": str(e)}
+
+
+def run_taint_insights(output_dir: Path) -> Dict[str, Any]:
+    """Run taint severity insights."""
+    try:
+        from datetime import datetime, UTC
+        from theauditor.taint.insights import calculate_severity, classify_vulnerability, generate_summary
+        from theauditor.taint_analyzer import SECURITY_SINKS
+        
+        # Load raw taint data
+        taint_path = Path(".pf/raw/taint_analysis.json")
+        if not taint_path.exists():
+            return {"error": "No taint data found. Run 'aud taint-analyze' first."}
+        
+        with open(taint_path) as f:
+            taint_data = json.load(f)
+        
+        if not taint_data.get("success"):
+            return {"error": "Taint analysis was not successful"}
+        
+        # Calculate severity for each path and create enriched versions
+        severity_analysis = []
+        enriched_paths = []
+        for path in taint_data.get("taint_paths", []):
+            severity = calculate_severity(path)
+            vuln_type = classify_vulnerability(path.get("sink", {}), SECURITY_SINKS)
+            
+            severity_analysis.append({
+                "file": path.get("sink", {}).get("file"),
+                "line": path.get("sink", {}).get("line"),
+                "severity": severity,
+                "vulnerability_type": vuln_type,
+                "path_length": len(path.get("path", [])),
+                "risk_score": 1.0 if severity == "critical" else 0.7 if severity == "high" else 0.4
+            })
+            
+            # Create enriched path with severity for summary generation
+            enriched_path = dict(path)
+            enriched_path["severity"] = severity
+            enriched_path["vulnerability_type"] = vuln_type
+            enriched_paths.append(enriched_path)
+        
+        # Generate summary using enriched paths with severity
+        summary = generate_summary(enriched_paths)
+        
+        # Save results
+        output = {
+            "generated_at": datetime.now(UTC).isoformat(),
+            "severity_analysis": severity_analysis,
+            "summary": summary,
+            "total_vulnerabilities": len(severity_analysis),
+            "sources_analyzed": taint_data.get("sources_found", 0),
+            "sinks_analyzed": taint_data.get("sinks_found", 0)
+        }
+        
+        output_path = output_dir / "taint_severity.json"
+        with open(output_path, 'w') as f:
+            json.dump(output, f, indent=2)
+        
+        return {"success": True, "risk_level": summary.get("risk_level")}
+        
+    except ImportError:
+        return {"error": "Taint insights module not available"}
+    except Exception as e:
+        return {"error": str(e)}
+
+
+def run_impact_insights(output_dir: Path) -> Dict[str, Any]:
+    """Run impact analysis insights."""
+    try:
+        # Check if workset exists
+        workset_path = Path(".pf/workset.json")
+        if not workset_path.exists():
+            return {"error": "No workset found. Run 'aud workset' first."}
+        
+        with open(workset_path) as f:
+            workset_data = json.load(f)
+        
+        # For now, create a simple impact summary
+        # In future, this could run actual impact analysis on changed files
+        output = {
+            "files_changed": len(workset_data.get("files", [])),
+            "potential_impact": "Analysis pending",
+            "recommendation": "Run 'aud impact --file <file> --line <line>' for detailed analysis"
+        }
+        
+        output_path = output_dir / "impact_analysis.json"
+        with open(output_path, 'w') as f:
+            json.dump(output, f, indent=2)
+        
+        return {"success": True, "files_analyzed": len(workset_data.get("files", []))}
+        
+    except Exception as e:
+        return {"error": str(e)}
+
+
+def aggregate_insights(results: Dict[str, Any], output_dir: Path) -> Dict[str, Any]:
+    """Aggregate all insights into unified summary."""
+    summary = {
+        "insights_generated": list(results.keys()),
+        "timestamp": __import__('datetime').datetime.now().isoformat(),
+        "output_directory": str(output_dir)
+    }
+    
+    # ML insights
+    if "ml" in results and results["ml"].get("success"):
+        summary["ml"] = {
+            "status": "success",
+            "workset_size": results["ml"].get("workset_size", 0),
+            "predictions_generated": True
+        }
+    elif "ml" in results:
+        summary["ml"] = {"status": "error", "error": results["ml"].get("error")}
+    
+    # Graph insights
+    if "graph" in results and results["graph"].get("success"):
+        summary["graph"] = {
+            "status": "success",
+            "health_score": results["graph"].get("health_score", 0)
+        }
+    elif "graph" in results:
+        summary["graph"] = {"status": "error", "error": results["graph"].get("error")}
+    
+    # Taint insights
+    if "taint" in results and results["taint"].get("success"):
+        summary["taint"] = {
+            "status": "success",
+            "risk_level": results["taint"].get("risk_level", "unknown")
+        }
+    elif "taint" in results:
+        summary["taint"] = {"status": "error", "error": results["taint"].get("error")}
+    
+    # Impact insights
+    if "impact" in results and results["impact"].get("success"):
+        summary["impact"] = {
+            "status": "success",
+            "files_analyzed": results["impact"].get("files_analyzed", 0)
+        }
+    elif "impact" in results:
+        summary["impact"] = {"status": "error", "error": results["impact"].get("error")}
+    
+    return summary
+
+
+def print_insights_summary(summary: Dict[str, Any]) -> None:
+    """Print insights summary to console."""
+    click.echo(f"\n{'='*60}")
+    click.echo("INSIGHTS SUMMARY")
+    click.echo(f"{'='*60}")
+    
+    # ML Summary
+    if "ml" in summary:
+        if summary["ml"]["status"] == "success":
+            click.echo(f"\n[ML] Machine Learning Insights:")
+            click.echo(f"  • Workset size: {summary['ml'].get('workset_size', 0)} files")
+            click.echo(f"  • Predictions: Generated successfully")
+        else:
+            click.echo(f"\n[ML] Machine Learning Insights: {summary['ml'].get('error')}")
+    
+    # Graph Summary
+    if "graph" in summary:
+        if summary["graph"]["status"] == "success":
+            health = summary["graph"].get("health_score", 0)
+            grade = "A" if health >= 90 else "B" if health >= 80 else "C" if health >= 70 else "D" if health >= 60 else "F"
+            click.echo(f"\n[GRAPH] Architecture Health:")
+            click.echo(f"  • Health score: {health}/100 (Grade: {grade})")
+        else:
+            click.echo(f"\n[GRAPH] Architecture Health: {summary['graph'].get('error')}")
+    
+    # Taint Summary
+    if "taint" in summary:
+        if summary["taint"]["status"] == "success":
+            risk = summary["taint"].get("risk_level", "unknown")
+            color = "red" if risk == "critical" else "yellow" if risk == "high" else "green"
+            click.echo(f"\n[TAINT] Security Risk:")
+            click.echo(f"  • Risk level: {risk.upper()}")
+        else:
+            click.echo(f"\n[TAINT] Security Risk: {summary['taint'].get('error')}")
+    
+    # Impact Summary
+    if "impact" in summary:
+        if summary["impact"]["status"] == "success":
+            click.echo(f"\n[IMPACT] Change Impact:")
+            click.echo(f"  • Files analyzed: {summary['impact'].get('files_analyzed', 0)}")
+        else:
+            click.echo(f"\n[IMPACT] Change Impact: {summary['impact'].get('error')}")
+    
+    click.echo(f"\n{'='*60}")
+
+
+# Register command
+insights_command = insights
\ No newline at end of file
diff --git a/theauditor/commands/lint.py b/theauditor/commands/lint.py
new file mode 100644
index 0000000..701cef4
--- /dev/null
+++ b/theauditor/commands/lint.py
@@ -0,0 +1,267 @@
+"""Run linters and normalize output to evidence format."""
+
+import hashlib
+import json
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+import click
+
+from theauditor.linters import (
+    detect_linters,
+    run_linter,
+)
+from theauditor.utils import load_json_file
+from theauditor.utils.error_handler import handle_exceptions
+
+
+def write_lint_json(findings: list[dict[str, Any]], output_path: str):
+    """Write findings to JSON file."""
+    # Sort findings for determinism
+    sorted_findings = sorted(findings, key=lambda f: (f["file"], f["line"], f["rule"]))
+
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(sorted_findings, f, indent=2, sort_keys=True)
+
+
+def lint_command(
+    root_path: str = ".",
+    workset_path: str = "./.pf/workset.json",
+    manifest_path: str = "manifest.json",
+    timeout: int = 300,
+    print_plan: bool = False,
+    auto_fix: bool = False,
+) -> dict[str, Any]:
+    """
+    Run linters and normalize output.
+
+    Returns:
+        Dictionary with success status and statistics
+    """
+    # AUTO-FIX DEPRECATED: Force disabled to prevent version mismatch issues
+    auto_fix = False
+    # Load workset or manifest files
+    if workset_path is not None:
+        # Use workset mode
+        try:
+            workset = load_json_file(workset_path)
+            workset_files = {p["path"] for p in workset.get("paths", [])}
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            return {"success": False, "error": f"Failed to load workset: {e}"}
+    else:
+        # Use all files from manifest when --workset is not used
+        try:
+            manifest = load_json_file(manifest_path)
+            # Use all text files from the manifest
+            workset_files = {f["path"] for f in manifest if isinstance(f, dict) and "path" in f}
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            return {"success": False, "error": f"Failed to load manifest: {e}"}
+
+    if not workset_files:
+        return {"success": False, "error": "Empty workset"}
+
+    # Detect available linters
+    linters = detect_linters(root_path, auto_fix=auto_fix)
+
+    if print_plan:
+        print("Lint Plan:")
+        # AUTO-FIX DEPRECATED: Always in check-only mode
+        # print(f"  Mode: {'AUTO-FIX' if auto_fix else 'CHECK-ONLY'}")
+        print(f"  Mode: CHECK-ONLY")
+        print(f"  Workset: {len(workset_files)} files")
+        if linters:
+            print("  External linters detected:")
+            for tool in linters:
+                # AUTO-FIX DEPRECATED: No fix indicators
+                # fix_capable = tool in ["eslint", "prettier", "ruff", "black"]
+                # fix_indicator = " (will fix)" if auto_fix and fix_capable else ""
+                print(f"    - {tool}")
+        else:
+            print("  No external linters detected")
+            print("  Will run built-in checks:")
+            print("    - NO_TODO_LAND (excessive TODOs)")
+            print("    - NO_LONG_FILES (>1500 lines)")
+            print("    - NO_CYCLES (import cycles)")
+            print("    - NO_DEBUG_CALLS (console.log/print)")
+            print("    - NO_SECRET_LIKE (potential secrets)")
+        return {"success": True, "printed_plan": True}
+
+    all_findings = []
+    fixed_count = 0
+    all_ast_data = {}  # Collect AST data from ESLint
+
+    if linters:
+        # Run external linters
+        # AUTO-FIX DEPRECATED: Always run in check-only mode
+        # mode_str = "Fixing" if auto_fix else "Checking"
+        print(f"Checking with {len(linters)} external linters...")
+        for tool, command in linters.items():
+            # AUTO-FIX DEPRECATED: This entire block is disabled
+            # if auto_fix and tool in ["eslint", "prettier", "ruff", "black"]:
+            #     print(f"  Fixing with {tool}...")
+            #     # In fix mode, we run the tool but may get fewer findings (as they're fixed)
+            #     findings, ast_data = run_linter(tool, command, root_path, workset_files, timeout)
+            #     # Collect AST data from ESLint
+            #     if tool == "eslint" and ast_data:
+            #         all_ast_data.update(ast_data)
+            #     # Add remaining findings (unfixable issues)
+            #     all_findings.extend(findings)
+            #     # Estimate fixes based on the tool (most issues are fixable)
+            #     if tool in ["prettier", "black"]:
+            #         # Formatters fix all issues
+            #         if len(findings) == 0:
+            #             print(f"    Fixed all formatting issues")
+            #         else:
+            #             print(f"    Fixed most issues, {len(findings)} remaining")
+            #     else:
+            #         # ESLint and Ruff fix most but not all issues
+            #         remaining = len(findings)
+            #         if remaining > 0:
+            #             print(f"    Fixed issues, {remaining} remaining (unfixable)")
+            #         else:
+            #             print(f"    Fixed all issues")
+            # else:
+            print(f"  Checking with {tool}...")
+            findings, ast_data = run_linter(tool, command, root_path, workset_files, timeout)
+            # Collect AST data from ESLint
+            if tool == "eslint" and ast_data:
+                all_ast_data.update(ast_data)
+            all_findings.extend(findings)
+            print(f"    Found {len(findings)} issues")
+    else:
+        # No linters found - this indicates broken environment
+        print("[WARNING] No external linters found!")
+        print("[ERROR] Environment is not properly configured - industry tools are required")
+        print("  Install at least one linter:")
+        print("    JavaScript/TypeScript: npm install --save-dev eslint")
+        print("    Python: pip install ruff")
+        print("    Go: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest")
+        # Continue with empty findings rather than failing completely
+        print("[INFO] Continuing with no lint findings...")
+    
+    # Check TypeScript configuration to determine which TS tool to use
+    # This is DETECTION logic, not a linter itself
+    # tsconfig_findings = check_tsconfig(root_path)
+    # NOTE: check_tsconfig was deleted with builtin.py - need to restore detection logic
+
+    # Write outputs directly to raw directory
+    output_dir = Path(".pf/raw")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    json_path = output_dir / "lint.json"
+
+    write_lint_json(all_findings, str(json_path))
+
+    # Save ESLint ASTs to cache
+    if all_ast_data:
+        # Load manifest to get file hashes
+        try:
+            manifest = load_json_file(manifest_path)
+            file_hashes = {f["path"]: f.get("sha256") for f in manifest if isinstance(f, dict) and "sha256" in f}
+            
+            # Create AST cache directory
+            ast_cache_dir = output_dir / "ast_cache" / "eslint"
+            ast_cache_dir.mkdir(parents=True, exist_ok=True)
+            
+            # Save each AST with the file's SHA256 hash as the filename
+            for file_path, ast in all_ast_data.items():
+                if file_path in file_hashes and file_hashes[file_path]:
+                    file_hash = file_hashes[file_path]
+                else:
+                    # If hash not in manifest, compute it from file content
+                    full_path = Path(root_path) / file_path
+                    if full_path.exists():
+                        with open(full_path, "rb") as f:
+                            file_hash = hashlib.sha256(f.read()).hexdigest()
+                    else:
+                        continue
+                
+                # Save AST to cache file
+                ast_file = ast_cache_dir / f"{file_hash}.json"
+                with open(ast_file, "w", encoding="utf-8") as f:
+                    json.dump(ast, f, indent=2)
+            
+            print(f"  Cached {len(all_ast_data)} ASTs from ESLint")
+        except Exception as e:
+            print(f"Warning: Failed to cache ESLint ASTs: {e}")
+
+    # Statistics
+    stats = {
+        "total_findings": len(all_findings),
+        "tools_run": len(linters) if linters else 1,  # 1 for built-in
+        "workset_size": len(workset_files),
+        "errors": sum(1 for f in all_findings if f["severity"] == "error"),
+        "warnings": sum(1 for f in all_findings if f["severity"] == "warning"),
+    }
+
+    # AUTO-FIX DEPRECATED: This block is disabled
+    # if auto_fix:
+    #     print("\n[OK] Auto-fix complete:")
+    #     print(f"  Files processed: {len(workset_files)}")
+    #     print(f"  Remaining issues: {stats['total_findings']}")
+    #     print(f"    Errors: {stats['errors']}")
+    #     print(f"    Warnings: {stats['warnings']}")
+    #     if stats['total_findings'] > 0:
+    #         print(f"  Note: Some issues cannot be auto-fixed and require manual attention")
+    #     print(f"  Report: {json_path}")
+    # else:
+    print("\nLint complete:")
+    print(f"  Total findings: {stats['total_findings']}")
+    print(f"  Errors: {stats['errors']}")
+    print(f"  Warnings: {stats['warnings']}")
+    print(f"  Output: {json_path}")
+    if stats['total_findings'] > 0:
+        print("  Note: Many linters (ESLint, Prettier, Ruff, Black) have their own automatic code style fix capabilities")
+
+    return {
+        "success": True,
+        "stats": stats,
+        "output_files": [str(json_path)],
+        "auto_fix_applied": auto_fix,
+    }
+
+
+@click.command()
+@handle_exceptions
+@click.option("--root", default=".", help="Root directory")
+@click.option("--workset", is_flag=True, help="Use workset mode (lint only files in .pf/workset.json)")
+@click.option("--workset-path", default=None, help="Custom workset path (rarely needed)")
+@click.option("--manifest", default=None, help="Manifest file path")
+@click.option("--timeout", default=None, type=int, help="Timeout in seconds for each linter")
+@click.option("--print-plan", is_flag=True, help="Print lint plan without executing")
+# AUTO-FIX DEPRECATED: Hidden flag kept for backward compatibility
+@click.option("--fix", is_flag=True, hidden=True, help="[DEPRECATED] No longer functional")
+def lint(root, workset, workset_path, manifest, timeout, print_plan, fix):
+    """Run linters and normalize output to evidence format."""
+    from theauditor.config_runtime import load_runtime_config
+    
+    # Load configuration
+    config = load_runtime_config(root)
+    
+    # Use config defaults if not provided
+    if manifest is None:
+        manifest = config["paths"]["manifest"]
+    if timeout is None:
+        timeout = config["timeouts"]["lint_timeout"]
+    if workset_path is None and workset:
+        workset_path = config["paths"]["workset"]
+
+    # Use workset path only if --workset flag is set
+    actual_workset_path = workset_path if workset else None
+    
+    result = lint_command(
+        root_path=root,
+        workset_path=actual_workset_path,
+        manifest_path=manifest,
+        timeout=timeout,
+        print_plan=print_plan,
+        auto_fix=fix,
+    )
+
+    if result.get("printed_plan"):
+        return
+
+    if not result["success"]:
+        click.echo(f"Error: {result.get('error', 'Lint failed')}", err=True)
+        raise click.ClickException(result.get("error", "Lint failed"))
\ No newline at end of file
diff --git a/theauditor/commands/ml.py b/theauditor/commands/ml.py
new file mode 100644
index 0000000..76d0c11
--- /dev/null
+++ b/theauditor/commands/ml.py
@@ -0,0 +1,165 @@
+"""Machine learning commands for TheAuditor."""
+
+import click
+from pathlib import Path
+
+
+@click.command(name="learn")
+@click.option("--db-path", default="./.pf/repo_index.db", help="Database path")
+@click.option("--manifest", default="./.pf/manifest.json", help="Manifest file path")
+@click.option("--journal", default="./.pf/journal.ndjson", help="Journal file path")
+@click.option("--fce", default="./.pf/fce.json", help="FCE file path")
+@click.option("--ast", default="./.pf/ast_proofs.json", help="AST proofs file path")
+@click.option("--enable-git", is_flag=True, help="Enable git churn features")
+@click.option("--model-dir", default="./.pf/ml", help="Model output directory")
+@click.option("--window", default=50, type=int, help="Journal window size")
+@click.option("--seed", default=13, type=int, help="Random seed")
+@click.option("--feedback", help="Path to human feedback JSON file")
+@click.option("--train-on", type=click.Choice(["full", "diff", "all"]), default="full", help="Type of historical runs to train on")
+@click.option("--print-stats", is_flag=True, help="Print training statistics")
+def learn(db_path, manifest, journal, fce, ast, enable_git, model_dir, window, seed, feedback, train_on, print_stats):
+    """Train ML models from audit artifacts to predict risk and root causes."""
+    from theauditor.ml import learn as ml_learn
+    
+    click.echo(f"[ML] Training models from audit artifacts (using {train_on} runs)...")
+    
+    result = ml_learn(
+        db_path=db_path,
+        manifest_path=manifest,
+        journal_path=journal,
+        fce_path=fce,
+        ast_path=ast,
+        enable_git=enable_git,
+        model_dir=model_dir,
+        window=window,
+        seed=seed,
+        print_stats=print_stats,
+        feedback_path=feedback,
+        train_on=train_on,
+    )
+    
+    if result.get("success"):
+        stats = result.get("stats", {})
+        click.echo(f"[OK] Models trained successfully")
+        click.echo(f"  * Training data: {train_on} runs from history")
+        click.echo(f"  * Files analyzed: {result.get('source_files', 0)}")
+        click.echo(f"  * Features: {stats.get('n_features', 0)} dimensions")
+        click.echo(f"  * Root cause ratio: {stats.get('root_cause_positive_ratio', 0):.2%}")
+        click.echo(f"  * Risk mean: {stats.get('mean_risk', 0):.3f}")
+        if stats.get('cold_start'):
+            click.echo(f"  [WARN] Cold-start mode (<500 samples)")
+        click.echo(f"  * Models saved to: {result.get('model_dir')}")
+    else:
+        click.echo(f"[FAIL] Training failed: {result.get('error')}", err=True)
+        raise click.ClickException(result.get("error"))
+
+
+@click.command(name="suggest")
+@click.option("--db-path", default="./.pf/repo_index.db", help="Database path")
+@click.option("--manifest", default="./.pf/manifest.json", help="Manifest file path")
+@click.option("--workset", default="./.pf/workset.json", help="Workset file path")
+@click.option("--fce", default="./.pf/fce.json", help="FCE file path")
+@click.option("--ast", default="./.pf/ast_proofs.json", help="AST proofs file path")
+@click.option("--model-dir", default="./.pf/ml", help="Model directory")
+@click.option("--topk", default=10, type=int, help="Top K files to suggest")
+@click.option("--out", default="./.pf/insights/ml_suggestions.json", help="Output file path")
+@click.option("--print-plan", is_flag=True, help="Print suggestions to console")
+def suggest(db_path, manifest, workset, fce, ast, model_dir, topk, out, print_plan):
+    """Generate ML-based suggestions for risky files and likely root causes."""
+    from theauditor.ml import suggest as ml_suggest
+    
+    click.echo("[ML] Generating suggestions from trained models...")
+    
+    result = ml_suggest(
+        db_path=db_path,
+        manifest_path=manifest,
+        workset_path=workset,
+        fce_path=fce,
+        ast_path=ast,
+        model_dir=model_dir,
+        topk=topk,
+        out_path=out,
+        print_plan=print_plan,
+    )
+    
+    if result.get("success"):
+        click.echo(f"[OK] Suggestions generated")
+        click.echo(f"  * Workset size: {result.get('workset_size', 0)} files")
+        click.echo(f"  * Source files analyzed: {result.get('workset_size', 0)}")
+        click.echo(f"  * Non-source excluded: {result.get('excluded_count', 0)}")
+        click.echo(f"  * Top {result.get('topk', 10)} suggestions saved to: {result.get('out_path')}")
+    else:
+        click.echo(f"[FAIL] Suggestion generation failed: {result.get('error')}", err=True)
+        raise click.ClickException(result.get("error"))
+
+
+@click.command(name="learn-feedback")
+@click.option("--feedback-file", required=True, help="Path to feedback JSON file")
+@click.option("--db-path", default="./.pf/repo_index.db", help="Database path")
+@click.option("--manifest", default="./.pf/manifest.json", help="Manifest file path")
+@click.option("--model-dir", default="./.pf/ml", help="Model output directory")
+@click.option("--train-on", type=click.Choice(["full", "diff", "all"]), default="full", help="Type of historical runs to train on")
+@click.option("--print-stats", is_flag=True, help="Print training statistics")
+def learn_feedback(feedback_file, db_path, manifest, model_dir, train_on, print_stats):
+    """
+    Re-train models with human feedback for improved accuracy.
+    
+    The feedback file should be a JSON file with the format:
+    {
+        "path/to/file.py": {
+            "is_risky": true,
+            "is_root_cause": false,
+            "will_need_edit": true
+        },
+        ...
+    }
+    """
+    from theauditor.ml import learn as ml_learn
+    
+    # Validate feedback file exists
+    if not Path(feedback_file).exists():
+        click.echo(f"[FAIL] Feedback file not found: {feedback_file}", err=True)
+        raise click.ClickException(f"Feedback file not found: {feedback_file}")
+    
+    # Validate feedback file format
+    try:
+        import json
+        with open(feedback_file) as f:
+            feedback_data = json.load(f)
+        
+        if not isinstance(feedback_data, dict):
+            raise ValueError("Feedback file must contain a JSON object")
+        
+        # Count feedback entries
+        feedback_count = len(feedback_data)
+        click.echo(f"[ML] Loading human feedback for {feedback_count} files...")
+        
+    except Exception as e:
+        click.echo(f"[FAIL] Invalid feedback file format: {e}", err=True)
+        raise click.ClickException(f"Invalid feedback file: {e}")
+    
+    click.echo(f"[ML] Re-training models with human feedback (using {train_on} runs)...")
+    
+    result = ml_learn(
+        db_path=db_path,
+        manifest_path=manifest,
+        model_dir=model_dir,
+        print_stats=print_stats,
+        feedback_path=feedback_file,
+        train_on=train_on,
+        # Use default paths for historical data from .pf/history
+        enable_git=False,  # Disable git for speed in feedback mode
+    )
+    
+    if result.get("success"):
+        stats = result.get("stats", {})
+        click.echo(f"[OK] Models re-trained with human feedback")
+        click.echo(f"  * Training data: {train_on} runs from history")
+        click.echo(f"  * Files analyzed: {result.get('source_files', 0)}")
+        click.echo(f"  * Human feedback incorporated: {feedback_count} files")
+        click.echo(f"  * Features: {stats.get('n_features', 0)} dimensions")
+        click.echo(f"  * Models saved to: {result.get('model_dir')}")
+        click.echo(f"\n[TIP] The models have learned from your feedback and will provide more accurate predictions.")
+    else:
+        click.echo(f"[FAIL] Re-training failed: {result.get('error')}", err=True)
+        raise click.ClickException(result.get("error"))
\ No newline at end of file
diff --git a/theauditor/commands/refactor.py b/theauditor/commands/refactor.py
new file mode 100644
index 0000000..e3089fc
--- /dev/null
+++ b/theauditor/commands/refactor.py
@@ -0,0 +1,600 @@
+"""Refactoring impact analysis command.
+
+This command analyzes the impact of refactoring changes and detects
+inconsistencies between frontend and backend, API contract mismatches,
+and data model evolution issues.
+"""
+
+import json
+import os
+import sqlite3
+from pathlib import Path
+from typing import Dict, List, Set, Any, Optional
+
+import click
+
+
+@click.command()
+@click.option("--file", "-f", help="File to analyze refactoring impact from")
+@click.option("--line", "-l", type=int, help="Line number in the file")
+@click.option("--migration-dir", "-m", default="backend/migrations", 
+              help="Directory containing database migrations")
+@click.option("--migration-limit", "-ml", type=int, default=0,
+              help="Number of recent migrations to analyze (0=all, default=all)")
+@click.option("--expansion-mode", "-e", 
+              type=click.Choice(["none", "direct", "full"]),
+              default="none",
+              help="Dependency expansion mode: none (affected only), direct (1 level), full (transitive)")
+@click.option("--auto-detect", "-a", is_flag=True, 
+              help="Auto-detect refactoring from recent migrations")
+@click.option("--workset", "-w", is_flag=True,
+              help="Use current workset for analysis")
+@click.option("--output", "-o", type=click.Path(),
+              help="Output file for detailed report")
+def refactor(file: Optional[str], line: Optional[int], migration_dir: str,
+             migration_limit: int, expansion_mode: str,
+             auto_detect: bool, workset: bool, output: Optional[str]) -> None:
+    """Analyze refactoring impact and find inconsistencies.
+    
+    This command helps detect issues introduced by refactoring such as:
+    - Data model changes (fields moved between tables)
+    - API contract mismatches (frontend expects old structure)
+    - Missing updates in dependent code
+    - Cross-stack inconsistencies
+    
+    Examples:
+        # Analyze impact from a specific model change
+        aud refactor --file models/Product.ts --line 42
+        
+        # Auto-detect refactoring from migrations
+        aud refactor --auto-detect
+        
+        # Analyze current workset
+        aud refactor --workset
+    """
+    
+    # Find repository root
+    repo_root = Path.cwd()
+    while repo_root != repo_root.parent:
+        if (repo_root / ".git").exists():
+            break
+        repo_root = repo_root.parent
+    
+    pf_dir = repo_root / ".pf"
+    db_path = pf_dir / "repo_index.db"
+    
+    if not db_path.exists():
+        click.echo("Error: No index found. Run 'aud index' first.", err=True)
+        raise click.Abort()
+    
+    # Import components here to avoid import errors
+    try:
+        from theauditor.impact_analyzer import analyze_impact
+        from theauditor.universal_detector import UniversalPatternDetector
+        from theauditor.pattern_loader import PatternLoader
+        from theauditor.fce import run_fce
+        from theauditor.correlations.loader import CorrelationLoader
+    except ImportError as e:
+        click.echo(f"Error importing components: {e}", err=True)
+        raise click.Abort()
+    # Initialize components
+    pattern_loader = PatternLoader()
+    pattern_detector = UniversalPatternDetector(
+        repo_root, 
+        pattern_loader,
+        exclude_patterns=[]
+    )
+    
+    click.echo("\nRefactoring Impact Analysis")
+    click.echo("-" * 60)
+    
+    # Step 1: Determine what to analyze
+    affected_files = set()
+    
+    if auto_detect:
+        click.echo("Auto-detecting refactoring from migrations...")
+        affected_files.update(_analyze_migrations(repo_root, migration_dir, migration_limit))
+        
+        if not affected_files:
+            click.echo("No affected files found from migrations.")
+            click.echo("Tip: Check if your migrations contain schema change operations")
+            return
+        
+    elif workset:
+        click.echo("Analyzing workset files...")
+        workset_file = pf_dir / "workset.json"
+        if workset_file.exists():
+            with open(workset_file, 'r') as f:
+                workset_data = json.load(f)
+                affected_files.update(workset_data.get("files", []))
+        else:
+            click.echo("Error: No workset found. Create one with 'aud workset'", err=True)
+            raise click.Abort()
+            
+    elif file and line:
+        click.echo(f"Analyzing impact from {file}:{line}...")
+        
+        # Run impact analysis
+        impact_result = analyze_impact(
+            db_path=str(db_path),
+            target_file=file,
+            target_line=line,
+            trace_to_backend=True
+        )
+        
+        if not impact_result.get("error"):
+            # Extract affected files from impact analysis
+            upstream_files = [dep["file"] for dep in impact_result.get("upstream", [])]
+            downstream_files = [dep["file"] for dep in impact_result.get("downstream", [])]
+            upstream_trans_files = [dep["file"] for dep in impact_result.get("upstream_transitive", [])]
+            downstream_trans_files = [dep["file"] for dep in impact_result.get("downstream_transitive", [])]
+            
+            all_impact_files = set(upstream_files + downstream_files + upstream_trans_files + downstream_trans_files)
+            affected_files.update(all_impact_files)
+            
+            # Show immediate impact
+            summary = impact_result.get("impact_summary", {})
+            click.echo(f"\nDirect impact: {summary.get('direct_upstream', 0)} upstream, "
+                      f"{summary.get('direct_downstream', 0)} downstream")
+            click.echo(f"Total files affected: {summary.get('affected_files', len(affected_files))}")
+            
+            # Check for cross-stack impact
+            if impact_result.get("cross_stack_impact"):
+                click.echo("\n⚠️  Cross-stack impact detected!")
+                for impact in impact_result["cross_stack_impact"]:
+                    click.echo(f"  • {impact['file']}:{impact['line']} - {impact['type']}")
+    else:
+        click.echo("Error: Specify --file and --line, --auto-detect, or --workset", err=True)
+        raise click.Abort()
+    
+    if not affected_files:
+        click.echo("No files to analyze.")
+        return
+    
+    # Step 2b: Expand affected files based on mode
+    if affected_files:
+        expanded_files = _expand_affected_files(
+            affected_files, 
+            str(db_path), 
+            expansion_mode,
+            repo_root
+        )
+    else:
+        expanded_files = set()
+    
+    # Update workset with expanded files
+    click.echo(f"\nCreating workset from {len(expanded_files)} files...")
+    temp_workset_file = pf_dir / "temp_workset.json"
+    with open(temp_workset_file, 'w') as f:
+        json.dump({"files": list(expanded_files)}, f)
+    
+    # Step 3: Run pattern detection with targeted file list
+    if expanded_files:
+        click.echo(f"Running pattern detection on {len(expanded_files)} files...")
+        
+        # Check if batch method is available
+        if hasattr(pattern_detector, 'detect_patterns_for_files'):
+            # Use optimized batch method if available
+            findings = pattern_detector.detect_patterns_for_files(
+                list(expanded_files),
+                categories=None
+            )
+        else:
+            # Fallback to individual file processing
+            findings = []
+            for i, file_path in enumerate(expanded_files, 1):
+                if i % 10 == 0:
+                    click.echo(f"  Scanning file {i}/{len(expanded_files)}...", nl=False)
+                    click.echo("\r", nl=False)
+                
+                # Convert to relative path for pattern detector
+                try:
+                    rel_path = Path(file_path).relative_to(repo_root).as_posix()
+                except ValueError:
+                    rel_path = file_path
+                
+                file_findings = pattern_detector.detect_patterns(
+                    categories=None, 
+                    file_filter=rel_path
+                )
+                findings.extend(file_findings)
+            
+            click.echo(f"\n  Found {len(findings)} patterns")
+    else:
+        findings = []
+        click.echo("No files to analyze after expansion")
+    
+    patterns = findings
+    
+    # Step 4: Run FCE correlation with refactoring rules
+    click.echo("Running correlation analysis...")
+    
+    # Run the FCE to get correlations
+    fce_results = run_fce(
+        root_path=str(repo_root),
+        capsules_dir=str(pf_dir / "capsules"),
+        manifest_path="manifest.json",
+        workset_path=str(temp_workset_file),
+        db_path="repo_index.db",
+        timeout=600,
+        print_plan=False
+    )
+    
+    # Extract correlations from FCE results
+    correlations = []
+    if fce_results.get("success") and fce_results.get("results"):
+        fce_data = fce_results["results"]
+        if "correlations" in fce_data and "factual_clusters" in fce_data["correlations"]:
+            correlations = fce_data["correlations"]["factual_clusters"]
+    
+    # Step 5: Identify mismatches
+    mismatches = _find_mismatches(patterns, correlations, affected_files)
+    
+    # Generate report
+    report = _generate_report(affected_files, patterns, correlations, mismatches)
+    
+    # Display summary
+    click.echo("\n" + "=" * 60)
+    click.echo("Refactoring Analysis Summary")
+    click.echo("=" * 60)
+    
+    click.echo(f"\nFiles analyzed: {len(affected_files)}")
+    click.echo(f"Patterns detected: {len(patterns)}")
+    click.echo(f"Correlations found: {len(correlations)}")
+    
+    if mismatches["api"]:
+        click.echo(f"\nAPI Mismatches: {len(mismatches['api'])}")
+        for mismatch in mismatches["api"][:5]:  # Show top 5
+            click.echo(f"  • {mismatch['description']}")
+            
+    if mismatches["model"]:
+        click.echo(f"\nData Model Mismatches: {len(mismatches['model'])}")
+        for mismatch in mismatches["model"][:5]:  # Show top 5
+            click.echo(f"  • {mismatch['description']}")
+            
+    if mismatches["contract"]:
+        click.echo(f"\nContract Mismatches: {len(mismatches['contract'])}")
+        for mismatch in mismatches["contract"][:5]:  # Show top 5
+            click.echo(f"  • {mismatch['description']}")
+    
+    # Risk assessment
+    risk_level = _assess_risk(mismatches, len(affected_files))
+    click.echo(f"\nRisk Level: {risk_level}")
+    
+    # Recommendations
+    recommendations = _generate_recommendations(mismatches)
+    if recommendations:
+        click.echo("\nRecommendations:")
+        for rec in recommendations:
+            click.echo(f"  ✓ {rec}")
+    
+    # Save detailed report if requested
+    if output:
+        with open(output, 'w') as f:
+            json.dump(report, f, indent=2, default=str)
+        click.echo(f"\nDetailed report saved to: {output}")
+    
+    # Suggest next steps
+    click.echo("\nNext Steps:")
+    click.echo("  1. Review the mismatches identified above")
+    click.echo("  2. Run 'aud impact --file <file> --line <line>' for detailed impact")
+    click.echo("  3. Use 'aud detect-patterns --workset' for pattern-specific issues")
+    click.echo("  4. Run 'aud full' for comprehensive analysis")
+
+
+def _expand_affected_files(
+    affected_files: Set[str], 
+    db_path: str, 
+    expansion_mode: str,
+    repo_root: Path
+) -> Set[str]:
+    """Expand affected files with their dependencies based on mode."""
+    if expansion_mode == "none":
+        return affected_files
+    
+    expanded = set(affected_files)
+    total_files = len(affected_files)
+    
+    click.echo(f"\nExpanding {total_files} affected files with {expansion_mode} mode...")
+    
+    if expansion_mode in ["direct", "full"]:
+        from theauditor.impact_analyzer import analyze_impact
+        import sqlite3
+        import os
+        
+        for i, file_path in enumerate(affected_files, 1):
+            if i % 5 == 0 or i == total_files:
+                click.echo(f"  Analyzing dependencies {i}/{total_files}...", nl=False)
+                click.echo("\r", nl=False)
+            
+            # Find a representative line (first function/class)
+            conn = sqlite3.connect(db_path)
+            cursor = conn.cursor()
+            cursor.execute("""
+                SELECT line FROM symbols 
+                WHERE path = ? AND type IN ('function', 'class')
+                ORDER BY line LIMIT 1
+            """, (file_path,))
+            result = cursor.fetchone()
+            conn.close()
+            
+            if result:
+                line = result[0]
+                try:
+                    impact = analyze_impact(
+                        db_path=db_path,
+                        target_file=file_path,
+                        target_line=line,
+                        trace_to_backend=(expansion_mode == "full")
+                    )
+                    
+                    # Add direct dependencies
+                    for dep in impact.get("upstream", []):
+                        expanded.add(dep["file"])
+                    for dep in impact.get("downstream", []):
+                        if dep["file"] != "external":
+                            expanded.add(dep["file"])
+                    
+                    # Add transitive if full mode
+                    if expansion_mode == "full":
+                        for dep in impact.get("upstream_transitive", []):
+                            expanded.add(dep["file"])
+                        for dep in impact.get("downstream_transitive", []):
+                            if dep["file"] != "external":
+                                expanded.add(dep["file"])
+                except Exception as e:
+                    # Don't fail entire analysis for one file
+                    if os.environ.get("THEAUDITOR_DEBUG"):
+                        click.echo(f"\n  Warning: Could not analyze {file_path}: {e}")
+        
+        click.echo(f"\n  Expanded from {total_files} to {len(expanded)} files")
+    
+    return expanded
+
+
+def _analyze_migrations(repo_root: Path, migration_dir: str, migration_limit: int = 0) -> List[str]:
+    """Analyze migration files to detect schema changes.
+    
+    Args:
+        repo_root: Repository root path
+        migration_dir: Migration directory path
+        migration_limit: Number of recent migrations to analyze (0=all)
+    """
+    migration_path = repo_root / migration_dir
+    affected_files = []
+    
+    if not migration_path.exists():
+        # Try common locations (most common first!)
+        found_migrations = False
+        for common_path in ["backend/migrations", "migrations", "db/migrations", 
+                           "database/migrations", "frontend/migrations"]:
+            test_path = repo_root / common_path
+            if test_path.exists():
+                # Check if it actually contains migration files
+                import glob
+                test_migrations = (glob.glob(str(test_path / "*.js")) + 
+                                 glob.glob(str(test_path / "*.ts")) +
+                                 glob.glob(str(test_path / "*.sql")))
+                if test_migrations:
+                    migration_path = test_path
+                    found_migrations = True
+                    click.echo(f"Found migrations in: {common_path}")
+                    break
+        
+        if not found_migrations:
+            click.echo("\n⚠️  WARNING: No migration files found in standard locations:", err=True)
+            click.echo("    • backend/migrations/", err=True)
+            click.echo("    • migrations/", err=True)
+            click.echo("    • db/migrations/", err=True)
+            click.echo("    • database/migrations/", err=True)
+            click.echo("    • frontend/migrations/ (yes, we check here too)", err=True)
+            click.echo(f"\n    Current directory searched: {migration_dir}", err=True)
+            click.echo(f"    Use --migration-dir <path> to specify your migration folder\n", err=True)
+            return affected_files
+    
+    if migration_path.exists():
+        # Look for migration files
+        import glob
+        import re
+        
+        migrations = sorted(glob.glob(str(migration_path / "*.js")) + 
+                          glob.glob(str(migration_path / "*.ts")) +
+                          glob.glob(str(migration_path / "*.sql")))
+        
+        if not migrations:
+            click.echo(f"\n⚠️  WARNING: Directory '{migration_path}' exists but contains no migration files", err=True)
+            click.echo(f"    Expected: .js, .ts, or .sql files", err=True)
+            return affected_files
+        
+        # Determine which migrations to analyze
+        total_migrations = len(migrations)
+        if migration_limit > 0:
+            migrations_to_analyze = migrations[-migration_limit:]
+            click.echo(f"Analyzing {len(migrations_to_analyze)} most recent migrations (out of {total_migrations} total)")
+        else:
+            migrations_to_analyze = migrations
+            click.echo(f"Analyzing ALL {total_migrations} migration files")
+            if total_migrations > 20:
+                click.echo("⚠️  Large migration set detected. Consider using --migration-limit for faster analysis")
+        
+        # Enhanced pattern matching
+        schema_patterns = {
+            'column_ops': r'(?:removeColumn|dropColumn|renameColumn|addColumn|alterColumn|modifyColumn)',
+            'table_ops': r'(?:createTable|dropTable|renameTable|alterTable)',
+            'index_ops': r'(?:addIndex|dropIndex|createIndex|removeIndex)',
+            'fk_ops': r'(?:addForeignKey|dropForeignKey|addConstraint|dropConstraint)',
+            'type_changes': r'(?:changeColumn|changeDataType|alterType)'
+        }
+        
+        tables_affected = set()
+        operations_found = set()
+        
+        # Process migrations with progress indicator
+        for i, migration_file in enumerate(migrations_to_analyze, 1):
+            if i % 10 == 0 or i == len(migrations_to_analyze):
+                click.echo(f"  Processing migration {i}/{len(migrations_to_analyze)}...", nl=False)
+                click.echo("\r", nl=False)
+            
+            try:
+                with open(migration_file, 'r') as f:
+                    content = f.read()
+                    
+                    # Check all pattern categories
+                    for pattern_name, pattern_regex in schema_patterns.items():
+                        if re.search(pattern_regex, content, re.IGNORECASE):
+                            operations_found.add(pattern_name)
+                            
+                            # Extract table/model names (improved regex)
+                            # Handles: "table", 'table', `table`, tableName
+                            tables = re.findall(r"['\"`](\w+)['\"`]|(?:table|Table)Name:\s*['\"`]?(\w+)", content)
+                            for match in tables:
+                                # match is a tuple from multiple capture groups
+                                table = match[0] if match[0] else match[1] if len(match) > 1 else None
+                                if table and table not in ['table', 'Table', 'column', 'Column']:
+                                    tables_affected.add(table)
+            except Exception as e:
+                click.echo(f"\nWarning: Could not read migration {migration_file}: {e}")
+                continue
+        
+        click.echo(f"\nFound {len(operations_found)} types of operations affecting {len(tables_affected)} tables")
+        
+        # Map tables to model files
+        for table in tables_affected:
+            model_file = _find_model_file(repo_root, table)
+            if model_file:
+                affected_files.append(str(model_file))
+        
+        # Deduplicate
+        affected_files = list(set(affected_files))
+        click.echo(f"Mapped to {len(affected_files)} model files")
+    
+    return affected_files
+
+
+def _find_model_file(repo_root: Path, table_name: str) -> Optional[Path]:
+    """Find model file corresponding to a database table."""
+    # Convert table name to likely model name
+    model_names = [
+        table_name,  # exact match
+        table_name.rstrip('s'),  # singular
+        ''.join(word.capitalize() for word in table_name.split('_')),  # PascalCase
+    ]
+    
+    for model_name in model_names:
+        # Check common model locations
+        for pattern in [f"**/models/{model_name}.*", f"**/{model_name}.model.*", 
+                       f"**/entities/{model_name}.*"]:
+            import glob
+            matches = glob.glob(str(repo_root / pattern), recursive=True)
+            if matches:
+                return Path(matches[0])
+    
+    return None
+
+
+def _find_mismatches(patterns: List[Dict], correlations: List[Dict], 
+                    affected_files: Set[str]) -> Dict[str, List[Dict]]:
+    """Identify mismatches from patterns and correlations."""
+    mismatches = {
+        "api": [],
+        "model": [],
+        "contract": []
+    }
+    
+    # Analyze patterns for known refactoring issues
+    for pattern in patterns:
+        if pattern.get("rule_id") in ["PRODUCT_PRICE_FIELD_REMOVED", 
+                                      "PRODUCT_SKU_MOVED_TO_VARIANT"]:
+            mismatches["model"].append({
+                "type": "field_moved",
+                "description": pattern.get("message", "Field moved between models"),
+                "file": pattern.get("file"),
+                "line": pattern.get("line")
+            })
+        elif pattern.get("rule_id") in ["API_ENDPOINT_PRODUCT_PRICE"]:
+            mismatches["api"].append({
+                "type": "endpoint_deprecated",
+                "description": pattern.get("message", "API endpoint no longer exists"),
+                "file": pattern.get("file"),
+                "line": pattern.get("line")
+            })
+        elif pattern.get("rule_id") in ["FRONTEND_BACKEND_CONTRACT_MISMATCH"]:
+            mismatches["contract"].append({
+                "type": "contract_mismatch",
+                "description": pattern.get("message", "Frontend/backend contract mismatch"),
+                "file": pattern.get("file"),
+                "line": pattern.get("line")
+            })
+    
+    # Analyze correlations for co-occurring issues
+    for correlation in correlations:
+        if correlation.get("confidence", 0) > 0.8:
+            category = "contract" if "contract" in correlation.get("name", "").lower() else \
+                      "api" if "api" in correlation.get("name", "").lower() else "model"
+            
+            mismatches[category].append({
+                "type": "correlation",
+                "description": correlation.get("description", "Correlated issue detected"),
+                "confidence": correlation.get("confidence"),
+                "facts": correlation.get("matched_facts", [])
+            })
+    
+    return mismatches
+
+
+def _assess_risk(mismatches: Dict[str, List], file_count: int) -> str:
+    """Assess the risk level of the refactoring."""
+    total_issues = sum(len(issues) for issues in mismatches.values())
+    
+    if total_issues > 20 or file_count > 50:
+        return "HIGH"
+    elif total_issues > 10 or file_count > 20:
+        return "MEDIUM"
+    else:
+        return "LOW"
+
+
+def _generate_recommendations(mismatches: Dict[str, List]) -> List[str]:
+    """Generate actionable recommendations based on mismatches."""
+    recommendations = []
+    
+    if mismatches["model"]:
+        recommendations.append("Update frontend interfaces to match new model structure")
+        recommendations.append("Run database migrations in all environments")
+        
+    if mismatches["api"]:
+        recommendations.append("Update API client to use new endpoints")
+        recommendations.append("Add deprecation notices for old endpoints")
+        
+    if mismatches["contract"]:
+        recommendations.append("Synchronize TypeScript interfaces with backend models")
+        recommendations.append("Add API versioning to prevent breaking changes")
+    
+    if sum(len(issues) for issues in mismatches.values()) > 10:
+        recommendations.append("Consider breaking this refactoring into smaller steps")
+        recommendations.append("Add integration tests before proceeding")
+    
+    return recommendations
+
+
+def _generate_report(affected_files: Set[str], patterns: List[Dict], 
+                    correlations: List[Dict], mismatches: Dict) -> Dict:
+    """Generate detailed report of the refactoring analysis."""
+    return {
+        "summary": {
+            "files_analyzed": len(affected_files),
+            "patterns_detected": len(patterns),
+            "correlations_found": len(correlations),
+            "total_mismatches": sum(len(issues) for issues in mismatches.values())
+        },
+        "affected_files": list(affected_files),
+        "patterns": patterns,
+        "correlations": correlations,
+        "mismatches": mismatches,
+        "risk_assessment": _assess_risk(mismatches, len(affected_files)),
+        "recommendations": _generate_recommendations(mismatches)
+    }
+
+
+# Register command
+refactor_command = refactor
\ No newline at end of file
diff --git a/theauditor/commands/report.py b/theauditor/commands/report.py
new file mode 100644
index 0000000..087a77f
--- /dev/null
+++ b/theauditor/commands/report.py
@@ -0,0 +1,66 @@
+"""Generate unified audit report from all artifacts."""
+
+from pathlib import Path
+import click
+from theauditor.utils.error_handler import handle_exceptions
+
+
+@click.command()
+@handle_exceptions
+@click.option("--manifest", default="./.pf/manifest.json", help="Manifest file path")
+@click.option("--db", default="./.pf/repo_index.db", help="Database path")
+@click.option("--workset", default="./.pf/workset.json", help="Workset file path")
+@click.option("--capsules", default="./.pf/capsules", help="Capsules directory")
+@click.option("--run-report", default="./.pf/run_report.json", help="Run report file path")
+@click.option("--journal", default="./.pf/journal.ndjson", help="Journal file path")
+@click.option("--fce", default="./.pf/fce.json", help="FCE file path")
+@click.option("--ast", default="./.pf/ast_proofs.json", help="AST proofs file path")
+@click.option("--ml", default="./.pf/ml_suggestions.json", help="ML suggestions file path")
+@click.option("--patch", help="Patch diff file path")
+@click.option("--out-dir", default="./.pf/audit", help="Output directory for audit reports")
+@click.option("--max-snippet-lines", default=3, type=int, help="Maximum lines per snippet")
+@click.option("--max-snippet-chars", default=220, type=int, help="Maximum characters per line")
+@click.option("--print-stats", is_flag=True, help="Print summary statistics")
+def report(
+    manifest,
+    db,
+    workset,
+    capsules,
+    run_report,
+    journal,
+    fce,
+    ast,
+    ml,
+    patch,
+    out_dir,
+    max_snippet_lines,
+    max_snippet_chars,
+    print_stats,
+):
+    """Generate unified audit report from all artifacts."""
+    # Report generation has been simplified
+    # Data is already chunked in .pf/readthis/ by extraction phase
+    
+    readthis_dir = Path("./.pf/readthis")
+    
+    if readthis_dir.exists():
+        json_files = list(readthis_dir.glob("*.json"))
+        click.echo(f"[OK] Audit report generated - Data chunks ready for AI consumption")
+        click.echo(f"[INFO] Report contains {len(json_files)} JSON chunks in .pf/readthis/")
+        
+        if print_stats:
+            total_size = sum(f.stat().st_size for f in json_files)
+            click.echo(f"\n[STATS] Summary:")
+            click.echo(f"  - Total chunks: {len(json_files)}")
+            click.echo(f"  - Total size: {total_size:,} bytes")
+            click.echo(f"  - Average chunk: {total_size // len(json_files):,} bytes" if json_files else "  - No chunks")
+            
+            click.echo(f"\n[FILES] Available chunks:")
+            for f in sorted(json_files)[:10]:  # Show first 10
+                size = f.stat().st_size
+                click.echo(f"  - {f.name} ({size:,} bytes)")
+            if len(json_files) > 10:
+                click.echo(f"  ... and {len(json_files) - 10} more")
+    else:
+        click.echo("[WARNING] No readthis directory found at .pf/readthis/")
+        click.echo("[INFO] Run 'aud full' to generate analysis data")
\ No newline at end of file
diff --git a/theauditor/commands/rules.py b/theauditor/commands/rules.py
new file mode 100644
index 0000000..2a89c09
--- /dev/null
+++ b/theauditor/commands/rules.py
@@ -0,0 +1,226 @@
+"""Rules command - inspect and summarize detection capabilities."""
+
+import os
+import yaml
+import importlib
+import inspect
+from pathlib import Path
+from typing import Dict, List, Any
+
+import click
+
+from theauditor.utils import handle_exceptions
+from theauditor.utils.exit_codes import ExitCodes
+
+
+@click.command(name="rules")
+@click.option(
+    "--summary",
+    is_flag=True,
+    default=False,
+    help="Generate a summary of all detection capabilities",
+)
+@handle_exceptions
+def rules_command(summary: bool) -> None:
+    """Inspect and summarize TheAuditor's detection rules and patterns.
+    
+    Args:
+        summary: If True, generate a comprehensive capability report
+    """
+    if not summary:
+        click.echo(click.style("[ERROR] Please specify --summary to generate a capability report", fg="red"), err=True)
+        raise SystemExit(ExitCodes.TASK_INCOMPLETE)
+    
+    # Get the base path for patterns and rules
+    base_path = Path(__file__).parent.parent
+    patterns_path = base_path / "patterns"
+    rules_path = base_path / "rules"
+    
+    # Create output directory
+    output_dir = Path(".pf")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_file = output_dir / "auditor_capabilities.md"
+    
+    # Collect output in a list
+    output_lines = []
+    output_lines.append("# TheAuditor Detection Capabilities\n")
+    
+    # Also print to console
+    print("# TheAuditor Detection Capabilities\n")
+    
+    # Scan YAML patterns
+    print("## YAML Patterns\n")
+    output_lines.append("## YAML Patterns\n")
+    yaml_patterns = scan_yaml_patterns(patterns_path)
+    total_patterns = 0
+    
+    for category, files in yaml_patterns.items():
+        if files:
+            category_display = "patterns/" if category == "." else f"patterns/{category}/"
+            print(f"### {category_display}\n")
+            output_lines.append(f"### {category_display}\n")
+            for file_name, patterns in files.items():
+                if patterns:
+                    print(f"**{file_name}** ({len(patterns)} patterns)")
+                    output_lines.append(f"**{file_name}** ({len(patterns)} patterns)")
+                    for pattern in patterns:
+                        print(f"- `{pattern}`")
+                        output_lines.append(f"- `{pattern}`")
+                    print()
+                    output_lines.append("")
+                    total_patterns += len(patterns)
+    
+    # Scan Python rules
+    print("## Python AST Rules\n")
+    output_lines.append("## Python AST Rules\n")
+    python_rules = scan_python_rules(rules_path)
+    total_rules = 0
+    
+    for module_path, functions in python_rules.items():
+        if functions:
+            # Make path relative to rules/ for readability
+            display_path = module_path.replace(str(rules_path) + os.sep, "")
+            print(f"### {display_path}")
+            output_lines.append(f"### {display_path}")
+            for func in functions:
+                print(f"- `{func}()`")
+                output_lines.append(f"- `{func}()`")
+            print()
+            output_lines.append("")
+            total_rules += len(functions)
+    
+    # Print summary statistics
+    print("## Summary Statistics\n")
+    output_lines.append("## Summary Statistics\n")
+    print(f"- **Total YAML Patterns**: {total_patterns}")
+    output_lines.append(f"- **Total YAML Patterns**: {total_patterns}")
+    print(f"- **Total Python Rules**: {total_rules}")
+    output_lines.append(f"- **Total Python Rules**: {total_rules}")
+    print(f"- **Combined Detection Capabilities**: {total_patterns + total_rules}")
+    output_lines.append(f"- **Combined Detection Capabilities**: {total_patterns + total_rules}")
+    
+    # Write to file
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write('\n'.join(output_lines))
+    
+    click.echo(click.style(f"\n[SUCCESS] Capability report generated successfully", fg="green"))
+    click.echo(f"[INFO] Report saved to: {output_file}")
+    raise SystemExit(ExitCodes.SUCCESS)
+
+
+def scan_yaml_patterns(patterns_path: Path) -> Dict[str, Dict[str, List[str]]]:
+    """Scan YAML pattern files and extract pattern names.
+    
+    Args:
+        patterns_path: Path to the patterns directory
+        
+    Returns:
+        Dictionary mapping category -> file -> list of pattern names
+    """
+    results = {}
+    
+    if not patterns_path.exists():
+        return results
+    
+    # Walk through all subdirectories
+    for root, dirs, files in os.walk(patterns_path):
+        # Skip __pycache__ directories
+        dirs[:] = [d for d in dirs if d != "__pycache__"]
+        
+        for file in files:
+            if file.endswith(".yml") or file.endswith(".yaml"):
+                file_path = Path(root) / file
+                
+                # Determine category from directory structure
+                rel_path = file_path.relative_to(patterns_path)
+                # If file is in root of patterns/, use "." as category
+                # If in subdirectory like frameworks/, use that as category
+                if rel_path.parent == Path("."):
+                    category = "."
+                else:
+                    category = str(rel_path.parent)
+                
+                if category not in results:
+                    results[category] = {}
+                
+                # Parse YAML and extract pattern names
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        data = yaml.safe_load(f)
+                        
+                    if data and isinstance(data, list):
+                        pattern_names = []
+                        for pattern in data:
+                            if isinstance(pattern, dict) and 'name' in pattern:
+                                pattern_names.append(pattern['name'])
+                        
+                        if pattern_names:
+                            results[category][file] = pattern_names
+                            
+                except (yaml.YAMLError, OSError) as e:
+                    # Skip files that can't be parsed
+                    continue
+    
+    return results
+
+
+def scan_python_rules(rules_path: Path) -> Dict[str, List[str]]:
+    """Scan Python rule files and find all find_* functions.
+    
+    Args:
+        rules_path: Path to the rules directory
+        
+    Returns:
+        Dictionary mapping module path -> list of find_* function names
+    """
+    results = {}
+    
+    if not rules_path.exists():
+        return results
+    
+    # First, check what's exposed in the main __init__.py
+    init_file = rules_path / "__init__.py"
+    if init_file.exists():
+        try:
+            module = importlib.import_module("theauditor.rules")
+            exposed_functions = []
+            for name, obj in inspect.getmembers(module, inspect.isfunction):
+                if name.startswith("find_"):
+                    exposed_functions.append(name)
+            if exposed_functions:
+                results["rules/__init__.py (exposed)"] = exposed_functions
+        except ImportError:
+            pass
+    
+    # Walk through all Python files
+    for root, dirs, files in os.walk(rules_path):
+        # Skip __pycache__ directories
+        dirs[:] = [d for d in dirs if d != "__pycache__"]
+        
+        for file in files:
+            if file.endswith(".py"):
+                file_path = Path(root) / file
+                
+                # Skip __init__.py files for now (we handle them separately)
+                if file == "__init__.py":
+                    continue
+                
+                # Try basic text scanning (more reliable than import)
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                    
+                    # Simple regex to find function definitions
+                    import re
+                    pattern = r'^def\s+(find_\w+)\s*\('
+                    matches = re.findall(pattern, content, re.MULTILINE)
+                    
+                    if matches:
+                        # Make path relative for display
+                        display_path = str(file_path.relative_to(rules_path.parent))
+                        results[display_path] = matches
+                        
+                except OSError:
+                    continue
+    
+    return results
\ No newline at end of file
diff --git a/theauditor/commands/setup.py b/theauditor/commands/setup.py
new file mode 100644
index 0000000..ee98ada
--- /dev/null
+++ b/theauditor/commands/setup.py
@@ -0,0 +1,63 @@
+"""Setup commands for TheAuditor - Claude Code integration."""
+
+import click
+
+
+@click.command("setup-claude")
+@click.option(
+    "--target", 
+    required=True,
+    help="Target project root (absolute or relative path)"
+)
+@click.option(
+    "--source", 
+    default="agent_templates",
+    help="Path to TheAuditor agent templates directory (default: agent_templates)"
+)
+@click.option(
+    "--sync",
+    is_flag=True,
+    help="Force update (still creates .bak on first change only)"
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Print plan without executing"
+)
+def setup_claude(target, source, sync, dry_run):
+    """Install Claude Code agents, hooks, and per-project venv for TheAuditor.
+    
+    This command performs a complete zero-optional installation:
+    1. Creates a Python venv at <target>/.venv
+    2. Installs TheAuditor into that venv (editable/offline)
+    3. Creates cross-platform launcher wrappers at <target>/.claude/bin/
+    4. Generates Claude agents from agent_templates/*.md
+    5. Writes hooks to <target>/.claude/hooks.json
+    
+    All commands in agents/hooks use ./.claude/bin/aud to ensure
+    they run with the project's own venv.
+    """
+    from theauditor.claude_setup import setup_claude_complete
+
+    try:
+        result = setup_claude_complete(
+            target=target,
+            source=source,
+            sync=sync,
+            dry_run=dry_run
+        )
+
+        # The setup_claude_complete function already prints detailed output
+        # Just handle any failures here
+        if result.get("failed"):
+            click.echo("\n[WARN]  Some operations failed:", err=True)
+            for item in result["failed"]:
+                click.echo(f"  - {item}", err=True)
+            raise click.ClickException("Setup incomplete due to failures")
+
+    except ValueError as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
\ No newline at end of file
diff --git a/theauditor/commands/structure.py b/theauditor/commands/structure.py
new file mode 100644
index 0000000..d4df424
--- /dev/null
+++ b/theauditor/commands/structure.py
@@ -0,0 +1,96 @@
+"""Project structure and intelligence report command."""
+
+import click
+from pathlib import Path
+from theauditor.utils.error_handler import handle_exceptions
+from theauditor.utils.exit_codes import ExitCodes
+
+
+@click.command("structure")
+@handle_exceptions
+@click.option("--root", default=".", help="Root directory to analyze")
+@click.option("--manifest", default="./.pf/manifest.json", help="Path to manifest.json")
+@click.option("--db-path", default="./.pf/repo_index.db", help="Path to repo_index.db")
+@click.option("--output", default="./.pf/readthis/STRUCTURE.md", help="Output file path")
+@click.option("--max-depth", default=4, type=int, help="Maximum directory tree depth")
+def structure(root, manifest, db_path, output, max_depth):
+    """Generate project structure and intelligence report.
+    
+    Creates a comprehensive markdown report including:
+    - Directory tree visualization
+    - Project statistics (files, LOC, tokens)
+    - Language distribution
+    - Top 10 largest files by tokens
+    - Top 15 critical files by convention
+    - AI context optimization recommendations
+    """
+    from theauditor.project_summary import generate_project_summary, generate_directory_tree
+    
+    # Check if manifest exists (not required but enhances report)
+    manifest_exists = Path(manifest).exists()
+    db_exists = Path(db_path).exists()
+    
+    if not manifest_exists and not db_exists:
+        click.echo("Warning: Neither manifest.json nor repo_index.db found", err=True)
+        click.echo("Run 'aud index' first for complete statistics", err=True)
+        click.echo("Generating basic structure report...\n")
+    elif not manifest_exists:
+        click.echo("Warning: manifest.json not found, statistics will be limited", err=True)
+    elif not db_exists:
+        click.echo("Warning: repo_index.db not found, symbol counts will be missing", err=True)
+    
+    # Generate the report
+    click.echo(f"Analyzing project structure (max depth: {max_depth})...")
+    
+    try:
+        # Generate full report
+        report_content = generate_project_summary(
+            root_path=root,
+            manifest_path=manifest,
+            db_path=db_path,
+            max_depth=max_depth
+        )
+        
+        # Ensure output directory exists
+        output_path = Path(output)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Write report
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write(report_content)
+        
+        click.echo(f"\n✓ Project structure report generated: {output}")
+        
+        # Show summary stats if available
+        if manifest_exists:
+            import json
+            with open(manifest, 'r') as f:
+                manifest_data = json.load(f)
+            
+            total_files = len(manifest_data)
+            total_loc = sum(f.get('loc', 0) for f in manifest_data)
+            total_bytes = sum(f.get('bytes', 0) for f in manifest_data)
+            total_tokens = total_bytes // 4  # Rough approximation
+            
+            click.echo(f"\nProject Summary:")
+            click.echo(f"  Files: {total_files:,}")
+            click.echo(f"  LOC: {total_loc:,}")
+            click.echo(f"  Tokens: ~{total_tokens:,}")
+            
+            # Token percentage of Claude's context
+            # Claude has 200k context, but practical limit is ~160k for user content
+            # (leaving room for system prompts, conversation history, response)
+            claude_total_context = 200000  # Total context window
+            claude_usable_context = 160000  # Practical limit for user content
+            token_percent = (total_tokens / claude_usable_context * 100) if total_tokens > 0 else 0
+            
+            if token_percent > 100:
+                click.echo(f"  Context Usage: {token_percent:.1f}% (EXCEEDS Claude's practical limit)")
+            else:
+                click.echo(f"  Context Usage: {token_percent:.1f}% of Claude's usable window")
+        
+        return ExitCodes.SUCCESS
+        
+    except Exception as e:
+        click.echo(f"Error generating report: {e}", err=True)
+        return ExitCodes.TASK_INCOMPLETE
\ No newline at end of file
diff --git a/theauditor/commands/summary.py b/theauditor/commands/summary.py
new file mode 100644
index 0000000..a070940
--- /dev/null
+++ b/theauditor/commands/summary.py
@@ -0,0 +1,236 @@
+"""Generate comprehensive audit summary from all analysis phases."""
+
+import json
+import time
+from pathlib import Path
+from typing import Any, Dict
+import click
+
+
+@click.command()
+@click.option("--root", default=".", help="Root directory")
+@click.option("--raw-dir", default="./.pf/raw", help="Raw outputs directory")
+@click.option("--out", default="./.pf/raw/audit_summary.json", help="Output path for summary")
+def summary(root, raw_dir, out):
+    """Generate comprehensive audit summary from all phases."""
+    start_time = time.time()
+    raw_path = Path(raw_dir)
+    
+    # Initialize summary structure
+    audit_summary = {
+        "generated_at": time.strftime('%Y-%m-%d %H:%M:%S'),
+        "overall_status": "UNKNOWN",
+        "total_runtime_seconds": 0,
+        "total_findings_by_severity": {
+            "critical": 0,
+            "high": 0,
+            "medium": 0,
+            "low": 0,
+            "info": 0
+        },
+        "metrics_by_phase": {},
+        "key_statistics": {}
+    }
+    
+    # Helper function to safely load JSON
+    def load_json(file_path: Path) -> Dict[str, Any]:
+        if file_path.exists():
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            except (json.JSONDecodeError, IOError):
+                pass
+        return {}
+    
+    # Phase 1: Index metrics
+    manifest_path = Path(root) / "manifest.json"
+    if manifest_path.exists():
+        manifest = load_json(manifest_path)
+        if isinstance(manifest, list):
+            audit_summary["metrics_by_phase"]["index"] = {
+                "files_indexed": len(manifest),
+                "total_size_bytes": sum(f.get("size", 0) for f in manifest)
+            }
+    
+    # Phase 2: Framework detection
+    frameworks = load_json(raw_path / "frameworks.json")
+    if frameworks:
+        if isinstance(frameworks, dict):
+            framework_list = frameworks.get("frameworks", [])
+        else:
+            framework_list = frameworks if isinstance(frameworks, list) else []
+        
+        audit_summary["metrics_by_phase"]["detect_frameworks"] = {
+            "frameworks_detected": len(framework_list),
+            "languages": list(set(f.get("language", "") if isinstance(f, dict) else "" for f in framework_list))
+        }
+    
+    # Phase 3: Dependencies
+    deps = load_json(raw_path / "deps.json")
+    deps_latest = load_json(raw_path / "deps_latest.json")
+    if deps or deps_latest:
+        outdated_count = 0
+        vulnerability_count = 0
+        total_deps = 0
+        
+        # Handle deps being either dict or list
+        if isinstance(deps, dict):
+            total_deps = len(deps.get("dependencies", []))
+        elif isinstance(deps, list):
+            total_deps = len(deps)
+        
+        # Handle deps_latest structure
+        if isinstance(deps_latest, dict) and "packages" in deps_latest:
+            for pkg in deps_latest["packages"]:
+                if isinstance(pkg, dict):
+                    if pkg.get("outdated"):
+                        outdated_count += 1
+                    if pkg.get("vulnerabilities"):
+                        vulnerability_count += len(pkg["vulnerabilities"])
+        
+        audit_summary["metrics_by_phase"]["dependencies"] = {
+            "total_dependencies": total_deps,
+            "outdated_packages": outdated_count,
+            "vulnerabilities": vulnerability_count
+        }
+    
+    # Phase 7: Linting
+    lint_data = load_json(raw_path / "lint.json")
+    if lint_data and "findings" in lint_data:
+        lint_by_severity = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
+        for finding in lint_data["findings"]:
+            severity = finding.get("severity", "info").lower()
+            if severity in lint_by_severity:
+                lint_by_severity[severity] += 1
+        
+        audit_summary["metrics_by_phase"]["lint"] = {
+            "total_issues": len(lint_data["findings"]),
+            "by_severity": lint_by_severity
+        }
+        
+        # Add to total
+        for sev, count in lint_by_severity.items():
+            audit_summary["total_findings_by_severity"][sev] += count
+    
+    # Phase 8: Pattern detection
+    patterns = load_json(raw_path / "patterns.json")
+    if not patterns:
+        patterns = load_json(raw_path / "findings.json")
+    
+    if patterns and "findings" in patterns:
+        pattern_by_severity = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
+        for finding in patterns["findings"]:
+            severity = finding.get("severity", "info").lower()
+            if severity in pattern_by_severity:
+                pattern_by_severity[severity] += 1
+        
+        audit_summary["metrics_by_phase"]["patterns"] = {
+            "total_patterns_matched": len(patterns["findings"]),
+            "by_severity": pattern_by_severity
+        }
+        
+        # Add to total
+        for sev, count in pattern_by_severity.items():
+            audit_summary["total_findings_by_severity"][sev] += count
+    
+    # Phase 9-10: Graph analysis
+    graph_analysis = load_json(raw_path / "graph_analysis.json")
+    graph_metrics = load_json(raw_path / "graph_metrics.json")
+    if graph_analysis:
+        summary_data = graph_analysis.get("summary", {})
+        audit_summary["metrics_by_phase"]["graph"] = {
+            "import_nodes": summary_data.get("import_graph", {}).get("nodes", 0),
+            "import_edges": summary_data.get("import_graph", {}).get("edges", 0),
+            "cycles_detected": len(graph_analysis.get("cycles", [])),
+            "hotspots_identified": len(graph_analysis.get("hotspots", [])),
+            "graph_density": summary_data.get("import_graph", {}).get("density", 0)
+        }
+        
+        if "health_metrics" in summary_data:
+            audit_summary["metrics_by_phase"]["graph"]["health_grade"] = summary_data["health_metrics"].get("health_grade", "N/A")
+            audit_summary["metrics_by_phase"]["graph"]["fragility_score"] = summary_data["health_metrics"].get("fragility_score", 0)
+    
+    # Phase 11: Taint analysis
+    taint = load_json(raw_path / "taint_analysis.json")
+    if taint:
+        taint_by_severity = {"critical": 0, "high": 0, "medium": 0, "low": 0}
+        if "taint_paths" in taint:
+            for path in taint["taint_paths"]:
+                severity = path.get("severity", "medium").lower()
+                if severity in taint_by_severity:
+                    taint_by_severity[severity] += 1
+        
+        audit_summary["metrics_by_phase"]["taint_analysis"] = {
+            "taint_paths_found": len(taint.get("taint_paths", [])),
+            "total_vulnerabilities": taint.get("total_vulnerabilities", 0),
+            "by_severity": taint_by_severity
+        }
+        
+        # Add to total
+        for sev, count in taint_by_severity.items():
+            if sev in audit_summary["total_findings_by_severity"]:
+                audit_summary["total_findings_by_severity"][sev] += count
+    
+    # Phase 12: FCE (Factual Correlation Engine)
+    fce = load_json(raw_path / "fce.json")
+    if fce:
+        correlations = fce.get("correlations", {})
+        audit_summary["metrics_by_phase"]["fce"] = {
+            "total_findings": len(fce.get("all_findings", [])),
+            "test_failures": len(fce.get("test_results", {}).get("failures", [])),
+            "hotspots_correlated": correlations.get("total_hotspots", 0),
+            "factual_clusters": len(correlations.get("factual_clusters", []))
+        }
+    
+    # Calculate overall status based on severity counts
+    severity_counts = audit_summary["total_findings_by_severity"]
+    if severity_counts["critical"] > 0:
+        audit_summary["overall_status"] = "CRITICAL"
+    elif severity_counts["high"] > 0:
+        audit_summary["overall_status"] = "HIGH"
+    elif severity_counts["medium"] > 0:
+        audit_summary["overall_status"] = "MEDIUM"
+    elif severity_counts["low"] > 0:
+        audit_summary["overall_status"] = "LOW"
+    else:
+        audit_summary["overall_status"] = "CLEAN"
+    
+    # Add key statistics
+    audit_summary["key_statistics"] = {
+        "total_findings": sum(severity_counts.values()),
+        "phases_with_findings": len([p for p in audit_summary["metrics_by_phase"] if audit_summary["metrics_by_phase"][p]]),
+        "total_phases_run": len(audit_summary["metrics_by_phase"])
+    }
+    
+    # Calculate runtime
+    elapsed = time.time() - start_time
+    audit_summary["summary_generation_time"] = elapsed
+    
+    # Read pipeline.log for total runtime if available
+    pipeline_log = Path(root) / ".pf" / "pipeline.log"
+    if pipeline_log.exists():
+        try:
+            with open(pipeline_log, 'r') as f:
+                for line in f:
+                    if "[TIME] Total time:" in line:
+                        # Extract seconds from line like "[TIME] Total time: 73.0s"
+                        parts = line.split(":")[-1].strip().replace("s", "").split("(")[0]
+                        audit_summary["total_runtime_seconds"] = float(parts)
+                        break
+        except:
+            pass
+    
+    # Save the summary
+    out_path = Path(out)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, 'w', encoding='utf-8') as f:
+        json.dump(audit_summary, f, indent=2)
+    
+    # Output results
+    click.echo(f"[OK] Audit summary generated in {elapsed:.1f}s")
+    click.echo(f"  Overall status: {audit_summary['overall_status']}")
+    click.echo(f"  Total findings: {audit_summary['key_statistics']['total_findings']}")
+    click.echo(f"  Critical: {severity_counts['critical']}, High: {severity_counts['high']}, Medium: {severity_counts['medium']}, Low: {severity_counts['low']}")
+    click.echo(f"  Summary saved to: {out_path}")
+    
+    return audit_summary
\ No newline at end of file
diff --git a/theauditor/commands/taint.py b/theauditor/commands/taint.py
new file mode 100644
index 0000000..a05bbff
--- /dev/null
+++ b/theauditor/commands/taint.py
@@ -0,0 +1,272 @@
+"""Perform taint analysis to detect security vulnerabilities via data flow tracking."""
+
+import sys
+import platform
+import click
+from pathlib import Path
+from datetime import datetime, UTC
+from theauditor.utils.error_handler import handle_exceptions
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+
+
+@click.command("taint-analyze")
+@handle_exceptions
+@click.option("--db", default=None, help="Path to the SQLite database (default: repo_index.db)")
+@click.option("--output", default="./.pf/raw/taint_analysis.json", help="Output path for analysis results")
+@click.option("--max-depth", default=5, type=int, help="Maximum depth for taint propagation tracing")
+@click.option("--json", is_flag=True, help="Output raw JSON instead of formatted report")
+@click.option("--verbose", is_flag=True, help="Show detailed path information")
+@click.option("--severity", type=click.Choice(["all", "critical", "high", "medium", "low"]), 
+              default="all", help="Filter results by severity level")
+@click.option("--rules/--no-rules", default=True, help="Enable/disable rule-based detection")
+def taint_analyze(db, output, max_depth, json, verbose, severity, rules):
+    """
+    Perform taint analysis to detect security vulnerabilities.
+    
+    This command traces the flow of untrusted data from taint sources
+    (user inputs) to security sinks (dangerous functions) to identify
+    potential injection vulnerabilities and data exposure risks.
+    
+    The analysis detects:
+    - SQL Injection
+    - Command Injection  
+    - Cross-Site Scripting (XSS)
+    - Path Traversal
+    - LDAP Injection
+    - NoSQL Injection
+    
+    Example:
+        aud taint-analyze
+        aud taint-analyze --severity critical --verbose
+        aud taint-analyze --json --output vulns.json
+    """
+    from theauditor.taint_analyzer import trace_taint, save_taint_analysis, normalize_taint_path, SECURITY_SINKS
+    from theauditor.taint.insights import format_taint_report, calculate_severity, generate_summary, classify_vulnerability
+    from theauditor.config_runtime import load_runtime_config
+    from theauditor.rules.orchestrator import RulesOrchestrator, RuleContext
+    from theauditor.taint.registry import TaintRegistry
+    import json as json_lib
+    
+    # Load configuration for default paths
+    config = load_runtime_config(".")
+    
+    # Use default database path if not provided
+    if db is None:
+        db = config["paths"]["db"]
+    
+    # Verify database exists
+    db_path = Path(db)
+    if not db_path.exists():
+        click.echo(f"Error: Database not found at {db}", err=True)
+        click.echo("Run 'aud index' first to build the repository index", err=True)
+        raise click.ClickException(f"Database not found: {db}")
+    
+    # Check if rules are enabled
+    if rules:
+        # STAGE 1: Initialize infrastructure
+        click.echo("Initializing security analysis infrastructure...")
+        registry = TaintRegistry()
+        orchestrator = RulesOrchestrator(project_path=Path("."), db_path=db_path)
+        
+        # Track all findings
+        all_findings = []
+        
+        # STAGE 2: Run standalone infrastructure rules
+        click.echo("Running infrastructure and configuration analysis...")
+        infra_findings = orchestrator.run_standalone_rules()
+        all_findings.extend(infra_findings)
+        click.echo(f"  Found {len(infra_findings)} infrastructure issues")
+        
+        # STAGE 3: Run discovery rules to populate registry
+        click.echo("Discovering framework-specific patterns...")
+        discovery_findings = orchestrator.run_discovery_rules(registry)
+        all_findings.extend(discovery_findings)
+        
+        stats = registry.get_stats()
+        click.echo(f"  Registry now has {stats['total_sinks']} sinks, {stats['total_sources']} sources")
+        
+        # STAGE 4: Run enriched taint analysis with registry
+        click.echo("Performing data-flow taint analysis...")
+        result = trace_taint(
+            db_path=str(db_path),
+            max_depth=max_depth,
+            registry=registry
+        )
+        
+        # Extract taint paths
+        taint_paths = result.get("taint_paths", result.get("paths", []))
+        click.echo(f"  Found {len(taint_paths)} taint flow vulnerabilities")
+        
+        # STAGE 5: Run taint-dependent rules
+        click.echo("Running advanced security analysis...")
+        
+        # Create taint checker from results
+        def taint_checker(var_name, line_num=None):
+            """Check if variable is in any taint path."""
+            for path in taint_paths:
+                # Check source
+                if path.get("source", {}).get("name") == var_name:
+                    return True
+                # Check sink
+                if path.get("sink", {}).get("name") == var_name:
+                    return True
+                # Check intermediate steps
+                for step in path.get("path", []):
+                    if isinstance(step, dict) and step.get("name") == var_name:
+                        return True
+            return False
+        
+        advanced_findings = orchestrator.run_taint_dependent_rules(taint_checker)
+        all_findings.extend(advanced_findings)
+        click.echo(f"  Found {len(advanced_findings)} advanced security issues")
+        
+        # STAGE 6: Consolidate all findings
+        click.echo(f"\nTotal vulnerabilities found: {len(all_findings) + len(taint_paths)}")
+        
+        # Add all non-taint findings to result
+        result["infrastructure_issues"] = infra_findings
+        result["discovery_findings"] = discovery_findings
+        result["advanced_findings"] = advanced_findings
+        result["all_rule_findings"] = all_findings
+        
+        # Update total count
+        result["total_vulnerabilities"] = len(taint_paths) + len(all_findings)
+    else:
+        # Original taint analysis without orchestrator
+        click.echo("Performing taint analysis (rules disabled)...")
+        result = trace_taint(
+            db_path=str(db_path),
+            max_depth=max_depth
+        )
+    
+    # Enrich raw paths with interpretive insights
+    if result.get("success"):
+        # Add severity and classification to each path
+        enriched_paths = []
+        for path in result.get("taint_paths", result.get("paths", [])):
+            # Normalize the path first
+            path = normalize_taint_path(path)
+            # Add severity
+            path["severity"] = calculate_severity(path)
+            # Enrich sink information with vulnerability classification
+            path["vulnerability_type"] = classify_vulnerability(
+                path.get("sink", {}), 
+                SECURITY_SINKS
+            )
+            enriched_paths.append(path)
+        
+        # Update result with enriched paths
+        result["taint_paths"] = enriched_paths
+        result["paths"] = enriched_paths
+        
+        # Generate summary
+        result["summary"] = generate_summary(enriched_paths)
+    
+    # Filter by severity if requested
+    if severity != "all" and result.get("success"):
+        filtered_paths = []
+        for path in result.get("taint_paths", result.get("paths", [])):
+            # Normalize the path to ensure all keys exist
+            path = normalize_taint_path(path)
+            if path["severity"].lower() == severity or (
+                severity == "critical" and path["severity"].lower() == "critical"
+            ) or (
+                severity == "high" and path["severity"].lower() in ["critical", "high"]
+            ):
+                filtered_paths.append(path)
+        
+        # Update counts
+        result["taint_paths"] = filtered_paths
+        result["paths"] = filtered_paths  # Keep both keys synchronized
+        result["total_vulnerabilities"] = len(filtered_paths)
+        
+        # Recalculate vulnerability types
+        from collections import defaultdict
+        vuln_counts = defaultdict(int)
+        for path in filtered_paths:
+            # Path is already normalized from filtering above
+            vuln_counts[path.get("vulnerability_type", "Unknown")] += 1
+        result["vulnerabilities_by_type"] = dict(vuln_counts)
+        
+        # CRITICAL FIX: Recalculate summary with filtered paths
+        from theauditor.taint.insights import generate_summary
+        result["summary"] = generate_summary(filtered_paths)
+    
+    # Save COMPLETE taint analysis results to raw (including all data)
+    save_taint_analysis(result, output)
+    click.echo(f"Raw analysis results saved to: {output}")
+    
+    # Output results
+    if json:
+        # JSON output for programmatic use
+        click.echo(json_lib.dumps(result, indent=2, sort_keys=True))
+    else:
+        # Human-readable report
+        report = format_taint_report(result)
+        click.echo(report)
+        
+        # Additional verbose output
+        if verbose and result.get("success"):
+            paths = result.get("taint_paths", result.get("paths", []))
+            if paths and len(paths) > 10:
+                click.echo("\n" + "=" * 60)
+                click.echo("ADDITIONAL VULNERABILITY DETAILS")
+                click.echo("=" * 60)
+                
+                for i, path in enumerate(paths[10:20], 11):
+                    # Normalize path to ensure all keys exist
+                    path = normalize_taint_path(path)
+                    click.echo(f"\n{i}. {path['vulnerability_type']} ({path['severity']})")
+                    click.echo(f"   Source: {path['source']['file']}:{path['source']['line']}")
+                    click.echo(f"   Sink: {path['sink']['file']}:{path['sink']['line']}")
+                    arrow = "->" if IS_WINDOWS else "→"
+                    click.echo(f"   Pattern: {path['source'].get('pattern', '')} {arrow} {path['sink'].get('pattern', '')}")  # Empty not unknown
+                
+                if len(paths) > 20:
+                    click.echo(f"\n... and {len(paths) - 20} additional vulnerabilities not shown")
+    
+    # Provide actionable recommendations based on findings
+    if not json and result.get("success"):
+        summary = result.get("summary", {})
+        risk_level = summary.get("risk_level", "UNKNOWN")
+        
+        click.echo("\n" + "=" * 60)
+        click.echo("RECOMMENDED ACTIONS")
+        click.echo("=" * 60)
+        
+        if risk_level == "CRITICAL":
+            click.echo("[CRITICAL] CRITICAL SECURITY ISSUES DETECTED")
+            click.echo("1. Review and fix all CRITICAL vulnerabilities immediately")
+            click.echo("2. Add input validation and sanitization at all entry points")
+            click.echo("3. Use parameterized queries for all database operations")
+            click.echo("4. Implement output encoding for all user-controlled data")
+            click.echo("5. Consider a security audit before deployment")
+        elif risk_level == "HIGH":
+            click.echo("[HIGH] HIGH RISK VULNERABILITIES FOUND")
+            click.echo("1. Prioritize fixing HIGH severity issues this sprint")
+            click.echo("2. Review all user input handling code")
+            click.echo("3. Implement security middleware/filters")
+            click.echo("4. Add security tests for vulnerable paths")
+        elif risk_level == "MEDIUM":
+            click.echo("[MEDIUM] MODERATE SECURITY CONCERNS")
+            click.echo("1. Schedule vulnerability fixes for next sprint")
+            click.echo("2. Review and update security best practices")
+            click.echo("3. Add input validation where missing")
+        else:
+            click.echo("[LOW] LOW RISK PROFILE")
+            click.echo("1. Continue following secure coding practices")
+            click.echo("2. Regular security scanning recommended")
+            click.echo("3. Keep dependencies updated")
+    
+    # Exit with appropriate code
+    if result.get("success"):
+        summary = result.get("summary", {})
+        if summary.get("critical_count", 0) > 0:
+            exit(2)  # Critical vulnerabilities found
+        elif summary.get("high_count", 0) > 0:
+            exit(1)  # High severity vulnerabilities found
+    else:
+        raise click.ClickException(result.get("error", "Analysis failed"))
\ No newline at end of file
diff --git a/theauditor/commands/tool_versions.py b/theauditor/commands/tool_versions.py
new file mode 100644
index 0000000..bb985b9
--- /dev/null
+++ b/theauditor/commands/tool_versions.py
@@ -0,0 +1,25 @@
+"""Detect and record tool versions."""
+
+import click
+
+
+@click.command("tool-versions")
+@click.option("--out-dir", default="./.pf/audit", help="Output directory")
+def tool_versions(out_dir):
+    """Detect and record tool versions."""
+    from theauditor.tools import write_tools_report
+
+    try:
+        res = write_tools_report(out_dir)
+        click.echo(f"[OK] Tool versions written to {out_dir}/")
+        click.echo("  - TOOLS.md (human-readable)")
+        click.echo("  - tools.json (machine-readable)")
+
+        # Show summary
+        python_found = sum(1 for v in res["python"].values() if v != "missing")
+        node_found = sum(1 for v in res["node"].values() if v != "missing")
+        click.echo(f"  - Python tools: {python_found}/4 found")
+        click.echo(f"  - Node tools: {node_found}/3 found")
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        raise click.ClickException(str(e)) from e
\ No newline at end of file
diff --git a/theauditor/commands/validate_templates.py b/theauditor/commands/validate_templates.py
new file mode 100644
index 0000000..cd31f30
--- /dev/null
+++ b/theauditor/commands/validate_templates.py
@@ -0,0 +1,30 @@
+"""Validate agent templates for SOP compliance."""
+
+import click
+
+
+@click.command("validate-templates")
+@click.option("--source", default="./agent_templates", help="Directory containing agent templates")
+@click.option("--format", type=click.Choice(["json", "text"]), default="text", help="Output format")
+@click.option("--output", help="Write report to file instead of stdout")
+def validate_templates(source, format, output):
+    """Validate agent templates for SOP compliance."""
+    from theauditor.agent_template_validator import TemplateValidator
+    
+    validator = TemplateValidator()
+    results = validator.validate_all(source)
+    
+    report = validator.generate_report(results, format=format)
+    
+    if output:
+        with open(output, 'w') as f:
+            f.write(report)
+        click.echo(f"Report written to {output}")
+    else:
+        click.echo(report)
+    
+    # Exit with non-zero if violations found
+    if not results["valid"]:
+        raise click.ClickException(
+            f"Template validation failed: {results['total_violations']} violations found"
+        )
\ No newline at end of file
diff --git a/theauditor/commands/workset.py b/theauditor/commands/workset.py
new file mode 100644
index 0000000..1f60510
--- /dev/null
+++ b/theauditor/commands/workset.py
@@ -0,0 +1,55 @@
+"""Compute target file set from git diff and dependencies."""
+
+import click
+from theauditor.utils.error_handler import handle_exceptions
+
+
+@click.command()
+@handle_exceptions
+@click.option("--root", default=".", help="Root directory")
+@click.option("--db", default=None, help="Input SQLite database path")
+@click.option("--manifest", default=None, help="Input manifest file path")
+@click.option("--all", is_flag=True, help="Include all source files (ignores common directories)")
+@click.option("--diff", help="Git diff range (e.g., main..HEAD)")
+@click.option("--files", multiple=True, help="Explicit file list")
+@click.option("--include", multiple=True, help="Include glob patterns")
+@click.option("--exclude", multiple=True, help="Exclude glob patterns")
+@click.option("--max-depth", default=None, type=int, help="Maximum dependency depth")
+@click.option("--out", default=None, help="Output workset file path")
+@click.option("--print-stats", is_flag=True, help="Print summary statistics")
+def workset(root, db, manifest, all, diff, files, include, exclude, max_depth, out, print_stats):
+    """Compute target file set from git diff and dependencies."""
+    from theauditor.workset import compute_workset
+    from theauditor.config_runtime import load_runtime_config
+    
+    # Load configuration
+    config = load_runtime_config(root)
+    
+    # Use config defaults if not provided
+    if db is None:
+        db = config["paths"]["db"]
+    if manifest is None:
+        manifest = config["paths"]["manifest"]
+    if out is None:
+        out = config["paths"]["workset"]
+    if max_depth is None:
+        max_depth = config["limits"]["max_graph_depth"]
+
+    result = compute_workset(
+        root_path=root,
+        db_path=db,
+        manifest_path=manifest,
+        all_files=all,
+        diff_spec=diff,
+        file_list=list(files) if files else None,
+        include_patterns=list(include) if include else None,
+        exclude_patterns=list(exclude) if exclude else None,
+        max_depth=max_depth,
+        output_path=out,
+        print_stats=print_stats,
+    )
+
+    if not print_stats:
+        click.echo(f"Workset written to {out}")
+        click.echo(f"  Seed files: {result['seed_count']}")
+        click.echo(f"  Expanded files: {result['expanded_count']}")
\ No newline at end of file
diff --git a/theauditor/config.py b/theauditor/config.py
new file mode 100644
index 0000000..2ff1b74
--- /dev/null
+++ b/theauditor/config.py
@@ -0,0 +1,40 @@
+"""Configuration management for TheAuditor."""
+
+import tomllib
+from pathlib import Path
+
+
+def ensure_mypy_config(pyproject_path: str) -> dict[str, str]:
+    """
+    Ensure minimal mypy config exists in pyproject.toml.
+
+    Returns:
+        {"status": "created"} if config was added
+        {"status": "exists"} if config already present
+    """
+    path = Path(pyproject_path)
+
+    if not path.exists():
+        raise FileNotFoundError(f"pyproject.toml not found at {pyproject_path}")
+
+    # Parse to check if [tool.mypy] exists
+    with open(path, "rb") as f:
+        data = tomllib.load(f)
+
+    # Check if mypy config already exists
+    if "tool" in data and "mypy" in data["tool"]:
+        return {"status": "exists"}
+
+    # Mypy config to append
+    mypy_block = """
+
+[tool.mypy]
+python_version = "3.12"
+strict = true
+warn_unused_configs = true"""
+
+    # Append to file
+    with open(path, "a") as f:
+        f.write(mypy_block)
+
+    return {"status": "created"}
diff --git a/theauditor/config_runtime.py b/theauditor/config_runtime.py
new file mode 100644
index 0000000..a3eb732
--- /dev/null
+++ b/theauditor/config_runtime.py
@@ -0,0 +1,160 @@
+"""Runtime configuration for TheAuditor - centralized configuration management."""
+
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+
+DEFAULTS = {
+    "paths": {
+        # Core files
+        "manifest": "./.pf/manifest.json",
+        "db": "./.pf/repo_index.db",
+        "workset": "./.pf/workset.json",
+        
+        # Directories
+        "pf_dir": "./.pf",
+        "capsules_dir": "./.pf/capsules",
+        "docs_dir": "./.pf/docs",
+        "audit_dir": "./.pf/audit",
+        "context_docs_dir": "./.pf/context/docs",
+        "doc_capsules_dir": "./.pf/context/doc_capsules",
+        "graphs_dir": "./.pf/graphs",
+        "model_dir": "./.pf/ml",
+        "claude_dir": "./.claude",
+        
+        # Core artifacts
+        "journal": "./.pf/journal.ndjson",
+        "checkpoint": "./.pf/checkpoint.json",
+        "run_report": "./.pf/run_report.json",
+        "fce_json": "./.pf/raw/fce.json",
+        "ast_proofs_json": "./.pf/ast_proofs.json",
+        "ast_proofs_md": "./.pf/ast_proofs.md",
+        "ml_suggestions": "./.pf/insights/ml_suggestions.json",
+        "graphs_db": "./.pf/graphs.db",
+        "graph_analysis": "./.pf/graph_analysis.json",
+        "deps_json": "./.pf/deps.json",
+        "findings_json": "./.pf/findings.json",
+        "patterns_json": "./.pf/patterns.json",
+        "xgraph_json": "./.pf/xgraph.json",
+        "pattern_fce_json": "./.pf/pattern_fce.json",
+        "fix_suggestions_json": "./.pf/fix_suggestions.json",
+        "policy_yml": "./.pf/policy.yml",
+    },
+    "limits": {
+        # File size limits
+        "max_file_size": 2 * 1024 * 1024,  # 2 MiB
+        
+        # Chunking limits for extraction
+        "max_chunks_per_file": 3,  # Maximum number of chunks per extracted file
+        "max_chunk_size": 56320,  # Maximum size per chunk in bytes (55KB)
+        
+        # Batch processing
+        "default_batch_size": 200,
+        "evidence_batch_size": 100,
+        
+        # ML and analysis windows
+        "ml_window": 50,
+        "git_churn_window_days": 30,
+        
+        # Graph analysis
+        "max_graph_depth": 3,
+        "high_risk_threshold": 0.5,
+        "high_risk_limit": 10,
+        "graph_limit_nodes": 500,
+    },
+    "timeouts": {
+        # Tool detection (quick checks)
+        "tool_detection": 5,
+        
+        # Network operations
+        "url_fetch": 10,
+        "venv_check": 30,
+        
+        # Build/test operations
+        "test_run": 60,
+        "venv_install": 120,
+        
+        # Analysis operations
+        "lint_timeout": 300,
+        "orchestrator_timeout": 300,
+        
+        # FCE and long operations
+        "fce_timeout": 600,
+    },
+    "report": {
+        "max_lint_rows": 50,
+        "max_ast_rows": 50,
+        "max_snippet_lines": 12,
+        "max_snippet_chars": 800,
+    }
+}
+
+
+def load_runtime_config(root: str = ".") -> dict[str, Any]:
+    """
+    Load runtime configuration from .pf/config.json and environment variables.
+    
+    Config priority (highest to lowest):
+    1. Environment variables (THEAUDITOR_* prefixed)
+    2. .pf/config.json file
+    3. Built-in defaults
+    
+    Args:
+        root: Root directory to look for config file
+        
+    Returns:
+        Configuration dictionary with merged values
+    """
+    # Start with deep copy of defaults
+    import copy
+    cfg = copy.deepcopy(DEFAULTS)
+    
+    # Try to load user config from .pf/config.json
+    path = Path(root) / ".pf" / "config.json"
+    try:
+        if path.exists():
+            with open(path, "r", encoding="utf-8") as f:
+                user = json.load(f)
+                
+            # Merge each section if present
+            if isinstance(user, dict):
+                for section in ["paths", "limits", "timeouts", "report"]:
+                    if section in user and isinstance(user[section], dict):
+                        for key, value in user[section].items():
+                            # Validate type matches default
+                            if key in cfg[section]:
+                                if isinstance(value, type(cfg[section][key])):
+                                    cfg[section][key] = value
+    except (json.JSONDecodeError, IOError, OSError) as e:
+        print(f"[WARNING] Could not load config file from {path}: {e}")
+        print("[INFO] Continuing with default configuration")
+        # Continue with defaults - config file is optional
+    
+    # Environment variable overrides (flattened namespace)
+    # Format: THEAUDITOR_SECTION_KEY (e.g., THEAUDITOR_PATHS_MANIFEST)
+    for section in cfg:
+        for key in cfg[section]:
+            env_var = f"THEAUDITOR_{section.upper()}_{key.upper()}"
+            if env_var in os.environ:
+                value = os.environ[env_var]
+                try:
+                    # Try to cast to the same type as the default
+                    default_value = cfg[section][key]
+                    if isinstance(default_value, int):
+                        cfg[section][key] = int(value)
+                    elif isinstance(default_value, float):
+                        cfg[section][key] = float(value)
+                    elif isinstance(default_value, list):
+                        # Parse comma-separated values for lists
+                        cfg[section][key] = [v.strip() for v in value.split(",")]
+                    else:
+                        cfg[section][key] = value
+                except (ValueError, AttributeError) as e:
+                    print(f"[WARNING] Invalid value for environment variable {env_var}: '{value}' - {e}")
+                    print(f"[INFO] Using default value: {cfg[section][key]}")
+                    # Continue with default value - env vars are optional overrides
+    
+    return cfg
\ No newline at end of file
diff --git a/theauditor/correlations/__init__.py b/theauditor/correlations/__init__.py
new file mode 100644
index 0000000..0ccac3b
--- /dev/null
+++ b/theauditor/correlations/__init__.py
@@ -0,0 +1,5 @@
+"""Correlation rules for the Factual Correlation Engine."""
+
+from .loader import CorrelationLoader, CorrelationRule
+
+__all__ = ["CorrelationLoader", "CorrelationRule"]
\ No newline at end of file
diff --git a/theauditor/correlations/loader.py b/theauditor/correlations/loader.py
new file mode 100644
index 0000000..32cf378
--- /dev/null
+++ b/theauditor/correlations/loader.py
@@ -0,0 +1,237 @@
+"""Correlation rule loader for the Factual Correlation Engine."""
+
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+
+@dataclass
+class CorrelationRule:
+    """Represents a single correlation rule for factual co-occurrence detection."""
+    
+    name: str
+    co_occurring_facts: List[Dict[str, str]]
+    description: Optional[str] = None
+    confidence: float = 0.8
+    compiled_patterns: List[Dict[str, Any]] = field(default_factory=list, init=False, repr=False)
+    
+    def __post_init__(self):
+        """Compile regex patterns in co-occurring facts after initialization."""
+        for fact in self.co_occurring_facts:
+            if 'tool' not in fact or 'pattern' not in fact:
+                raise ValueError(f"Invalid fact in rule '{self.name}': must contain 'tool' and 'pattern' keys")
+            
+            compiled_fact = {
+                'tool': fact['tool'],
+                'pattern': fact['pattern']
+            }
+            
+            # Try to compile as regex, if it fails, treat as literal string
+            try:
+                compiled_fact['compiled_regex'] = re.compile(fact['pattern'], re.IGNORECASE)
+                compiled_fact['is_regex'] = True
+            except re.error:
+                # Not a valid regex, will be used as literal string match
+                compiled_fact['is_regex'] = False
+            
+            self.compiled_patterns.append(compiled_fact)
+    
+    def matches_finding(self, finding: Dict[str, Any], fact_index: int) -> bool:
+        """Check if a finding matches a specific fact pattern.
+        
+        Args:
+            finding: Dictionary containing finding data with 'tool' and 'rule' keys
+            fact_index: Index of the fact pattern to check
+            
+        Returns:
+            True if the finding matches the specified fact pattern
+        """
+        if fact_index >= len(self.compiled_patterns):
+            return False
+        
+        fact = self.compiled_patterns[fact_index]
+        
+        # Check tool match
+        if finding.get('tool') != fact['tool']:
+            return False
+        
+        # Check pattern match against rule or message
+        if fact['is_regex']:
+            # Check against rule field and message field
+            rule_match = fact['compiled_regex'].search(finding.get('rule', ''))
+            message_match = fact['compiled_regex'].search(finding.get('message', ''))
+            return bool(rule_match or message_match)
+        else:
+            # Literal string match
+            return (fact['pattern'] in finding.get('rule', '') or 
+                    fact['pattern'] in finding.get('message', ''))
+
+
+class CorrelationLoader:
+    """Loads and manages correlation rules from YAML files."""
+    
+    def __init__(self, rules_dir: Optional[Path] = None):
+        """Initialize correlation loader.
+        
+        Args:
+            rules_dir: Directory containing correlation rule YAML files.
+                      Defaults to theauditor/correlations/rules/
+        """
+        if rules_dir is None:
+            rules_dir = Path(__file__).parent / "rules"
+        self.rules_dir = Path(rules_dir)
+        self.rules: List[CorrelationRule] = []
+        self._loaded = False
+    
+    def load_rules(self) -> List[CorrelationRule]:
+        """Load correlation rules from YAML files.
+        
+        Returns:
+            List of CorrelationRule objects.
+            
+        Raises:
+            FileNotFoundError: If the rules directory doesn't exist.
+        """
+        if not self.rules_dir.exists():
+            # Create directory if it doesn't exist, but return empty list
+            self.rules_dir.mkdir(parents=True, exist_ok=True)
+            self._loaded = True
+            return self.rules
+        
+        yaml_files = list(self.rules_dir.glob("*.yml")) + list(self.rules_dir.glob("*.yaml"))
+        
+        # Clear existing rules before loading
+        self.rules = []
+        
+        for yaml_file in yaml_files:
+            try:
+                rules = self._load_yaml_file(yaml_file)
+                self.rules.extend(rules)
+            except Exception as e:
+                # Log warning but continue loading other files
+                print(f"Warning: Failed to load correlation rules from {yaml_file}: {e}")
+        
+        self._loaded = True
+        return self.rules
+    
+    def _load_yaml_file(self, file_path: Path) -> List[CorrelationRule]:
+        """Load correlation rules from a single YAML file.
+        
+        Args:
+            file_path: Path to YAML file.
+            
+        Returns:
+            List of CorrelationRule objects.
+            
+        Raises:
+            ValueError: If the file format is invalid.
+        """
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = yaml.safe_load(f)
+        
+        if not isinstance(data, dict):
+            raise ValueError(f"Invalid rule file format in {file_path}: expected dictionary at root")
+        
+        rules = []
+        
+        # Support both single rule and multiple rules formats
+        if 'rules' in data:
+            # Multiple rules format
+            rule_list = data['rules']
+            if not isinstance(rule_list, list):
+                raise ValueError(f"Invalid rule file format in {file_path}: 'rules' must be a list")
+            
+            for rule_data in rule_list:
+                try:
+                    rule = self._parse_rule(rule_data)
+                    rules.append(rule)
+                except (KeyError, ValueError) as e:
+                    print(f"Warning: Skipping invalid rule in {file_path}: {e}")
+        
+        elif 'name' in data and 'co_occurring_facts' in data:
+            # Single rule format
+            try:
+                rule = self._parse_rule(data)
+                rules.append(rule)
+            except (KeyError, ValueError) as e:
+                print(f"Warning: Skipping invalid rule in {file_path}: {e}")
+        
+        else:
+            raise ValueError(f"Invalid rule file format in {file_path}: must contain 'rules' list or single rule with 'name' and 'co_occurring_facts'")
+        
+        return rules
+    
+    def _parse_rule(self, rule_data: Dict[str, Any]) -> CorrelationRule:
+        """Parse a single rule from dictionary data.
+        
+        Args:
+            rule_data: Dictionary containing rule data.
+            
+        Returns:
+            CorrelationRule object.
+            
+        Raises:
+            KeyError: If required fields are missing.
+            ValueError: If data format is invalid.
+        """
+        if 'name' not in rule_data:
+            raise KeyError("Rule must have a 'name' field")
+        
+        if 'co_occurring_facts' not in rule_data:
+            raise KeyError("Rule must have a 'co_occurring_facts' field")
+        
+        if not isinstance(rule_data['co_occurring_facts'], list):
+            raise ValueError("'co_occurring_facts' must be a list")
+        
+        if len(rule_data['co_occurring_facts']) == 0:
+            raise ValueError("'co_occurring_facts' must not be empty")
+        
+        return CorrelationRule(
+            name=rule_data['name'],
+            co_occurring_facts=rule_data['co_occurring_facts'],
+            description=rule_data.get('description'),
+            confidence=rule_data.get('confidence', 0.8)
+        )
+    
+    def get_all_rules(self) -> List[CorrelationRule]:
+        """Get all loaded correlation rules.
+        
+        Returns:
+            List of all loaded CorrelationRule objects.
+        """
+        if not self._loaded:
+            self.load_rules()
+        
+        return self.rules
+    
+    def validate_rules(self) -> List[str]:
+        """Validate all loaded correlation rules.
+        
+        Returns:
+            List of validation error messages.
+        """
+        if not self._loaded:
+            self.load_rules()
+        
+        errors = []
+        
+        # Check for duplicate rule names
+        names = [rule.name for rule in self.rules]
+        for name in names:
+            if names.count(name) > 1:
+                errors.append(f"Duplicate rule name: {name}")
+        
+        # Validate each rule
+        for rule in self.rules:
+            # Check that each rule has at least 2 co-occurring facts
+            if len(rule.co_occurring_facts) < 2:
+                errors.append(f"Rule '{rule.name}' has fewer than 2 co-occurring facts")
+            
+            # Check confidence is between 0 and 1
+            if not 0 <= rule.confidence <= 1:
+                errors.append(f"Rule '{rule.name}' has invalid confidence value: {rule.confidence}")
+        
+        return errors
\ No newline at end of file
diff --git a/theauditor/correlations/rules/angular_sanitization_cluster.yml b/theauditor/correlations/rules/angular_sanitization_cluster.yml
new file mode 100644
index 0000000..8cab74f
--- /dev/null
+++ b/theauditor/correlations/rules/angular_sanitization_cluster.yml
@@ -0,0 +1,10 @@
+name: "Angular Sanitization Bypass Factual Cluster"
+description: "Multiple tools detected patterns consistent with XSS via sanitization bypass in Angular."
+confidence: 0.95
+co_occurring_facts:
+  - tool: "framework_detector"
+    pattern: "angular"
+  - tool: "patterns"
+    pattern: "bypassSecurity"
+  - tool: "taint_analyzer"
+    pattern: "trust"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/api_key_exposure_cluster.yml b/theauditor/correlations/rules/api_key_exposure_cluster.yml
new file mode 100644
index 0000000..3fd5769
--- /dev/null
+++ b/theauditor/correlations/rules/api_key_exposure_cluster.yml
@@ -0,0 +1,10 @@
+name: "API Key Exposure Factual Cluster"
+description: "Multiple tools detected patterns consistent with a hardcoded or exposed API key."
+confidence: 0.95
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "api_key"
+  - tool: "ast"
+    pattern: "hardcoded"
+  - tool: "git"
+    pattern: "committed"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/command_injection_cluster.yml b/theauditor/correlations/rules/command_injection_cluster.yml
new file mode 100644
index 0000000..d00be54
--- /dev/null
+++ b/theauditor/correlations/rules/command_injection_cluster.yml
@@ -0,0 +1,10 @@
+name: "Command Injection Factual Cluster"
+description: "Multiple tools detected patterns consistent with a Command Injection vulnerability."
+confidence: 0.95
+co_occurring_facts:
+  - tool: "taint_analyzer"
+    pattern: "command"
+  - tool: "patterns"
+    pattern: "(exec|subprocess|shell)"
+  - tool: "lint"
+    pattern: "subprocess"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/container_escape_cluster.yml b/theauditor/correlations/rules/container_escape_cluster.yml
new file mode 100644
index 0000000..fa6f073
--- /dev/null
+++ b/theauditor/correlations/rules/container_escape_cluster.yml
@@ -0,0 +1,10 @@
+name: "Container Escape Factual Cluster"
+description: "Multiple tools detected patterns consistent with a container escape vulnerability."
+confidence: 0.90
+co_occurring_facts:
+  - tool: "deployment"
+    pattern: "privileged"
+  - tool: "patterns"
+    pattern: "docker"
+  - tool: "security"
+    pattern: "cap_sys_admin"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/cors_misconfiguration_cluster.yml b/theauditor/correlations/rules/cors_misconfiguration_cluster.yml
new file mode 100644
index 0000000..cf88d8f
--- /dev/null
+++ b/theauditor/correlations/rules/cors_misconfiguration_cluster.yml
@@ -0,0 +1,10 @@
+name: "CORS Misconfiguration Factual Cluster"
+description: "Multiple tools detected patterns consistent with a CORS misconfiguration."
+confidence: 0.90
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "Access-Control"
+  - tool: "security"
+    pattern: "wildcard"
+  - tool: "framework_detector"
+    pattern: "cors"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/deadlock_cluster.yml b/theauditor/correlations/rules/deadlock_cluster.yml
new file mode 100644
index 0000000..f45cdb2
--- /dev/null
+++ b/theauditor/correlations/rules/deadlock_cluster.yml
@@ -0,0 +1,10 @@
+name: "Deadlock Factual Cluster"
+description: "Multiple tools detected patterns consistent with a potential deadlock."
+confidence: 0.85
+co_occurring_facts:
+  - tool: "graph"
+    pattern: "mutex"
+  - tool: "patterns"
+    pattern: "lock"
+  - tool: "taint_analyzer"
+    pattern: "circular"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/debug_enabled_cluster.yml b/theauditor/correlations/rules/debug_enabled_cluster.yml
new file mode 100644
index 0000000..9cf0822
--- /dev/null
+++ b/theauditor/correlations/rules/debug_enabled_cluster.yml
@@ -0,0 +1,10 @@
+name: "Debug Mode Enabled Factual Cluster"
+description: "Multiple tools detected patterns consistent with debug mode being enabled in a production environment."
+confidence: 0.95
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "DEBUG=true"
+  - tool: "framework_detector"
+    pattern: "production"
+  - tool: "deployment"
+    pattern: "exposed"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/express_bodyparser_cluster.yml b/theauditor/correlations/rules/express_bodyparser_cluster.yml
new file mode 100644
index 0000000..49db22e
--- /dev/null
+++ b/theauditor/correlations/rules/express_bodyparser_cluster.yml
@@ -0,0 +1,10 @@
+name: "Express Body-Parser Factual Cluster"
+description: "Multiple tools detected patterns consistent with insecure body-parser configuration in Express."
+confidence: 0.75
+co_occurring_facts:
+  - tool: "framework_detector"
+    pattern: "express"
+  - tool: "patterns"
+    pattern: "body-parser"
+  - tool: "security"
+    pattern: "no_limit"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/infinite_loop_cluster.yml b/theauditor/correlations/rules/infinite_loop_cluster.yml
new file mode 100644
index 0000000..e1b1d98
--- /dev/null
+++ b/theauditor/correlations/rules/infinite_loop_cluster.yml
@@ -0,0 +1,10 @@
+name: "Infinite Loop Factual Cluster"
+description: "Multiple tools detected patterns consistent with a potential infinite loop."
+confidence: 0.80
+co_occurring_facts:
+  - tool: "graph"
+    pattern: "cycle"
+  - tool: "patterns"
+    pattern: "while\\(true\\)"
+  - tool: "ast"
+    pattern: "no_break"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/jwt_issues_cluster.yml b/theauditor/correlations/rules/jwt_issues_cluster.yml
new file mode 100644
index 0000000..e44cc10
--- /dev/null
+++ b/theauditor/correlations/rules/jwt_issues_cluster.yml
@@ -0,0 +1,10 @@
+name: "JWT Issues Factual Cluster"
+description: "Multiple tools detected patterns consistent with insecure JWT implementation."
+confidence: 0.90
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "jwt"
+  - tool: "security"
+    pattern: "HS256"
+  - tool: "lint"
+    pattern: "jwt"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/ldap_injection_cluster.yml b/theauditor/correlations/rules/ldap_injection_cluster.yml
new file mode 100644
index 0000000..7a76245
--- /dev/null
+++ b/theauditor/correlations/rules/ldap_injection_cluster.yml
@@ -0,0 +1,10 @@
+name: "LDAP Injection Factual Cluster"
+description: "Multiple tools detected patterns consistent with an LDAP Injection vulnerability."
+confidence: 0.85
+co_occurring_facts:
+  - tool: "taint_analyzer"
+    pattern: "ldap"
+  - tool: "patterns"
+    pattern: "filter"
+  - tool: "lint"
+    pattern: "ldap"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/memory_leak_cluster.yml b/theauditor/correlations/rules/memory_leak_cluster.yml
new file mode 100644
index 0000000..145e421
--- /dev/null
+++ b/theauditor/correlations/rules/memory_leak_cluster.yml
@@ -0,0 +1,10 @@
+name: "Memory Leak Factual Cluster"
+description: "Multiple tools detected patterns consistent with a potential memory leak."
+confidence: 0.70
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "setInterval"
+  - tool: "graph"
+    pattern: "no_cleanup"
+  - tool: "lint"
+    pattern: "memory"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/missing_auth_cluster.yml b/theauditor/correlations/rules/missing_auth_cluster.yml
new file mode 100644
index 0000000..59c1411
--- /dev/null
+++ b/theauditor/correlations/rules/missing_auth_cluster.yml
@@ -0,0 +1,10 @@
+name: "Missing Authentication Factual Cluster"
+description: "Multiple tools detected patterns consistent with a missing authentication control on a sensitive endpoint."
+confidence: 0.80
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "public"
+  - tool: "framework_detector"
+    pattern: "no_auth"
+  - tool: "graph"
+    pattern: "exposed"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/nosql_injection_cluster.yml b/theauditor/correlations/rules/nosql_injection_cluster.yml
new file mode 100644
index 0000000..c7578ce
--- /dev/null
+++ b/theauditor/correlations/rules/nosql_injection_cluster.yml
@@ -0,0 +1,10 @@
+name: "NoSQL Injection Factual Cluster"
+description: "Multiple tools detected patterns consistent with a NoSQL Injection vulnerability."
+confidence: 0.85
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "(mongodb|mongoose)"
+  - tool: "taint_analyzer"
+    pattern: "$where"
+  - tool: "lint"
+    pattern: "nosql"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/path_traversal_cluster.yml b/theauditor/correlations/rules/path_traversal_cluster.yml
new file mode 100644
index 0000000..34567b5
--- /dev/null
+++ b/theauditor/correlations/rules/path_traversal_cluster.yml
@@ -0,0 +1,10 @@
+name: "Path Traversal Factual Cluster"
+description: "Multiple tools detected patterns consistent with a Path Traversal vulnerability."
+confidence: 0.85
+co_occurring_facts:
+  - tool: "taint_analyzer"
+    pattern: "path"
+  - tool: "patterns"
+    pattern: "\\.\\./"
+  - tool: "lint"
+    pattern: "path"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/pii_leak_cluster.yml b/theauditor/correlations/rules/pii_leak_cluster.yml
new file mode 100644
index 0000000..87c43ee
--- /dev/null
+++ b/theauditor/correlations/rules/pii_leak_cluster.yml
@@ -0,0 +1,10 @@
+name: "PII Leak Factual Cluster"
+description: "Multiple tools detected patterns consistent with a potential leak of Personally Identifiable Information (PII)."
+confidence: 0.80
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "(email|ssn)"
+  - tool: "taint_analyzer"
+    pattern: "response"
+  - tool: "framework_detector"
+    pattern: "no_mask"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/race_condition_cluster.yml b/theauditor/correlations/rules/race_condition_cluster.yml
new file mode 100644
index 0000000..ed88f3a
--- /dev/null
+++ b/theauditor/correlations/rules/race_condition_cluster.yml
@@ -0,0 +1,10 @@
+name: "Race Condition Factual Cluster"
+description: "Multiple tools detected patterns consistent with a potential race condition."
+confidence: 0.75
+co_occurring_facts:
+  - tool: "graph"
+    pattern: "concurrent"
+  - tool: "patterns"
+    pattern: "async"
+  - tool: "taint_analyzer"
+    pattern: "shared_state"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/rate_limit_missing_cluster.yml b/theauditor/correlations/rules/rate_limit_missing_cluster.yml
new file mode 100644
index 0000000..d6c9451
--- /dev/null
+++ b/theauditor/correlations/rules/rate_limit_missing_cluster.yml
@@ -0,0 +1,10 @@
+name: "Missing Rate Limiting Factual Cluster"
+description: "Multiple tools detected patterns consistent with a sensitive endpoint lacking rate limiting."
+confidence: 0.85
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "endpoint"
+  - tool: "framework_detector"
+    pattern: "no_throttle"
+  - tool: "deployment"
+    pattern: "public"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/react_dangerous_html_cluster.yml b/theauditor/correlations/rules/react_dangerous_html_cluster.yml
new file mode 100644
index 0000000..fe8d517
--- /dev/null
+++ b/theauditor/correlations/rules/react_dangerous_html_cluster.yml
@@ -0,0 +1,10 @@
+name: "React dangerouslySetInnerHTML Factual Cluster"
+description: "Multiple tools detected patterns consistent with XSS via dangerouslySetInnerHTML in React."
+confidence: 0.95
+co_occurring_facts:
+  - tool: "framework_detector"
+    pattern: "react"
+  - tool: "patterns"
+    pattern: "dangerously"
+  - tool: "taint_analyzer"
+    pattern: "user"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/refactoring.yaml b/theauditor/correlations/rules/refactoring.yaml
new file mode 100644
index 0000000..485537f
--- /dev/null
+++ b/theauditor/correlations/rules/refactoring.yaml
@@ -0,0 +1,277 @@
+# Refactoring Detection Correlation Rules
+# These rules detect common refactoring issues and inconsistencies
+
+rules:
+  # ============================================================================
+  # DATA MODEL REFACTORING PATTERNS
+  # ============================================================================
+  
+  - name: "FIELD_MOVED_BETWEEN_MODELS"
+    description: "Field moved from one model to another but old references remain"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "removeColumn.*('products'|\"products\")"
+      - tool: "grep"
+        pattern: "product\\.(unit_price|retail_price|wholesale_price|sku|inventory_type)"
+    confidence: 0.95
+
+  - name: "PRODUCT_VARIANT_REFACTOR"
+    description: "Product fields moved to ProductVariant but frontend still uses old structure"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "ProductVariant.*retail_price.*Sequelize"
+      - tool: "grep"
+        pattern: "product\\.unit_price|product\\.retail_price"
+    confidence: 0.92
+
+  - name: "SKU_FIELD_MIGRATION"
+    description: "SKU moved from Product to ProductVariant"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "ProductVariant.*sku.*unique.*true"
+      - tool: "grep"
+        pattern: "product\\.sku|WHERE.*products\\.sku"
+    confidence: 0.94
+
+  # ============================================================================
+  # FOREIGN KEY REFACTORING
+  # ============================================================================
+  
+  - name: "ORDER_ITEMS_WRONG_FK"
+    description: "Order items using product_id instead of product_variant_id"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "order_items.*product_variant_id.*fkey"
+      - tool: "grep"
+        pattern: "order_items.*product_id(?!_variant)"
+    confidence: 0.96
+
+  - name: "TRANSFER_ITEMS_WRONG_FK"
+    description: "Transfer items referencing wrong product foreign key"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "transfer_items.*product_variant_id"
+      - tool: "grep"
+        pattern: "transfer.*product_id(?!_variant)"
+    confidence: 0.93
+
+  - name: "INVENTORY_FK_MISMATCH"
+    description: "Inventory table has both product_id and product_variant_id"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "inventory.*product_variant_id.*NULL"
+      - tool: "grep"
+        pattern: "inventory.*product_id.*NOT NULL"
+    confidence: 0.88
+
+  # ============================================================================
+  # API CONTRACT CHANGES
+  # ============================================================================
+  
+  - name: "API_ENDPOINT_REMOVED"
+    description: "Frontend calling API endpoints that no longer exist"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "/api/products/.*/price|/api/products/.*/sku"
+      - tool: "grep"
+        pattern: "router\\.(get|post).*'/variants'"
+    confidence: 0.90
+
+  - name: "API_RESPONSE_STRUCTURE_CHANGED"
+    description: "API response structure changed but frontend expects old format"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "res\\.json.*variants.*product"
+      - tool: "grep"
+        pattern: "response\\.data\\.product\\.price"
+    confidence: 0.87
+
+  - name: "GRAPHQL_SCHEMA_MISMATCH"
+    description: "GraphQL schema doesn't match model structure"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "type Product.*price.*Float"
+      - tool: "grep"
+        pattern: "Product\\.init.*!.*price"
+    confidence: 0.85
+
+  # ============================================================================
+  # FRONTEND-BACKEND MISMATCHES
+  # ============================================================================
+  
+  - name: "TYPESCRIPT_INTERFACE_OUTDATED"
+    description: "TypeScript interfaces don't match backend models"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "interface.*Product.*unit_price.*number"
+      - tool: "grep"
+        pattern: "Product\\.init.*!.*unit_price"
+    confidence: 0.96
+
+  - name: "FRONTEND_NESTED_STRUCTURE"
+    description: "Frontend expects nested relationships that backend doesn't provide"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "product_variant\\.product\\.(name|brand)"
+      - tool: "grep"
+        pattern: "ProductVariant.*belongsTo.*Product"
+    confidence: 0.91
+
+  - name: "CART_WRONG_ID_FIELD"
+    description: "Shopping cart using product_id instead of product_variant_id"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "OrderItem.*product_variant_id.*required"
+      - tool: "grep"
+        pattern: "addToCart.*product_id|cart.*product_id"
+    confidence: 0.93
+
+  # ============================================================================
+  # MIGRATION PATTERNS
+  # ============================================================================
+  
+  - name: "INCOMPLETE_MIGRATION"
+    description: "Database migration incomplete - old column references remain"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "removeColumn|dropColumn"
+      - tool: "grep"
+        pattern: "SELECT.*FROM.*WHERE.*{removed_column}"
+    confidence: 0.89
+
+  - name: "MIGRATION_DATA_LOSS"
+    description: "Migration drops column without data migration"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "removeColumn.*CASCADE|dropColumn.*CASCADE"
+      - tool: "grep"
+        pattern: "!.*UPDATE.*SET.*before.*removeColumn"
+    confidence: 0.86
+
+  - name: "ENUM_TYPE_CHANGED"
+    description: "ENUM values changed but code still uses old values"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "DROP TYPE.*enum_products"
+      - tool: "grep"
+        pattern: "inventory_type.*=.*'both'|inventory_type.*weight|unit|both"
+    confidence: 0.84
+
+  # ============================================================================
+  # AUTHORIZATION CHANGES
+  # ============================================================================
+  
+  - name: "MISSING_AUTH_MIDDLEWARE"
+    description: "New routes missing authentication/authorization"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "router\\.(post|put|delete).*variant"
+      - tool: "grep"
+        pattern: "!.*requireAdmin.*productVariant\\.routes"
+    confidence: 0.92
+
+  - name: "PERMISSION_MODEL_CHANGED"
+    description: "Permission model changed but checks not updated"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "role.*admin|worker"
+      - tool: "grep"
+        pattern: "req\\.user\\.permissions|can\\("
+    confidence: 0.80
+
+  # ============================================================================
+  # VALIDATION CHANGES
+  # ============================================================================
+  
+  - name: "VALIDATION_SCHEMA_OUTDATED"
+    description: "Joi/Yup validation schema doesn't match model"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "Joi\\.object.*product.*unit_price"
+      - tool: "grep"
+        pattern: "!.*Product.*unit_price"
+    confidence: 0.88
+
+  - name: "REQUIRED_FIELD_MISMATCH"
+    description: "Required fields in validation don't match database constraints"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "allowNull.*false.*sku"
+      - tool: "grep"
+        pattern: "sku.*Joi\\..*optional\\(\\)"
+    confidence: 0.85
+
+  # ============================================================================
+  # SERVICE LAYER ISSUES
+  # ============================================================================
+  
+  - name: "SERVICE_METHOD_SIGNATURE_CHANGED"
+    description: "Service method signature changed but callers not updated"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "async.*create.*product.*variant"
+      - tool: "grep"
+        pattern: "productService\\.create\\(.*price"
+    confidence: 0.87
+
+  - name: "REPOSITORY_PATTERN_MISMATCH"
+    description: "Repository methods don't match new model structure"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "findOne.*where.*sku"
+      - tool: "grep"
+        pattern: "ProductVariant.*sku"
+    confidence: 0.83
+
+  # ============================================================================
+  # TESTING ISSUES
+  # ============================================================================
+  
+  - name: "TEST_FIXTURES_OUTDATED"
+    description: "Test fixtures using old model structure"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "test.*product.*unit_price"
+      - tool: "grep"
+        pattern: "ProductVariant.*retail_price"
+    confidence: 0.82
+
+  - name: "MOCK_DATA_MISMATCH"
+    description: "Mock data doesn't match actual model structure"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "mock.*product.*price"
+      - tool: "grep"
+        pattern: "!.*Product.*price"
+    confidence: 0.79
+
+  # ============================================================================
+  # COMMON REFACTORING ANTI-PATTERNS
+  # ============================================================================
+  
+  - name: "EXTRACT_VARIANT_PATTERN"
+    description: "Classic Extract Variant refactoring with incomplete updates"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "createTable.*variants"
+      - tool: "grep"
+        pattern: "product\\.(price|sku|inventory)"
+    confidence: 0.94
+
+  - name: "NORMALIZE_HIERARCHY"
+    description: "Hierarchy normalization with missing relationship updates"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "belongsTo.*hasMany.*through"
+      - tool: "grep"
+        pattern: "JOIN.*old_table"
+    confidence: 0.86
+
+  - name: "SPLIT_TABLE_INCOMPLETE"
+    description: "Table split into multiple tables but queries not updated"
+    co_occurring_facts:
+      - tool: "grep"
+        pattern: "createTable.*_details|_metadata"
+      - tool: "grep"
+        pattern: "SELECT.*FROM.*{original_table}.*WHERE"
+    confidence: 0.88
\ No newline at end of file
diff --git a/theauditor/correlations/rules/sensitive_logs_cluster.yml b/theauditor/correlations/rules/sensitive_logs_cluster.yml
new file mode 100644
index 0000000..5440be2
--- /dev/null
+++ b/theauditor/correlations/rules/sensitive_logs_cluster.yml
@@ -0,0 +1,10 @@
+name: "Sensitive Data in Logs Factual Cluster"
+description: "Multiple tools detected patterns consistent with sensitive data being written to logs."
+confidence: 0.85
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "console.log"
+  - tool: "taint_analyzer"
+    pattern: "password"
+  - tool: "lint"
+    pattern: "logging"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/session_fixation_cluster.yml b/theauditor/correlations/rules/session_fixation_cluster.yml
new file mode 100644
index 0000000..8d8b7ad
--- /dev/null
+++ b/theauditor/correlations/rules/session_fixation_cluster.yml
@@ -0,0 +1,10 @@
+name: "Session Fixation Factual Cluster"
+description: "Multiple tools detected patterns consistent with a Session Fixation vulnerability."
+confidence: 0.75
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "session"
+  - tool: "taint_analyzer"
+    pattern: "user_controlled"
+  - tool: "framework_detector"
+    pattern: "session"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/source_map_exposure_cluster.yml b/theauditor/correlations/rules/source_map_exposure_cluster.yml
new file mode 100644
index 0000000..cc61b5f
--- /dev/null
+++ b/theauditor/correlations/rules/source_map_exposure_cluster.yml
@@ -0,0 +1,10 @@
+name: "Source Map Exposure Factual Cluster"
+description: "Multiple tools detected patterns consistent with exposed source maps in a production environment."
+confidence: 0.95
+co_occurring_facts:
+  - tool: "build"
+    pattern: "sourcemap"
+  - tool: "deployment"
+    pattern: "production"
+  - tool: "patterns"
+    pattern: "\\.map"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/ssrf_cluster.yml b/theauditor/correlations/rules/ssrf_cluster.yml
new file mode 100644
index 0000000..c56015d
--- /dev/null
+++ b/theauditor/correlations/rules/ssrf_cluster.yml
@@ -0,0 +1,10 @@
+name: "SSRF Factual Cluster"
+description: "Multiple tools detected patterns consistent with a Server-Side Request Forgery (SSRF) vulnerability."
+confidence: 0.80
+co_occurring_facts:
+  - tool: "taint_analyzer"
+    pattern: "url"
+  - tool: "patterns"
+    pattern: "(request|fetch|urllib)"
+  - tool: "lint"
+    pattern: "urllib"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/template_injection_cluster.yml b/theauditor/correlations/rules/template_injection_cluster.yml
new file mode 100644
index 0000000..7c67304
--- /dev/null
+++ b/theauditor/correlations/rules/template_injection_cluster.yml
@@ -0,0 +1,10 @@
+name: "Template Injection Factual Cluster"
+description: "Multiple tools detected patterns consistent with a Server-Side Template Injection (SSTI) vulnerability."
+confidence: 0.80
+co_occurring_facts:
+  - tool: "taint_analyzer"
+    pattern: "template"
+  - tool: "patterns"
+    pattern: "eval"
+  - tool: "framework_detector"
+    pattern: "(jinja|blade|pug)"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/test_sql_injection.yml b/theauditor/correlations/rules/test_sql_injection.yml
new file mode 100644
index 0000000..7b33598
--- /dev/null
+++ b/theauditor/correlations/rules/test_sql_injection.yml
@@ -0,0 +1,10 @@
+name: "Potential SQL Injection Factual Cluster"
+description: "Multiple tools detected patterns consistent with SQL injection vulnerability"
+confidence: 0.85
+co_occurring_facts:
+  - tool: "taint_analyzer"
+    pattern: "sql"
+  - tool: "patterns"
+    pattern: "string.*query"
+  - tool: "lint"
+    pattern: "sql"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/vue_v_html_cluster.yml b/theauditor/correlations/rules/vue_v_html_cluster.yml
new file mode 100644
index 0000000..cc8bf18
--- /dev/null
+++ b/theauditor/correlations/rules/vue_v_html_cluster.yml
@@ -0,0 +1,10 @@
+name: "Vue v-html Factual Cluster"
+description: "Multiple tools detected patterns consistent with XSS via v-html in Vue."
+confidence: 0.95
+co_occurring_facts:
+  - tool: "framework_detector"
+    pattern: "vue"
+  - tool: "patterns"
+    pattern: "v-html"
+  - tool: "taint_analyzer"
+    pattern: "user_input"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/weak_auth_cluster.yml b/theauditor/correlations/rules/weak_auth_cluster.yml
new file mode 100644
index 0000000..2ec8a7e
--- /dev/null
+++ b/theauditor/correlations/rules/weak_auth_cluster.yml
@@ -0,0 +1,10 @@
+name: "Weak Authentication Factual Cluster"
+description: "Multiple tools detected patterns consistent with weak or deprecated authentication mechanisms."
+confidence: 0.85
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "(md5|sha1)"
+  - tool: "security"
+    pattern: "password"
+  - tool: "lint"
+    pattern: "deprecated"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/xss_cluster.yml b/theauditor/correlations/rules/xss_cluster.yml
new file mode 100644
index 0000000..bcdfd25
--- /dev/null
+++ b/theauditor/correlations/rules/xss_cluster.yml
@@ -0,0 +1,10 @@
+name: "XSS Factual Cluster"
+description: "Multiple tools detected patterns consistent with a Cross-Site Scripting (XSS) vulnerability."
+confidence: 0.90
+co_occurring_facts:
+  - tool: "taint_analyzer"
+    pattern: "xss"
+  - tool: "patterns"
+    pattern: "(innerHTML|dangerouslySetInnerHTML)"
+  - tool: "lint"
+    pattern: "xss"
\ No newline at end of file
diff --git a/theauditor/correlations/rules/xxe_cluster.yml b/theauditor/correlations/rules/xxe_cluster.yml
new file mode 100644
index 0000000..7db0c3f
--- /dev/null
+++ b/theauditor/correlations/rules/xxe_cluster.yml
@@ -0,0 +1,10 @@
+name: "XXE Factual Cluster"
+description: "Multiple tools detected patterns consistent with an XML External Entity (XXE) vulnerability."
+confidence: 0.80
+co_occurring_facts:
+  - tool: "patterns"
+    pattern: "xml"
+  - tool: "taint_analyzer"
+    pattern: "parse"
+  - tool: "framework_detector"
+    pattern: "xml_parser"
\ No newline at end of file
diff --git a/theauditor/deps.py b/theauditor/deps.py
new file mode 100644
index 0000000..9fe1d84
--- /dev/null
+++ b/theauditor/deps.py
@@ -0,0 +1,1109 @@
+"""Dependency parser for multiple ecosystems."""
+
+import glob
+import http.client
+import json
+import platform
+import re
+import shutil
+import time
+import urllib.error
+import yaml
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from theauditor.security import sanitize_path, sanitize_url_component, validate_package_name, SecurityError
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+# Rate limiting configuration - optimized for minimal runtime
+# Based on actual API rate limits and industry standards
+RATE_LIMIT_NPM = 0.1      # npm registry: 600 req/min (well under any limit)
+RATE_LIMIT_PYPI = 0.2     # PyPI: 300 req/min (safe margin) 
+RATE_LIMIT_DOCKER = 0.2   # Docker Hub: 300 req/min for tag checks
+RATE_LIMIT_BACKOFF = 15   # Backoff on 429/disconnect (15s gives APIs time to reset)
+
+
+def parse_dependencies(root_path: str = ".") -> List[Dict[str, Any]]:
+    """
+    Parse dependencies from various package managers.
+    
+    Returns list of dependency objects with structure:
+    {
+        "name": str,
+        "version": str,
+        "manager": "npm"|"py",
+        "files": [paths that import it],
+        "source": "package.json|pyproject.toml|requirements.txt"
+    }
+    """
+    import os
+    root = Path(root_path)
+    deps = []
+    
+    # Debug mode
+    debug = os.environ.get("THEAUDITOR_DEBUG")
+    
+    # Parse Node dependencies
+    try:
+        package_json = sanitize_path("package.json", root_path)
+        if package_json.exists():
+            if debug:
+                print(f"Debug: Found {package_json}")
+            deps.extend(_parse_package_json(package_json))
+    except SecurityError as e:
+        if debug:
+            print(f"Debug: Security error checking package.json: {e}")
+    
+    # Parse Python dependencies
+    try:
+        pyproject = sanitize_path("pyproject.toml", root_path)
+        if pyproject.exists():
+            if debug:
+                print(f"Debug: Found {pyproject}")
+            deps.extend(_parse_pyproject_toml(pyproject))
+    except SecurityError as e:
+        if debug:
+            print(f"Debug: Security error checking pyproject.toml: {e}")
+    
+    # Parse requirements files
+    req_files = list(root.glob("requirements*.txt"))
+    if debug and req_files:
+        print(f"Debug: Found requirements files: {req_files}")
+    for req_file in req_files:
+        try:
+            # Validate the path is within project root
+            safe_req_file = sanitize_path(str(req_file), root_path)
+            deps.extend(_parse_requirements_txt(safe_req_file))
+        except SecurityError as e:
+            if debug:
+                print(f"Debug: Security error with {req_file}: {e}")
+    
+    # Parse Docker Compose files
+    docker_compose_files = list(root.glob("docker-compose*.yml")) + list(root.glob("docker-compose*.yaml"))
+    if debug and docker_compose_files:
+        print(f"Debug: Found Docker Compose files: {docker_compose_files}")
+    for compose_file in docker_compose_files:
+        try:
+            safe_compose_file = sanitize_path(str(compose_file), root_path)
+            deps.extend(_parse_docker_compose(safe_compose_file))
+        except SecurityError as e:
+            if debug:
+                print(f"Debug: Security error with {compose_file}: {e}")
+    
+    # Parse Dockerfiles
+    dockerfiles = list(root.glob("**/Dockerfile"))
+    if debug and dockerfiles:
+        print(f"Debug: Found Dockerfiles: {dockerfiles}")
+    for dockerfile in dockerfiles:
+        try:
+            safe_dockerfile = sanitize_path(str(dockerfile), root_path)
+            deps.extend(_parse_dockerfile(safe_dockerfile))
+        except SecurityError as e:
+            if debug:
+                print(f"Debug: Security error with {dockerfile}: {e}")
+    
+    if debug:
+        print(f"Debug: Total dependencies found: {len(deps)}")
+    
+    return deps
+
+
+def _parse_package_json(path: Path) -> List[Dict[str, Any]]:
+    """Parse dependencies from package.json, with monorepo support."""
+    deps = []
+    processed_packages = set()  # Track processed packages to avoid duplicates
+    
+    def parse_single_package(pkg_path: Path, workspace_path: str = "package.json") -> List[Dict[str, Any]]:
+        """Parse a single package.json file."""
+        local_deps = []
+        try:
+            with open(pkg_path, encoding="utf-8") as f:
+                data = json.load(f)
+            
+            # Combine dependencies and devDependencies
+            all_deps = {}
+            if "dependencies" in data:
+                all_deps.update(data["dependencies"])
+            if "devDependencies" in data:
+                all_deps.update(data["devDependencies"])
+            
+            for name, version_spec in all_deps.items():
+                # Clean version spec (remove ^, ~, >=, etc.)
+                version = _clean_version(version_spec)
+                local_deps.append({
+                    "name": name,
+                    "version": version,
+                    "manager": "npm",
+                    "files": [],  # Will be populated by workset scan
+                    "source": "package.json",
+                    "workspace_package": workspace_path  # Track which package.json this came from
+                })
+        except (json.JSONDecodeError, KeyError) as e:
+            # Log but don't fail - package.json might be malformed
+            print(f"Warning: Could not parse {pkg_path}: {e}")
+        
+        return local_deps
+    
+    # Parse the root package.json first
+    root_dir = path.parent
+    deps.extend(parse_single_package(path, "package.json"))
+    processed_packages.add(str(path.resolve()))
+    
+    # Check for monorepo workspaces
+    try:
+        with open(path, encoding="utf-8") as f:
+            data = json.load(f)
+        
+        # Check for workspaces field (Yarn/npm workspaces)
+        workspaces = data.get("workspaces", [])
+        
+        # Handle different workspace formats
+        if isinstance(workspaces, dict):
+            # npm 7+ format: {"packages": ["packages/*"]}
+            workspaces = workspaces.get("packages", [])
+        
+        if workspaces and isinstance(workspaces, list):
+            # This is a monorepo - expand workspace patterns
+            for pattern in workspaces:
+                # Convert workspace pattern to absolute path pattern
+                abs_pattern = str(root_dir / pattern)
+                
+                # Handle glob patterns like "packages/*" or "apps/**"
+                if "*" in abs_pattern:
+                    # Use glob to find matching directories
+                    matched_paths = glob.glob(abs_pattern)
+                    
+                    for matched_path in matched_paths:
+                        matched_dir = Path(matched_path)
+                        if matched_dir.is_dir():
+                            # Look for package.json in this directory
+                            workspace_pkg = matched_dir / "package.json"
+                            if workspace_pkg.exists():
+                                # Skip if already processed
+                                if str(workspace_pkg.resolve()) in processed_packages:
+                                    continue
+                                
+                                # Calculate relative path for workspace_package field
+                                try:
+                                    rel_path = workspace_pkg.relative_to(root_dir)
+                                    workspace_path = str(rel_path).replace("\\", "/")
+                                except ValueError:
+                                    # If relative path fails, use absolute path
+                                    workspace_path = str(workspace_pkg)
+                                
+                                # Parse this workspace package
+                                workspace_deps = parse_single_package(workspace_pkg, workspace_path)
+                                deps.extend(workspace_deps)
+                                processed_packages.add(str(workspace_pkg.resolve()))
+                else:
+                    # Direct path without glob
+                    workspace_dir = root_dir / pattern
+                    if workspace_dir.is_dir():
+                        workspace_pkg = workspace_dir / "package.json"
+                        if workspace_pkg.exists():
+                            # Skip if already processed
+                            if str(workspace_pkg.resolve()) in processed_packages:
+                                continue
+                            
+                            # Calculate relative path for workspace_package field
+                            try:
+                                rel_path = workspace_pkg.relative_to(root_dir)
+                                workspace_path = str(rel_path).replace("\\", "/")
+                            except ValueError:
+                                workspace_path = str(workspace_pkg)
+                            
+                            # Parse this workspace package
+                            workspace_deps = parse_single_package(workspace_pkg, workspace_path)
+                            deps.extend(workspace_deps)
+                            processed_packages.add(str(workspace_pkg.resolve()))
+        
+        # Also check for Lerna configuration (lerna.json)
+        lerna_json = root_dir / "lerna.json"
+        if lerna_json.exists():
+            try:
+                with open(lerna_json, encoding="utf-8") as f:
+                    lerna_data = json.load(f)
+                
+                lerna_packages = lerna_data.get("packages", [])
+                for pattern in lerna_packages:
+                    abs_pattern = str(root_dir / pattern)
+                    if "*" in abs_pattern:
+                        matched_paths = glob.glob(abs_pattern)
+                        for matched_path in matched_paths:
+                            matched_dir = Path(matched_path)
+                            if matched_dir.is_dir():
+                                workspace_pkg = matched_dir / "package.json"
+                                if workspace_pkg.exists() and str(workspace_pkg.resolve()) not in processed_packages:
+                                    try:
+                                        rel_path = workspace_pkg.relative_to(root_dir)
+                                        workspace_path = str(rel_path).replace("\\", "/")
+                                    except ValueError:
+                                        workspace_path = str(workspace_pkg)
+                                    
+                                    workspace_deps = parse_single_package(workspace_pkg, workspace_path)
+                                    deps.extend(workspace_deps)
+                                    processed_packages.add(str(workspace_pkg.resolve()))
+            except (json.JSONDecodeError, KeyError):
+                # Lerna.json parsing failed, continue without it
+                pass
+        
+        # Check for pnpm-workspace.yaml
+        pnpm_workspace = root_dir / "pnpm-workspace.yaml"
+        if pnpm_workspace.exists():
+            try:
+                with open(pnpm_workspace, encoding="utf-8") as f:
+                    pnpm_data = yaml.safe_load(f)
+                
+                pnpm_packages = pnpm_data.get("packages", [])
+                for pattern in pnpm_packages:
+                    abs_pattern = str(root_dir / pattern)
+                    if "*" in abs_pattern:
+                        matched_paths = glob.glob(abs_pattern)
+                        for matched_path in matched_paths:
+                            matched_dir = Path(matched_path)
+                            if matched_dir.is_dir():
+                                workspace_pkg = matched_dir / "package.json"
+                                if workspace_pkg.exists() and str(workspace_pkg.resolve()) not in processed_packages:
+                                    try:
+                                        rel_path = workspace_pkg.relative_to(root_dir)
+                                        workspace_path = str(rel_path).replace("\\", "/")
+                                    except ValueError:
+                                        workspace_path = str(workspace_pkg)
+                                    
+                                    workspace_deps = parse_single_package(workspace_pkg, workspace_path)
+                                    deps.extend(workspace_deps)
+                                    processed_packages.add(str(workspace_pkg.resolve()))
+            except (yaml.YAMLError, KeyError):
+                # pnpm-workspace.yaml parsing failed, continue without it
+                pass
+    
+    except (json.JSONDecodeError, KeyError) as e:
+        # Root package.json parsing for workspaces failed, but we already have root deps
+        pass
+    
+    return deps
+
+
+def _parse_pyproject_toml(path: Path) -> List[Dict[str, Any]]:
+    """Parse dependencies from pyproject.toml."""
+    deps = []
+    try:
+        import tomllib
+    except ImportError:
+        # Python < 3.11
+        try:
+            import tomli as tomllib
+        except ImportError:
+            # Can't parse TOML without library
+            print(f"Warning: Cannot parse {path} - tomllib not available")
+            return deps
+    
+    try:
+        with open(path, "rb") as f:
+            data = tomllib.load(f)
+        
+        # Get project dependencies
+        project_deps = data.get("project", {}).get("dependencies", [])
+        for dep_spec in project_deps:
+            name, version = _parse_python_dep_spec(dep_spec)
+            if name:
+                deps.append({
+                    "name": name,
+                    "version": version or "latest",
+                    "manager": "py",
+                    "files": [],
+                    "source": "pyproject.toml"
+                })
+        
+        # Also check optional dependencies
+        optional = data.get("project", {}).get("optional-dependencies", {})
+        for group_deps in optional.values():
+            for dep_spec in group_deps:
+                name, version = _parse_python_dep_spec(dep_spec)
+                if name:
+                    deps.append({
+                        "name": name,
+                        "version": version or "latest",
+                        "manager": "py",
+                        "files": [],
+                        "source": "pyproject.toml"
+                    })
+    except Exception as e:
+        print(f"Warning: Could not parse {path}: {e}")
+    
+    return deps
+
+
+def _parse_requirements_txt(path: Path) -> List[Dict[str, Any]]:
+    """Parse dependencies from requirements.txt."""
+    deps = []
+    try:
+        with open(path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                # Skip comments and empty lines
+                if not line or line.startswith("#"):
+                    continue
+                # Skip special directives
+                if line.startswith("-"):
+                    continue
+                
+                # Strip inline comments and trailing whitespace
+                if "#" in line:
+                    line = line.split("#")[0].strip()
+                
+                name, version = _parse_python_dep_spec(line)
+                if name:
+                    deps.append({
+                        "name": name,
+                        "version": version or "latest",
+                        "manager": "py",
+                        "files": [],
+                        "source": path.name
+                    })
+    except Exception as e:
+        print(f"Warning: Could not parse {path}: {e}")
+    
+    return deps
+
+
+def _parse_python_dep_spec(spec: str) -> tuple[str, Optional[str]]:
+    """
+    Parse a Python dependency specification.
+    Returns (name, version) tuple.
+    """
+    # Handle various formats:
+    # package==1.2.3
+    # package>=1.2.3
+    # package~=1.2.3
+    # package[extra]==1.2.3
+    # package @ git+https://...
+    
+    # Remove extras
+    spec = re.sub(r'\[.*?\]', '', spec)
+    
+    # Handle git URLs
+    if "@" in spec and ("git+" in spec or "https://" in spec):
+        name = spec.split("@")[0].strip()
+        return (name, "git")
+    
+    # Parse version specs (allow dots, underscores, hyphens in package names)
+    match = re.match(r'^([a-zA-Z0-9._-]+)\s*([><=~!]+)\s*(.+)$', spec)
+    if match:
+        name, op, version = match.groups()
+        # For pinned versions, use exact version
+        if op == "==":
+            return (name, version)
+        # For other operators, use the specified version as hint
+        return (name, version)
+    
+    # No version specified
+    return (spec.strip(), None)
+
+
+def _clean_version(version_spec: str) -> str:
+    """
+    Clean version specification to get actual version.
+    ^1.2.3 -> 1.2.3
+    ~1.2.3 -> 1.2.3
+    >=1.2.3 -> 1.2.3
+    """
+    # Remove common prefixes
+    version = re.sub(r'^[~^>=<]+', '', version_spec)
+    # Handle ranges (use first version)
+    if " " in version:
+        version = version.split()[0]
+    return version.strip()
+
+
+def _parse_docker_compose(path: Path) -> List[Dict[str, Any]]:
+    """Parse Docker base images from docker-compose.yml files."""
+    deps = []
+    try:
+        with open(path, encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+        
+        # Check if services key exists
+        if not data or "services" not in data:
+            return deps
+        
+        # Iterate through services
+        for service_name, service_config in data["services"].items():
+            if not isinstance(service_config, dict):
+                continue
+            
+            # Extract image if present
+            if "image" in service_config:
+                image_spec = service_config["image"]
+                # Parse image:tag format
+                if ":" in image_spec:
+                    name, tag = image_spec.rsplit(":", 1)
+                else:
+                    name = image_spec
+                    tag = "latest"
+                
+                # Handle registry prefixes (e.g., docker.io/library/postgres)
+                if "/" in name:
+                    # Take the last part as the image name
+                    name_parts = name.split("/")
+                    if len(name_parts) >= 2:
+                        # If it's library/image, use just image
+                        if name_parts[-2] == "library":
+                            name = name_parts[-1]
+                        else:
+                            # Keep org/image format
+                            name = "/".join(name_parts[-2:])
+                
+                deps.append({
+                    "name": name,
+                    "version": tag,
+                    "manager": "docker",
+                    "files": [],
+                    "source": path.name
+                })
+    except (yaml.YAMLError, KeyError, AttributeError) as e:
+        print(f"Warning: Could not parse {path}: {e}")
+    
+    return deps
+
+
+def _parse_dockerfile(path: Path) -> List[Dict[str, Any]]:
+    """Parse Docker base images from Dockerfile."""
+    deps = []
+    try:
+        with open(path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                # Look for FROM instructions
+                if line.upper().startswith("FROM "):
+                    # Extract image spec after FROM
+                    image_spec = line[5:].strip()
+                    
+                    # Handle multi-stage builds (FROM image AS stage)
+                    if " AS " in image_spec.upper():
+                        image_spec = image_spec.split(" AS ")[0].strip()
+                    elif " as " in image_spec:
+                        image_spec = image_spec.split(" as ")[0].strip()
+                    
+                    # Skip scratch and build stages
+                    if image_spec.lower() in ["scratch", "builder"]:
+                        continue
+                    
+                    # Parse image:tag format
+                    if ":" in image_spec:
+                        name, tag = image_spec.rsplit(":", 1)
+                    else:
+                        name = image_spec
+                        tag = "latest"
+                    
+                    # Handle registry prefixes
+                    if "/" in name:
+                        name_parts = name.split("/")
+                        if len(name_parts) >= 2:
+                            if name_parts[-2] == "library":
+                                name = name_parts[-1]
+                            else:
+                                name = "/".join(name_parts[-2:])
+                    
+                    deps.append({
+                        "name": name,
+                        "version": tag,
+                        "manager": "docker",
+                        "files": [],
+                        "source": str(path.relative_to(Path.cwd()))
+                    })
+    except Exception as e:
+        print(f"Warning: Could not parse {path}: {e}")
+    
+    return deps
+
+
+def write_deps_json(deps: List[Dict[str, Any]], output_path: str = "./.pf/deps.json") -> None:
+    """Write dependencies to JSON file."""
+    try:
+        output = sanitize_path(output_path, ".")
+        output.parent.mkdir(parents=True, exist_ok=True)
+        
+        with open(output, "w", encoding="utf-8") as f:
+            json.dump(deps, f, indent=2, sort_keys=True)
+    except SecurityError as e:
+        raise SecurityError(f"Invalid output path: {e}")
+
+
+def check_latest_versions(
+    deps: List[Dict[str, Any]], 
+    allow_net: bool = True,
+    offline: bool = False,
+    cache_file: str = "./.pf/deps_cache.json"
+) -> Dict[str, Dict[str, Any]]:
+    """
+    Check latest versions from registries with caching.
+    
+    Returns dict keyed by "manager:name" with:
+    {
+        "locked": str,
+        "latest": str,
+        "delta": str,
+        "is_outdated": bool,
+        "last_checked": str (ISO timestamp)
+    }
+    """
+    if offline or not allow_net:
+        # Try to load from cache in offline mode
+        cached_data = _load_deps_cache(cache_file)
+        if cached_data:
+            # Update locked versions from current deps
+            for dep in deps:
+                key = f"{dep['manager']}:{dep['name']}"
+                if key in cached_data:
+                    cached_data[key]["locked"] = dep["version"]
+                    cached_data[key]["is_outdated"] = cached_data[key]["latest"] != dep["version"]
+                    cached_data[key]["delta"] = _calculate_version_delta(dep["version"], cached_data[key]["latest"])
+        return cached_data or {}
+    
+    # Load existing cache
+    cache = _load_deps_cache(cache_file)
+    latest_info = {}
+    needs_check = []
+    
+    # FIRST PASS: Check what's in cache and still valid
+    for dep in deps:
+        key = f"{dep['manager']}:{dep['name']}"
+        if key in latest_info:
+            continue  # Already processed
+        
+        # Check if we have valid cached data (24 hours for deps)
+        if key in cache and _is_cache_valid(cache[key], hours=24):
+            # Update locked version from current deps
+            cache[key]["locked"] = dep["version"]
+            cache[key]["is_outdated"] = cache[key]["latest"] != dep["version"]
+            cache[key]["delta"] = _calculate_version_delta(dep["version"], cache[key]["latest"])
+            latest_info[key] = cache[key]
+        else:
+            needs_check.append(dep)
+    
+    # Early exit if everything is cached
+    if not needs_check:
+        return latest_info
+    
+    # SECOND PASS: Check only what needs updating, with per-service rate limiting
+    npm_rate_limited_until = 0
+    pypi_rate_limited_until = 0
+    docker_rate_limited_until = 0
+    
+    for dep in needs_check:
+        key = f"{dep['manager']}:{dep['name']}"
+        current_time = time.time()
+        
+        # Skip if this service is rate limited
+        if dep["manager"] == "npm" and current_time < npm_rate_limited_until:
+            # Use cached data if available, even if expired
+            if key in cache:
+                latest_info[key] = cache[key]
+            continue
+        elif dep["manager"] == "py" and current_time < pypi_rate_limited_until:
+            if key in cache:
+                latest_info[key] = cache[key]
+            continue
+        elif dep["manager"] == "docker" and current_time < docker_rate_limited_until:
+            if key in cache:
+                latest_info[key] = cache[key]
+            continue
+        
+        try:
+            if dep["manager"] == "npm":
+                latest = _check_npm_latest(dep["name"])
+            elif dep["manager"] == "py":
+                latest = _check_pypi_latest(dep["name"])
+            elif dep["manager"] == "docker":
+                latest = _check_dockerhub_latest(dep["name"])
+            else:
+                continue
+            
+            if latest:
+                locked = dep["version"]
+                delta = _calculate_version_delta(locked, latest)
+                latest_info[key] = {
+                    "locked": locked,
+                    "latest": latest,
+                    "delta": delta,
+                    "is_outdated": locked != latest,
+                    "last_checked": datetime.now().isoformat()
+                }
+                # Rate limiting: service-specific delays for optimal performance
+                if dep["manager"] == "npm":
+                    time.sleep(RATE_LIMIT_NPM)  # 0.1s for npm
+                elif dep["manager"] == "py":
+                    time.sleep(RATE_LIMIT_PYPI)  # 0.2s for PyPI
+                elif dep["manager"] == "docker":
+                    time.sleep(RATE_LIMIT_DOCKER)  # 0.2s for Docker Hub
+        except (urllib.error.URLError, urllib.error.HTTPError, http.client.RemoteDisconnected,
+                TimeoutError, json.JSONDecodeError, KeyError, ValueError) as e:
+            error_msg = f"{type(e).__name__}: {str(e)[:50]}"
+            
+            # Handle rate limiting and connection errors specifically
+            if ("429" in str(e) or "rate" in str(e).lower() or 
+                "RemoteDisconnected" in str(e) or "closed connection" in str(e).lower()):
+                # Set rate limit expiry for this service
+                if dep["manager"] == "npm":
+                    npm_rate_limited_until = current_time + RATE_LIMIT_BACKOFF
+                elif dep["manager"] == "py":
+                    pypi_rate_limited_until = current_time + RATE_LIMIT_BACKOFF
+                elif dep["manager"] == "docker":
+                    docker_rate_limited_until = current_time + RATE_LIMIT_BACKOFF
+            
+            # Use cached data if available, even if expired
+            if key in cache:
+                latest_info[key] = cache[key]
+                latest_info[key]["error"] = error_msg
+            else:
+                latest_info[key] = {
+                    "locked": dep["version"],
+                    "latest": None,
+                    "delta": None,
+                    "is_outdated": False,
+                    "error": error_msg,
+                    "last_checked": datetime.now().isoformat()
+                }
+            continue
+    
+    # Save updated cache
+    _save_deps_cache(latest_info, cache_file)
+    
+    return latest_info
+
+
+def _load_deps_cache(cache_file: str) -> Dict[str, Dict[str, Any]]:
+    """
+    Load the dependency cache from disk.
+    Returns empty dict if cache doesn't exist or is invalid.
+    """
+    try:
+        cache_path = Path(cache_file)
+        if cache_path.exists():
+            with open(cache_path, 'r', encoding='utf-8') as f:
+                return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        pass
+    return {}
+
+
+def _save_deps_cache(latest_info: Dict[str, Dict[str, Any]], cache_file: str) -> None:
+    """
+    Save the dependency cache to disk.
+    Merges with existing cache to preserve data for packages not in current check.
+    """
+    try:
+        cache_path = Path(cache_file)
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Load existing cache to merge
+        existing = _load_deps_cache(cache_file)
+        
+        # Merge new data into existing (new data takes precedence)
+        existing.update(latest_info)
+        
+        # Write merged cache
+        with open(cache_path, 'w', encoding='utf-8') as f:
+            json.dump(existing, f, indent=2, sort_keys=True)
+    except OSError:
+        pass  # Fail silently if can't write cache
+
+
+def _is_cache_valid(cached_item: Dict[str, Any], hours: int = 24) -> bool:
+    """
+    Check if a cached item is still valid based on age.
+    Default is 24 hours for dependency version checks.
+    """
+    try:
+        if "last_checked" not in cached_item:
+            return False
+        last_checked = datetime.fromisoformat(cached_item["last_checked"])
+        age = datetime.now() - last_checked
+        return age.total_seconds() < (hours * 3600)
+    except (ValueError, KeyError):
+        return False
+
+
+def _check_npm_latest(package_name: str) -> Optional[str]:
+    """Fetch latest version from npm registry."""
+    import urllib.request
+    import urllib.error
+    
+    # Validate and sanitize package name
+    if not validate_package_name(package_name, "npm"):
+        return None
+    
+    # URL-encode the package name for safety
+    safe_package_name = sanitize_url_component(package_name)
+    url = f"https://registry.npmjs.org/{safe_package_name}"
+    try:
+        with urllib.request.urlopen(url, timeout=10) as response:
+            data = json.loads(response.read())
+            return data.get("dist-tags", {}).get("latest")
+    except (urllib.error.URLError, http.client.RemoteDisconnected, json.JSONDecodeError, KeyError):
+        return None
+
+
+def _check_pypi_latest(package_name: str) -> Optional[str]:
+    """Fetch latest version from PyPI."""
+    import urllib.request
+    import urllib.error
+    
+    # Validate package name
+    if not validate_package_name(package_name, "py"):
+        return None
+    
+    # Normalize package name for PyPI (replace underscores with hyphens)
+    normalized_name = package_name.replace('_', '-')
+    # Sanitize for URL
+    safe_package_name = sanitize_url_component(normalized_name)
+    url = f"https://pypi.org/pypi/{safe_package_name}/json"
+    try:
+        with urllib.request.urlopen(url, timeout=10) as response:
+            data = json.loads(response.read())
+            return data.get("info", {}).get("version")
+    except (urllib.error.URLError, http.client.RemoteDisconnected, json.JSONDecodeError, KeyError):
+        return None
+
+
+def _check_dockerhub_latest(image_name: str) -> Optional[str]:
+    """Fetch latest version from Docker Hub."""
+    import urllib.request
+    import urllib.error
+    
+    # Validate image name
+    if not validate_package_name(image_name, "docker"):
+        return None
+    
+    # For official images, use library/ prefix
+    if "/" not in image_name:
+        image_name = f"library/{image_name}"
+    
+    # Sanitize image name for URL
+    safe_image_name = sanitize_url_component(image_name)
+    
+    # Docker Hub API endpoint for tags
+    url = f"https://hub.docker.com/v2/repositories/{safe_image_name}/tags"
+    
+    try:
+        # Create request with proper headers
+        req = urllib.request.Request(url)
+        req.add_header('User-Agent', 'TheAuditor/0.1.0')
+        
+        with urllib.request.urlopen(req, timeout=10) as response:
+            data = json.loads(response.read())
+            
+            # Parse the results to find latest stable version
+            tags = data.get("results", [])
+            if not tags:
+                return None
+            
+            # Filter and sort tags to find the best "latest" version
+            version_tags = []
+            for tag in tags:
+                tag_name = tag.get("name", "")
+                # Skip non-version tags
+                if tag_name in ["latest", "alpine", "slim", "bullseye", "bookworm"]:
+                    continue
+                # Look for semantic version-like tags
+                if re.match(r'^\d+(\.\d+)*', tag_name):
+                    version_tags.append(tag_name)
+            
+            if version_tags:
+                # Sort versions (simple string sort for now)
+                # More sophisticated version comparison could be added
+                version_tags.sort(reverse=True)
+                return version_tags[0]
+            
+            # Fallback to "latest" if no version tags found
+            for tag in tags:
+                if tag.get("name") == "latest":
+                    return "latest"
+            
+            return None
+            
+    except (urllib.error.URLError, http.client.RemoteDisconnected, json.JSONDecodeError, KeyError) as e:
+        # Docker Hub API might require auth or have rate limits
+        return None
+
+
+def _calculate_version_delta(locked: str, latest: str) -> str:
+    """
+    Calculate semantic version delta.
+    Returns: "major", "minor", "patch", "equal", or "unknown"
+    """
+    try:
+        locked_parts = [int(x) for x in locked.split(".")[:3]]
+        latest_parts = [int(x) for x in latest.split(".")[:3]]
+        
+        # Pad with zeros if needed
+        while len(locked_parts) < 3:
+            locked_parts.append(0)
+        while len(latest_parts) < 3:
+            latest_parts.append(0)
+        
+        if locked_parts == latest_parts:
+            return "equal"
+        elif latest_parts[0] > locked_parts[0]:
+            return "major"
+        elif latest_parts[1] > locked_parts[1]:
+            return "minor"
+        elif latest_parts[2] > locked_parts[2]:
+            return "patch"
+        else:
+            return "unknown"  # locked is newer than latest?
+    except (ValueError, IndexError):
+        return "unknown"
+
+
+def write_deps_latest_json(
+    latest_info: Dict[str, Dict[str, Any]], 
+    output_path: str = "./.pf/deps_latest.json"
+) -> None:
+    """Write latest version info to JSON file."""
+    try:
+        output = sanitize_path(output_path, ".")
+        output.parent.mkdir(parents=True, exist_ok=True)
+        
+        with open(output, "w", encoding="utf-8") as f:
+            json.dump(latest_info, f, indent=2, sort_keys=True)
+    except SecurityError as e:
+        raise SecurityError(f"Invalid output path: {e}")
+
+
+def upgrade_all_deps(
+    root_path: str,
+    latest_info: Dict[str, Dict[str, Any]],
+    deps_list: List[Dict[str, Any]]
+) -> Dict[str, int]:
+    """
+    YOLO MODE: Upgrade all dependencies to latest versions.
+    Rewrites requirements.txt, package.json, and pyproject.toml with latest versions.
+    
+    Returns dict with counts of upgraded packages per file type.
+    """
+    import shutil
+    from datetime import datetime
+    
+    root = Path(root_path)
+    upgraded = {
+        "requirements.txt": 0,
+        "package.json": 0,
+        "pyproject.toml": 0
+    }
+    
+    # Group deps by source file
+    deps_by_source = {}
+    for dep in deps_list:
+        source = dep.get("source", "")
+        if source not in deps_by_source:
+            deps_by_source[source] = []
+        deps_by_source[source].append(dep)
+    
+    # Upgrade requirements*.txt files
+    for req_file in root.glob("requirements*.txt"):
+        if req_file.name in deps_by_source:
+            count = _upgrade_requirements_txt(req_file, latest_info, deps_by_source[req_file.name])
+            upgraded["requirements.txt"] += count
+    
+    # Upgrade package.json
+    package_json = root / "package.json"
+    if package_json.exists() and "package.json" in deps_by_source:
+        count = _upgrade_package_json(package_json, latest_info, deps_by_source["package.json"])
+        upgraded["package.json"] = count
+    
+    # Upgrade pyproject.toml
+    pyproject = root / "pyproject.toml"
+    if pyproject.exists() and "pyproject.toml" in deps_by_source:
+        count = _upgrade_pyproject_toml(pyproject, latest_info, deps_by_source["pyproject.toml"])
+        upgraded["pyproject.toml"] = count
+    
+    return upgraded
+
+
+def _upgrade_requirements_txt(
+    path: Path,
+    latest_info: Dict[str, Dict[str, Any]],
+    deps: List[Dict[str, Any]]
+) -> int:
+    """Upgrade a requirements.txt file to latest versions."""
+    # Sanitize path
+    try:
+        safe_path = sanitize_path(str(path), ".")
+    except SecurityError:
+        return 0  # Skip files outside project root
+    
+    # Create backup
+    backup_path = safe_path.with_suffix(safe_path.suffix + ".bak")
+    shutil.copy2(safe_path, backup_path)
+    
+    # Read current file
+    with open(safe_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+    
+    # Build package name to latest version map
+    latest_versions = {}
+    for dep in deps:
+        key = f"py:{dep['name']}"
+        if key in latest_info:
+            latest_versions[dep['name']] = latest_info[key]['latest']
+    
+    # Rewrite lines with latest versions
+    updated_lines = []
+    count = 0
+    
+    for line in lines:
+        original_line = line
+        line = line.strip()
+        
+        # Skip comments and empty lines
+        if not line or line.startswith("#") or line.startswith("-"):
+            updated_lines.append(original_line)
+            continue
+        
+        # Parse package name
+        name, _ = _parse_python_dep_spec(line)
+        
+        if name and name in latest_versions:
+            # Replace with latest version
+            updated_lines.append(f"{name}=={latest_versions[name]}\n")
+            count += 1
+        else:
+            updated_lines.append(original_line)
+    
+    # Write updated file
+    with open(safe_path, "w", encoding="utf-8") as f:
+        f.writelines(updated_lines)
+    
+    return count
+
+
+def _upgrade_package_json(
+    path: Path,
+    latest_info: Dict[str, Dict[str, Any]],
+    deps: List[Dict[str, Any]]
+) -> int:
+    """Upgrade package.json to latest versions."""
+    import shutil
+    
+    # Sanitize path
+    try:
+        safe_path = sanitize_path(str(path), ".")
+    except SecurityError:
+        return 0  # Skip files outside project root
+    
+    # Create backup
+    backup_path = safe_path.with_suffix(safe_path.suffix + ".bak")
+    shutil.copy2(safe_path, backup_path)
+    
+    # Read current file
+    with open(safe_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    
+    count = 0
+    
+    # Update dependencies
+    if "dependencies" in data:
+        for name in data["dependencies"]:
+            key = f"npm:{name}"
+            if key in latest_info:
+                data["dependencies"][name] = latest_info[key]["latest"]
+                count += 1
+    
+    # Update devDependencies
+    if "devDependencies" in data:
+        for name in data["devDependencies"]:
+            key = f"npm:{name}"
+            if key in latest_info:
+                data["devDependencies"][name] = latest_info[key]["latest"]
+                count += 1
+    
+    # Write updated file
+    with open(safe_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+        f.write("\n")  # Add trailing newline
+    
+    return count
+
+
+def _upgrade_pyproject_toml(
+    path: Path,
+    latest_info: Dict[str, Dict[str, Any]],
+    deps: List[Dict[str, Any]]
+) -> int:
+    """Upgrade pyproject.toml to latest versions - handles ALL sections."""
+    import shutil
+    import re
+    
+    # Sanitize path
+    try:
+        safe_path = sanitize_path(str(path), ".")
+    except SecurityError:
+        return 0  # Skip files outside project root
+    
+    # Create backup
+    backup_path = safe_path.with_suffix(safe_path.suffix + ".bak")
+    shutil.copy2(safe_path, backup_path)
+    
+    # Read entire file as string for regex replacement
+    with open(safe_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    
+    count = 0
+    updated_packages = {}  # Track all updates: package -> [(old, new)]
+    
+    # For each package in latest_info
+    for key, info in latest_info.items():
+        if not key.startswith("py:"):
+            continue
+        
+        package_name = key[3:]  # Remove "py:" prefix
+        latest_version = info.get("latest")
+        
+        if not latest_version:
+            continue
+        
+        # Pattern to match this package anywhere in the file
+        # Matches: "package==X.Y.Z" with any version number
+        pattern = rf'"{package_name}==([^"]+)"'
+        
+        # Replace ALL occurrences at once using re.sub with a function
+        def replacer(match):
+            old_version = match.group(1)
+            if old_version != latest_version:
+                # Track the update
+                if package_name not in updated_packages:
+                    updated_packages[package_name] = []
+                updated_packages[package_name].append((old_version, latest_version))
+                return f'"{package_name}=={latest_version}"'
+            return match.group(0)  # No change
+        
+        # Replace all occurrences in one pass
+        new_content = re.sub(pattern, replacer, content)
+        
+        # Update count only if package was actually updated
+        if package_name in updated_packages and content != new_content:
+            count += 1
+            content = new_content
+    
+    # Write updated content
+    with open(safe_path, "w", encoding="utf-8") as f:
+        f.write(content)
+    
+    # Report what was updated
+    total_occurrences = 0
+    # Use ASCII characters on Windows
+    check_mark = "[OK]" if IS_WINDOWS else "✓"
+    arrow = "->" if IS_WINDOWS else "→"
+    for package, updates in updated_packages.items():
+        total_occurrences += len(updates)
+        if len(updates) == 1:
+            print(f"  {check_mark} {package}: {updates[0][0]} {arrow} {updates[0][1]}")
+        else:
+            print(f"  {check_mark} {package}: {updates[0][0]} {arrow} {updates[0][1]} ({len(updates)} occurrences)")
+    
+    # Return total occurrences updated, not just unique packages
+    return total_occurrences
\ No newline at end of file
diff --git a/theauditor/docgen.py b/theauditor/docgen.py
new file mode 100644
index 0000000..baa4255
--- /dev/null
+++ b/theauditor/docgen.py
@@ -0,0 +1,565 @@
+"""Documentation generator from index and capsules (optional feature)."""
+
+import hashlib
+import json
+import platform
+import sqlite3
+import sys
+from collections import defaultdict
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+from theauditor import __version__
+
+
+def is_source_file(file_path: str) -> bool:
+    """Check if a file is a source code file (not test, config, or docs)."""
+    path = Path(file_path)
+    
+    # Skip test files and directories
+    if any(part in ['test', 'tests', '__tests__', 'spec', 'fixtures', 'fixture_repo', 'test_scaffold'] for part in path.parts):
+        return False
+    if path.name.startswith('test_') or path.name.endswith('_test.py') or '.test.' in path.name or '.spec.' in path.name:
+        return False
+    if 'test' in str(path).lower() and any(ext in str(path).lower() for ext in ['.spec.', '_test.', 'test_']):
+        return False
+    
+    # Skip documentation
+    if path.suffix.lower() in ['.md', '.rst', '.txt']:
+        return False
+    
+    # Skip configuration files
+    config_files = {
+        '.gitignore', '.gitattributes', '.editorconfig',
+        'pyproject.toml', 'setup.py', 'setup.cfg',
+        'package.json', 'package-lock.json', 'yarn.lock',
+        'package-template.json', 'tsconfig.json',
+        'Makefile', 'makefile', 'requirements.txt',
+        'Dockerfile', 'docker-compose.yml', '.dockerignore',
+        'manifest.json', 'repo_index.db'
+    }
+    if path.name.lower() in config_files:
+        return False
+    
+    # Skip build artifacts and caches
+    skip_dirs = {'docs', 'documentation', 'examples', 'samples', 'schemas', 'agent_templates'}
+    if any(part.lower() in skip_dirs for part in path.parts):
+        return False
+    
+    return True
+
+
+def load_manifest(manifest_path: str) -> tuple[list[dict], str]:
+    """Load manifest and compute its hash."""
+    with open(manifest_path, "rb") as f:
+        content = f.read()
+        manifest_hash = hashlib.sha256(content).hexdigest()
+
+    manifest = json.loads(content)
+    return manifest, manifest_hash
+
+
+def load_workset(workset_path: str) -> set[str]:
+    """Load workset file paths."""
+    if not Path(workset_path).exists():
+        return set()
+
+    with open(workset_path) as f:
+        workset = json.load(f)
+    return {p["path"] for p in workset.get("paths", [])}
+
+
+def load_capsules(capsules_dir: str, workset_paths: set[str] | None = None) -> list[dict]:
+    """Load capsules, optionally filtered by workset."""
+    capsules = []
+    capsules_path = Path(capsules_dir)
+
+    if not capsules_path.exists():
+        raise RuntimeError(f"Capsules directory not found: {capsules_dir}")
+
+    for capsule_file in sorted(capsules_path.glob("*.json")):
+        with open(capsule_file) as f:
+            capsule = json.load(f)
+
+        # Filter by workset if provided
+        if workset_paths is None or capsule.get("path") in workset_paths:
+            # Filter out non-source files
+            if is_source_file(capsule.get("path", "")):
+                capsules.append(capsule)
+
+    return capsules
+
+
+def get_routes(db_path: str, workset_paths: set[str] | None = None) -> list[dict]:
+    """Get routes from database, excluding test files."""
+    if not Path(db_path).exists():
+        return []
+
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+
+    if workset_paths:
+        placeholders = ",".join("?" * len(workset_paths))
+        query = f"""
+            SELECT method, pattern, file
+            FROM api_endpoints
+            WHERE file IN ({placeholders})
+            ORDER BY file, pattern
+        """
+        cursor.execute(query, tuple(workset_paths))
+    else:
+        cursor.execute(
+            """
+            SELECT method, pattern, file
+            FROM api_endpoints
+            ORDER BY file, pattern
+        """
+        )
+
+    routes = []
+    for row in cursor.fetchall():
+        # Filter out test files
+        if is_source_file(row[2]):
+            routes.append({"method": row[0], "pattern": row[1], "file": row[2]})
+
+    conn.close()
+    return routes
+
+
+def get_sql_objects(db_path: str, workset_paths: set[str] | None = None) -> list[dict]:
+    """Get SQL objects from database, excluding test files."""
+    if not Path(db_path).exists():
+        return []
+
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+
+    if workset_paths:
+        placeholders = ",".join("?" * len(workset_paths))
+        query = f"""
+            SELECT kind, name, file
+            FROM sql_objects
+            WHERE file IN ({placeholders})
+            ORDER BY kind, name
+        """
+        cursor.execute(query, tuple(workset_paths))
+    else:
+        cursor.execute(
+            """
+            SELECT kind, name, file
+            FROM sql_objects
+            ORDER BY kind, name
+        """
+        )
+
+    objects = []
+    for row in cursor.fetchall():
+        # Filter out test files
+        if is_source_file(row[2]):
+            objects.append({"kind": row[0], "name": row[1], "file": row[2]})
+
+    conn.close()
+    return objects
+
+
+def group_files_by_folder(capsules: list[dict]) -> dict[str, list[dict]]:
+    """Group files by their first directory segment."""
+    groups = defaultdict(list)
+
+    for capsule in capsules:
+        path = capsule.get("path", "")
+        if "/" in path:
+            folder = path.split("/")[0]
+        else:
+            folder = "."
+        groups[folder].append(capsule)
+
+    # Sort by folder name
+    return dict(sorted(groups.items()))
+
+
+def generate_architecture_md(
+    routes: list[dict],
+    sql_objects: list[dict],
+    capsules: list[dict],
+    scope: str,
+) -> str:
+    """Generate ARCHITECTURE.md content."""
+    now = datetime.now(UTC).isoformat()
+
+    content = [
+        "# Architecture",
+        f"Generated at: {now}",
+        "",
+        "## Scope",
+        f"Mode: {scope}",
+        "",
+    ]
+
+    # Routes table
+    if routes:
+        content.extend(
+            [
+                "## Routes",
+                "",
+                "| Method | Pattern | File |",
+                "|--------|---------|------|",
+            ]
+        )
+        for route in routes:
+            content.append(f"| {route['method']} | {route['pattern']} | {route['file']} |")
+        content.append("")
+
+    # SQL Objects table
+    if sql_objects:
+        content.extend(
+            [
+                "## SQL Objects",
+                "",
+                "| Kind | Name | File |",
+                "|------|------|------|",
+            ]
+        )
+        for obj in sql_objects:
+            content.append(f"| {obj['kind']} | {obj['name']} | {obj['file']} |")
+        content.append("")
+
+    # Core Modules (group by actual functionality)
+    groups = group_files_by_folder(capsules)
+    if groups:
+        content.extend(
+            [
+                "## Core Modules",
+                "",
+            ]
+        )
+        
+        # Filter and organize by purpose
+        module_categories = {
+            "Core CLI": {},
+            "Analysis & Detection": {},
+            "Code Generation": {},
+            "Reporting": {},
+            "Utilities": {},
+        }
+        
+        for folder, folder_capsules in groups.items():
+            if folder == "theauditor":
+                for capsule in folder_capsules:
+                    path = Path(capsule.get("path", ""))
+                    name = path.stem
+                    
+                    # Skip duplicates and internal modules
+                    if name in ['__init__', 'parsers'] or name.endswith('.py.tpl'):
+                        continue
+                    
+                    exports = capsule.get("interfaces", {}).get("exports", [])
+                    functions = capsule.get("interfaces", {}).get("functions", [])
+                    classes = capsule.get("interfaces", {}).get("classes", [])
+                    
+                    # Categorize based on filename
+                    if name in ['cli', 'orchestrator', 'config', 'config_runtime']:
+                        category = "Core CLI"
+                    elif name in ['lint', 'ast_verify', 'universal_detector', 'pattern_loader', 'flow_analyzer', 'risk_scorer', 'pattern_rca', 'xgraph_analyzer']:
+                        category = "Analysis & Detection"
+                    elif name in ['scaffolder', 'test_generator', 'claude_setup', 'claude_autogen', 'venv_install']:
+                        category = "Code Generation"
+                    elif name in ['report', 'capsules', 'docgen', 'journal_view']:
+                        category = "Reporting"
+                    else:
+                        # Skip certain utility files from main display
+                        if name in ['utils', 'evidence', 'runner', 'contracts', 'tools']:
+                            continue
+                        category = "Utilities"
+                    
+                    # Build summary (only add if not already present)
+                    if name not in module_categories[category]:
+                        summary_parts = []
+                        if classes:
+                            summary_parts.append(f"Classes: {', '.join(classes[:3])}")
+                        elif functions:
+                            summary_parts.append(f"Functions: {', '.join(functions[:3])}")
+                        elif exports:
+                            summary_parts.append(f"Exports: {', '.join(exports[:3])}")
+                        
+                        summary = " | ".join(summary_parts) if summary_parts else "Utility module"
+                        module_categories[category][name] = f"- **{name}**: {summary}"
+        
+        # Output categorized modules
+        for category, modules_dict in module_categories.items():
+            if modules_dict:
+                content.append(f"### {category}")
+                # Sort modules by name and get their descriptions
+                for name in sorted(modules_dict.keys()):
+                    content.append(modules_dict[name])
+                content.append("")
+
+    return "\n".join(content)
+
+
+def generate_features_md(capsules: list[dict]) -> str:
+    """Generate FEATURES.md content with meaningful feature descriptions."""
+    content = [
+        "# Features & Capabilities",
+        "",
+        "## Core Functionality",
+        "",
+    ]
+    
+    # Analyze capsules to extract features
+    features = {
+        "Code Analysis": [],
+        "Test Generation": [],
+        "Documentation": [],
+        "CI/CD Integration": [],
+        "ML Capabilities": [],
+    }
+    
+    cli_commands = set()
+    
+    for capsule in capsules:
+        path = Path(capsule.get("path", ""))
+        if path.parent.name != "theauditor":
+            continue
+            
+        name = path.stem
+        exports = capsule.get("interfaces", {}).get("exports", [])
+        functions = capsule.get("interfaces", {}).get("functions", [])
+        
+        # Extract features based on module
+        if name == "cli":
+            # Try to extract CLI commands from functions
+            for func in functions:
+                if func not in ['main', 'cli']:
+                    cli_commands.add(func)
+        elif name == "lint":
+            features["Code Analysis"].append("- **Linting**: Custom security and quality rules")
+        elif name == "ast_verify":
+            features["Code Analysis"].append("- **AST Verification**: Contract-based code verification")
+        elif name == "universal_detector":
+            features["Code Analysis"].append("- **Pattern Detection**: Security and performance anti-patterns")
+        elif name == "flow_analyzer":
+            features["Code Analysis"].append("- **Flow Analysis**: Deadlock and race condition detection")
+        elif name == "risk_scorer":
+            features["Code Analysis"].append("- **Risk Scoring**: Automated risk assessment for files")
+        elif name == "test_generator":
+            features["Test Generation"].append("- **Test Scaffolding**: Generate test stubs from code")
+        elif name == "scaffolder":
+            features["Test Generation"].append("- **Contract Tests**: Generate DB/API contract tests")
+        elif name == "docgen":
+            features["Documentation"].append("- **Architecture Docs**: Auto-generate architecture documentation")
+        elif name == "capsules":
+            features["Documentation"].append("- **Code Capsules**: Compressed code summaries")
+        elif name == "report":
+            features["Documentation"].append("- **Audit Reports**: Comprehensive audit report generation")
+        elif name == "claude_setup":
+            features["CI/CD Integration"].append("- **Claude Code Integration**: Automated hooks for Claude AI")
+        elif name == "orchestrator":
+            features["CI/CD Integration"].append("- **Event-Driven Automation**: Git hooks and CI pipeline support")
+        elif name == "ml":
+            features["ML Capabilities"].append("- **ML-Based Suggestions**: Learn from codebase patterns")
+            features["ML Capabilities"].append("- **Root Cause Prediction**: Predict likely failure points")
+    
+    # Output features by category
+    for category, feature_list in features.items():
+        if feature_list:
+            content.append(f"### {category}")
+            # Deduplicate
+            seen = set()
+            for feature in feature_list:
+                if feature not in seen:
+                    content.append(feature)
+                    seen.add(feature)
+            content.append("")
+    
+    # Add CLI commands summary
+    if cli_commands:
+        content.append("## Available Commands")
+        content.append("")
+        content.append("The following commands are available through the CLI:")
+        content.append("")
+        # Group commands by purpose
+        cmd_groups = {
+            "Analysis": ['lint', 'ast_verify', 'detect_patterns', 'flow_analyze', 'risk_score'],
+            "Generation": ['gen_tests', 'scaffold', 'suggest_fixes'],
+            "Reporting": ['report', 'journal', 'capsules'],
+            "Setup": ['init', 'setup_claude', 'deps'],
+        }
+        
+        for group, cmds in cmd_groups.items():
+            group_cmds = [c for c in cli_commands if any(cmd in c for cmd in cmds)]
+            if group_cmds:
+                content.append(f"**{group}**: {', '.join(sorted(group_cmds)[:5])}")
+        content.append("")
+    
+    # Add configuration info
+    content.append("## Configuration")
+    content.append("")
+    content.append("- **Zero Dependencies**: Core functionality uses only Python stdlib")
+    content.append("- **Offline Mode**: All operations work without network access")
+    content.append("- **Per-Project**: No global state, everything is project-local")
+    content.append("")
+    
+    return "\n".join(content)
+
+
+def generate_trace_md(
+    manifest_hash: str,
+    manifest: list[dict],
+    capsules: list[dict],
+    db_path: str,
+    workset_paths: set[str] | None,
+) -> str:
+    """Generate TRACE.md content with meaningful metrics."""
+    # Count database entries
+    routes_count = 0
+    sql_objects_count = 0
+    refs_count = 0
+    imports_count = 0
+
+    if Path(db_path).exists():
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        cursor.execute("SELECT COUNT(*) FROM api_endpoints")
+        routes_count = cursor.fetchone()[0]
+
+        cursor.execute("SELECT COUNT(*) FROM sql_objects")
+        sql_objects_count = cursor.fetchone()[0]
+
+        # Count refs (files table)
+        cursor.execute("SELECT COUNT(*) FROM files")
+        refs_count = cursor.fetchone()[0]
+        
+        # Count imports
+        try:
+            cursor.execute("SELECT COUNT(*) FROM imports")
+            imports_count = cursor.fetchone()[0]
+        except sqlite3.OperationalError:
+            imports_count = 0
+
+        conn.close()
+    
+    # Separate source files from all files
+    source_files = [f for f in manifest if is_source_file(f.get("path", ""))]
+    test_files = [f for f in manifest if 'test' in f.get("path", "").lower()]
+    doc_files = [f for f in manifest if f.get("path", "").endswith(('.md', '.rst', '.txt'))]
+
+    # Calculate coverage
+    if workset_paths:
+        coverage = len(capsules) / len(workset_paths) * 100 if workset_paths else 0
+    else:
+        coverage = len(capsules) / len(source_files) * 100 if source_files else 0
+
+    content = [
+        "# Audit Trace",
+        "",
+        "## Repository Snapshot",
+        f"**Manifest Hash**: `{manifest_hash}`",
+        f"**Timestamp**: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}",
+        "",
+        "## File Statistics",
+        f"- **Total Files**: {len(manifest)}",
+        f"  - Source Files: {len(source_files)}",
+        f"  - Test Files: {len(test_files)}",
+        f"  - Documentation: {len(doc_files)}",
+        f"  - Other: {len(manifest) - len(source_files) - len(test_files) - len(doc_files)}",
+        "",
+        "## Code Metrics",
+        f"- **Cross-References**: {refs_count}",
+        f"- **Import Statements**: {imports_count}",
+        f"- **HTTP Routes**: {routes_count}",
+        f"- **SQL Objects**: {sql_objects_count}",
+        "",
+        "## Analysis Coverage",
+        f"- **Coverage**: {coverage:.1f}% of source files",
+        f"- **Capsules Generated**: {len(capsules)}",
+        f"- **Scope**: {'Workset' if workset_paths else 'Full repository'}",
+        "",
+        "## Language Distribution",
+    ]
+    
+    # Count languages
+    lang_counts = defaultdict(int)
+    for capsule in capsules:
+        lang = capsule.get("language", "")  # Empty not unknown
+        lang_counts[lang] += 1
+    
+    for lang, count in sorted(lang_counts.items(), key=lambda x: x[1], reverse=True):
+        content.append(f"- {lang}: {count} files")
+    
+    content.extend([
+        "",
+        "## Environment",
+        f"- **TheAuditor Version**: {__version__}",
+        f"- **Python**: {sys.version.split()[0]}",
+        f"- **Platform**: {platform.platform()}",
+        f"- **Processor**: {platform.processor() or 'Unknown'}",
+        "",
+        "## Audit Trail",
+        "This document provides cryptographic proof of the codebase state at audit time.",
+        "The manifest hash can be used to verify no files have been modified since analysis.",
+        "",
+    ])
+
+    return "\n".join(content)
+
+
+# This function was moved above generate_trace_md
+
+
+def generate_docs(
+    manifest_path: str = "manifest.json",
+    db_path: str = "repo_index.db",
+    capsules_dir: str = "./.pf/capsules",
+    workset_path: str = "./.pf/workset.json",
+    out_dir: str = "./.pf/docs",
+    full: bool = False,
+    print_stats: bool = False,
+) -> dict[str, Any]:
+    """Generate documentation from index and capsules."""
+
+    # Load data
+    manifest, manifest_hash = load_manifest(manifest_path)
+    workset_paths = None if full else load_workset(workset_path)
+
+    try:
+        capsules = load_capsules(capsules_dir, workset_paths)
+    except RuntimeError as e:
+        raise RuntimeError(f"Cannot generate docs: {e}. Run 'aud capsules' first.") from e
+
+    # Get database data
+    routes = get_routes(db_path, workset_paths)
+    sql_objects = get_sql_objects(db_path, workset_paths)
+
+    # Generate content
+    scope = "full" if full else "workset"
+    architecture_content = generate_architecture_md(routes, sql_objects, capsules, scope)
+    trace_content = generate_trace_md(manifest_hash, manifest, capsules, db_path, workset_paths)
+    features_content = generate_features_md(capsules)
+
+    # Write files
+    out_path = Path(out_dir)
+    out_path.mkdir(parents=True, exist_ok=True)
+
+    (out_path / "ARCHITECTURE.md").write_text(architecture_content)
+    (out_path / "TRACE.md").write_text(trace_content)
+    (out_path / "FEATURES.md").write_text(features_content)
+
+    result = {
+        "files_written": 3,
+        "scope": scope,
+        "capsules_used": len(capsules),
+        "routes": len(routes),
+        "sql_objects": len(sql_objects),
+    }
+
+    if print_stats:
+        print(f"Generated {result['files_written']} docs in {out_dir}")
+        print(f"  Scope: {result['scope']}")
+        print(f"  Capsules: {result['capsules_used']}")
+        print(f"  Routes: {result['routes']}")
+        print(f"  SQL Objects: {result['sql_objects']}")
+
+    return result
diff --git a/theauditor/docker_analyzer.py b/theauditor/docker_analyzer.py
new file mode 100644
index 0000000..455809b
--- /dev/null
+++ b/theauditor/docker_analyzer.py
@@ -0,0 +1,310 @@
+"""Docker container security analyzer module."""
+
+import json
+import logging
+import re
+import sqlite3
+from pathlib import Path
+from typing import Any, Dict, List
+
+# Set up logger
+logger = logging.getLogger(__name__)
+
+
+def analyze_docker_images(db_path: str, check_vulnerabilities: bool = True) -> List[Dict[str, Any]]:
+    """
+    Analyze indexed Docker images for security misconfigurations.
+    
+    Args:
+        db_path: Path to the repo_index.db database
+        check_vulnerabilities: Whether to scan base images for vulnerabilities
+        
+    Returns:
+        List of security findings with severity levels
+    """
+    findings = []
+    
+    # Connect to the database
+    with sqlite3.connect(db_path) as conn:
+        conn.row_factory = sqlite3.Row
+        
+        # Run each security check
+        findings.extend(_find_root_containers(conn))
+        findings.extend(_find_exposed_secrets(conn))
+        
+        # Base image vulnerability check
+        if check_vulnerabilities:
+            base_images = _prepare_base_image_scan(conn)
+            if base_images:
+                # Import here to avoid circular dependency
+                from .vulnerability_scanner import scan_dependencies
+                
+                # Run vulnerability scan on Docker base images
+                vuln_findings = scan_dependencies(base_images, offline=False)
+                
+                # Convert vulnerability findings to Docker-specific format
+                for vuln in vuln_findings:
+                    findings.append({
+                        'type': 'docker_base_image_vulnerability',
+                        'severity': vuln.get('severity', 'medium'),
+                        'file': 'Dockerfile',
+                        'message': f"Base image {vuln.get('package', 'unknown')} has vulnerability: {vuln.get('title', 'Unknown vulnerability')}",
+                        'recommendation': vuln.get('recommendation', 'Update to latest secure version'),
+                        'details': vuln
+                    })
+        
+    return findings
+
+
+def _find_root_containers(conn: sqlite3.Connection) -> List[Dict[str, Any]]:
+    """
+    Detect containers running as root user (default or explicit).
+    
+    CIS Docker Benchmark: Running containers as root is a major security risk.
+    A container breakout would grant attacker root privileges on the host.
+    
+    Args:
+        conn: SQLite database connection
+        
+    Returns:
+        List of findings for containers running as root
+    """
+    findings = []
+    cursor = conn.cursor()
+    
+    # Query all Docker images
+    cursor.execute("SELECT file_path, env_vars FROM docker_images")
+    
+    for row in cursor:
+        file_path = row['file_path']
+        env_vars_json = row['env_vars']
+        
+        # Parse the JSON column
+        try:
+            env_vars = json.loads(env_vars_json) if env_vars_json else {}
+        except json.JSONDecodeError as e:
+            logger.debug(f"Non-critical error parsing Docker env vars JSON: {e}", exc_info=False)
+            continue
+            
+        # Check for _DOCKER_USER key (set by USER instruction)
+        docker_user = env_vars.get('_DOCKER_USER')
+        
+        # If no USER instruction or explicitly set to root
+        if docker_user is None or docker_user.lower() == 'root':
+            findings.append({
+                'type': 'docker_root_user',
+                'severity': 'High',
+                'file': file_path,
+                'message': f"Container runs as root user (USER instruction {'not set' if docker_user is None else 'set to root'})",
+                'recommendation': "Add 'USER <non-root-user>' instruction to Dockerfile after installing dependencies"
+            })
+    
+    return findings
+
+
+def _find_exposed_secrets(conn: sqlite3.Connection) -> List[Dict[str, Any]]:
+    """
+    Detect hardcoded secrets in ENV and ARG instructions.
+    
+    ENV and ARG values are stored in image layers and can be inspected
+    by anyone with access to the image, making them unsuitable for secrets.
+    
+    Args:
+        conn: SQLite database connection
+        
+    Returns:
+        List of findings for exposed secrets
+    """
+    findings = []
+    cursor = conn.cursor()
+    
+    # Patterns for detecting sensitive keys
+    sensitive_key_patterns = [
+        r'(?i)password',
+        r'(?i)secret',
+        r'(?i)api[_-]?key',
+        r'(?i)token',
+        r'(?i)auth',
+        r'(?i)credential',
+        r'(?i)private[_-]?key',
+        r'(?i)access[_-]?key'
+    ]
+    
+    # Common secret value patterns
+    secret_value_patterns = [
+        r'^ghp_[A-Za-z0-9]{36}$',  # GitHub personal access token
+        r'^ghs_[A-Za-z0-9]{36}$',  # GitHub secret
+        r'^sk-[A-Za-z0-9]{48}$',   # OpenAI API key
+        r'^xox[baprs]-[A-Za-z0-9-]+$',  # Slack token
+        r'^AKIA[A-Z0-9]{16}$',     # AWS access key ID
+    ]
+    
+    # Query all Docker images
+    cursor.execute("SELECT file_path, env_vars, build_args FROM docker_images")
+    
+    for row in cursor:
+        file_path = row['file_path']
+        env_vars_json = row['env_vars']
+        build_args_json = row['build_args']
+        
+        # Parse JSON columns
+        try:
+            env_vars = json.loads(env_vars_json) if env_vars_json else {}
+            build_args = json.loads(build_args_json) if build_args_json else {}
+        except json.JSONDecodeError as e:
+            logger.debug(f"Non-critical error parsing Docker JSON columns: {e}", exc_info=False)
+            continue
+        
+        # Check ENV variables
+        for key, value in env_vars.items():
+            # Skip internal tracking keys
+            if key.startswith('_DOCKER_'):
+                continue
+                
+            is_sensitive = False
+            
+            # Check if key name indicates sensitive data
+            for pattern in sensitive_key_patterns:
+                if re.search(pattern, key):
+                    is_sensitive = True
+                    findings.append({
+                        'type': 'docker_exposed_secret',
+                        'severity': 'Critical',
+                        'file': file_path,
+                        'message': f"Potential secret exposed in ENV instruction: {key}",
+                        'recommendation': "Use Docker secrets or mount secrets at runtime instead of ENV"
+                    })
+                    break
+            
+            # Check if value matches known secret patterns
+            if not is_sensitive and value:
+                for pattern in secret_value_patterns:
+                    if re.match(pattern, str(value)):
+                        findings.append({
+                            'type': 'docker_exposed_secret',
+                            'severity': 'Critical',
+                            'file': file_path,
+                            'message': f"Detected secret pattern in ENV value for key: {key}",
+                            'recommendation': "Remove hardcoded secrets and use runtime secret injection"
+                        })
+                        break
+                
+                # Check for high entropy strings (potential secrets)
+                if not is_sensitive and value and _is_high_entropy(str(value)):
+                    findings.append({
+                        'type': 'docker_possible_secret',
+                        'severity': 'Medium',
+                        'file': file_path,
+                        'message': f"High entropy value in ENV {key} - possible secret",
+                        'recommendation': "Review if this is a secret and move to secure storage if so"
+                    })
+        
+        # Check BUILD ARGs
+        for key, value in build_args.items():
+            # Check if key name indicates sensitive data
+            for pattern in sensitive_key_patterns:
+                if re.search(pattern, key):
+                    findings.append({
+                        'type': 'docker_exposed_secret',
+                        'severity': 'High',  # Slightly lower than ENV as ARGs are build-time only
+                        'file': file_path,
+                        'message': f"Potential secret exposed in ARG instruction: {key}",
+                        'recommendation': "Use --secret mount or BuildKit secrets instead of ARG for sensitive data"
+                    })
+                    break
+    
+    return findings
+
+
+def _prepare_base_image_scan(conn: sqlite3.Connection) -> List[Dict[str, Any]]:
+    """
+    Prepare base image data for vulnerability scanning.
+    
+    This function extracts and parses base image information from the database,
+    preparing it in the format expected by vulnerability_scanner.scan_dependencies().
+    
+    Args:
+        conn: SQLite database connection
+        
+    Returns:
+        List of dependency dicts with manager='docker', name, and version
+    """
+    dependencies = []
+    cursor = conn.cursor()
+    
+    # Get all unique base images
+    cursor.execute("SELECT DISTINCT base_image FROM docker_images WHERE base_image IS NOT NULL")
+    
+    for row in cursor:
+        base_image = row[0]
+        
+        # Parse image string to extract name and version/tag
+        # Format examples:
+        # - python:3.11-slim
+        # - node:18-alpine
+        # - ubuntu:22.04
+        # - gcr.io/project/image:tag
+        # - image@sha256:hash
+        
+        if '@' in base_image:
+            # Handle digest format (image@sha256:...)
+            name = base_image.split('@')[0]
+            version = base_image.split('@')[1]
+        elif ':' in base_image:
+            # Handle tag format (image:tag)
+            parts = base_image.rsplit(':', 1)
+            name = parts[0]
+            version = parts[1]
+        else:
+            # No tag specified, defaults to 'latest'
+            name = base_image
+            version = 'latest'
+        
+        # Create dependency dict in vulnerability scanner format
+        dependencies.append({
+            'manager': 'docker',
+            'name': name,
+            'version': version,
+            'source_file': 'Dockerfile'  # Could be enhanced to track actual file
+        })
+    
+    return dependencies
+
+
+def _is_high_entropy(value: str, threshold: float = 4.0) -> bool:
+    """
+    Check if a string has high entropy (potential secret).
+    
+    Uses Shannon entropy calculation to detect random-looking strings
+    that might be secrets, API keys, or tokens.
+    
+    Args:
+        value: String to check
+        threshold: Entropy threshold (default 4.0)
+        
+    Returns:
+        True if entropy exceeds threshold
+    """
+    import math
+    
+    # Skip short strings
+    if len(value) < 10:
+        return False
+    
+    # Skip strings with spaces (likely not secrets)
+    if ' ' in value:
+        return False
+    
+    # Calculate character frequency
+    char_freq = {}
+    for char in value:
+        char_freq[char] = char_freq.get(char, 0) + 1
+    
+    # Calculate Shannon entropy
+    entropy = 0.0
+    for freq in char_freq.values():
+        probability = freq / len(value)
+        if probability > 0:
+            entropy -= probability * math.log2(probability)
+    
+    return entropy > threshold
\ No newline at end of file
diff --git a/theauditor/docs_fetch.py b/theauditor/docs_fetch.py
new file mode 100644
index 0000000..9952ad4
--- /dev/null
+++ b/theauditor/docs_fetch.py
@@ -0,0 +1,793 @@
+"""Documentation fetcher for version-correct package docs."""
+
+import json
+import re
+import time
+import urllib.error
+import urllib.request
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from theauditor.security import sanitize_path, sanitize_url_component, validate_package_name, SecurityError
+
+
+# Default allowlist for registries
+DEFAULT_ALLOWLIST = [
+    "https://registry.npmjs.org/",
+    "https://pypi.org/",  # Allow both API and web scraping
+    "https://raw.githubusercontent.com/",
+    "https://readthedocs.io/",
+    "https://readthedocs.org/",
+]
+
+# Rate limiting configuration - optimized for minimal runtime
+RATE_LIMIT_DELAY = 0.15  # Average delay between requests (balanced for npm/PyPI)
+RATE_LIMIT_BACKOFF = 15  # Backoff on 429/disconnect (15s gives APIs time to reset)
+
+
+def fetch_docs(
+    deps: List[Dict[str, Any]],
+    allow_net: bool = True,
+    allowlist: Optional[List[str]] = None,
+    offline: bool = False,
+    output_dir: str = "./.pf/context/docs"
+) -> Dict[str, Any]:
+    """
+    Fetch version-correct documentation for dependencies.
+    
+    Args:
+        deps: List of dependency objects from deps.py
+        allow_net: Whether network access is allowed
+        allowlist: List of allowed URL prefixes (uses DEFAULT_ALLOWLIST if None)
+        offline: Force offline mode
+        output_dir: Base directory for cached docs
+    
+    Returns:
+        Summary of fetch operations
+    """
+    if offline or not allow_net:
+        return {
+            "mode": "offline",
+            "fetched": 0,
+            "cached": 0,
+            "skipped": len(deps),
+            "errors": []
+        }
+
+    if allowlist is None:
+        allowlist = DEFAULT_ALLOWLIST
+
+    try:
+        output_path = sanitize_path(output_dir, ".")
+        output_path.mkdir(parents=True, exist_ok=True)
+    except SecurityError as e:
+        return {
+            "mode": "error",
+            "error": f"Invalid output directory: {e}",
+            "fetched": 0,
+            "cached": 0,
+            "skipped": len(deps)
+        }
+
+    stats = {
+        "mode": "online",
+        "fetched": 0,
+        "cached": 0,
+        "skipped": 0,
+        "errors": []
+    }
+
+    # FIRST PASS: Check what's cached
+    needs_fetch = []
+    for dep in deps:
+        # Quick cache check without network
+        cache_result = _check_cache_for_dep(dep, output_path)
+        if cache_result["cached"]:
+            stats["cached"] += 1
+        else:
+            needs_fetch.append(dep)
+    
+    # Early exit if everything is cached
+    if not needs_fetch:
+        return stats
+    
+    # SECOND PASS: Fetch only what we need, with per-service rate limiting
+    npm_rate_limited_until = 0
+    pypi_rate_limited_until = 0
+    
+    for i, dep in enumerate(needs_fetch):
+        try:
+            current_time = time.time()
+            
+            # Check if this service is rate limited
+            if dep["manager"] == "npm" and current_time < npm_rate_limited_until:
+                stats["skipped"] += 1
+                stats["errors"].append(f"{dep['name']}: Skipped (npm rate limited)")
+                continue
+            elif dep["manager"] == "py" and current_time < pypi_rate_limited_until:
+                stats["skipped"] += 1
+                stats["errors"].append(f"{dep['name']}: Skipped (PyPI rate limited)")
+                continue
+            
+            # Fetch the documentation
+            if dep["manager"] == "npm":
+                result = _fetch_npm_docs(dep, output_path, allowlist)
+            elif dep["manager"] == "py":
+                result = _fetch_pypi_docs(dep, output_path, allowlist)
+            else:
+                stats["skipped"] += 1
+                continue
+
+            if result["status"] == "fetched":
+                stats["fetched"] += 1
+                # Rate limiting: delay after successful fetch to be server-friendly
+                # npm and PyPI both have rate limits (npm: 100/min, PyPI: 60/min)
+                time.sleep(RATE_LIMIT_DELAY)  # Be server-friendly
+            elif result["status"] == "cached":
+                stats["cached"] += 1  # Shouldn't happen here but handle it
+            elif result.get("reason") == "rate_limited":
+                stats["errors"].append(f"{dep['name']}: Rate limited - backing off {RATE_LIMIT_BACKOFF}s")
+                stats["skipped"] += 1
+                # Set rate limit expiry for this service
+                if dep["manager"] == "npm":
+                    npm_rate_limited_until = time.time() + RATE_LIMIT_BACKOFF
+                elif dep["manager"] == "py":
+                    pypi_rate_limited_until = time.time() + RATE_LIMIT_BACKOFF
+            else:
+                stats["skipped"] += 1
+
+        except Exception as e:
+            error_msg = str(e)
+            if "429" in error_msg or "rate" in error_msg.lower():
+                stats["errors"].append(f"{dep['name']}: Rate limited - backing off {RATE_LIMIT_BACKOFF}s")
+                # Set rate limit expiry for this service
+                if dep["manager"] == "npm":
+                    npm_rate_limited_until = time.time() + RATE_LIMIT_BACKOFF
+                elif dep["manager"] == "py":
+                    pypi_rate_limited_until = time.time() + RATE_LIMIT_BACKOFF
+            else:
+                stats["errors"].append(f"{dep['name']}: {error_msg}")
+
+    return stats
+
+
+def _check_cache_for_dep(dep: Dict[str, Any], output_dir: Path) -> Dict[str, bool]:
+    """
+    Quick cache check for a dependency without making network calls.
+    Returns {"cached": True/False}
+    """
+    name = dep["name"]
+    version = dep["version"]
+    manager = dep["manager"]
+    
+    # Build the cache file path
+    if manager == "npm":
+        # Handle git versions
+        if version.startswith("git") or "://" in version:
+            import hashlib
+            version_hash = hashlib.md5(version.encode()).hexdigest()[:8]
+            safe_version = f"git-{version_hash}"
+        else:
+            safe_version = version.replace(":", "_").replace("/", "_").replace("\\", "_")
+        safe_name = name.replace("@", "_at_").replace("/", "_")
+        pkg_dir = output_dir / "npm" / f"{safe_name}@{safe_version}"
+    elif manager == "py":
+        safe_version = version.replace(":", "_").replace("/", "_").replace("\\", "_")
+        safe_name = name.replace("/", "_").replace("\\", "_")
+        pkg_dir = output_dir / "py" / f"{safe_name}@{safe_version}"
+    else:
+        return {"cached": False}
+    
+    doc_file = pkg_dir / "doc.md"
+    meta_file = pkg_dir / "meta.json"
+    
+    # Check cache validity
+    if doc_file.exists() and meta_file.exists():
+        try:
+            with open(meta_file, encoding="utf-8") as f:
+                meta = json.load(f)
+            # Cache for 7 days
+            last_checked = datetime.fromisoformat(meta["last_checked"])
+            if (datetime.now() - last_checked).days < 7:
+                return {"cached": True}
+        except (json.JSONDecodeError, KeyError):
+            pass
+    
+    return {"cached": False}
+
+
+def _fetch_npm_docs(
+    dep: Dict[str, Any],
+    output_dir: Path,
+    allowlist: List[str]
+) -> Dict[str, Any]:
+    """Fetch documentation for an npm package."""
+    name = dep["name"]
+    version = dep["version"]
+    
+    # Validate package name
+    if not validate_package_name(name, "npm"):
+        return {"status": "skipped", "reason": "Invalid package name"}
+
+    # Sanitize version for filesystem (handle git URLs)
+    if version.startswith("git") or "://" in version:
+        # For git dependencies, use a hash of the URL as version
+        import hashlib
+        version_hash = hashlib.md5(version.encode()).hexdigest()[:8]
+        safe_version = f"git-{version_hash}"
+    else:
+        # For normal versions, just replace problematic characters
+        safe_version = version.replace(":", "_").replace("/", "_").replace("\\", "_")
+
+    # Create package-specific directory with sanitized name
+    # Replace @ and / in scoped packages for filesystem safety
+    safe_name = name.replace("@", "_at_").replace("/", "_")
+    try:
+        pkg_dir = output_dir / "npm" / f"{safe_name}@{safe_version}"
+        pkg_dir.mkdir(parents=True, exist_ok=True)
+    except (OSError, SecurityError) as e:
+        return {"status": "error", "error": f"Cannot create package directory: {e}"}
+
+    doc_file = pkg_dir / "doc.md"
+    meta_file = pkg_dir / "meta.json"
+
+    # Check cache
+    if doc_file.exists() and meta_file.exists():
+        # Check if cache is still valid (simple time-based for now)
+        try:
+            with open(meta_file, encoding="utf-8") as f:
+                meta = json.load(f)
+            # Cache for 7 days
+            last_checked = datetime.fromisoformat(meta["last_checked"])
+            if (datetime.now() - last_checked).days < 7:
+                return {"status": "cached"}
+        except (json.JSONDecodeError, KeyError):
+            pass  # Invalid cache, refetch
+
+    # Fetch from registry with sanitized package name
+    safe_url_name = sanitize_url_component(name)
+    safe_url_version = sanitize_url_component(version)
+    url = f"https://registry.npmjs.org/{safe_url_name}/{safe_url_version}"
+    if not _is_url_allowed(url, allowlist):
+        return {"status": "skipped", "reason": "URL not in allowlist"}
+
+    try:
+        with urllib.request.urlopen(url, timeout=10) as response:
+            data = json.loads(response.read())
+
+        readme = data.get("readme", "")
+        repository = data.get("repository", {})
+        homepage = data.get("homepage", "")
+
+        # Priority 1: Try to get README from GitHub if available
+        github_fetched = False
+        if isinstance(repository, dict):
+            repo_url = repository.get("url", "")
+            github_readme = _fetch_github_readme(repo_url, allowlist)
+            if github_readme and len(github_readme) > 500:  # Only use if substantial
+                readme = github_readme
+                github_fetched = True
+
+        # Priority 2: If no good GitHub README, try homepage if it's GitHub
+        if not github_fetched and homepage and "github.com" in homepage:
+            github_readme = _fetch_github_readme(homepage, allowlist)
+            if github_readme and len(github_readme) > 500:
+                readme = github_readme
+                github_fetched = True
+
+        # Priority 3: Use npm README if it's substantial
+        if not github_fetched and len(readme) < 500:
+            # The npm README is too short, try to enhance it
+            readme = _enhance_npm_readme(data, readme)
+
+        # Write documentation
+        with open(doc_file, "w", encoding="utf-8") as f:
+            f.write(f"# {name}@{version}\n\n")
+            f.write(f"**Package**: [{name}](https://www.npmjs.com/package/{name})\n")
+            f.write(f"**Version**: {version}\n")
+            if homepage:
+                f.write(f"**Homepage**: {homepage}\n")
+            f.write("\n---\n\n")
+            f.write(readme)
+
+            # Add usage examples if not in README
+            if "## Usage" not in readme and "## Example" not in readme:
+                f.write("\n\n## Installation\n\n```bash\nnpm install {name}\n```\n".format(name=name))
+
+        # Write metadata
+        meta = {
+            "source_url": url,
+            "last_checked": datetime.now().isoformat(),
+            "etag": response.headers.get("ETag"),
+            "repository": repository,
+            "from_github": github_fetched
+        }
+        with open(meta_file, "w", encoding="utf-8") as f:
+            json.dump(meta, f, indent=2)
+
+        return {"status": "fetched"}
+
+    except urllib.error.HTTPError as e:
+        if e.code == 429:
+            return {"status": "error", "reason": "rate_limited", "error": "HTTP 429: Rate limited"}
+        return {"status": "error", "error": f"HTTP {e.code}: {str(e)}"}
+    except (urllib.error.URLError, json.JSONDecodeError) as e:
+        return {"status": "error", "error": str(e)}
+
+
+def _fetch_pypi_docs(
+    dep: Dict[str, Any],
+    output_dir: Path,
+    allowlist: List[str]
+) -> Dict[str, Any]:
+    """Fetch documentation for a PyPI package."""
+    name = dep["name"].strip()  # Strip any whitespace from name
+    version = dep["version"]
+    
+    # Validate package name
+    if not validate_package_name(name, "py"):
+        return {"status": "skipped", "reason": "Invalid package name"}
+
+    # Sanitize package name for URL
+    safe_url_name = sanitize_url_component(name)
+    
+    # Handle special versions
+    if version in ["latest", "git"]:
+        # For latest, fetch current version first
+        if version == "latest":
+            url = f"https://pypi.org/pypi/{safe_url_name}/json"
+        else:
+            return {"status": "skipped", "reason": "git dependency"}
+    else:
+        safe_url_version = sanitize_url_component(version)
+        url = f"https://pypi.org/pypi/{safe_url_name}/{safe_url_version}/json"
+
+    if not _is_url_allowed(url, allowlist):
+        return {"status": "skipped", "reason": "URL not in allowlist"}
+
+    # Sanitize version for filesystem
+    safe_version = version.replace(":", "_").replace("/", "_").replace("\\", "_")
+    
+    # Create package-specific directory with sanitized name
+    safe_name = name.replace("/", "_").replace("\\", "_")
+    try:
+        pkg_dir = output_dir / "py" / f"{safe_name}@{safe_version}"
+        pkg_dir.mkdir(parents=True, exist_ok=True)
+    except (OSError, SecurityError) as e:
+        return {"status": "error", "error": f"Cannot create package directory: {e}"}
+
+    doc_file = pkg_dir / "doc.md"
+    meta_file = pkg_dir / "meta.json"
+
+    # Check cache
+    if doc_file.exists() and meta_file.exists():
+        try:
+            with open(meta_file, encoding="utf-8") as f:
+                meta = json.load(f)
+            last_checked = datetime.fromisoformat(meta["last_checked"])
+            if (datetime.now() - last_checked).days < 7:
+                return {"status": "cached"}
+        except (json.JSONDecodeError, KeyError):
+            pass
+
+    try:
+        with urllib.request.urlopen(url, timeout=10) as response:
+            data = json.loads(response.read())
+
+        info = data.get("info", {})
+        description = info.get("description", "")
+        summary = info.get("summary", "")
+
+        # Priority 1: Try to get README from project URLs (GitHub, GitLab, etc.)
+        github_fetched = False
+        project_urls = info.get("project_urls", {})
+
+        # Check all possible URL sources for GitHub
+        all_urls = []
+        for key, proj_url in project_urls.items():
+            if proj_url:
+                all_urls.append(proj_url)
+
+        # Also check home_page and download_url
+        home_page = info.get("home_page", "")
+        if home_page:
+            all_urls.append(home_page)
+        download_url = info.get("download_url", "")
+        if download_url:
+            all_urls.append(download_url)
+
+        # Try GitHub first
+        for url in all_urls:
+            if "github.com" in url.lower():
+                github_readme = _fetch_github_readme(url, allowlist)
+                if github_readme and len(github_readme) > 500:
+                    description = github_readme
+                    github_fetched = True
+                    break
+
+        # Priority 2: Try ReadTheDocs if available
+        if not github_fetched:
+            for url in all_urls:
+                if "readthedocs" in url.lower():
+                    rtd_content = _fetch_readthedocs(url, allowlist)
+                    if rtd_content and len(rtd_content) > 500:
+                        description = rtd_content
+                        github_fetched = True  # Mark as fetched from external source
+                        break
+
+        # Priority 3: Try to scrape PyPI web page (not API) for full README
+        if not github_fetched and len(description) < 1000:
+            pypi_readme = _fetch_pypi_web_readme(name, version, allowlist)
+            if pypi_readme and len(pypi_readme) > len(description):
+                description = pypi_readme
+                github_fetched = True  # Mark as fetched from external source
+
+        # Priority 4: Use PyPI description (often contains full README)
+        # PyPI descriptions can be quite good if properly uploaded
+        if not github_fetched and len(description) < 500 and summary:
+            # If description is too short, enhance it
+            description = _enhance_pypi_description(info, description, summary)
+
+        # Write documentation
+        with open(doc_file, "w", encoding="utf-8") as f:
+            f.write(f"# {name}@{version}\n\n")
+            f.write(f"**Package**: [{name}](https://pypi.org/project/{name}/)\n")
+            f.write(f"**Version**: {version}\n")
+
+            # Add project URLs if available
+            if project_urls:
+                f.write("\n**Links**:\n")
+                for key, url in list(project_urls.items())[:5]:  # Limit to 5
+                    if url:
+                        f.write(f"- {key}: {url}\n")
+
+            f.write("\n---\n\n")
+
+            # Add summary if different from description
+            if summary and summary not in description:
+                f.write(f"**Summary**: {summary}\n\n")
+
+            f.write(description)
+
+            # Add installation instructions if not in description
+            if "pip install" not in description.lower():
+                f.write(f"\n\n## Installation\n\n```bash\npip install {name}\n```\n")
+
+            # Add basic usage if really minimal docs
+            if len(description) < 200:
+                f.write(f"\n\n## Basic Usage\n\n```python\nimport {name.replace('-', '_')}\n```\n")
+
+        # Write metadata
+        meta = {
+            "source_url": url,
+            "last_checked": datetime.now().isoformat(),
+            "etag": response.headers.get("ETag"),
+            "project_urls": project_urls,
+            "from_github": github_fetched
+        }
+        with open(meta_file, "w", encoding="utf-8") as f:
+            json.dump(meta, f, indent=2)
+
+        return {"status": "fetched"}
+
+    except urllib.error.HTTPError as e:
+        if e.code == 429:
+            return {"status": "error", "reason": "rate_limited", "error": "HTTP 429: Rate limited"}
+        return {"status": "error", "error": f"HTTP {e.code}: {str(e)}"}
+    except (urllib.error.URLError, json.JSONDecodeError) as e:
+        return {"status": "error", "error": str(e)}
+
+
+def _fetch_github_readme(repo_url: str, allowlist: List[str]) -> Optional[str]:
+    """
+    Fetch README from GitHub repository.
+    Converts repository URL to raw GitHub URL for README.
+    """
+    if not repo_url:
+        return None
+
+    # Extract owner/repo from various GitHub URL formats
+    patterns = [
+        r'github\.com[:/]([^/]+)/([^/\s]+)',
+        r'git\+https://github\.com/([^/]+)/([^/\s]+)',
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, repo_url)
+        if match:
+            owner, repo = match.groups()
+            # Clean repo name
+            repo = repo.replace(".git", "")
+
+            # Try common README filenames
+            readme_files = ["README.md", "readme.md", "README.rst", "README.txt"]
+
+            # Sanitize owner and repo for URL
+            safe_owner = sanitize_url_component(owner)
+            safe_repo = sanitize_url_component(repo)
+
+            for readme_name in readme_files:
+                safe_readme = sanitize_url_component(readme_name)
+                raw_url = f"https://raw.githubusercontent.com/{safe_owner}/{safe_repo}/main/{safe_readme}"
+
+                if not _is_url_allowed(raw_url, allowlist):
+                    continue
+
+                try:
+                    with urllib.request.urlopen(raw_url, timeout=5) as response:
+                        return response.read().decode("utf-8")
+                except urllib.error.HTTPError:
+                    # Try master branch
+                    raw_url = f"https://raw.githubusercontent.com/{safe_owner}/{safe_repo}/master/{safe_readme}"
+                    try:
+                        with urllib.request.urlopen(raw_url, timeout=5) as response:
+                            return response.read().decode("utf-8")
+                    except urllib.error.URLError:
+                        continue
+                except urllib.error.URLError:
+                    continue
+
+    return None
+
+
+def _is_url_allowed(url: str, allowlist: List[str]) -> bool:
+    """Check if URL is in the allowlist."""
+    for allowed in allowlist:
+        if url.startswith(allowed):
+            return True
+    return False
+
+
+def _enhance_npm_readme(data: Dict[str, Any], readme: str) -> str:
+    """Enhance minimal npm README with package metadata."""
+    enhanced = readme if readme else ""
+
+    # Add description if not in README
+    description = data.get("description", "")
+    if description and description not in enhanced:
+        enhanced = f"{description}\n\n{enhanced}"
+
+    # Add keywords
+    keywords = data.get("keywords", [])
+    if keywords and "keywords" not in enhanced.lower():
+        enhanced += f"\n\n## Keywords\n\n{', '.join(keywords)}"
+
+    # Add main entry point info
+    main = data.get("main", "")
+    if main:
+        enhanced += f"\n\n## Entry Point\n\nMain file: `{main}`"
+
+    # Add dependencies info if substantial
+    deps = data.get("dependencies", {})
+    if len(deps) > 0 and len(deps) <= 10:  # Only if reasonable number
+        enhanced += "\n\n## Dependencies\n\n"
+        for dep, ver in deps.items():
+            enhanced += f"- {dep}: {ver}\n"
+
+    return enhanced
+
+
+def _fetch_readthedocs(url: str, allowlist: List[str]) -> Optional[str]:
+    """
+    Fetch documentation from ReadTheDocs.
+    Tries to get the main index page content.
+    """
+    if not url or not _is_url_allowed(url, allowlist):
+        return None
+
+    # Ensure we're getting the latest version
+    if not url.endswith("/"):
+        url += "/"
+
+    # Try to fetch the main page
+    try:
+        # Add en/latest if not already in URL
+        if "/en/latest" not in url and "/en/stable" not in url:
+            url = url.rstrip("/") + "/en/latest/"
+
+        with urllib.request.urlopen(url, timeout=10) as response:
+            html_content = response.read().decode("utf-8")
+
+        # Basic HTML to markdown conversion (very simplified)
+        # Remove script and style tags
+        html_content = re.sub(r'<script[^>]*>.*?</script>', '', html_content, flags=re.DOTALL)
+        html_content = re.sub(r'<style[^>]*>.*?</style>', '', html_content, flags=re.DOTALL)
+
+        # Extract main content (look for common RTD content divs)
+        content_match = re.search(r'<div[^>]*class="[^"]*document[^"]*"[^>]*>(.*?)</div>', html_content, re.DOTALL)
+        if content_match:
+            html_content = content_match.group(1)
+
+        # Convert basic HTML tags to markdown
+        html_content = re.sub(r'<h1[^>]*>(.*?)</h1>', r'# \1\n', html_content)
+        html_content = re.sub(r'<h2[^>]*>(.*?)</h2>', r'## \1\n', html_content)
+        html_content = re.sub(r'<h3[^>]*>(.*?)</h3>', r'### \1\n', html_content)
+        html_content = re.sub(r'<code[^>]*>(.*?)</code>', r'`\1`', html_content)
+        html_content = re.sub(r'<pre[^>]*>(.*?)</pre>', r'```\n\1\n```', html_content, flags=re.DOTALL)
+        html_content = re.sub(r'<p[^>]*>(.*?)</p>', r'\1\n\n', html_content)
+        html_content = re.sub(r'<a[^>]*href="([^"]*)"[^>]*>(.*?)</a>', r'[\2](\1)', html_content)
+        html_content = re.sub(r'<[^>]+>', '', html_content)  # Remove remaining HTML tags
+
+        # Clean up whitespace
+        html_content = re.sub(r'\n{3,}', '\n\n', html_content)
+
+        return html_content.strip()
+    except Exception:
+        return None
+
+
+def _fetch_pypi_web_readme(name: str, version: str, allowlist: List[str]) -> Optional[str]:
+    """
+    Fetch the rendered README from PyPI's web interface.
+    The web interface shows the full README that's often missing from the API.
+    """
+    # Validate package name
+    if not validate_package_name(name, "py"):
+        return None
+    
+    # Sanitize for URL
+    safe_name = sanitize_url_component(name)
+    safe_version = sanitize_url_component(version)
+    
+    # PyPI web URLs
+    urls_to_try = [
+        f"https://pypi.org/project/{safe_name}/{safe_version}/",
+        f"https://pypi.org/project/{safe_name}/"
+    ]
+
+    for url in urls_to_try:
+        if not _is_url_allowed(url, allowlist):
+            continue
+
+        try:
+            req = urllib.request.Request(url, headers={
+                'User-Agent': 'Mozilla/5.0 (compatible; TheAuditor/1.0)'
+            })
+            with urllib.request.urlopen(req, timeout=10) as response:
+                html_content = response.read().decode("utf-8")
+
+            # Look for the project description div
+            # PyPI uses a specific class for the README content
+            readme_match = re.search(
+                r'<div[^>]*class="[^"]*project-description[^"]*"[^>]*>(.*?)</div>',
+                html_content,
+                re.DOTALL | re.IGNORECASE
+            )
+
+            if not readme_match:
+                # Try alternative patterns
+                readme_match = re.search(
+                    r'<div[^>]*class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
+                    html_content,
+                    re.DOTALL | re.IGNORECASE
+                )
+
+            if readme_match:
+                readme_html = readme_match.group(1)
+
+                # Convert HTML to markdown (simplified)
+                # Headers
+                readme_html = re.sub(r'<h1[^>]*>(.*?)</h1>', r'# \1\n', readme_html, flags=re.IGNORECASE)
+                readme_html = re.sub(r'<h2[^>]*>(.*?)</h2>', r'## \1\n', readme_html, flags=re.IGNORECASE)
+                readme_html = re.sub(r'<h3[^>]*>(.*?)</h3>', r'### \1\n', readme_html, flags=re.IGNORECASE)
+
+                # Code blocks
+                readme_html = re.sub(r'<pre[^>]*><code[^>]*>(.*?)</code></pre>', r'```\n\1\n```', readme_html, flags=re.DOTALL | re.IGNORECASE)
+                readme_html = re.sub(r'<code[^>]*>(.*?)</code>', r'`\1`', readme_html, flags=re.IGNORECASE)
+
+                # Lists
+                readme_html = re.sub(r'<li[^>]*>(.*?)</li>', r'- \1\n', readme_html, flags=re.IGNORECASE)
+
+                # Links
+                readme_html = re.sub(r'<a[^>]*href="([^"]*)"[^>]*>(.*?)</a>', r'[\2](\1)', readme_html, flags=re.IGNORECASE)
+
+                # Paragraphs and line breaks
+                readme_html = re.sub(r'<p[^>]*>(.*?)</p>', r'\1\n\n', readme_html, flags=re.DOTALL | re.IGNORECASE)
+                readme_html = re.sub(r'<br[^>]*>', '\n', readme_html, flags=re.IGNORECASE)
+
+                # Remove remaining HTML tags
+                readme_html = re.sub(r'<[^>]+>', '', readme_html)
+
+                # Decode HTML entities
+                readme_html = readme_html.replace('&lt;', '<')
+                readme_html = readme_html.replace('&gt;', '>')
+                readme_html = readme_html.replace('&amp;', '&')
+                readme_html = readme_html.replace('&quot;', '"')
+                readme_html = readme_html.replace('&#39;', "'")
+
+                # Clean up whitespace
+                readme_html = re.sub(r'\n{3,}', '\n\n', readme_html)
+                readme_html = readme_html.strip()
+
+                if len(readme_html) > 100:  # Only return if we got substantial content
+                    return readme_html
+        except Exception:
+            continue
+
+    return None
+
+
+def _enhance_pypi_description(info: Dict[str, Any], description: str, summary: str) -> str:
+    """Enhance minimal PyPI description with package metadata."""
+    enhanced = description if description else ""
+
+    # Start with summary if description is empty
+    if not enhanced and summary:
+        enhanced = f"{summary}\n\n"
+
+    # Add author info
+    author = info.get("author", "")
+    author_email = info.get("author_email", "")
+    if author and "author" not in enhanced.lower():
+        author_info = f"\n\n## Author\n\n{author}"
+        if author_email:
+            author_info += f" ({author_email})"
+        enhanced += author_info
+
+    # Add license
+    license_info = info.get("license", "")
+    if license_info and "license" not in enhanced.lower():
+        enhanced += f"\n\n## License\n\n{license_info}"
+
+    # Add classifiers (limited)
+    classifiers = info.get("classifiers", [])
+    relevant_classifiers = [
+        c for c in classifiers
+        if "Programming Language" in c or "Framework" in c or "Topic" in c
+    ][:5]  # Limit to 5
+    if relevant_classifiers:
+        enhanced += "\n\n## Classifiers\n\n"
+        for classifier in relevant_classifiers:
+            enhanced += f"- {classifier}\n"
+
+    # Add requires_python if specified
+    requires_python = info.get("requires_python", "")
+    if requires_python:
+        enhanced += f"\n\n## Python Version\n\nRequires Python {requires_python}"
+
+    return enhanced
+
+
+def check_latest(
+    deps: List[Dict[str, Any]],
+    allow_net: bool = True,
+    offline: bool = False,
+    output_path: str = "./.pf/deps_latest.json"
+) -> Dict[str, Any]:
+    """
+    Check latest versions and compare to locked versions.
+    
+    This is a wrapper around deps.check_latest_versions for consistency.
+    """
+    from .deps import check_latest_versions, write_deps_latest_json
+
+    if offline or not allow_net:
+        return {
+            "mode": "offline",
+            "checked": 0,
+            "outdated": 0
+        }
+
+    latest_info = check_latest_versions(deps, allow_net=allow_net, offline=offline)
+
+    if latest_info:
+        # Sanitize output path before writing
+        try:
+            safe_output_path = str(sanitize_path(output_path, "."))
+            write_deps_latest_json(latest_info, safe_output_path)
+        except SecurityError as e:
+            return {
+                "mode": "error",
+                "error": f"Invalid output path: {e}",
+                "checked": 0,
+                "outdated": 0
+            }
+
+    outdated = sum(1 for info in latest_info.values() if info["is_outdated"])
+
+    return {
+        "mode": "online",
+        "checked": len(latest_info),
+        "outdated": outdated,
+        "output": output_path
+    }
diff --git a/theauditor/docs_summarize.py b/theauditor/docs_summarize.py
new file mode 100644
index 0000000..e39b69f
--- /dev/null
+++ b/theauditor/docs_summarize.py
@@ -0,0 +1,408 @@
+"""Documentation summarizer for creating concise doc capsules."""
+
+import json
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set
+
+
+def summarize_docs(
+    docs_dir: str = "./.pf/context/docs",
+    output_dir: str = "./.pf/context/doc_capsules",
+    workset_path: Optional[str] = None,
+    max_capsule_lines: int = 50
+) -> Dict[str, Any]:
+    """
+    Generate concise doc capsules from fetched documentation.
+    
+    Args:
+        docs_dir: Directory containing fetched docs
+        output_dir: Directory for output capsules
+        workset_path: Optional workset to filter relevant deps
+        max_capsule_lines: Maximum lines per capsule
+    
+    Returns:
+        Summary statistics
+    """
+    docs_path = Path(docs_dir)
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    
+    # Load workset if provided
+    relevant_deps = None
+    if workset_path and Path(workset_path).exists():
+        relevant_deps = _load_workset_deps(workset_path)
+    
+    stats = {
+        "total_docs": 0,
+        "capsules_created": 0,
+        "skipped": 0,
+        "errors": []
+    }
+    
+    capsules_index = []
+    
+    # Process npm docs
+    npm_dir = docs_path / "npm"
+    if npm_dir.exists():
+        for pkg_dir in npm_dir.iterdir():
+            if not pkg_dir.is_dir():
+                continue
+            
+            # Extract package name and version
+            pkg_info = pkg_dir.name  # format: name@version
+            if "@" not in pkg_info:
+                stats["skipped"] += 1
+                continue
+            
+            name_version = pkg_info.rsplit("@", 1)
+            if len(name_version) != 2:
+                stats["skipped"] += 1
+                continue
+            
+            name, version = name_version
+            
+            # Check if in workset
+            if relevant_deps and f"npm:{name}" not in relevant_deps:
+                stats["skipped"] += 1
+                continue
+            
+            stats["total_docs"] += 1
+            
+            # Create capsule
+            doc_file = pkg_dir / "doc.md"
+            meta_file = pkg_dir / "meta.json"
+            
+            if doc_file.exists():
+                try:
+                    capsule = _create_capsule(
+                        doc_file, meta_file, name, version, "npm", max_capsule_lines
+                    )
+                    
+                    # Write capsule
+                    capsule_file = output_path / f"npm__{name}@{version}.md"
+                    with open(capsule_file, "w", encoding="utf-8") as f:
+                        f.write(capsule)
+                    
+                    capsules_index.append({
+                        "name": name,
+                        "version": version,
+                        "ecosystem": "npm",
+                        "path": str(capsule_file.relative_to(output_path))
+                    })
+                    
+                    stats["capsules_created"] += 1
+                    
+                except Exception as e:
+                    stats["errors"].append(f"{name}@{version}: {str(e)}")
+    
+    # Process Python docs
+    py_dir = docs_path / "py"
+    if py_dir.exists():
+        for pkg_dir in py_dir.iterdir():
+            if not pkg_dir.is_dir():
+                continue
+            
+            # Extract package name and version
+            pkg_info = pkg_dir.name  # format: name@version
+            if "@" not in pkg_info:
+                stats["skipped"] += 1
+                continue
+            
+            name_version = pkg_info.rsplit("@", 1)
+            if len(name_version) != 2:
+                stats["skipped"] += 1
+                continue
+            
+            name, version = name_version
+            
+            # Check if in workset
+            if relevant_deps and f"py:{name}" not in relevant_deps:
+                stats["skipped"] += 1
+                continue
+            
+            stats["total_docs"] += 1
+            
+            # Create capsule
+            doc_file = pkg_dir / "doc.md"
+            meta_file = pkg_dir / "meta.json"
+            
+            if doc_file.exists():
+                try:
+                    capsule = _create_capsule(
+                        doc_file, meta_file, name, version, "py", max_capsule_lines
+                    )
+                    
+                    # Write capsule
+                    capsule_file = output_path / f"py__{name}@{version}.md"
+                    with open(capsule_file, "w", encoding="utf-8") as f:
+                        f.write(capsule)
+                    
+                    capsules_index.append({
+                        "name": name,
+                        "version": version,
+                        "ecosystem": "py",
+                        "path": str(capsule_file.relative_to(output_path))
+                    })
+                    
+                    stats["capsules_created"] += 1
+                    
+                except Exception as e:
+                    stats["errors"].append(f"{name}@{version}: {str(e)}")
+    
+    # Write index
+    index_file = output_path.parent / "doc_index.json"
+    with open(index_file, "w", encoding="utf-8") as f:
+        json.dump({
+            "created_at": datetime.now().isoformat(),
+            "capsules": capsules_index,
+            "stats": stats
+        }, f, indent=2)
+    
+    return stats
+
+
+def _load_workset_deps(workset_path: str) -> Set[str]:
+    """
+    Load relevant dependencies from workset.
+    Returns set of "manager:name" keys.
+    """
+    relevant = set()
+    
+    try:
+        with open(workset_path, encoding="utf-8") as f:
+            workset = json.load(f)
+        
+        # Extract imported packages from workset files
+        # This is a simplified version - would need more sophisticated parsing
+        for file_info in workset.get("files", []):
+            path = file_info.get("path", "")
+            
+            # Simple heuristic: look at file extension
+            if path.endswith((".js", ".ts", ".jsx", ".tsx")):
+                # Would parse imports/requires
+                # For now, include all npm deps
+                relevant.add("npm:*")
+            elif path.endswith(".py"):
+                # Would parse imports
+                # For now, include all py deps
+                relevant.add("py:*")
+    
+    except (json.JSONDecodeError, KeyError):
+        pass
+    
+    # If we couldn't determine specific deps, include all
+    if not relevant or "npm:*" in relevant or "py:*" in relevant:
+        return set()  # Empty set means include all
+    
+    return relevant
+
+
+def _create_capsule(
+    doc_file: Path,
+    meta_file: Path,
+    name: str,
+    version: str,
+    ecosystem: str,
+    max_lines: int
+) -> str:
+    """Create a concise capsule from documentation."""
+    
+    # Read documentation
+    with open(doc_file, encoding="utf-8") as f:
+        content = f.read()
+    
+    # Read metadata
+    meta = {}
+    if meta_file.exists():
+        try:
+            with open(meta_file, encoding="utf-8") as f:
+                meta = json.load(f)
+        except json.JSONDecodeError:
+            pass
+    
+    # Extract key sections
+    sections = {
+        "init": _extract_initialization(content, ecosystem),
+        "apis": _extract_top_apis(content),
+        "examples": _extract_examples(content),
+    }
+    
+    # Build capsule
+    capsule_lines = [
+        f"# {name}@{version} ({ecosystem})",
+        "",
+        "## Quick Start",
+        ""
+    ]
+    
+    if sections["init"]:
+        capsule_lines.extend(sections["init"][:10])  # Limit lines
+        capsule_lines.append("")
+    elif content:  # If no structured init but has content, add some raw content
+        content_lines = content.split("\n")[:10]
+        capsule_lines.extend(content_lines)
+        capsule_lines.append("")
+    
+    if sections["apis"]:
+        capsule_lines.append("## Top APIs")
+        capsule_lines.append("")
+        capsule_lines.extend(sections["apis"][:15])  # Limit lines
+        capsule_lines.append("")
+    
+    if sections["examples"]:
+        capsule_lines.append("## Examples")
+        capsule_lines.append("")
+        capsule_lines.extend(sections["examples"][:15])  # Limit lines
+        capsule_lines.append("")
+    
+    # Add reference to full documentation
+    capsule_lines.append("## 📄 Full Documentation Available")
+    capsule_lines.append("")
+    # Calculate relative path from project root
+    full_doc_path = f"./.pf/context/docs/{ecosystem}/{name}@{version}/doc.md"
+    capsule_lines.append(f"**Full content**: `{full_doc_path}`")
+    
+    # Count lines in full doc if it exists
+    if doc_file.exists():
+        try:
+            with open(doc_file, encoding="utf-8") as f:
+                line_count = len(f.readlines())
+            capsule_lines.append(f"**Size**: {line_count} lines")
+        except Exception:
+            pass
+    
+    capsule_lines.append("")
+    
+    # Add source info
+    capsule_lines.append("## Source")
+    capsule_lines.append("")
+    capsule_lines.append(f"- URL: {meta.get('source_url', '')}")
+    capsule_lines.append(f"- Fetched: {meta.get('last_checked', '')}")
+    
+    # Truncate if too long
+    if len(capsule_lines) > max_lines:
+        # Keep the full doc reference even when truncating
+        keep_lines = capsule_lines[:max_lines-7]  # Leave room for reference and truncation
+        ref_lines = [l for l in capsule_lines if "Full Documentation Available" in l or "Full content" in l or "Size" in l]
+        capsule_lines = keep_lines + ["", "...","(truncated)", ""] + ref_lines
+    
+    return "\n".join(capsule_lines)
+
+
+def _extract_initialization(content: str, ecosystem: str) -> List[str]:
+    """Extract initialization/installation snippets."""
+    lines = []
+    
+    # Look for installation section
+    install_patterns = [
+        r"## Install\w*",
+        r"## Getting Started",
+        r"## Quick Start",
+        r"### Install\w*",
+    ]
+    
+    for pattern in install_patterns:
+        match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
+        if match:
+            # Extract next code block
+            start = match.end()
+            code_match = re.search(r"```(\w*)\n(.*?)```", content[start:], re.DOTALL)
+            if code_match:
+                lines.append(f"```{code_match.group(1)}")
+                lines.extend(code_match.group(2).strip().split("\n")[:5])
+                lines.append("```")
+                break
+    
+    # Fallback: look for common patterns
+    if not lines:
+        if ecosystem == "npm":
+            if "require(" in content:
+                match = re.search(r"(const|var|let)\s+\w+\s*=\s*require\([^)]+\)", content)
+                if match:
+                    lines = ["```javascript", match.group(0), "```"]
+            elif "import " in content:
+                match = re.search(r"import\s+.*?from\s+['\"][^'\"]+['\"]", content)
+                if match:
+                    lines = ["```javascript", match.group(0), "```"]
+        elif ecosystem == "py":
+            if "import " in content:
+                match = re.search(r"import\s+\w+", content)
+                if match:
+                    lines = ["```python", match.group(0), "```"]
+            elif "from " in content:
+                match = re.search(r"from\s+\w+\s+import\s+\w+", content)
+                if match:
+                    lines = ["```python", match.group(0), "```"]
+    
+    return lines
+
+
+def _extract_top_apis(content: str) -> List[str]:
+    """Extract top API methods."""
+    lines = []
+    
+    # Look for API section
+    api_patterns = [
+        r"## API",
+        r"## Methods",
+        r"## Functions",
+        r"### API",
+    ]
+    
+    for pattern in api_patterns:
+        match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
+        if match:
+            start = match.end()
+            # Extract next few method signatures
+            method_matches = re.findall(
+                r"^[\*\-]\s*`([^`]+)`",
+                content[start:start+2000],
+                re.MULTILINE
+            )
+            for method in method_matches[:5]:  # Top 5 methods
+                lines.append(f"- `{method}`")
+            break
+    
+    # Fallback: look for function definitions in code blocks
+    if not lines:
+        code_blocks = re.findall(r"```\w*\n(.*?)```", content, re.DOTALL)
+        for block in code_blocks[:2]:  # Check first 2 code blocks
+            # Look for function signatures
+            funcs = re.findall(r"(?:function|def|const|let|var)\s+(\w+)\s*\(([^)]*)\)", block)
+            for func_name, params in funcs[:5]:
+                lines.append(f"- `{func_name}({params})`")
+            if lines:
+                break
+    
+    return lines
+
+
+def _extract_examples(content: str) -> List[str]:
+    """Extract usage examples."""
+    lines = []
+    
+    # Look for examples section
+    example_patterns = [
+        r"## Example",
+        r"## Usage",
+        r"### Example",
+        r"### Usage",
+    ]
+    
+    for pattern in example_patterns:
+        match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
+        if match:
+            start = match.end()
+            # Extract next code block
+            code_match = re.search(r"```(\w*)\n(.*?)```", content[start:], re.DOTALL)
+            if code_match:
+                lang = code_match.group(1) or "javascript"
+                code_lines = code_match.group(2).strip().split("\n")[:10]  # Max 10 lines
+                lines.append(f"```{lang}")
+                lines.extend(code_lines)
+                lines.append("```")
+                break
+    
+    return lines
\ No newline at end of file
diff --git a/theauditor/extraction.py b/theauditor/extraction.py
new file mode 100644
index 0000000..4d08c9f
--- /dev/null
+++ b/theauditor/extraction.py
@@ -0,0 +1,493 @@
+"""Extraction module - pure courier model for data chunking.
+
+This module implements the courier model: takes raw tool output and chunks it
+into manageable pieces for AI processing WITHOUT any filtering or interpretation.
+
+Pure Courier Principles:
+- NO filtering by severity or importance
+- NO deduplication or sampling
+- NO interpretation of findings
+- ONLY chunks files if they exceed 65KB
+- ALL data preserved exactly as generated
+
+The AI consumer decides what's important, not TheAuditor.
+"""
+
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from collections import defaultdict
+from theauditor.config_runtime import load_runtime_config
+
+
+# DELETED: All smart extraction functions removed
+# Pure courier model - no filtering, only chunking if needed
+
+
+def _chunk_large_file(raw_path: Path, max_chunk_size: Optional[int] = None) -> Optional[List[Tuple[Path, int]]]:
+    """Split large files into chunks of configured max size."""
+    # Load config if not provided
+    if max_chunk_size is None:
+        config = load_runtime_config()
+        max_chunk_size = config["limits"]["max_chunk_size"]
+    
+    # Get max chunks per file from config
+    config = load_runtime_config()
+    max_chunks_per_file = config["limits"]["max_chunks_per_file"]
+    
+    chunks = []
+    try:
+        # Handle non-JSON files (like .dot, .txt, etc.)
+        if raw_path.suffix != '.json':
+            # Read as text and chunk if needed
+            with open(raw_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+            
+            # Check if file needs chunking
+            if len(content) <= max_chunk_size:
+                # Small enough, copy as-is
+                output_path = raw_path.parent.parent / 'readthis' / raw_path.name
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(content)
+                size = output_path.stat().st_size
+                print(f"  [COPIED] {raw_path.name} → {output_path.name} ({size:,} bytes)")
+                return [(output_path, size)]
+            else:
+                # Need to chunk text file
+                base_name = raw_path.stem
+                ext = raw_path.suffix
+                chunk_num = 0
+                position = 0
+                
+                while position < len(content) and chunk_num < max_chunks_per_file:
+                    chunk_num += 1
+                    chunk_end = min(position + max_chunk_size, len(content))
+                    chunk_content = content[position:chunk_end]
+                    
+                    output_path = raw_path.parent.parent / 'readthis' / f"{base_name}_chunk{chunk_num:02d}{ext}"
+                    output_path.parent.mkdir(parents=True, exist_ok=True)
+                    with open(output_path, 'w', encoding='utf-8') as f:
+                        f.write(chunk_content)
+                    size = output_path.stat().st_size
+                    chunks.append((output_path, size))
+                    print(f"  [CHUNKED] {raw_path.name} → {output_path.name} ({size:,} bytes)")
+                    
+                    position = chunk_end
+                
+                if position < len(content):
+                    print(f"  [TRUNCATED] {raw_path.name} - stopped at {max_chunks_per_file} chunks")
+                
+                return chunks
+        
+        # Handle JSON files
+        with open(raw_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        
+        # Check if file needs chunking
+        full_json = json.dumps(data, indent=2)
+        if len(full_json) <= max_chunk_size:
+            # Small enough, copy as-is
+            output_path = raw_path.parent.parent / 'readthis' / raw_path.name
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(full_json)
+            size = output_path.stat().st_size
+            print(f"  [COPIED] {raw_path.name} → {output_path.name} ({size:,} bytes)")
+            return [(output_path, size)]
+        
+        # File needs chunking
+        base_name = raw_path.stem
+        ext = raw_path.suffix
+        
+        # Handle different data structures
+        if isinstance(data, list):
+            # For lists, chunk by items
+            chunk_num = 0
+            current_chunk = []
+            current_size = 100  # Account for JSON structure overhead
+            
+            for item in data:
+                item_json = json.dumps(item, indent=2)
+                item_size = len(item_json)
+                
+                if current_size + item_size > max_chunk_size and current_chunk:
+                    # Check chunk limit
+                    if chunk_num >= max_chunks_per_file:
+                        print(f"  [TRUNCATED] {raw_path.name} - stopped at {max_chunks_per_file} chunks (would have created more)")
+                        break
+                    
+                    # Write current chunk
+                    chunk_num += 1
+                    output_path = raw_path.parent.parent / 'readthis' / f"{base_name}_chunk{chunk_num:02d}{ext}"
+                    output_path.parent.mkdir(parents=True, exist_ok=True)
+                    with open(output_path, 'w', encoding='utf-8') as f:
+                        json.dump(current_chunk, f, indent=2)
+                    size = output_path.stat().st_size
+                    chunks.append((output_path, size))
+                    print(f"  [CHUNKED] {raw_path.name} → {output_path.name} ({size:,} bytes)")
+                    
+                    # Start new chunk
+                    current_chunk = [item]
+                    current_size = item_size + 100
+                else:
+                    current_chunk.append(item)
+                    current_size += item_size
+            
+            # Write final chunk (only if under limit)
+            if current_chunk and chunk_num < max_chunks_per_file:
+                chunk_num += 1
+                output_path = raw_path.parent.parent / 'readthis' / f"{base_name}_chunk{chunk_num:02d}{ext}"
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    json.dump(current_chunk, f, indent=2)
+                size = output_path.stat().st_size
+                chunks.append((output_path, size))
+                print(f"  [CHUNKED] {raw_path.name} → {output_path.name} ({size:,} bytes)")
+                
+        elif isinstance(data, dict):
+            # For dicts with lists (like findings, paths), chunk the lists
+            # Determine the correct key to chunk on
+            if base_name == 'taint_analysis':
+                # For taint analysis, we need to merge ALL findings into one list
+                # because they're split across multiple keys
+                if 'taint_paths' in data or 'all_rule_findings' in data:
+                    # Merge all findings into a single list for chunking
+                    all_taint_items = []
+                    
+                    # Add taint paths
+                    if 'taint_paths' in data:
+                        for item in data['taint_paths']:
+                            item['finding_type'] = 'taint_path'
+                            all_taint_items.append(item)
+                    
+                    # Add all rule findings
+                    if 'all_rule_findings' in data:
+                        for item in data['all_rule_findings']:
+                            item['finding_type'] = 'rule_finding'
+                            all_taint_items.append(item)
+                    
+                    # Add infrastructure issues only if they're different from all_rule_findings
+                    # (to avoid duplicates when they're the same list)
+                    if 'infrastructure_issues' in data:
+                        # Check if they're different objects (not the same list)
+                        if data['infrastructure_issues'] is not data.get('all_rule_findings'):
+                            # Only add if they're actually different content
+                            infra_set = {json.dumps(item, sort_keys=True) for item in data['infrastructure_issues']}
+                            rules_set = {json.dumps(item, sort_keys=True) for item in data.get('all_rule_findings', [])}
+                            if infra_set != rules_set:
+                                for item in data['infrastructure_issues']:
+                                    item['finding_type'] = 'infrastructure'
+                                    all_taint_items.append(item)
+                    
+                    # Add paths (data flow paths) - these are often duplicates of taint_paths but may have extra info
+                    if 'paths' in data:
+                        # Check if different from taint_paths
+                        paths_set = {json.dumps(item, sort_keys=True) for item in data['paths']}
+                        taint_set = {json.dumps(item, sort_keys=True) for item in data.get('taint_paths', [])}
+                        if paths_set != taint_set:
+                            for item in data['paths']:
+                                item['finding_type'] = 'path'
+                                all_taint_items.append(item)
+                    
+                    # Add vulnerabilities - these are the final analyzed vulnerabilities
+                    if 'vulnerabilities' in data:
+                        for item in data['vulnerabilities']:
+                            item['finding_type'] = 'vulnerability'
+                            all_taint_items.append(item)
+                    
+                    # Create a new data structure with merged findings
+                    data = {
+                        'success': data.get('success', True),
+                        'summary': data.get('summary', {}),
+                        'total_vulnerabilities': data.get('total_vulnerabilities', len(all_taint_items)),
+                        'sources_found': data.get('sources_found', 0),
+                        'sinks_found': data.get('sinks_found', 0),
+                        'merged_findings': all_taint_items
+                    }
+                    list_key = 'merged_findings'
+                else:
+                    list_key = 'paths'
+            elif 'all_findings' in data:
+                # CRITICAL: FCE findings are pre-sorted by severity via finding_priority.py
+                # The order MUST be preserved during chunking to ensure critical issues
+                # appear in chunk01. DO NOT sort or shuffle these findings!
+                list_key = 'all_findings'
+                
+                # Log for verification
+                if data.get(list_key):
+                    first_items = data[list_key][:3] if len(data[list_key]) >= 3 else data[list_key]
+                    severities = [item.get('severity', 'unknown') for item in first_items]
+                    print(f"[EXTRACTION] Processing FCE with {len(data[list_key])} pre-sorted findings")
+                    print(f"[EXTRACTION] First 3 severities: {severities}")
+            elif 'findings' in data:
+                list_key = 'findings'
+            elif 'vulnerabilities' in data:
+                list_key = 'vulnerabilities'
+            elif 'issues' in data:
+                list_key = 'issues'
+            elif 'edges' in data:
+                list_key = 'edges'  # For call_graph.json and import_graph.json
+            elif 'nodes' in data:
+                list_key = 'nodes'  # For graph files with nodes
+            elif 'taint_paths' in data:
+                list_key = 'taint_paths'
+            elif 'paths' in data:
+                list_key = 'paths'
+            elif 'dependencies' in data:
+                list_key = 'dependencies'  # For deps.json
+            elif 'files' in data:
+                list_key = 'files'  # For file lists
+            elif 'results' in data:
+                list_key = 'results'  # For analysis results
+            else:
+                list_key = None
+            
+            if list_key:
+                items = data.get(list_key, [])
+                
+                # Extract minimal metadata (don't duplicate everything)
+                metadata = {}
+                for key in ['success', 'summary', 'total_vulnerabilities', 'chunk_info']:
+                    if key in data:
+                        metadata[key] = data[key]
+                
+                # Calculate actual metadata size
+                metadata_json = json.dumps(metadata, indent=2)
+                metadata_size = len(metadata_json)
+                
+                chunk_num = 0
+                chunk_items = []
+                current_size = metadata_size + 200  # Actual metadata size + bracket overhead
+                
+                for item in items:
+                    item_json = json.dumps(item, indent=2)
+                    item_size = len(item_json)
+                    
+                    if current_size + item_size > max_chunk_size and chunk_items:
+                        # Check chunk limit
+                        if chunk_num >= max_chunks_per_file:
+                            print(f"  [TRUNCATED] {raw_path.name} - stopped at {max_chunks_per_file} chunks (would have created more)")
+                            break
+                        
+                        # Write current chunk
+                        chunk_num += 1
+                        chunk_data = metadata.copy()
+                        chunk_data[list_key] = chunk_items
+                        chunk_data['chunk_info'] = {
+                            'chunk_number': chunk_num,
+                            'total_items_in_chunk': len(chunk_items),
+                            'original_total_items': len(items),
+                            'list_key': list_key,
+                            'truncated': chunk_num >= max_chunks_per_file  # Mark if this is the last allowed chunk
+                        }
+                        
+                        output_path = raw_path.parent.parent / 'readthis' / f"{base_name}_chunk{chunk_num:02d}{ext}"
+                        output_path.parent.mkdir(parents=True, exist_ok=True)
+                        with open(output_path, 'w', encoding='utf-8') as f:
+                            json.dump(chunk_data, f, indent=2)
+                        size = output_path.stat().st_size
+                        chunks.append((output_path, size))
+                        print(f"  [CHUNKED] {raw_path.name} → {output_path.name} ({len(chunk_items)} items, {size:,} bytes)")
+                        
+                        # Start new chunk
+                        chunk_items = [item]
+                        current_size = metadata_size + item_size + 200
+                    else:
+                        chunk_items.append(item)
+                        current_size += item_size
+                
+                # Write final chunk (only if under limit)
+                if chunk_items and chunk_num < max_chunks_per_file:
+                    chunk_num += 1
+                    chunk_data = metadata.copy()
+                    chunk_data[list_key] = chunk_items
+                    chunk_data['chunk_info'] = {
+                        'chunk_number': chunk_num,
+                        'total_items_in_chunk': len(chunk_items),
+                        'original_total_items': len(items),
+                        'list_key': list_key,
+                        'truncated': False  # This is the final chunk within limit
+                    }
+                    
+                    output_path = raw_path.parent.parent / 'readthis' / f"{base_name}_chunk{chunk_num:02d}{ext}"
+                    with open(output_path, 'w', encoding='utf-8') as f:
+                        json.dump(chunk_data, f, indent=2)
+                    size = output_path.stat().st_size
+                    chunks.append((output_path, size))
+                    print(f"  [CHUNKED] {raw_path.name} → {output_path.name} ({len(chunk_items)} items, {size:,} bytes)")
+            else:
+                # No recognized list key - shouldn't happen now with expanded list
+                # Log warning and copy as-is
+                print(f"  [WARNING] No chunkable list found in {raw_path.name}, copying as-is")
+                output_path = raw_path.parent.parent / 'readthis' / raw_path.name
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    json.dump(data, f, indent=2)
+                size = output_path.stat().st_size
+                chunks.append((output_path, size))
+                print(f"  [COPIED] {raw_path.name} → {output_path.name} ({size:,} bytes)")
+        
+        return chunks
+        
+    except Exception as e:
+        print(f"  [ERROR] Failed to chunk {raw_path.name}: {e}")
+        return None  # Return None to signal failure, not empty list
+
+
+def _copy_as_is(raw_path: Path) -> Tuple[Optional[Path], int]:
+    """Copy small files as-is or chunk if >65KB."""
+    chunks = _chunk_large_file(raw_path)
+    if chunks is None:
+        # Chunking failed
+        return None, -1  # Signal error with -1
+    elif chunks:
+        # Return the first chunk info for compatibility
+        return chunks[0] if len(chunks) == 1 else (None, sum(s for _, s in chunks))
+    return None, 0
+
+
+def extract_all_to_readthis(root_path_str: str, budget_kb: int = 1500) -> bool:
+    """Main function for extracting readthis chunks from raw data.
+    
+    Implements intelligent extraction with prioritization to stay within
+    budget while preserving all critical security findings.
+    
+    Args:
+        root_path_str: Root directory path as string
+        budget_kb: Maximum total size in KB for all readthis files (default 1000KB)
+        
+    Returns:
+        True if extraction completed successfully, False otherwise
+    """
+    root_path = Path(root_path_str)
+    raw_dir = root_path / ".pf" / "raw"
+    readthis_dir = root_path / ".pf" / "readthis"
+    
+    print("\n" + "="*60)
+    print("[EXTRACTION] Smart extraction with 1MB budget")
+    print("="*60)
+    
+    # Check if raw directory exists
+    if not raw_dir.exists():
+        print(f"[WARNING] Raw directory does not exist: {raw_dir}")
+        print("[INFO] No raw data to extract - skipping extraction phase")
+        return True
+    
+    # Ensure readthis directory exists
+    try:
+        readthis_dir.mkdir(parents=True, exist_ok=True)
+        print(f"[OK] Readthis directory ready: {readthis_dir}")
+    except Exception as e:
+        print(f"[ERROR] Failed to create readthis directory: {e}")
+        return False
+    
+    # Discover ALL files in raw directory dynamically (courier model)
+    raw_files = []
+    for file_path in raw_dir.iterdir():
+        if file_path.is_file():
+            raw_files.append(file_path.name)
+    
+    print(f"[DISCOVERED] Found {len(raw_files)} files in raw directory")
+    
+    # Pure courier model - no smart extraction, just chunking if needed
+    # Build extraction strategy dynamically
+    extraction_strategy = []
+    for filename in sorted(raw_files):
+        # All files get same treatment: chunk if needed
+        extraction_strategy.append((filename, 100, _copy_as_is))
+    
+    total_budget = budget_kb * 1024  # Convert to bytes
+    total_used = 0
+    extracted_files = []
+    skipped_files = []
+    failed_files = []  # Track failures
+    
+    print(f"[BUDGET] Total budget: {budget_kb}KB ({total_budget:,} bytes)")
+    print(f"[STRATEGY] Pure courier model - no filtering\n")
+    
+    for filename, file_budget_kb, extractor in extraction_strategy:
+        raw_path = raw_dir / filename
+        
+        if not raw_path.exists():
+            continue
+        
+        print(f"[PROCESSING] {filename}")
+        
+        # Just chunk everything - ignore budget for chunking
+        # The whole point is to break large files into manageable pieces
+        chunks = _chunk_large_file(raw_path)
+        
+        if chunks is None:
+            # Chunking failed for this file
+            print(f"  [FAILED] {filename} - chunking error")
+            failed_files.append(filename)
+            continue
+        
+        if chunks:
+            for chunk_path, chunk_size in chunks:
+                # Optionally check budget per chunk (or ignore completely)
+                if total_used + chunk_size > total_budget:
+                    # Could skip remaining chunks or just ignore budget
+                    # For now, let's just ignore budget and extract everything
+                    pass
+                
+                total_used += chunk_size
+                extracted_files.append((chunk_path.name, chunk_size))
+    
+    # Create extraction summary
+    summary = {
+        'extraction_timestamp': str(Path(root_path_str).stat().st_mtime),
+        'budget_kb': budget_kb,
+        'total_used_bytes': total_used,
+        'total_used_kb': total_used // 1024,
+        'utilization_percent': (total_used / total_budget) * 100,
+        'files_extracted': len(extracted_files),
+        'files_skipped': len(skipped_files),
+        'files_failed': len(failed_files),
+        'extracted': [{'file': f, 'size': s} for f, s in extracted_files],
+        'skipped': skipped_files,
+        'failed': failed_files,
+        'strategy': 'Pure courier model - chunk if needed, no filtering'
+    }
+    
+    summary_path = readthis_dir / 'extraction_summary.json'
+    with open(summary_path, 'w', encoding='utf-8') as f:
+        json.dump(summary, f, indent=2)
+    
+    # Summary report
+    print("\n" + "="*60)
+    print("[EXTRACTION COMPLETE]")
+    print(f"  Files extracted: {len(extracted_files)}")
+    print(f"  Files skipped: {len(skipped_files)}")
+    print(f"  Files failed: {len(failed_files)}")
+    print(f"  Total size: {total_used:,} bytes ({total_used//1024}KB)")
+    print(f"  Budget used: {(total_used/total_budget)*100:.1f}%")
+    print(f"  Summary saved: {summary_path}")
+    
+    # List what was extracted
+    print("\n[EXTRACTED FILES]")
+    for filename, size in extracted_files:
+        print(f"  {filename:30} {size:8,} bytes ({size//1024:4}KB)")
+    
+    if skipped_files:
+        print("\n[SKIPPED FILES]")
+        for filename in skipped_files:
+            print(f"  {filename}")
+    
+    if failed_files:
+        print("\n[FAILED FILES]")
+        for filename in failed_files:
+            print(f"  {filename}")
+    
+    print("\n[KEY INSIGHTS]")
+    print("  ✓ All findings preserved - no filtering")
+    print("  ✓ Pure courier model - no interpretation")
+    print("  ✓ Files chunked only if >65KB")
+    print("  ✓ Complete data for AI consumption")
+    print("="*60)
+    
+    # Return False if any files failed, True only if all succeeded
+    if failed_files:
+        print(f"\n[ERROR] Extraction failed for {len(failed_files)} files")
+        return False
+    return True
\ No newline at end of file
diff --git a/theauditor/fce.py b/theauditor/fce.py
new file mode 100644
index 0000000..5ddc3f8
--- /dev/null
+++ b/theauditor/fce.py
@@ -0,0 +1,784 @@
+"""Factual Correlation Engine - aggregates and correlates findings from all analysis tools."""
+
+import json
+import os
+import re
+import shlex
+import sqlite3
+import subprocess
+from collections import defaultdict, deque
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+from theauditor.test_frameworks import detect_test_framework
+from theauditor.correlations import CorrelationLoader
+
+
+
+
+def scan_all_findings(raw_dir: Path) -> list[dict[str, Any]]:
+    """
+    Scan ALL raw outputs for structured findings with line-level detail.
+    Extract findings from JSON outputs with file, line, rule, and tool information.
+    """
+    all_findings = []
+    
+    for output_file in raw_dir.glob('*.json'):
+        if not output_file.is_file():
+            continue
+            
+        tool_name = output_file.stem
+        try:
+            with open(output_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            
+            # Handle different JSON structures based on tool
+            findings = []
+            
+            # Standard findings structure (lint.json, patterns.json, etc.)
+            if isinstance(data, dict) and 'findings' in data:
+                findings = data['findings']
+            # Vulnerabilities structure
+            elif isinstance(data, dict) and 'vulnerabilities' in data:
+                findings = data['vulnerabilities']
+            # Taint analysis structure
+            elif isinstance(data, dict) and 'taint_paths' in data:
+                for path in data['taint_paths']:
+                    # Create a finding for each taint path
+                    if 'file' in path and 'line' in path:
+                        findings.append({
+                            'file': path['file'],
+                            'line': path['line'],
+                            'rule': f"taint-{path.get('sink_type', 'unknown')}",
+                            'message': path.get('message', 'Taint path detected')
+                        })
+            # Direct list of findings
+            elif isinstance(data, list):
+                findings = data
+            # RCA/test results structure
+            elif isinstance(data, dict) and 'failures' in data:
+                findings = data['failures']
+            
+            # Process each finding
+            for finding in findings:
+                if isinstance(finding, dict):
+                    # Ensure required fields exist
+                    if 'file' in finding:
+                        # Create standardized finding
+                        standardized = {
+                            'file': finding.get('file', ''),
+                            'line': int(finding.get('line', 0)),
+                            'rule': finding.get('rule', finding.get('code', finding.get('pattern', 'unknown'))),
+                            'tool': finding.get('tool', tool_name),
+                            'message': finding.get('message', ''),
+                            'severity': finding.get('severity', 'warning')
+                        }
+                        all_findings.append(standardized)
+                        
+        except (json.JSONDecodeError, KeyError, TypeError):
+            # Skip files that can't be parsed as JSON or don't have expected structure
+            continue
+        except Exception:
+            # Skip files with other errors
+            continue
+    
+    return all_findings
+
+
+def run_tool(command: str, root_path: str, timeout: int = 600) -> tuple[int, str, str]:
+    """Run build/test tool with timeout and capture output."""
+    try:
+        # Use deque as ring buffer to limit memory usage
+        max_lines = 10000
+        stdout_buffer = deque(maxlen=max_lines)
+        stderr_buffer = deque(maxlen=max_lines)
+
+        # Run command - safely split command string into arguments
+        cmd_args = shlex.split(command)
+        
+        # Write directly to temp files to avoid buffer overflow
+        import tempfile
+        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt') as out_tmp, \
+             tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt') as err_tmp:
+            
+            process = subprocess.Popen(
+                cmd_args,
+                cwd=root_path,
+                stdout=out_tmp,
+                stderr=err_tmp,
+                text=True,
+                env={**os.environ, "CI": "true"},  # Set CI env for tools
+            )
+            
+            stdout_file = out_tmp.name
+            stderr_file = err_tmp.name
+
+        # Stream output with timeout
+        try:
+            process.communicate(timeout=timeout)
+            
+            # Read back the outputs
+            with open(stdout_file, 'r') as f:
+                stdout = f.read()
+            with open(stderr_file, 'r') as f:
+                stderr = f.read()
+            
+            # Clean up temp files
+            os.unlink(stdout_file)
+            os.unlink(stderr_file)
+            
+            # Append any errors to the global error.log
+            if stderr:
+                from pathlib import Path
+                error_log = Path(root_path) / ".pf" / "error.log"
+                error_log.parent.mkdir(parents=True, exist_ok=True)
+                with open(error_log, 'a') as f:
+                    f.write(f"\n=== RCA Subprocess Error ({command[:50]}) ===\n")
+                    f.write(f"Timestamp: {datetime.now().isoformat()}\n")
+                    f.write(stderr)
+                    f.write("\n")
+            # Store in buffers
+            stdout_buffer.extend(stdout.splitlines())
+            stderr_buffer.extend(stderr.splitlines())
+        except subprocess.TimeoutExpired:
+            process.kill()
+            return 124, "Process timed out", f"Command exceeded {timeout}s timeout"
+
+        # Join lines
+        stdout_text = "\n".join(stdout_buffer)
+        stderr_text = "\n".join(stderr_buffer)
+
+        return process.returncode, stdout_text, stderr_text
+
+    except Exception as e:
+        return 1, "", str(e)
+
+
+def parse_typescript_errors(output: str) -> list[dict[str, Any]]:
+    """Parse TypeScript/TSNode compiler errors."""
+    errors = []
+
+    # TypeScript error format: file:line:col - error CODE: message
+    pattern = (
+        r"(?P<file>[^:\n]+):(?P<line>\d+):(?P<col>\d+) - error (?P<code>[A-Z]+\d+): (?P<msg>.+)"
+    )
+
+    for match in re.finditer(pattern, output):
+        errors.append(
+            {
+                "tool": "tsc",
+                "file": match.group("file"),
+                "line": int(match.group("line")),
+                "column": int(match.group("col")),
+                "message": match.group("msg"),
+                "code": match.group("code"),
+                "category": "type_error",
+            }
+        )
+
+    return errors
+
+
+def parse_jest_errors(output: str) -> list[dict[str, Any]]:
+    """Parse Jest/Vitest test failures."""
+    errors = []
+
+    # Jest failed test: ● Test Suite Name › test name
+    # Followed by stack trace: at Object.<anonymous> (file:line:col)
+    test_pattern = r"● (?P<testname>[^\n]+)"
+    stack_pattern = r"at .*? \((?P<file>[^:]+):(?P<line>\d+):(?P<col>\d+)\)"
+
+    lines = output.splitlines()
+    for i, line in enumerate(lines):
+        test_match = re.match(test_pattern, line)
+        if test_match:
+            # Look for stack trace in next lines
+            for j in range(i + 1, min(i + 20, len(lines))):
+                stack_match = re.search(stack_pattern, lines[j])
+                if stack_match:
+                    errors.append(
+                        {
+                            "tool": "jest",
+                            "file": stack_match.group("file"),
+                            "line": int(stack_match.group("line")),
+                            "column": int(stack_match.group("col")),
+                            "message": f"Test failed: {test_match.group('testname')}",
+                            "category": "test_failure",
+                        }
+                    )
+                    break
+
+    return errors
+
+
+def parse_pytest_errors(output: str) -> list[dict[str, Any]]:
+    """Parse pytest failures."""
+    errors = []
+
+    # Pytest error format varies, but typically:
+    # FAILED path/to/test.py::TestClass::test_method - AssertionError: message
+    # Or: E   AssertionError: message
+    #     path/to/file.py:42: AssertionError
+
+    failed_pattern = r"FAILED (?P<file>[^:]+)(?:::(?P<test>[^\s]+))? - (?P<msg>.+)"
+    error_pattern = r"^E\s+(?P<msg>.+)\n.*?(?P<file>[^:]+):(?P<line>\d+):"
+
+    for match in re.finditer(failed_pattern, output):
+        errors.append(
+            {
+                "tool": "pytest",
+                "file": match.group("file"),
+                "line": 0,  # Line not in FAILED format
+                "message": match.group("msg"),
+                "category": "test_failure",
+            }
+        )
+
+    for match in re.finditer(error_pattern, output, re.MULTILINE):
+        errors.append(
+            {
+                "tool": "pytest",
+                "file": match.group("file"),
+                "line": int(match.group("line")),
+                "message": match.group("msg"),
+                "category": "test_failure",
+            }
+        )
+
+    return errors
+
+
+def parse_python_compile_errors(output: str) -> list[dict[str, Any]]:
+    """Parse Python compilation errors from py_compile output."""
+    errors = []
+    
+    # Python compile error format:
+    # Traceback (most recent call last):
+    #   File "path/to/file.py", line X, in <module>
+    # SyntaxError: invalid syntax
+    # Or: ModuleNotFoundError: No module named 'xxx'
+    
+    # Parse traceback format
+    lines = output.splitlines()
+    for i, line in enumerate(lines):
+        # Look for File references in tracebacks
+        if 'File "' in line and '", line ' in line:
+            # Extract file and line number
+            match = re.match(r'.*File "([^"]+)", line (\d+)', line)
+            if match and i + 1 < len(lines):
+                file_path = match.group(1)
+                line_num = int(match.group(2))
+                
+                # Look for the error type in following lines
+                for j in range(i + 1, min(i + 5, len(lines))):
+                    if 'Error:' in lines[j]:
+                        error_msg = lines[j].strip()
+                        errors.append({
+                            "tool": "py_compile",
+                            "file": file_path,
+                            "line": line_num,
+                            "message": error_msg,
+                            "category": "compile_error",
+                        })
+                        break
+        
+        # Also catch simple error messages
+        if 'SyntaxError:' in line or 'ModuleNotFoundError:' in line or 'ImportError:' in line:
+            # Try to extract file info from previous lines
+            file_info = None
+            for j in range(max(0, i - 3), i):
+                if '***' in lines[j] and '.py' in lines[j]:
+                    # py_compile format: *** path/to/file.py
+                    file_match = re.match(r'\*\*\* (.+\.py)', lines[j])
+                    if file_match:
+                        file_info = file_match.group(1)
+                        break
+            
+            if file_info:
+                errors.append({
+                    "tool": "py_compile",
+                    "file": file_info,
+                    "line": 0,
+                    "message": line.strip(),
+                    "category": "compile_error",
+                })
+    
+    return errors
+
+
+def parse_errors(output: str, tool_name: str) -> list[dict[str, Any]]:
+    """Parse errors based on tool type."""
+    all_errors = []
+
+    # Try all parsers
+    all_errors.extend(parse_typescript_errors(output))
+    all_errors.extend(parse_jest_errors(output))
+    all_errors.extend(parse_pytest_errors(output))
+    all_errors.extend(parse_python_compile_errors(output))
+
+    return all_errors
+
+
+def load_capsule(capsules_dir: str, file_hash: str) -> dict | None:
+    """Load capsule by file hash."""
+    capsule_path = Path(capsules_dir) / f"{file_hash}.json"
+    if not capsule_path.exists():
+        return None
+
+    try:
+        with open(capsule_path) as f:
+            return json.load(f)
+    except json.JSONDecodeError:
+        return None
+
+
+
+
+def correlate_failures(
+    errors: list[dict[str, Any]],
+    manifest_path: str,
+    workset_path: str,
+    capsules_dir: str,
+    db_path: str,
+) -> list[dict[str, Any]]:
+    """Correlate failures with capsules for factual enrichment."""
+    # Load manifest for hash lookup
+    file_hashes = {}
+    try:
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+        for entry in manifest:
+            file_hashes[entry["path"]] = entry.get("sha256")
+    except (FileNotFoundError, json.JSONDecodeError):
+        pass
+
+    # Load workset
+    workset_files = set()
+    try:
+        with open(workset_path) as f:
+            workset = json.load(f)
+        workset_files = {p["path"] for p in workset.get("paths", [])}
+    except (FileNotFoundError, json.JSONDecodeError):
+        pass
+
+    # Correlate each error
+    for error in errors:
+        file = error.get("file", "")
+
+        # Load capsule if file in workset/manifest
+        if file in file_hashes:
+            file_hash = file_hashes[file]
+            capsule = load_capsule(capsules_dir, file_hash)
+            if capsule:
+                error["capsule"] = {
+                    "path": capsule.get("path"),
+                    "hash": capsule.get("sha256"),
+                    "interfaces": capsule.get("interfaces", {}),
+                }
+
+
+    return errors
+
+
+def generate_rca_json(failures: list[dict[str, Any]]) -> dict[str, Any]:
+    """Generate RCA JSON output."""
+    return {
+        "completed_at": datetime.now(UTC).isoformat(),
+        "failures": failures,
+    }
+
+
+def run_fce(
+    root_path: str = ".",
+    capsules_dir: str = "./.pf/capsules",
+    manifest_path: str = "manifest.json",
+    workset_path: str = "./.pf/workset.json",
+    db_path: str = "repo_index.db",
+    timeout: int = 600,
+    print_plan: bool = False,
+) -> dict[str, Any]:
+    """Run factual correlation engine - NO interpretation, just facts."""
+    try:
+        # Step A: Initialization
+        raw_dir = Path(root_path) / ".pf" / "raw"
+        results = {
+            "timestamp": datetime.now(UTC).isoformat(),
+            "all_findings": [],
+            "test_results": {},
+            "correlations": {}
+        }
+        
+        # Step B: Phase 1 - Gather All Findings
+        if raw_dir.exists():
+            results["all_findings"] = scan_all_findings(raw_dir)
+        
+        # Step B2: Load Optional Insights (ML predictions, etc.)
+        insights_dir = Path(root_path) / ".pf" / "insights"
+        if insights_dir.exists():
+            # Load ML suggestions if available
+            ml_path = insights_dir / "ml_suggestions.json"
+            if ml_path.exists():
+                try:
+                    with open(ml_path) as f:
+                        ml_data = json.load(f)
+                    
+                    # Convert ML predictions to correlatable findings
+                    # ML has separate lists for root causes, risk scores, etc.
+                    for root_cause in ml_data.get("likely_root_causes", [])[:5]:  # Top 5 root causes
+                        if root_cause.get("score", 0) > 0.7:
+                            results["all_findings"].append({
+                                "file": root_cause["path"],
+                                "line": 0,  # ML doesn't provide line-level predictions
+                                "rule": "ML_ROOT_CAUSE",
+                                "tool": "ml",
+                                "message": f"ML predicts {root_cause['score']:.1%} probability as root cause",
+                                "severity": "high"
+                            })
+                    
+                    for risk_item in ml_data.get("risk", [])[:5]:  # Top 5 risky files
+                        if risk_item.get("score", 0) > 0.7:
+                            results["all_findings"].append({
+                                "file": risk_item["path"],
+                                "line": 0,
+                                "rule": f"ML_RISK_{int(risk_item['score']*100)}",
+                                "tool": "ml",
+                                "message": f"ML predicts {risk_item['score']:.1%} risk score",
+                                "severity": "high" if risk_item.get("score", 0) > 0.85 else "medium"
+                            })
+                except (json.JSONDecodeError, KeyError):
+                    pass  # ML insights are optional, continue if they fail
+            
+            # Load taint severity insights if available  
+            taint_severity_path = insights_dir / "taint_severity.json"
+            if taint_severity_path.exists():
+                try:
+                    with open(taint_severity_path) as f:
+                        taint_data = json.load(f)
+                    
+                    # Add severity-enhanced taint findings
+                    for item in taint_data.get("severity_analysis", []):
+                        if item.get("severity") in ["critical", "high"]:
+                            results["all_findings"].append({
+                                "file": item.get("file", ""),
+                                "line": item.get("line", 0),
+                                "rule": f"TAINT_{item.get('vulnerability_type', 'UNKNOWN').upper().replace(' ', '_')}",
+                                "tool": "taint-insights",
+                                "message": f"{item.get('vulnerability_type')} with {item.get('severity')} severity",
+                                "severity": item.get("severity")
+                            })
+                except (json.JSONDecodeError, KeyError):
+                    pass  # Insights are optional
+        
+        # Step C: Phase 2 - Execute Tests
+        # Detect test framework
+        framework_info = detect_test_framework(root_path)
+        
+        tools = []
+        if framework_info["name"] != "unknown" and framework_info["cmd"]:
+            command = framework_info["cmd"]
+            
+            # Add quiet flags
+            if "pytest" in command:
+                command = "pytest -q -p no:cacheprovider"
+            elif "npm test" in command:
+                command = "npm test --silent"
+            elif "unittest" in command:
+                command = "python -m unittest discover -q"
+            
+            tools.append({
+                "name": framework_info["name"],
+                "command": command,
+                "type": "test"
+            })
+        
+        # Check for build scripts
+        package_json = Path(root_path) / "package.json"
+        if package_json.exists():
+            try:
+                with open(package_json) as f:
+                    package = json.load(f)
+                scripts = package.get("scripts", {})
+                if "build" in scripts:
+                    tools.append({
+                        "name": "npm build",
+                        "command": "npm run build --silent",
+                        "type": "build"
+                    })
+            except json.JSONDecodeError:
+                pass
+        
+        if print_plan:
+            print("Detected tools:")
+            for tool in tools:
+                print(f"  - {tool['name']}: {tool['command']}")
+            return {"success": True, "printed_plan": True}
+        
+        if not tools:
+            tools = []  # No test tools, continue processing
+        
+        # Run tools and collect failures
+        all_failures = []
+        
+        for tool in tools:
+            print(f"Running {tool['name']}...")
+            exit_code, stdout, stderr = run_tool(tool["command"], root_path, timeout)
+            
+            if exit_code != 0:
+                output = stdout + "\n" + stderr
+                errors = parse_errors(output, tool["name"])
+                
+                # Special handling for pytest collection failures
+                if tool["name"] == "pytest" and exit_code == 2 and "ERROR collecting" in output:
+                    print("Pytest collection failed. Falling back to Python compilation check...")
+                    
+                    py_files = []
+                    for py_file in Path(root_path).rglob("*.py"):
+                        if "__pycache__" not in str(py_file) and not any(part.startswith('.') for part in py_file.parts):
+                            py_files.append(str(py_file.relative_to(root_path)))
+                    
+                    if py_files:
+                        print(f"Checking {len(py_files)} Python files for compilation errors...")
+                        compile_errors = []
+                        
+                        for py_file in py_files[:50]:
+                            module_path = str(Path(py_file).as_posix()).replace('/', '.').replace('.py', '')
+                            import_cmd = f'python3 -c "import {module_path}"'
+                            comp_exit, comp_out, comp_err = run_tool(import_cmd, root_path, 10)
+                            
+                            if comp_exit != 0:
+                                comp_output = comp_out + "\n" + comp_err
+                                if comp_output.strip():
+                                    error_lines = comp_output.strip().split('\n')
+                                    error_msg = "Import failed"
+                                    
+                                    for line in error_lines:
+                                        if 'ModuleNotFoundError:' in line:
+                                            error_msg = line.strip()
+                                            break
+                                        elif 'ImportError:' in line:
+                                            error_msg = line.strip()
+                                            break
+                                        elif 'SyntaxError:' in line:
+                                            error_msg = line.strip()
+                                            break
+                                        elif 'AttributeError:' in line:
+                                            error_msg = line.strip()
+                                            break
+                                    
+                                    compile_errors.append({
+                                        "tool": "py_import",
+                                        "file": py_file,
+                                        "line": 0,
+                                        "message": error_msg,
+                                        "category": "compile_error",
+                                    })
+                        
+                        if compile_errors:
+                            print(f"Found {len(compile_errors)} compilation errors")
+                            errors.extend(compile_errors)
+                
+                # If no errors parsed, create generic one
+                if not errors and exit_code != 0:
+                    errors.append({
+                        "tool": tool["name"],
+                        "file": "unknown",
+                        "line": 0,
+                        "message": f"Tool failed with exit code {exit_code}",
+                        "category": "runtime",
+                    })
+                
+                all_failures.extend(errors)
+        
+        # Correlate with capsules
+        all_failures = correlate_failures(
+            all_failures,
+            Path(root_path) / manifest_path,
+            Path(root_path) / workset_path,
+            Path(root_path) / capsules_dir,
+            Path(root_path) / db_path,
+        )
+        
+        # Store test results
+        results["test_results"] = {
+            "completed_at": datetime.now(UTC).isoformat(),
+            "failures": all_failures,
+            "tools_run": len(tools)
+        }
+        
+        # Step D: Consolidate Evidence
+        consolidated_findings = results["all_findings"].copy()
+        
+        # Add test failures to consolidated list
+        if all_failures:
+            for failure in all_failures:
+                if 'file' in failure and 'line' in failure:
+                    consolidated_findings.append({
+                        'file': failure['file'],
+                        'line': int(failure.get('line', 0)),
+                        'rule': failure.get('code', failure.get('category', 'test-failure')),
+                        'tool': failure.get('tool', 'test'),
+                        'message': failure.get('message', ''),
+                        'severity': failure.get('severity', 'error')
+                    })
+        
+        # Step E: Phase 3 - Line-Level Correlation (Hotspots)
+        # Group findings by file:line
+        line_groups = defaultdict(list)
+        for finding in consolidated_findings:
+            if finding['line'] > 0:
+                key = f"{finding['file']}:{finding['line']}"
+                line_groups[key].append(finding)
+        
+        # Find hotspots
+        hotspots = {}
+        for line_key, findings in line_groups.items():
+            tools_on_line = set(f['tool'] for f in findings)
+            if len(tools_on_line) > 1:
+                hotspots[line_key] = findings
+        
+        # Enrich hotspots with symbol context
+        full_db_path = Path(root_path) / db_path
+        if hotspots and full_db_path.exists():
+            try:
+                conn = sqlite3.connect(str(full_db_path))
+                cursor = conn.cursor()
+                
+                enriched_hotspots = {}
+                for line_key, findings in hotspots.items():
+                    if ':' in line_key:
+                        file_path, line_str = line_key.rsplit(':', 1)
+                        try:
+                            line_num = int(line_str)
+                            
+                            query = """
+                            SELECT name, type, line 
+                            FROM symbols 
+                            WHERE file = ? 
+                              AND line <= ? 
+                              AND type IN ('function', 'class')
+                            ORDER BY line DESC 
+                            LIMIT 1
+                            """
+                            cursor.execute(query, (file_path, line_num))
+                            result = cursor.fetchone()
+                            
+                            hotspot_data = {"findings": findings}
+                            
+                            if result:
+                                symbol_name, symbol_type, symbol_line = result
+                                hotspot_data["in_symbol"] = f"{symbol_type}: {symbol_name}"
+                            
+                            enriched_hotspots[line_key] = hotspot_data
+                        except (ValueError, TypeError):
+                            enriched_hotspots[line_key] = {"findings": findings}
+                    else:
+                        enriched_hotspots[line_key] = {"findings": findings}
+                
+                conn.close()
+                hotspots = enriched_hotspots
+            except (sqlite3.Error, Exception):
+                hotspots = {k: {"findings": v} for k, v in hotspots.items()}
+        else:
+            hotspots = {k: {"findings": v} for k, v in hotspots.items()}
+        
+        # Store hotspots in correlations
+        results["correlations"]["hotspots"] = hotspots
+        results["correlations"]["total_findings"] = len(consolidated_findings)
+        results["correlations"]["total_lines_with_findings"] = len(line_groups)
+        results["correlations"]["total_hotspots"] = len(hotspots)
+        
+        # Step F: Phase 4 - Factual Cluster Detection
+        factual_clusters = []
+        
+        # Load correlation rules
+        correlation_loader = CorrelationLoader()
+        correlation_rules = correlation_loader.load_rules()
+        
+        if correlation_rules and consolidated_findings:
+            # Group findings by file
+            findings_by_file = defaultdict(list)
+            for finding in consolidated_findings:
+                if 'file' in finding:
+                    findings_by_file[finding['file']].append(finding)
+            
+            # Check each file against each rule
+            for file_path, file_findings in findings_by_file.items():
+                for rule in correlation_rules:
+                    all_facts_matched = True
+                    
+                    for fact_index, fact in enumerate(rule.co_occurring_facts):
+                        fact_matched = False
+                        for finding in file_findings:
+                            if rule.matches_finding(finding, fact_index):
+                                fact_matched = True
+                                break
+                        
+                        if not fact_matched:
+                            all_facts_matched = False
+                            break
+                    
+                    if all_facts_matched:
+                        factual_clusters.append({
+                            "name": rule.name,
+                            "file": file_path,
+                            "description": rule.description,
+                            "confidence": rule.confidence
+                        })
+        
+        # Store factual clusters
+        results["correlations"]["factual_clusters"] = factual_clusters
+        
+        # Step G: Finalization - Apply intelligent organization sorting
+        from theauditor.utils.finding_priority import sort_findings, normalize_severity
+        
+        # CRITICAL: Normalize all severities BEFORE sorting
+        # This handles Docker's integer severity and ESLint's "error" strings
+        if results.get("all_findings"):
+            # First pass: normalize severity in-place
+            for finding in results["all_findings"]:
+                original_severity = finding.get("severity")
+                finding["severity"] = normalize_severity(original_severity)
+                
+                # Debug log for unusual severities (helps catch new formats)
+                if original_severity and str(original_severity) != finding["severity"]:
+                    if isinstance(original_severity, int):
+                        # Expected for Docker, don't log
+                        pass
+                    else:
+                        print(f"[FCE] Normalized severity: {original_severity} -> {finding['severity']}")
+            
+            # Second pass: sort using centralized logic
+            results["all_findings"] = sort_findings(results["all_findings"])
+            
+            # Log sorting results for verification
+            if results["all_findings"]:
+                print(f"[FCE] Sorted {len(results['all_findings'])} findings")
+                first = results["all_findings"][0]
+                last = results["all_findings"][-1] if len(results["all_findings"]) > 1 else first
+                print(f"[FCE] First: {first.get('severity')} from {first.get('tool')}")
+                print(f"[FCE] Last: {last.get('severity')} from {last.get('tool')}")
+        
+        # Write results to JSON
+        raw_dir.mkdir(parents=True, exist_ok=True)
+        fce_path = raw_dir / "fce.json"
+        fce_path.write_text(json.dumps(results, indent=2))
+        
+        # Count total failures/findings
+        failures_found = len(results.get("all_findings", []))
+        
+        # Return success structure
+        return {
+            "success": True,
+            "failures_found": failures_found,
+            "output_files": [str(fce_path)],
+            "results": results
+        }
+        
+    except Exception as e:
+        # Step H: Error Handling
+        return {
+            "success": False,
+            "failures_found": 0,
+            "error": str(e)
+        }
diff --git a/theauditor/framework_detector.py b/theauditor/framework_detector.py
new file mode 100644
index 0000000..e0cbee8
--- /dev/null
+++ b/theauditor/framework_detector.py
@@ -0,0 +1,608 @@
+"""Framework detection for various languages and ecosystems."""
+
+import json
+import re
+import glob
+from pathlib import Path
+from typing import Any
+from theauditor.manifest_parser import ManifestParser
+from theauditor.framework_registry import FRAMEWORK_REGISTRY
+
+
+class FrameworkDetector:
+    """Detects frameworks and libraries used in a project."""
+    
+    # Note: Framework detection now uses the centralized FRAMEWORK_REGISTRY
+    # from framework_registry.py instead of the old FRAMEWORK_SIGNATURES
+
+    def __init__(self, project_path: Path, exclude_patterns: list[str] = None):
+        """Initialize detector with project path.
+
+        Args:
+            project_path: Root directory of the project.
+            exclude_patterns: List of patterns to exclude from scanning.
+        """
+        self.project_path = Path(project_path)
+        self.detected_frameworks = []
+        self.deps_cache = None
+        self.exclude_patterns = exclude_patterns or []
+
+    def detect_all(self) -> list[dict[str, Any]]:
+        """Detect all frameworks in the project.
+
+        Returns:
+            List of detected framework info dictionaries.
+        """
+        self.detected_frameworks = []
+
+        # Load TheAuditor's deps.json if available for better version info
+        self._load_deps_cache()
+
+        # Use unified manifest detection
+        self._detect_from_manifests()
+        
+        # Also detect from monorepo workspaces (keep existing logic)
+        self._detect_from_workspaces()
+
+        # Store frameworks found in manifests for version lookup
+        manifest_frameworks = {}
+        for fw in self.detected_frameworks:
+            if fw["source"] != "imports":
+                key = (fw["framework"], fw["language"])
+                manifest_frameworks[key] = fw["version"]
+
+        # DISABLED: Import scanning causes too many false positives
+        # It detects framework names in strings, comments, and detection code itself
+        # Real dependencies should be in manifest files (package.json, requirements.txt, etc.)
+        # self._scan_source_imports()
+
+        # Check for framework-specific files
+        self._check_framework_files()
+
+        # Update versions for frameworks detected from framework files only (imports disabled)
+        for fw in self.detected_frameworks:
+            if fw["version"] == "unknown" and fw["source"] == "framework_files":
+                key = (fw["framework"], fw["language"])
+                # First try manifest frameworks
+                if key in manifest_frameworks:
+                    fw["version"] = manifest_frameworks[key]
+                    fw["source"] = f"{fw['source']} (version from manifest)"
+                # Then try deps cache
+                elif self.deps_cache and fw["framework"] in self.deps_cache:
+                    cached_dep = self.deps_cache[fw["framework"]]
+                    manager = cached_dep.get("manager", "")
+                    # Match language to manager (py -> python, npm -> javascript)
+                    if (fw["language"] == "python" and manager == "py") or \
+                       (fw["language"] in ["javascript", "typescript"] and manager == "npm"):
+                        fw["version"] = cached_dep.get("version", "")  # Empty not unknown
+                        if fw["version"] != "unknown":
+                            fw["source"] = f"{fw['source']} (version from deps cache)"
+
+        # Deduplicate results, preferring entries with known versions
+        # Now we keep framework+language+path as unique key to support monorepos
+        seen = {}
+        for fw in self.detected_frameworks:
+            key = (fw["framework"], fw["language"], fw.get("path", "."))
+            if key not in seen:
+                seen[key] = fw
+            elif fw["version"] != "unknown" and seen[key]["version"] == "unknown":
+                # Replace with version that has a known version
+                seen[key] = fw
+
+        return list(seen.values())
+
+    def _detect_from_manifests(self):
+        """Unified manifest detection using registry and ManifestParser - now directory-aware."""
+        parser = ManifestParser()
+        
+        # Manifest file names to search for
+        manifest_names = [
+            "pyproject.toml",
+            "package.json",
+            "requirements.txt",
+            "requirements-dev.txt",
+            "requirements-test.txt",
+            "setup.py",
+            "setup.cfg",
+            "Gemfile",
+            "Gemfile.lock",
+            "go.mod",
+            "pom.xml",
+            "build.gradle",
+            "build.gradle.kts",
+            "composer.json",
+        ]
+        
+        # Recursively find all manifest files in the project
+        manifests = {}
+        for manifest_name in manifest_names:
+            # Use rglob to find all instances of this manifest file
+            for manifest_path in self.project_path.rglob(manifest_name):
+                # Skip excluded directories
+                try:
+                    relative_path = manifest_path.relative_to(self.project_path)
+                    should_skip = False
+                    
+                    # Check common skip directories
+                    for part in relative_path.parts[:-1]:  # Don't check the filename itself
+                        if part in ["node_modules", "venv", ".venv", ".auditor_venv", "vendor", 
+                                   "dist", "build", "__pycache__", ".git", ".tox", ".pytest_cache"]:
+                            should_skip = True
+                            break
+                    
+                    if should_skip:
+                        continue
+                    
+                    # Calculate the directory path relative to project root
+                    dir_path = manifest_path.parent.relative_to(self.project_path)
+                    dir_str = str(dir_path) if dir_path != Path('.') else '.'
+                    
+                    # Create a unique key for this manifest
+                    manifest_key = f"{dir_str}/{manifest_name}" if dir_str != '.' else manifest_name
+                    manifests[manifest_key] = manifest_path
+                    
+                except ValueError:
+                    # File is outside project path somehow, skip it
+                    continue
+        
+        # Parse each manifest that exists
+        parsed_data = {}
+        for manifest_key, path in manifests.items():
+            if path.exists():
+                try:
+                    # Extract just the filename for parsing logic
+                    filename = path.name
+                    
+                    if filename.endswith('.toml'):
+                        parsed_data[manifest_key] = parser.parse_toml(path)
+                    elif filename.endswith('.json'):
+                        parsed_data[manifest_key] = parser.parse_json(path)
+                    elif filename.endswith(('.yml', '.yaml')):
+                        parsed_data[manifest_key] = parser.parse_yaml(path)
+                    elif filename.endswith('.cfg'):
+                        parsed_data[manifest_key] = parser.parse_ini(path)
+                    elif filename.endswith('.txt'):
+                        parsed_data[manifest_key] = parser.parse_requirements_txt(path)
+                    elif filename == 'Gemfile' or filename == 'Gemfile.lock':
+                        # Parse Gemfile as text for now
+                        with open(path, 'r', encoding='utf-8') as f:
+                            parsed_data[manifest_key] = f.read()
+                    elif filename.endswith('.xml') or filename.endswith('.gradle') or filename.endswith('.kts') or filename.endswith('.mod'):
+                        # Parse as text content for now
+                        with open(path, 'r', encoding='utf-8') as f:
+                            parsed_data[manifest_key] = f.read()
+                    elif filename == 'setup.py':
+                        with open(path, 'r', encoding='utf-8') as f:
+                            parsed_data[manifest_key] = f.read()
+                except Exception as e:
+                    print(f"Warning: Failed to parse {manifest_key}: {e}")
+        
+        # Check each framework against all manifests
+        for fw_name, fw_config in FRAMEWORK_REGISTRY.items():
+            for required_manifest_name, search_configs in fw_config.get("detection_sources", {}).items():
+                # Check all parsed manifests that match this manifest type
+                for manifest_key, manifest_data in parsed_data.items():
+                    # Check if this manifest matches the required type
+                    if not manifest_key.endswith(required_manifest_name):
+                        continue
+                    
+                    # Extract the directory path from the manifest key
+                    if '/' in manifest_key:
+                        dir_path = '/'.join(manifest_key.split('/')[:-1])
+                    else:
+                        dir_path = '.'
+                    
+                    if search_configs == "line_search":
+                        # Simple text search for requirements.txt style or Gemfile
+                        if isinstance(manifest_data, list):
+                            # Requirements.txt parsed as list
+                            for line in manifest_data:
+                                version = parser.check_package_in_deps([line], fw_name)
+                                if version:
+                                    self.detected_frameworks.append({
+                                        "framework": fw_name,
+                                        "version": version or "unknown",
+                                        "language": fw_config["language"],
+                                        "path": dir_path,
+                                        "source": manifest_key
+                                    })
+                                    break
+                    elif isinstance(manifest_data, str):
+                        # Text file content
+                        if fw_name in manifest_data or (fw_config.get("package_pattern") and fw_config["package_pattern"] in manifest_data):
+                            # Try to extract version
+                            version = "unknown"
+                            import re
+                            if fw_config.get("package_pattern"):
+                                pattern = fw_config["package_pattern"]
+                            else:
+                                pattern = fw_name
+                            
+                            # Try different version patterns
+                            version_match = re.search(rf'{re.escape(pattern)}["\']?\s*[,:]?\s*["\']?([\d.]+)', manifest_data)
+                            if not version_match:
+                                version_match = re.search(rf'{re.escape(pattern)}\s+v([\d.]+)', manifest_data)
+                            if not version_match:
+                                version_match = re.search(rf'gem\s+["\']?{re.escape(pattern)}["\']?\s*,\s*["\']([\d.]+)["\']', manifest_data)
+                            
+                            if version_match:
+                                version = version_match.group(1)
+                            
+                            self.detected_frameworks.append({
+                                "framework": fw_name,
+                                "version": version,
+                                "language": fw_config["language"],
+                                "path": dir_path,
+                                "source": manifest_key
+                            })
+                            
+                    elif search_configs == "content_search":
+                        # Content search for text-based files
+                        if isinstance(manifest_data, str):
+                            found = False
+                            # Check package pattern first
+                            if fw_config.get("package_pattern") and fw_config["package_pattern"] in manifest_data:
+                                found = True
+                            # Check content patterns
+                            elif fw_config.get("content_patterns"):
+                                for pattern in fw_config["content_patterns"]:
+                                    if pattern in manifest_data:
+                                        found = True
+                                        break
+                            # Fallback to framework name
+                            elif fw_name in manifest_data:
+                                found = True
+                                
+                            if found:
+                                # Try to extract version
+                                version = "unknown"
+                                import re
+                                pattern = fw_config.get("package_pattern", fw_name)
+                                version_match = re.search(rf'{re.escape(pattern)}.*?[>v]([\d.]+)', manifest_data, re.DOTALL)
+                                if version_match:
+                                    version = version_match.group(1)
+                                
+                                self.detected_frameworks.append({
+                                    "framework": fw_name,
+                                    "version": version,
+                                    "language": fw_config["language"],
+                                    "path": dir_path,
+                                    "source": manifest_key
+                                })
+                            
+                    elif search_configs == "exists":
+                        # Just check if file exists (for go.mod with go test framework)
+                        self.detected_frameworks.append({
+                            "framework": fw_name,
+                            "version": "unknown",
+                            "language": fw_config["language"],
+                            "path": dir_path,
+                            "source": manifest_key
+                        })
+                    
+                    else:
+                        # Structured search for JSON/TOML/YAML
+                        for key_path in search_configs:
+                            deps = parser.extract_nested_value(manifest_data, key_path)
+                            if deps:
+                                # Check if framework is in dependencies
+                                package_name = fw_config.get("package_pattern", fw_name)
+                                version = parser.check_package_in_deps(deps, package_name)
+                                if version:
+                                    self.detected_frameworks.append({
+                                        "framework": fw_name,
+                                        "version": version,
+                                        "language": fw_config["language"],
+                                        "path": dir_path,
+                                        "source": manifest_key
+                                    })
+                                    break
+    
+    def _detect_from_workspaces(self):
+        """Detect frameworks from monorepo workspace packages."""
+        # This preserves the existing monorepo detection logic
+        package_json = self.project_path / "package.json"
+        if not package_json.exists():
+            return
+            
+        parser = ManifestParser()
+        try:
+            data = parser.parse_json(package_json)
+            
+            # Check for workspaces field (Yarn/npm workspaces)
+            workspaces = data.get("workspaces", [])
+            
+            # Handle different workspace formats
+            if isinstance(workspaces, dict):
+                # npm 7+ format: {"packages": ["packages/*"]}
+                workspaces = workspaces.get("packages", [])
+            
+            if workspaces and isinstance(workspaces, list):
+                # This is a monorepo - check workspace packages
+                for pattern in workspaces:
+                    # Convert workspace pattern to absolute path pattern
+                    abs_pattern = str(self.project_path / pattern)
+                    
+                    # Handle glob patterns
+                    if "*" in abs_pattern:
+                        matched_paths = glob.glob(abs_pattern)
+                        for matched_path in matched_paths:
+                            matched_dir = Path(matched_path)
+                            if matched_dir.is_dir():
+                                workspace_pkg = matched_dir / "package.json"
+                                if workspace_pkg.exists():
+                                    # Parse and check this workspace package
+                                    self._check_workspace_package(workspace_pkg, parser)
+                    else:
+                        # Direct path without glob
+                        workspace_dir = self.project_path / pattern
+                        if workspace_dir.is_dir():
+                            workspace_pkg = workspace_dir / "package.json"
+                            if workspace_pkg.exists():
+                                self._check_workspace_package(workspace_pkg, parser)
+        except Exception as e:
+            print(f"Warning: Failed to check workspaces: {e}")
+    
+    def _check_workspace_package(self, pkg_path: Path, parser: ManifestParser):
+        """Check a single workspace package.json for frameworks."""
+        try:
+            data = parser.parse_json(pkg_path)
+            
+            # Check dependencies
+            all_deps = {}
+            if "dependencies" in data:
+                all_deps.update(data["dependencies"])
+            if "devDependencies" in data:
+                all_deps.update(data["devDependencies"])
+            
+            # Check each JavaScript framework
+            for fw_name, fw_config in FRAMEWORK_REGISTRY.items():
+                if fw_config["language"] != "javascript":
+                    continue
+                    
+                package_name = fw_config.get("package_pattern", fw_name)
+                if package_name in all_deps:
+                    version = all_deps[package_name]
+                    # Clean version
+                    version = re.sub(r'^[~^>=<]+', '', str(version)).strip()
+                    
+                    # Calculate relative path for path field
+                    try:
+                        rel_path = pkg_path.parent.relative_to(self.project_path)
+                        path = str(rel_path).replace("\\", "/") if rel_path != Path('.') else '.'
+                        source = str(pkg_path.relative_to(self.project_path)).replace("\\", "/")
+                    except ValueError:
+                        path = '.'
+                        source = str(pkg_path)
+                    
+                    self.detected_frameworks.append({
+                        "framework": fw_name,
+                        "version": version,
+                        "language": "javascript",
+                        "path": path,
+                        "source": source
+                    })
+        except Exception as e:
+            print(f"Warning: Failed to parse workspace package {pkg_path}: {e}")
+    
+        # Stub method kept for backward compatibility - actual logic moved to _detect_from_manifests
+        pass
+
+    def _scan_source_imports(self):
+        """Scan source files for framework imports."""
+        # Limit scanning to avoid performance issues
+        max_files = 100
+        files_scanned = 0
+
+        # Language file extensions
+        lang_extensions = {
+            ".py": "python",
+            ".js": "javascript",
+            ".jsx": "javascript",
+            ".ts": "javascript",
+            ".tsx": "javascript",
+            ".go": "go",
+            ".java": "java",
+            ".rb": "ruby",
+            ".php": "php",
+        }
+
+        for ext, language in lang_extensions.items():
+            if files_scanned >= max_files:
+                break
+
+            for file_path in self.project_path.rglob(f"*{ext}"):
+                if files_scanned >= max_files:
+                    break
+
+                # Skip node_modules, venv, etc.
+                if any(
+                    part in file_path.parts
+                    for part in ["node_modules", "venv", ".venv", ".auditor_venv", "vendor", "dist", "build", "__pycache__", ".git"]
+                ):
+                    continue
+                
+                # Check exclude patterns
+                relative_path = file_path.relative_to(self.project_path)
+                should_skip = False
+                for pattern in self.exclude_patterns:
+                    # Handle directory patterns
+                    if pattern.endswith('/'):
+                        dir_pattern = pattern.rstrip('/')
+                        if str(relative_path).startswith(dir_pattern + '/') or str(relative_path).startswith(dir_pattern + '\\'):
+                            should_skip = True
+                            break
+                    # Handle glob patterns
+                    elif '*' in pattern:
+                        from fnmatch import fnmatch
+                        if fnmatch(str(relative_path), pattern):
+                            should_skip = True
+                            break
+                    # Handle exact matches
+                    elif str(relative_path) == pattern:
+                        should_skip = True
+                        break
+                
+                if should_skip:
+                    continue
+
+                files_scanned += 1
+
+                try:
+                    with open(file_path, encoding="utf-8", errors="ignore") as f:
+                        content = f.read()
+
+                    # Check frameworks from registry
+                    for fw_name, fw_config in FRAMEWORK_REGISTRY.items():
+                        # Only check frameworks for this language
+                        if fw_config["language"] != language:
+                            continue
+                            
+                        if "import_patterns" in fw_config:
+                            for import_pattern in fw_config["import_patterns"]:
+                                if import_pattern in content:
+                                    # Check if not already detected in this directory
+                                    file_dir = file_path.parent.relative_to(self.project_path)
+                                    dir_str = str(file_dir).replace("\\", "/") if file_dir != Path('.') else '.'
+                                    
+                                    if not any(
+                                        fw["framework"] == fw_name and fw["language"] == language and fw.get("path", ".") == dir_str
+                                        for fw in self.detected_frameworks
+                                    ):
+                                        self.detected_frameworks.append(
+                                            {
+                                                "framework": fw_name,
+                                                "version": "unknown",
+                                                "language": language,
+                                                "path": dir_str,
+                                                "source": "imports",
+                                            }
+                                        )
+                                    break
+
+                except Exception:
+                    # Skip files that can't be read
+                    continue
+
+    def _check_framework_files(self):
+        """Check for framework-specific files."""
+        # Check all frameworks in registry for file markers
+        for fw_name, fw_config in FRAMEWORK_REGISTRY.items():
+            if "file_markers" in fw_config:
+                for file_marker in fw_config["file_markers"]:
+                    # Handle wildcard patterns
+                    if "*" in file_marker:
+                        # Use glob for wildcard patterns
+                        import glob
+                        pattern = str(self.project_path / file_marker)
+                        if glob.glob(pattern):
+                            # Check if not already detected
+                            if not any(
+                                fw["framework"] == fw_name and fw["language"] == fw_config["language"]
+                                for fw in self.detected_frameworks
+                            ):
+                                self.detected_frameworks.append(
+                                    {
+                                        "framework": fw_name,
+                                        "version": "unknown",
+                                        "language": fw_config["language"],
+                                        "path": ".",  # Framework files typically at root
+                                        "source": "framework_files",
+                                    }
+                                )
+                            break
+                    else:
+                        # Direct file path
+                        if (self.project_path / file_marker).exists():
+                            # Check if not already detected
+                            if not any(
+                                fw["framework"] == fw_name and fw["language"] == fw_config["language"]
+                                for fw in self.detected_frameworks
+                            ):
+                                self.detected_frameworks.append(
+                                    {
+                                        "framework": fw_name,
+                                        "version": "unknown",
+                                        "language": fw_config["language"],
+                                        "path": ".",  # Framework files typically at root
+                                        "source": "framework_files",
+                                    }
+                                )
+                            break
+
+    def _load_deps_cache(self):
+        """Load TheAuditor's deps.json if available for version info."""
+        deps_file = self.project_path / ".pf" / "deps.json"
+        if deps_file.exists():
+            try:
+                with open(deps_file) as f:
+                    data = json.load(f)
+                    self.deps_cache = {}
+                    # Handle both old format (list) and new format (dict with "dependencies" key)
+                    if isinstance(data, list):
+                        deps_list = data
+                    else:
+                        deps_list = data.get("dependencies", [])
+                    
+                    for dep in deps_list:
+                        # Store by name for quick lookup
+                        self.deps_cache[dep["name"]] = dep
+            except Exception as e:
+                # Log the error but continue
+                print(f"Warning: Could not load deps cache: {e}")
+                pass
+
+    def format_table(self) -> str:
+        """Format detected frameworks as a table.
+
+        Returns:
+            Formatted table string.
+        """
+        if not self.detected_frameworks:
+            return "No frameworks detected."
+
+        lines = []
+        lines.append("FRAMEWORK          LANGUAGE      PATH            VERSION          SOURCE")
+        lines.append("-" * 80)
+
+        imports_only = []
+        for fw in self.detected_frameworks:
+            framework = fw["framework"][:18].ljust(18)
+            language = fw["language"][:12].ljust(12)
+            path = fw.get("path", ".")[:15].ljust(15)
+            version = fw["version"][:15].ljust(15)
+            source = fw["source"]
+
+            lines.append(f"{framework} {language} {path} {version} {source}")
+            
+            # Track if any are from imports only
+            if fw["source"] == "imports" and fw["version"] == "unknown":
+                imports_only.append(fw["framework"])
+
+        # Add note if frameworks detected from imports without versions
+        if imports_only:
+            lines.append("\n" + "="*60)
+            lines.append("NOTE: Frameworks marked with 'imports' source were detected from")
+            lines.append("import statements in the codebase (possibly test files) but are")
+            lines.append("not listed as dependencies. Version shown as 'unknown' because")
+            lines.append("they are not in package.json, pyproject.toml, or requirements.txt.")
+
+        return "\n".join(lines)
+
+    def to_json(self) -> str:
+        """Export detected frameworks to JSON.
+
+        Returns:
+            JSON string.
+        """
+        return json.dumps(self.detected_frameworks, indent=2, sort_keys=True)
+    
+    def save_to_file(self, output_path: Path) -> None:
+        """Save detected frameworks to a JSON file.
+        
+        Args:
+            output_path: Path where the JSON file should be saved.
+        """
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_text(self.to_json())
\ No newline at end of file
diff --git a/theauditor/framework_registry.py b/theauditor/framework_registry.py
new file mode 100644
index 0000000..26ce19e
--- /dev/null
+++ b/theauditor/framework_registry.py
@@ -0,0 +1,549 @@
+"""Registry of framework detection patterns and test framework configurations."""
+
+# Framework detection registry - defines where to find each framework
+FRAMEWORK_REGISTRY = {
+    # Python frameworks
+    "django": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["tool", "setuptools", "install_requires"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from django", "import django"],
+        "file_markers": ["manage.py", "wsgi.py"],
+    },
+    "flask": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from flask", "import flask"],
+    },
+    "fastapi": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from fastapi", "import fastapi"],
+    },
+    "pyramid": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from pyramid", "import pyramid"],
+    },
+    "tornado": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from tornado", "import tornado"],
+    },
+    "bottle": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from bottle", "import bottle"],
+    },
+    "aiohttp": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from aiohttp", "import aiohttp"],
+    },
+    "sanic": {
+        "language": "python",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "*", "dependencies"],
+                ["tool", "pdm", "dependencies"],
+                ["project", "optional-dependencies", "*"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "setup.py": "content_search",
+            "setup.cfg": ["options", "install_requires"],
+        },
+        "import_patterns": ["from sanic", "import sanic"],
+    },
+    
+    # JavaScript/TypeScript frameworks
+    "express": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "import_patterns": ["express", "require('express')", "from 'express'"],
+    },
+    "nestjs": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "package_pattern": "@nestjs/core",
+        "import_patterns": ["@nestjs"],
+    },
+    "next": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "import_patterns": ["next/", "from 'next'"],
+    },
+    "react": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "import_patterns": ["react", "from 'react'", "React"],
+    },
+    "vue": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "import_patterns": ["vue", "from 'vue'"],
+        "file_markers": ["*.vue"],
+    },
+    "angular": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "package_pattern": "@angular/core",
+        "import_patterns": ["@angular"],
+        "file_markers": ["angular.json"],
+    },
+    "fastify": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "import_patterns": ["fastify"],
+    },
+    "koa": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "import_patterns": ["koa", "require('koa')"],
+    },
+    "vite": {
+        "language": "javascript",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "import_patterns": ["vite"],
+        "config_files": ["vite.config.js", "vite.config.ts"],
+    },
+    
+    # PHP frameworks
+    "laravel": {
+        "language": "php",
+        "detection_sources": {
+            "composer.json": [
+                ["require"],
+                ["require-dev"],
+            ],
+        },
+        "package_pattern": "laravel/framework",
+        "file_markers": ["artisan", "bootstrap/app.php"],
+    },
+    "symfony": {
+        "language": "php",
+        "detection_sources": {
+            "composer.json": [
+                ["require"],
+                ["require-dev"],
+            ],
+        },
+        "package_pattern": "symfony/framework-bundle",
+        "file_markers": ["bin/console", "config/bundles.php"],
+    },
+    "slim": {
+        "language": "php",
+        "detection_sources": {
+            "composer.json": [
+                ["require"],
+                ["require-dev"],
+            ],
+        },
+        "package_pattern": "slim/slim",
+    },
+    "lumen": {
+        "language": "php",
+        "detection_sources": {
+            "composer.json": [
+                ["require"],
+                ["require-dev"],
+            ],
+        },
+        "package_pattern": "laravel/lumen-framework",
+        "file_markers": ["artisan"],
+    },
+    "codeigniter": {
+        "language": "php",
+        "detection_sources": {
+            "composer.json": [
+                ["require"],
+                ["require-dev"],
+            ],
+        },
+        "package_pattern": "codeigniter4/framework",
+        "file_markers": ["spark"],
+    },
+    
+    # Go frameworks
+    "gin": {
+        "language": "go",
+        "detection_sources": {
+            "go.mod": "content_search",
+        },
+        "package_pattern": "github.com/gin-gonic/gin",
+        "import_patterns": ["github.com/gin-gonic/gin"],
+    },
+    "echo": {
+        "language": "go",
+        "detection_sources": {
+            "go.mod": "content_search",
+        },
+        "package_pattern": "github.com/labstack/echo",
+        "import_patterns": ["github.com/labstack/echo"],
+    },
+    "fiber": {
+        "language": "go",
+        "detection_sources": {
+            "go.mod": "content_search",
+        },
+        "package_pattern": "github.com/gofiber/fiber",
+        "import_patterns": ["github.com/gofiber/fiber"],
+    },
+    "beego": {
+        "language": "go",
+        "detection_sources": {
+            "go.mod": "content_search",
+        },
+        "package_pattern": "github.com/beego/beego",
+        "import_patterns": ["github.com/beego/beego"],
+    },
+    "chi": {
+        "language": "go",
+        "detection_sources": {
+            "go.mod": "content_search",
+        },
+        "package_pattern": "github.com/go-chi/chi",
+        "import_patterns": ["github.com/go-chi/chi"],
+    },
+    "gorilla": {
+        "language": "go",
+        "detection_sources": {
+            "go.mod": "content_search",
+        },
+        "package_pattern": "github.com/gorilla/mux",
+        "import_patterns": ["github.com/gorilla/mux"],
+    },
+    
+    # Java frameworks
+    "spring": {
+        "language": "java",
+        "detection_sources": {
+            "pom.xml": "content_search",
+            "build.gradle": "content_search",
+            "build.gradle.kts": "content_search",
+        },
+        "package_pattern": "spring",
+        "content_patterns": ["spring-boot", "springframework"],
+    },
+    "micronaut": {
+        "language": "java",
+        "detection_sources": {
+            "pom.xml": "content_search",
+            "build.gradle": "content_search",
+            "build.gradle.kts": "content_search",
+        },
+        "package_pattern": "io.micronaut",
+        "content_patterns": ["io.micronaut"],
+    },
+    "quarkus": {
+        "language": "java",
+        "detection_sources": {
+            "pom.xml": "content_search",
+            "build.gradle": "content_search",
+            "build.gradle.kts": "content_search",
+        },
+        "package_pattern": "io.quarkus",
+        "content_patterns": ["io.quarkus"],
+    },
+    "dropwizard": {
+        "language": "java",
+        "detection_sources": {
+            "pom.xml": "content_search",
+            "build.gradle": "content_search",
+            "build.gradle.kts": "content_search",
+        },
+        "package_pattern": "io.dropwizard",
+        "content_patterns": ["io.dropwizard"],
+    },
+    "play": {
+        "language": "java",
+        "detection_sources": {
+            "build.sbt": "content_search",
+            "build.gradle": "content_search",
+        },
+        "package_pattern": "com.typesafe.play",
+        "content_patterns": ["com.typesafe.play"],
+    },
+    
+    # Ruby frameworks
+    "rails": {
+        "language": "ruby",
+        "detection_sources": {
+            "Gemfile": "line_search",
+            "Gemfile.lock": "content_search",
+        },
+        "package_pattern": "rails",
+        "file_markers": ["Rakefile", "config.ru", "bin/rails"],
+    },
+    "sinatra": {
+        "language": "ruby",
+        "detection_sources": {
+            "Gemfile": "line_search",
+            "Gemfile.lock": "content_search",
+        },
+        "package_pattern": "sinatra",
+    },
+    "hanami": {
+        "language": "ruby",
+        "detection_sources": {
+            "Gemfile": "line_search",
+            "Gemfile.lock": "content_search",
+        },
+        "package_pattern": "hanami",
+    },
+    "grape": {
+        "language": "ruby",
+        "detection_sources": {
+            "Gemfile": "line_search",
+            "Gemfile.lock": "content_search",
+        },
+        "package_pattern": "grape",
+    },
+}
+
+
+# Test framework detection registry
+TEST_FRAMEWORK_REGISTRY = {
+    "pytest": {
+        "language": "python",
+        "command": "pytest -q -p no:cacheprovider",
+        "detection_sources": {
+            "pyproject.toml": [
+                ["project", "dependencies"],
+                ["project", "optional-dependencies", "test"],
+                ["project", "optional-dependencies", "dev"],
+                ["project", "optional-dependencies", "tests"],
+                ["tool", "poetry", "dependencies"],
+                ["tool", "poetry", "group", "dev", "dependencies"],
+                ["tool", "poetry", "group", "test", "dependencies"],
+                ["tool", "poetry", "dev-dependencies"],
+                ["tool", "pdm", "dev-dependencies"],
+                ["tool", "hatch", "envs", "default", "dependencies"],
+            ],
+            "requirements.txt": "line_search",
+            "requirements-dev.txt": "line_search",
+            "requirements-test.txt": "line_search",
+            "setup.cfg": ["options", "tests_require"],
+            "setup.py": "content_search",
+            "tox.ini": "content_search",
+        },
+        "config_files": ["pytest.ini", ".pytest.ini", "pyproject.toml"],
+        "config_sections": {
+            "pyproject.toml": [
+                ["tool", "pytest"],
+                ["tool", "pytest", "ini_options"],
+            ],
+            "setup.cfg": [
+                ["tool:pytest"],
+                ["pytest"],
+            ],
+        },
+    },
+    "unittest": {
+        "language": "python",
+        "command": "python -m unittest discover -q",
+        "import_patterns": ["import unittest", "from unittest"],
+        "file_patterns": ["test*.py", "*_test.py"],
+    },
+    "jest": {
+        "language": "javascript",
+        "command": "npm test --silent",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "config_files": ["jest.config.js", "jest.config.ts", "jest.config.json"],
+        "config_sections": {
+            "package.json": [["jest"]],
+        },
+        "script_patterns": ["jest"],
+    },
+    "vitest": {
+        "language": "javascript",
+        "command": "npm test --silent",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "config_files": ["vitest.config.js", "vitest.config.ts", "vite.config.js", "vite.config.ts"],
+        "script_patterns": ["vitest"],
+    },
+    "mocha": {
+        "language": "javascript",
+        "command": "npm test --silent",
+        "detection_sources": {
+            "package.json": [
+                ["dependencies"],
+                ["devDependencies"],
+            ],
+        },
+        "config_files": [".mocharc.js", ".mocharc.json", ".mocharc.yaml", ".mocharc.yml"],
+        "script_patterns": ["mocha"],
+    },
+    "go": {
+        "language": "go",
+        "command": "go test ./...",
+        "file_patterns": ["*_test.go"],
+        "detection_sources": {
+            "go.mod": "exists",
+        },
+    },
+    "junit": {
+        "language": "java",
+        "command_maven": "mvn test",
+        "command_gradle": "gradle test",
+        "detection_sources": {
+            "pom.xml": "content_search",
+            "build.gradle": "content_search",
+            "build.gradle.kts": "content_search",
+        },
+        "content_patterns": ["junit", "testImplementation"],
+        "import_patterns": ["import org.junit"],
+        "file_patterns": ["*Test.java", "Test*.java"],
+    },
+    "rspec": {
+        "language": "ruby",
+        "command": "rspec",
+        "detection_sources": {
+            "Gemfile": "line_search",
+            "Gemfile.lock": "content_search",
+        },
+        "config_files": [".rspec", "spec/spec_helper.rb"],
+        "directory_markers": ["spec/"],
+    },
+}
\ No newline at end of file
diff --git a/theauditor/graph/__init__.py b/theauditor/graph/__init__.py
new file mode 100644
index 0000000..a7cd157
--- /dev/null
+++ b/theauditor/graph/__init__.py
@@ -0,0 +1,45 @@
+"""Graph package - dependency and call graph functionality.
+
+Core modules (always available):
+- analyzer: Pure graph algorithms (cycles, paths, layers)
+- builder: Graph construction from source code
+- store: SQLite persistence
+
+Optional modules:
+- insights: Interpretive metrics (health scores, recommendations, hotspots)
+"""
+
+# Core exports (always available)
+from .analyzer import XGraphAnalyzer
+from .builder import XGraphBuilder, GraphNode, GraphEdge, Cycle, Hotspot, ImpactAnalysis
+from .store import XGraphStore
+from .visualizer import GraphVisualizer
+
+# Optional insights module
+try:
+    from .insights import GraphInsights, check_insights_available, create_insights
+    INSIGHTS_AVAILABLE = True
+except ImportError:
+    # Insights module is optional - similar to ml.py
+    INSIGHTS_AVAILABLE = False
+    GraphInsights = None
+    check_insights_available = lambda: False
+    create_insights = lambda weights=None: None
+
+__all__ = [
+    # Core classes (always available)
+    "XGraphBuilder",
+    "XGraphAnalyzer", 
+    "XGraphStore",
+    "GraphVisualizer",
+    "GraphNode",
+    "GraphEdge",
+    "Cycle",
+    "Hotspot",
+    "ImpactAnalysis",
+    # Optional insights
+    "GraphInsights",
+    "INSIGHTS_AVAILABLE",
+    "check_insights_available",
+    "create_insights",
+]
\ No newline at end of file
diff --git a/theauditor/graph/analyzer.py b/theauditor/graph/analyzer.py
new file mode 100644
index 0000000..bda4cf6
--- /dev/null
+++ b/theauditor/graph/analyzer.py
@@ -0,0 +1,421 @@
+"""Graph analyzer module - pure graph algorithms for dependency and call graphs.
+
+This module provides ONLY non-interpretive graph algorithms:
+- Cycle detection (DFS)
+- Shortest path finding (BFS)
+- Layer identification (topological sort)
+- Impact analysis (graph traversal)
+- Statistical summaries (counts and grouping)
+
+For interpretive metrics like health scores, recommendations, and weighted
+rankings, see the optional graph.insights module.
+"""
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+
+class XGraphAnalyzer:
+    """Analyze cross-project dependency and call graphs using pure algorithms."""
+    
+    def detect_cycles(self, graph: dict[str, Any]) -> list[dict[str, Any]]:
+        """
+        Detect cycles in the dependency graph using DFS.
+        
+        This is a pure graph algorithm that returns raw cycle data
+        without any interpretation or scoring.
+        
+        Args:
+            graph: Graph with 'nodes' and 'edges' keys
+            
+        Returns:
+            List of cycles, each with nodes and size
+        """
+        # Build adjacency list
+        adj = defaultdict(list)
+        for edge in graph.get("edges", []):
+            adj[edge["source"]].append(edge["target"])
+        
+        # Track visited nodes and recursion stack
+        visited = set()
+        rec_stack = set()
+        cycles = []
+        
+        def dfs(node: str, path: list[str]) -> None:
+            """DFS to detect cycles."""
+            visited.add(node)
+            rec_stack.add(node)
+            path.append(node)
+            
+            for neighbor in adj[node]:
+                if neighbor not in visited:
+                    dfs(neighbor, path.copy())
+                elif neighbor in rec_stack:
+                    # Found a cycle
+                    cycle_start = path.index(neighbor)
+                    cycle_nodes = path[cycle_start:] + [neighbor]
+                    cycles.append({
+                        "nodes": cycle_nodes,
+                        "size": len(cycle_nodes) - 1,  # Don't count repeated node
+                    })
+            
+            rec_stack.remove(node)
+        
+        # Run DFS from all unvisited nodes
+        for node in graph.get("nodes", []):
+            node_id = node["id"]
+            if node_id not in visited:
+                dfs(node_id, [])
+        
+        # Sort cycles by size (largest first)
+        cycles.sort(key=lambda c: c["size"], reverse=True)
+        
+        return cycles
+    
+    def impact_of_change(
+        self,
+        targets: list[str],
+        import_graph: dict[str, Any],
+        call_graph: dict[str, Any] | None = None,
+        max_depth: int = 3,
+    ) -> dict[str, Any]:
+        """
+        Calculate the impact of changing target files using graph traversal.
+        
+        This is a pure graph algorithm that finds affected nodes
+        without interpreting or scoring the impact.
+        
+        Args:
+            targets: List of file/module IDs that will change
+            import_graph: Import/dependency graph
+            call_graph: Optional call graph
+            max_depth: Maximum traversal depth
+            
+        Returns:
+            Raw impact data with upstream and downstream effects
+        """
+        # Build adjacency lists
+        upstream = defaultdict(list)  # Who depends on X
+        downstream = defaultdict(list)  # What X depends on
+        
+        for edge in import_graph.get("edges", []):
+            downstream[edge["source"]].append(edge["target"])
+            upstream[edge["target"]].append(edge["source"])
+        
+        if call_graph:
+            for edge in call_graph.get("edges", []):
+                downstream[edge["source"]].append(edge["target"])
+                upstream[edge["target"]].append(edge["source"])
+        
+        # Find upstream impact (what depends on targets)
+        upstream_impact = set()
+        to_visit = [(t, 0) for t in targets]
+        visited = set()
+        
+        while to_visit:
+            node, depth = to_visit.pop(0)
+            if node in visited or depth >= max_depth:
+                continue
+            visited.add(node)
+            
+            for dependent in upstream[node]:
+                upstream_impact.add(dependent)
+                to_visit.append((dependent, depth + 1))
+        
+        # Find downstream impact (what targets depend on)
+        downstream_impact = set()
+        to_visit = [(t, 0) for t in targets]
+        visited = set()
+        
+        while to_visit:
+            node, depth = to_visit.pop(0)
+            if node in visited or depth >= max_depth:
+                continue
+            visited.add(node)
+            
+            for dependency in downstream[node]:
+                downstream_impact.add(dependency)
+                to_visit.append((dependency, depth + 1))
+        
+        # Return raw counts without ratios or interpretations
+        all_impacted = set(targets) | upstream_impact | downstream_impact
+        
+        return {
+            "targets": targets,
+            "upstream": sorted(upstream_impact),
+            "downstream": sorted(downstream_impact),
+            "total_impacted": len(all_impacted),
+            "graph_nodes": len(import_graph.get("nodes", [])),
+        }
+    
+    def find_shortest_path(
+        self, 
+        source: str, 
+        target: str, 
+        graph: dict[str, Any]
+    ) -> list[str] | None:
+        """
+        Find shortest path between two nodes using BFS.
+        
+        Pure pathfinding algorithm without interpretation.
+        
+        Args:
+            source: Source node ID
+            target: Target node ID
+            graph: Graph with edges
+            
+        Returns:
+            List of node IDs forming the path, or None if no path exists
+        """
+        # Build adjacency list
+        adj = defaultdict(list)
+        for edge in graph.get("edges", []):
+            adj[edge["source"]].append(edge["target"])
+        
+        # BFS
+        queue = [(source, [source])]
+        visited = {source}
+        
+        while queue:
+            node, path = queue.pop(0)
+            
+            if node == target:
+                return path
+            
+            for neighbor in adj[node]:
+                if neighbor not in visited:
+                    visited.add(neighbor)
+                    queue.append((neighbor, path + [neighbor]))
+        
+        return None
+    
+    def identify_layers(self, graph: dict[str, Any]) -> dict[str, list[str]]:
+        """
+        Identify architectural layers using topological sorting.
+        
+        Pure graph layering algorithm without interpretation.
+        
+        Args:
+            graph: Import/dependency graph
+            
+        Returns:
+            Dict mapping layer number to list of node IDs
+        """
+        # Calculate in-degrees
+        in_degree = defaultdict(int)
+        nodes = {node["id"] for node in graph.get("nodes", [])}
+        
+        for edge in graph.get("edges", []):
+            in_degree[edge["target"]] += 1
+        
+        # Find nodes with no dependencies (layer 0)
+        layers = {}
+        current_layer = []
+        
+        for node_id in nodes:
+            if in_degree[node_id] == 0:
+                current_layer.append(node_id)
+        
+        # Build layers using modified topological sort
+        layer_num = 0
+        adj = defaultdict(list)
+        
+        for edge in graph.get("edges", []):
+            adj[edge["source"]].append(edge["target"])
+        
+        while current_layer:
+            layers[layer_num] = current_layer
+            next_layer = []
+            
+            for node in current_layer:
+                for neighbor in adj[node]:
+                    in_degree[neighbor] -= 1
+                    if in_degree[neighbor] == 0:
+                        next_layer.append(neighbor)
+            
+            current_layer = next_layer
+            layer_num += 1
+        
+        return layers
+    
+    def get_graph_summary(self, graph_data: dict[str, Any]) -> dict[str, Any]:
+        """
+        Extract basic statistics from a graph without interpretation.
+        
+        This method provides raw counts and statistics only,
+        no subjective metrics or labels.
+        
+        Args:
+            graph_data: Large graph dict with 'nodes' and 'edges'
+            
+        Returns:
+            Concise summary with raw statistics only
+        """
+        # Basic statistics
+        nodes = graph_data.get("nodes", [])
+        edges = graph_data.get("edges", [])
+        
+        # Calculate in/out degrees
+        in_degree = defaultdict(int)
+        out_degree = defaultdict(int)
+        for edge in edges:
+            out_degree[edge["source"]] += 1
+            in_degree[edge["target"]] += 1
+        
+        # Find most connected nodes (raw data only)
+        connection_counts = []
+        for node in nodes:  # Process all nodes
+            node_id = node["id"]
+            total = in_degree[node_id] + out_degree[node_id]
+            if total > 0:
+                connection_counts.append({
+                    "id": node_id,
+                    "in_degree": in_degree[node_id],
+                    "out_degree": out_degree[node_id],
+                    "total_connections": total
+                })
+        
+        # Sort and get top 10
+        connection_counts.sort(key=lambda x: x["total_connections"], reverse=True)
+        top_connected = connection_counts[:10]
+        
+        # Detect cycles (complete search)
+        cycles = self.detect_cycles({"nodes": nodes, "edges": edges})
+        
+        # Calculate graph metrics
+        node_count = len(nodes)
+        edge_count = len(edges)
+        density = edge_count / (node_count * (node_count - 1)) if node_count > 1 else 0
+        
+        # Find isolated nodes
+        connected_nodes = set()
+        for edge in edges:
+            connected_nodes.add(edge["source"])
+            connected_nodes.add(edge["target"])
+        isolated_count = len([n for n in nodes if n["id"] not in connected_nodes])
+        
+        # Create summary with raw data only
+        summary = {
+            "statistics": {
+                "total_nodes": node_count,
+                "total_edges": edge_count,
+                "graph_density": round(density, 4),
+                "isolated_nodes": isolated_count,
+                "average_connections": round(edge_count / node_count, 2) if node_count > 0 else 0
+            },
+            "top_connected_nodes": top_connected,
+            "cycles_found": [
+                {
+                    "size": cycle["size"],
+                    "nodes": cycle["nodes"][:5] + (["..."] if len(cycle["nodes"]) > 5 else [])
+                }
+                for cycle in cycles[:5]
+            ],
+            "file_types": self._count_file_types(nodes),
+            "connection_distribution": {
+                "nodes_with_20_plus_connections": len([c for c in connection_counts if c["total_connections"] > 20]),
+                "nodes_with_30_plus_inbound": len([c for c in connection_counts if c["in_degree"] > 30]),
+                "cycle_count": len(cycles) if len(nodes) < 500 else f"{len(cycles)}+ (limited search)",
+            }
+        }
+        
+        return summary
+    
+    def _count_file_types(self, nodes: list[dict]) -> dict[str, int]:
+        """Count nodes by file extension - pure counting, no interpretation."""
+        ext_counts = defaultdict(int)
+        for node in nodes:  # Process all nodes
+            if "file" in node:
+                ext = Path(node["file"]).suffix or "no_ext"
+                ext_counts[ext] += 1
+        # Return top 10 extensions
+        sorted_exts = sorted(ext_counts.items(), key=lambda x: x[1], reverse=True)
+        return dict(sorted_exts[:10])
+    
+    def identify_hotspots(self, graph: dict[str, Any], top_n: int = 10) -> list[dict[str, Any]]:
+        """
+        Identify hotspot nodes based on connectivity (in/out degree).
+        
+        Pure graph algorithm that identifies most connected nodes
+        without interpretation or scoring.
+        
+        Args:
+            graph: Graph with 'nodes' and 'edges'
+            top_n: Number of top hotspots to return
+            
+        Returns:
+            List of hotspot nodes with their degree counts
+        """
+        # Calculate in/out degrees
+        in_degree = defaultdict(int)
+        out_degree = defaultdict(int)
+        
+        for edge in graph.get("edges", []):
+            out_degree[edge["source"]] += 1
+            in_degree[edge["target"]] += 1
+        
+        # Calculate total connections for each node
+        hotspots = []
+        for node in graph.get("nodes", []):
+            node_id = node["id"]
+            in_deg = in_degree[node_id]
+            out_deg = out_degree[node_id]
+            total = in_deg + out_deg
+            
+            if total > 0:  # Only include connected nodes
+                hotspots.append({
+                    "id": node_id,
+                    "in_degree": in_deg,
+                    "out_degree": out_deg,
+                    "total_connections": total,
+                    "file": node.get("file", node_id),
+                    "lang": node.get("lang", "unknown")
+                })
+        
+        # Sort by total connections and return top N
+        hotspots.sort(key=lambda x: x["total_connections"], reverse=True)
+        return hotspots[:top_n]
+    
+    def calculate_node_degrees(self, graph: dict[str, Any]) -> dict[str, dict[str, int]]:
+        """
+        Calculate in-degree and out-degree for all nodes.
+        
+        Pure counting algorithm without interpretation.
+        
+        Args:
+            graph: Graph with edges
+            
+        Returns:
+            Dict mapping node IDs to degree counts
+        """
+        degrees = defaultdict(lambda: {"in_degree": 0, "out_degree": 0})
+        
+        for edge in graph.get("edges", []):
+            degrees[edge["source"]]["out_degree"] += 1
+            degrees[edge["target"]]["in_degree"] += 1
+        
+        return dict(degrees)
+    
+    def analyze_impact(self, graph: dict[str, Any], targets: list[str], max_depth: int = 3) -> dict[str, Any]:
+        """
+        Analyze impact of changes to target nodes.
+        
+        Wrapper method for impact_of_change to match expected API.
+        
+        Args:
+            graph: Graph with 'nodes' and 'edges'
+            targets: List of target node IDs
+            max_depth: Maximum traversal depth
+            
+        Returns:
+            Impact analysis results with upstream/downstream effects
+        """
+        # Use existing impact_of_change method
+        result = self.impact_of_change(targets, graph, None, max_depth)
+        
+        # Add all_impacted field for compatibility
+        all_impacted = set(targets) | set(result.get("upstream", [])) | set(result.get("downstream", []))
+        result["all_impacted"] = sorted(all_impacted)
+        
+        return result
\ No newline at end of file
diff --git a/theauditor/graph/builder.py b/theauditor/graph/builder.py
new file mode 100644
index 0000000..ca21630
--- /dev/null
+++ b/theauditor/graph/builder.py
@@ -0,0 +1,1017 @@
+"""Graph builder module - constructs dependency and call graphs."""
+
+import os
+import platform
+import re
+import subprocess
+import tempfile
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any
+
+# Windows compatibility
+IS_WINDOWS = platform.system() == "Windows"
+
+import click
+
+from theauditor.indexer.config import SKIP_DIRS
+from theauditor.module_resolver import ModuleResolver
+from theauditor.ast_parser import ASTParser
+
+
+@dataclass
+class GraphNode:
+    """Represents a node in the dependency or call graph."""
+
+    id: str
+    file: str
+    lang: str | None = None
+    loc: int = 0
+    churn: int | None = None  # Git commit count if available
+    type: str = "module"  # module, function, class
+
+
+@dataclass
+class GraphEdge:
+    """Represents an edge in the graph."""
+
+    source: str
+    target: str
+    type: str = "import"  # import, call, extends, implements
+    file: str | None = None
+    line: int | None = None
+
+
+@dataclass
+class Cycle:
+    """Represents a cycle in the dependency graph."""
+    
+    nodes: list[str]
+    size: int
+    
+    def __init__(self, nodes: list[str]):
+        self.nodes = nodes
+        self.size = len(nodes)
+
+
+@dataclass
+class Hotspot:
+    """Represents a hotspot node with high connectivity."""
+    
+    id: str
+    in_degree: int
+    out_degree: int
+    centrality: float
+    score: float  # Computed based on weights
+
+
+@dataclass  
+class ImpactAnalysis:
+    """Results of change impact analysis."""
+    
+    targets: list[str]
+    upstream: list[str]  # What depends on targets
+    downstream: list[str]  # What targets depend on
+    total_impacted: int
+
+
+class XGraphBuilder:
+    """Build cross-project dependency and call graphs."""
+
+    # Import regex patterns for different languages
+    IMPORT_PATTERNS = {
+        "python": [
+            r"^import\s+(\S+)",
+            r"^from\s+(\S+)\s+import",
+        ],
+        "javascript": [
+            # Standard ES6 imports with 'from'
+            r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
+            
+            # Side-effect imports (no 'from')
+            r"import\s+['\"]([^'\"]+)['\"]",
+            
+            # CommonJS require
+            r"require\(['\"]([^'\"]+)['\"]\)",
+            
+            # Dynamic imports
+            r"import\(['\"]([^'\"]+)['\"]\)",
+            
+            # Re-exports
+            r"export\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
+        ],
+        "typescript": [
+            # Standard ES6 imports with 'from'
+            r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
+            
+            # Side-effect imports (no 'from')
+            r"import\s+['\"]([^'\"]+)['\"]",
+            
+            # Type-only imports
+            r"import\s+type\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
+            
+            # CommonJS require
+            r"require\(['\"]([^'\"]+)['\"]\)",
+            
+            # Dynamic imports
+            r"import\(['\"]([^'\"]+)['\"]\)",
+            
+            # Re-exports
+            r"export\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
+        ],
+        "java": [
+            r"^import\s+(\S+);",
+            r"^import\s+static\s+(\S+);",
+        ],
+        "go": [
+            r'^import\s+"([^"]+)"',
+            r'^import\s+\(\s*"([^"]+)"',
+        ],
+        "c#": [
+            r"^using\s+(\S+);",
+            r"^using\s+static\s+(\S+);",
+        ],
+        "php": [
+            r"^use\s+(\S+);",
+            r"require_once\s*\(['\"]([^'\"]+)['\"]\)",
+            r"include_once\s*\(['\"]([^'\"]+)['\"]\)",
+        ],
+        "ruby": [
+            r"^require\s+['\"]([^'\"]+)['\"]",
+            r"^require_relative\s+['\"]([^'\"]+)['\"]",
+        ],
+    }
+
+    # Export patterns for different languages
+    EXPORT_PATTERNS = {
+        "python": [
+            r"^def\s+(\w+)\s*\(",
+            r"^class\s+(\w+)",
+            r"^(\w+)\s*=",  # Module-level variables
+        ],
+        "javascript": [
+            r"export\s+(?:default\s+)?(?:function|class|const|let|var)\s+(\w+)",
+            r"exports\.(\w+)\s*=",
+            r"module\.exports\.(\w+)\s*=",
+        ],
+        "typescript": [
+            r"export\s+(?:default\s+)?(?:function|class|const|let|var|interface|type)\s+(\w+)",
+            r"exports\.(\w+)\s*=",
+        ],
+        "java": [
+            r"public\s+(?:static\s+)?(?:class|interface|enum)\s+(\w+)",
+            r"public\s+(?:static\s+)?(?:\w+\s+)?(\w+)\s*\(",  # Public methods
+        ],
+        "go": [
+            r"^func\s+(\w+)\s*\(",  # Exported if capitalized
+            r"^type\s+(\w+)\s+",
+            r"^var\s+(\w+)\s+",
+        ],
+    }
+
+    # Call patterns for different languages
+    CALL_PATTERNS = {
+        "python": [
+            r"(\w+)\s*\(",  # Function calls
+            r"(\w+)\.(\w+)\s*\(",  # Method calls
+        ],
+        "javascript": [
+            r"(\w+)\s*\(",
+            r"(\w+)\.(\w+)\s*\(",
+            r"new\s+(\w+)\s*\(",
+        ],
+        "typescript": [
+            r"(\w+)\s*\(",
+            r"(\w+)\.(\w+)\s*\(",
+            r"new\s+(\w+)\s*\(",
+        ],
+        "java": [
+            r"(\w+)\s*\(",
+            r"(\w+)\.(\w+)\s*\(",
+            r"new\s+(\w+)\s*\(",
+        ],
+        "go": [
+            r"(\w+)\s*\(",
+            r"(\w+)\.(\w+)\s*\(",
+        ],
+    }
+
+    def __init__(self, batch_size: int = 200, exclude_patterns: list[str] = None, project_root: str = "."):
+        """Initialize builder with configuration."""
+        self.batch_size = batch_size
+        self.exclude_patterns = exclude_patterns or []
+        self.checkpoint_file = Path(".pf/xgraph_checkpoint.json")
+        self.project_root = Path(project_root).resolve()
+        self.module_resolver = ModuleResolver()  # No project_root - uses database!
+        self.ast_parser = ASTParser()  # Initialize AST parser for structural analysis
+
+    def detect_language(self, file_path: Path) -> str | None:
+        """Detect language from file extension."""
+        ext_map = {
+            ".py": "python",
+            ".js": "javascript",
+            ".jsx": "javascript",
+            ".ts": "typescript",
+            ".tsx": "typescript",
+            ".java": "java",
+            ".go": "go",
+            ".cs": "c#",
+            ".php": "php",
+            ".rb": "ruby",
+            ".c": "c",
+            ".cpp": "c++",
+            ".h": "c",
+            ".hpp": "c++",
+            ".rs": "rust",
+            ".swift": "swift",
+            ".kt": "kotlin",
+            ".scala": "scala",
+            ".r": "r",
+            ".R": "r",
+            ".m": "objective-c",
+            ".mm": "objective-c++",
+        }
+        return ext_map.get(file_path.suffix.lower())
+
+    def should_skip(self, file_path: Path) -> bool:
+        """Check if file should be skipped based on exclude patterns."""
+        # First, check if any component of the path is in SKIP_DIRS
+        for part in file_path.parts:
+            if part in SKIP_DIRS:
+                return True
+        
+        # Second, check against exclude_patterns
+        path_str = str(file_path)
+        for pattern in self.exclude_patterns:
+            if pattern in path_str:
+                return True
+        return False
+
+    def extract_imports_from_db(self, rel_path: str) -> list[str]:
+        """Extract import statements from the database where indexer already stored them.
+        
+        Args:
+            rel_path: Relative path as stored in the database (e.g., "backend/src/app.ts")
+            
+        Returns:
+            List of import targets
+        """
+        import sqlite3
+        
+        # Query the refs table for imports
+        db_file = self.project_root / ".pf" / "repo_index.db"
+        if not db_file.exists():
+            print(f"Warning: Database not found at {db_file}")
+            return []
+        
+        try:
+            conn = sqlite3.connect(db_file)
+            cursor = conn.cursor()
+            
+            # Get all imports for this file from refs table
+            # The indexer stores imports with kind like 'import', 'require', etc.
+            cursor.execute(
+                "SELECT value FROM refs WHERE src = ? AND kind IN ('import', 'require', 'from', 'import_type', 'export')",
+                (rel_path,)
+            )
+            
+            imports = [row[0] for row in cursor.fetchall()]
+            conn.close()
+            
+            return imports
+            
+        except sqlite3.Error as e:
+            print(f"Warning: Failed to read imports from database: {e}")
+            return []
+
+    def extract_imports(self, file_path: Path, lang: str) -> list[str]:
+        """Extract import statements from the database where indexer already stored them.
+        
+        The indexer has already extracted all imports and stored them in the refs table.
+        We should read from there instead of re-parsing files.
+        """
+        import sqlite3
+        
+        # Get relative path for database lookup
+        try:
+            rel_path = file_path.relative_to(self.project_root)
+        except ValueError:
+            # If file_path is already relative or from a different root
+            rel_path = file_path
+        
+        # Normalize path separators for database lookup
+        db_path = str(rel_path).replace("\\", "/")
+        
+        # Query the refs table for imports
+        db_file = self.project_root / ".pf" / "repo_index.db"
+        if not db_file.exists():
+            print(f"Warning: Database not found at {db_file}")
+            return []
+        
+        try:
+            conn = sqlite3.connect(db_file)
+            cursor = conn.cursor()
+            
+            # Get all imports for this file from refs table
+            # The indexer stores imports with kind like 'import', 'require', etc.
+            cursor.execute(
+                "SELECT value FROM refs WHERE src = ? AND kind IN ('import', 'require', 'from', 'import_type', 'export')",
+                (db_path,)
+            )
+            
+            imports = [row[0] for row in cursor.fetchall()]
+            conn.close()
+            
+            return imports
+            
+        except sqlite3.Error as e:
+            print(f"Warning: Failed to read imports from database: {e}")
+            return []
+
+    def extract_exports_from_db(self, rel_path: str) -> list[str]:
+        """Extract exported symbols from the database where indexer already stored them.
+        
+        Args:
+            rel_path: Relative path as stored in the database
+            
+        Returns:
+            List of exported symbol names
+        """
+        import sqlite3
+        
+        db_file = self.project_root / ".pf" / "repo_index.db"
+        if not db_file.exists():
+            return []
+        
+        try:
+            conn = sqlite3.connect(db_file)
+            cursor = conn.cursor()
+            
+            # Get exported functions/classes from symbols table
+            # The indexer stores these as 'function' and 'class' types
+            cursor.execute(
+                "SELECT name FROM symbols WHERE path = ? AND type IN ('function', 'class')",
+                (rel_path,)
+            )
+            
+            exports = [row[0] for row in cursor.fetchall()]
+            conn.close()
+            
+            return exports
+            
+        except sqlite3.Error:
+            return []
+
+    def extract_exports(self, file_path: Path, lang: str) -> list[str]:
+        """Extract exported symbols from a file using AST parser with regex fallback."""
+        # Try AST parser first for supported languages
+        if self.ast_parser.supports_language(lang):
+            try:
+                # Check persistent cache first for JS/TS files
+                tree = None
+                if lang in ["javascript", "typescript"]:
+                    # Compute file hash for cache lookup
+                    import hashlib
+                    with open(file_path, 'rb') as f:
+                        file_hash = hashlib.sha256(f.read()).hexdigest()
+                    
+                    # Check cache
+                    cache_dir = self.project_root / ".pf" / "ast_cache"
+                    cache_file = cache_dir / f"{file_hash}.json"
+                    if cache_file.exists():
+                        try:
+                            import json
+                            with open(cache_file, 'r', encoding='utf-8') as f:
+                                tree = json.load(f)
+                        except (json.JSONDecodeError, OSError):
+                            pass  # Cache read failed, parse fresh
+                
+                # Parse file if not in cache
+                if not tree:
+                    tree = self.ast_parser.parse_file(file_path, lang)
+                    # REMOVED: Cache write logic - only indexer.py should write to cache
+                
+                if tree and tree.get("type") != "regex_fallback":
+                    # Extract exports using AST
+                    export_dicts = self.ast_parser.extract_exports(tree, lang)
+                    # Convert to list of export names
+                    exports = []
+                    for exp in export_dicts:
+                        name = exp.get('name')
+                        if name and name != 'unknown':
+                            exports.append(name)
+                    if exports:  # If we got results, return them
+                        return exports
+            except Exception as e:
+                # Fall through to regex fallback
+                pass
+        
+        # Fallback to regex-based extraction
+        return self._extract_exports_regex(file_path, lang)
+
+    def extract_calls_from_db(self, rel_path: str) -> list[tuple[str, str | None]]:
+        """Extract function calls from the database where indexer already stored them.
+        
+        Args:
+            rel_path: Relative path as stored in the database
+            
+        Returns:
+            List of (function_name, None) tuples for calls
+        """
+        import sqlite3
+        
+        db_file = self.project_root / ".pf" / "repo_index.db"
+        if not db_file.exists():
+            return []
+        
+        try:
+            conn = sqlite3.connect(db_file)
+            cursor = conn.cursor()
+            
+            # Get function calls from symbols table
+            # The indexer stores these as 'call' type
+            cursor.execute(
+                "SELECT name FROM symbols WHERE path = ? AND type = 'call'",
+                (rel_path,)
+            )
+            
+            # Return as tuples with None for second element (no parent info)
+            calls = [(row[0], None) for row in cursor.fetchall()]
+            conn.close()
+            
+            return calls
+            
+        except sqlite3.Error:
+            return []
+
+    def extract_calls(self, file_path: Path, lang: str) -> list[tuple[str, str | None]]:
+        """Extract function/method calls from a file using AST parser with regex fallback."""
+        # Try AST parser first for supported languages
+        if self.ast_parser.supports_language(lang):
+            try:
+                # Check persistent cache first for JS/TS files
+                tree = None
+                if lang in ["javascript", "typescript"]:
+                    # Compute file hash for cache lookup
+                    import hashlib
+                    with open(file_path, 'rb') as f:
+                        file_hash = hashlib.sha256(f.read()).hexdigest()
+                    
+                    # Check cache
+                    cache_dir = self.project_root / ".pf" / "ast_cache"
+                    cache_file = cache_dir / f"{file_hash}.json"
+                    if cache_file.exists():
+                        try:
+                            import json
+                            with open(cache_file, 'r', encoding='utf-8') as f:
+                                tree = json.load(f)
+                        except (json.JSONDecodeError, OSError):
+                            pass  # Cache read failed, parse fresh
+                
+                # Parse file if not in cache
+                if not tree:
+                    tree = self.ast_parser.parse_file(file_path, lang)
+                    # REMOVED: Cache write logic - only indexer.py should write to cache
+                
+                if tree and tree.get("type") != "regex_fallback":
+                    # Extract calls using AST
+                    call_dicts = self.ast_parser.extract_calls(tree, lang)
+                    # Convert to list of (function, method) tuples
+                    calls = []
+                    for call in call_dicts:
+                        name = call.get('name', '')
+                        # Check if it's a method call (contains dot)
+                        if '.' in name:
+                            parts = name.rsplit('.', 1)
+                            if len(parts) == 2:
+                                calls.append((parts[0], parts[1]))
+                            else:
+                                calls.append((name, None))
+                        else:
+                            calls.append((name, None))
+                    if calls:  # If we got results, return them
+                        return calls
+            except Exception as e:
+                # Fall through to regex fallback
+                pass
+        
+        # Fallback to regex-based extraction
+        return self._extract_calls_regex(file_path, lang)
+
+    def resolve_import_path(self, import_str: str, source_file: Path, lang: str) -> str:
+        """Resolve import string to a normalized module path that matches actual files in the graph."""
+        import sqlite3
+        
+        # Clean up the import string (remove quotes, semicolons, etc.)
+        import_str = import_str.strip().strip('"\'`;')
+        
+        # Language-specific resolution
+        if lang == "python":
+            # Convert Python module path to file path
+            parts = import_str.split(".")
+            return "/".join(parts)
+        elif lang in ["javascript", "typescript"]:
+            # Get source file directory for relative imports
+            source_dir = source_file.parent
+            # Handle case where source_file might already be relative or might be from manifest
+            try:
+                source_rel = str(source_file.relative_to(self.project_root)).replace("\\", "/")
+            except ValueError:
+                # If source_file is already relative or from a different root, use it as is
+                source_rel = str(source_file).replace("\\", "/")
+            
+            # Handle different import patterns
+            resolved_path = None
+            
+            # 1. Handle TypeScript path aliases using ModuleResolver (database-driven)
+            if import_str.startswith("@"):
+                # Determine context from source file location
+                try:
+                    source_rel = str(source_file.relative_to(self.project_root)).replace("\\", "/")
+                except ValueError:
+                    source_rel = str(source_file).replace("\\", "/")
+                
+                # Determine which tsconfig context applies
+                if "backend/" in source_rel:
+                    context = "backend"
+                elif "frontend/" in source_rel:
+                    context = "frontend"
+                else:
+                    context = "root"
+                
+                # Use ModuleResolver's context-aware resolution
+                resolved = self.module_resolver.resolve_with_context(import_str, str(source_file), context)
+                
+                # Check if resolution succeeded
+                if resolved != import_str:
+                    # Resolution worked, now verify file exists in database
+                    db_file = self.project_root / ".pf" / "repo_index.db"
+                    if db_file.exists():
+                        try:
+                            conn = sqlite3.connect(db_file)
+                            cursor = conn.cursor()
+                            
+                            # Try with common extensions if no extension
+                            test_paths = [resolved]
+                            if not Path(resolved).suffix:
+                                for ext in [".ts", ".tsx", ".js", ".jsx"]:
+                                    test_paths.append(resolved + ext)
+                                test_paths.append(resolved + "/index.ts")
+                                test_paths.append(resolved + "/index.js")
+                            
+                            for test_path in test_paths:
+                                cursor.execute("SELECT 1 FROM files WHERE path = ? LIMIT 1", (test_path,))
+                                if cursor.fetchone():
+                                    conn.close()
+                                    return test_path
+                            
+                            conn.close()
+                        except sqlite3.Error:
+                            pass
+                    
+                    # Return resolved even if file check failed
+                    return resolved
+            
+            # 2. Handle relative imports (./foo, ../bar/baz)
+            elif import_str.startswith("."):
+                # Resolve relative to source file
+                try:
+                    # Remove leading dots and slashes
+                    rel_import = import_str.lstrip("./")
+                    
+                    # Go up directories for ../
+                    up_count = import_str.count("../")
+                    current_dir = source_dir
+                    for _ in range(up_count):
+                        current_dir = current_dir.parent
+                    
+                    if up_count > 0:
+                        rel_import = import_str.replace("../", "")
+                    
+                    # Build the target path
+                    target_path = current_dir / rel_import
+                    rel_target = str(target_path.relative_to(self.project_root)).replace("\\", "/")
+                    
+                    # Check if this file exists (try with extensions)
+                    db_file = self.project_root / ".pf" / "repo_index.db"
+                    if db_file.exists():
+                        try:
+                            conn = sqlite3.connect(db_file)
+                            cursor = conn.cursor()
+                            
+                            # Try with common extensions
+                            for ext in ["", ".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"]:
+                                test_path = rel_target + ext
+                                cursor.execute("SELECT 1 FROM files WHERE path = ? LIMIT 1", (test_path,))
+                                if cursor.fetchone():
+                                    conn.close()
+                                    return test_path
+                            
+                            conn.close()
+                        except sqlite3.Error:
+                            pass
+                    
+                    return rel_target
+                    
+                except (ValueError, OSError):
+                    pass
+            
+            # 3. Handle node_modules imports (just return as-is, they're external)
+            else:
+                # For npm packages, just return the package name
+                return import_str
+            
+            # If nothing worked, return original
+            return import_str
+        else:
+            # Default: return as-is
+            return import_str
+
+    def get_file_metrics(self, file_path: Path) -> dict[str, Any]:
+        """Get basic metrics for a file."""
+        metrics = {"loc": 0, "churn": None}
+
+        # When working with manifest data, skip file reading
+        # The manifest already has loc and other metrics
+        if not file_path.exists():
+            # File doesn't exist, we're working with manifest data
+            # Return default metrics - the caller should use manifest data instead
+            return metrics
+
+        # Count lines of code
+        try:
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                metrics["loc"] = sum(1 for _ in f)
+        except (IOError, UnicodeDecodeError, OSError) as e:
+            print(f"Warning: Failed to read {file_path} for metrics: {e}")
+            # Still return default metrics but LOG the failure
+
+        # Get git churn (commit count)
+        try:
+            # Use temp files to avoid buffer overflow
+            with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
+                 tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:
+                
+                stdout_path = stdout_fp.name
+                stderr_path = stderr_fp.name
+                
+                result = subprocess.run(
+                    ["git", "log", "--oneline", str(file_path)],
+                    stdout=stdout_fp,
+                    stderr=stderr_fp,
+                    text=True,
+                    timeout=5,
+                    cwd=Path.cwd(),
+                    shell=IS_WINDOWS  # Windows compatibility fix
+                )
+            
+            with open(stdout_path, 'r', encoding='utf-8') as f:
+                result.stdout = f.read()
+            with open(stderr_path, 'r', encoding='utf-8') as f:
+                result.stderr = f.read()
+            
+            os.unlink(stdout_path)
+            os.unlink(stderr_path)
+            if result.returncode == 0:
+                metrics["churn"] = len(result.stdout.strip().split("\n"))
+        except (subprocess.TimeoutExpired, OSError, IOError) as e:
+            print(f"Warning: Failed to get git churn for {file_path}: {e}")
+            # Still return default metrics but LOG the failure
+
+        return metrics
+
+    def build_import_graph(
+        self,
+        root: str = ".",
+        langs: list[str] | None = None,
+        file_filter: str | None = None,
+        file_list: list[dict[str, Any]] | None = None,
+    ) -> dict[str, Any]:
+        """Build import/dependency graph for the project."""
+        root_path = Path(root).resolve()
+        nodes = {}
+        edges = []
+
+        # Collect all source files
+        files = []
+        manifest_lookup = {}  # Map file paths to manifest items for metrics
+        
+        if file_list is not None:
+            # Use provided file list from manifest
+            # The manifest already contains all the file info we need
+            for item in file_list:
+                manifest_path = Path(item['path'])
+                
+                # Use the path from manifest directly - we don't need actual files
+                # The manifest has all the data (path, ext, content, etc.)
+                file = root_path / manifest_path  # Just for consistent path handling
+                
+                # Store manifest item for later metric lookup
+                manifest_lookup[str(file)] = item
+                
+                # Detect language from extension in manifest
+                lang = self.detect_language(manifest_path)  # Use manifest path
+                if lang and (not langs or lang in langs):
+                    files.append((file, lang))
+        else:
+            # Fall back to original os.walk logic for backward compatibility
+            for dirpath, dirnames, filenames in os.walk(root_path):
+                # CRITICAL: Prune excluded directories before os.walk descends into them
+                # This prevents traversal into .venv and other SKIP_DIRS
+                dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
+                
+                # Also prune based on exclude_patterns
+                if self.exclude_patterns:
+                    dirnames[:] = [d for d in dirnames 
+                                  if not any(pattern in d for pattern in self.exclude_patterns)]
+                
+                # Process files in this directory
+                for filename in filenames:
+                    file = Path(dirpath) / filename
+                    if not self.should_skip(file):
+                        lang = self.detect_language(file)
+                        if lang and (not langs or lang in langs):
+                            files.append((file, lang))
+
+        # Process files with progress bar
+        with click.progressbar(
+            files,
+            label="Building import graph",
+            show_pos=True,
+            show_percent=True,
+            show_eta=True,
+            item_show_func=lambda x: str(x[0].name) if x else None,
+        ) as bar:
+            for file_path, lang in bar:
+                # Create node for this file
+                rel_path = str(file_path.relative_to(root_path)).replace("\\", "/")  # Normalize separators
+                node_id = rel_path  # Already normalized
+
+                # Get metrics from manifest if available, otherwise from file
+                if str(file_path) in manifest_lookup:
+                    # Use manifest data which already has metrics
+                    manifest_item = manifest_lookup[str(file_path)]
+                    loc = manifest_item.get('loc', 0)
+                    churn = None  # Manifest doesn't have churn data
+                else:
+                    # Fall back to reading file metrics
+                    metrics = self.get_file_metrics(file_path)
+                    loc = metrics["loc"]
+                    churn = metrics["churn"]
+                
+                node = GraphNode(
+                    id=node_id,
+                    file=rel_path,  # Already normalized
+                    lang=lang,
+                    loc=loc,
+                    churn=churn,
+                    type="module",
+                )
+                nodes[node_id] = asdict(node)
+
+                # Extract imports and create edges
+                # Pass the relative path that matches what's in the database
+                imports = self.extract_imports_from_db(rel_path)
+                for imp in imports:
+                    target = self.resolve_import_path(imp, file_path, lang)
+                    edge = GraphEdge(
+                        source=node_id,
+                        target=target,
+                        type="import",
+                        file=rel_path,  # Already normalized
+                    )
+                    edges.append(asdict(edge))
+
+        return {
+            "nodes": list(nodes.values()),
+            "edges": edges,
+            "metadata": {
+                "root": str(root_path),
+                "languages": list(set(n["lang"] for n in nodes.values())),
+                "total_files": len(nodes),
+                "total_imports": len(edges),
+            },
+        }
+
+    def build_call_graph(
+        self,
+        root: str = ".",
+        langs: list[str] | None = None,
+        file_filter: str | None = None,
+        file_list: list[dict[str, Any]] | None = None,
+    ) -> dict[str, Any]:
+        """Build call graph for the project."""
+        root_path = Path(root).resolve()
+        nodes = {}
+        edges = []
+
+        # Collect all source files
+        files = []
+        
+        if file_list is not None:
+            # Use provided file list from manifest
+            # The manifest already contains all the file info we need
+            for item in file_list:
+                manifest_path = Path(item['path'])
+                
+                # Use the path from manifest directly - we don't need actual files
+                # The manifest has all the data (path, ext, content, etc.)
+                file = root_path / manifest_path  # Just for consistent path handling
+                
+                # Detect language from extension in manifest
+                lang = self.detect_language(manifest_path)  # Use manifest path
+                if lang and (not langs or lang in langs):
+                    files.append((file, lang))
+        else:
+            # Fall back to original os.walk logic for backward compatibility
+            for dirpath, dirnames, filenames in os.walk(root_path):
+                # CRITICAL: Prune excluded directories before os.walk descends into them
+                # This prevents traversal into .venv and other SKIP_DIRS
+                dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
+                
+                # Also prune based on exclude_patterns
+                if self.exclude_patterns:
+                    dirnames[:] = [d for d in dirnames 
+                                  if not any(pattern in d for pattern in self.exclude_patterns)]
+                
+                # Process files in this directory
+                for filename in filenames:
+                    file = Path(dirpath) / filename
+                    if not self.should_skip(file):
+                        lang = self.detect_language(file)
+                        if lang and (not langs or lang in langs):
+                            files.append((file, lang))
+
+        # Process files with progress bar to extract functions and calls
+        with click.progressbar(
+            files,
+            label="Building call graph",
+            show_pos=True,
+            show_percent=True,
+            show_eta=True,
+            item_show_func=lambda x: str(x[0].name) if x else None,
+        ) as bar:
+            for file_path, lang in bar:
+                rel_path = str(file_path.relative_to(root_path)).replace("\\", "/")  # Normalize separators
+                module_id = rel_path  # Already normalized
+
+                # Extract exported functions/classes from database
+                exports = self.extract_exports_from_db(rel_path)
+                for export in exports:
+                    func_id = f"{module_id}::{export}"
+                    node = GraphNode(
+                        id=func_id,
+                        file=rel_path,  # Already normalized
+                        lang=lang,
+                        type="function",
+                    )
+                    nodes[func_id] = asdict(node)
+
+                # Extract calls from database
+                calls = self.extract_calls_from_db(rel_path)
+                for call, method in calls:
+                    # Try to resolve the call target
+                    if method:
+                        # Method call
+                        target_id = f"{call}.{method}"
+                    else:
+                        # Function call
+                        target_id = call
+
+                    # Create edge from module to called function
+                    edge = GraphEdge(
+                        source=module_id,
+                        target=target_id,
+                        type="call",
+                        file=rel_path,  # Already normalized
+                    )
+                    edges.append(asdict(edge))
+
+        return {
+            "nodes": list(nodes.values()),
+            "edges": edges,
+            "metadata": {
+                "root": str(root_path),
+                "languages": langs or [],
+                "total_functions": len(nodes),
+                "total_calls": len(edges),
+            },
+        }
+
+    def merge_graphs(self, import_graph: dict, call_graph: dict) -> dict[str, Any]:
+        """Merge import and call graphs into a unified graph."""
+        # Combine nodes (dedup by id)
+        nodes = {}
+        for node in import_graph["nodes"]:
+            nodes[node["id"]] = node
+        for node in call_graph["nodes"]:
+            nodes[node["id"]] = node
+
+        # Combine edges
+        edges = import_graph["edges"] + call_graph["edges"]
+
+        return {
+            "nodes": list(nodes.values()),
+            "edges": edges,
+            "metadata": {
+                "root": import_graph["metadata"]["root"],
+                "languages": list(
+                    set(
+                        import_graph["metadata"]["languages"]
+                        + call_graph["metadata"].get("languages", [])
+                    )
+                ),
+                "total_nodes": len(nodes),
+                "total_edges": len(edges),
+            },
+        }
+
+    def _extract_imports_regex(self, file_path: Path, lang: str) -> list[str]:
+        """Regex-based fallback for extracting imports.
+        
+        This method is used when AST parsing fails or is unavailable.
+        """
+        if lang not in self.IMPORT_PATTERNS:
+            return []
+
+        imports = []
+        patterns = [re.compile(p, re.MULTILINE) for p in self.IMPORT_PATTERNS[lang]]
+
+        try:
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                content = f.read()
+
+            for pattern in patterns:
+                matches = pattern.findall(content)
+                imports.extend(matches)
+
+        except (IOError, UnicodeDecodeError, OSError) as e:
+            print(f"Warning: Failed to extract imports from {file_path}: {e}")
+            # Return empty list but LOG the failure
+
+        return imports
+
+    def _extract_exports_regex(self, file_path: Path, lang: str) -> list[str]:
+        """Regex-based fallback for extracting exports.
+        
+        This method is used when AST parsing fails or is unavailable.
+        """
+        if lang not in self.EXPORT_PATTERNS:
+            return []
+
+        exports = []
+        patterns = [re.compile(p, re.MULTILINE) for p in self.EXPORT_PATTERNS[lang]]
+
+        try:
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                content = f.read()
+
+            for pattern in patterns:
+                matches = pattern.findall(content)
+                # Flatten tuples if regex has groups
+                for match in matches:
+                    if isinstance(match, tuple):
+                        exports.extend([m for m in match if m])
+                    else:
+                        exports.append(match)
+
+        except (IOError, UnicodeDecodeError, OSError) as e:
+            print(f"Warning: Failed to extract exports from {file_path}: {e}")
+            # Return empty list but LOG the failure
+
+        # Filter exports for Go (only capitalized are public)
+        if lang == "go":
+            exports = [e for e in exports if e and e[0].isupper()]
+
+        return exports
+
+    def _extract_calls_regex(self, file_path: Path, lang: str) -> list[tuple[str, str | None]]:
+        """Regex-based fallback for extracting function calls.
+        
+        This method is used when AST parsing fails or is unavailable.
+        """
+        if lang not in self.CALL_PATTERNS:
+            return []
+
+        calls = []
+        patterns = [re.compile(p) for p in self.CALL_PATTERNS[lang]]
+
+        try:
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                content = f.read()
+
+            for pattern in patterns:
+                matches = pattern.findall(content)
+                for match in matches:
+                    if isinstance(match, tuple):
+                        # Method call: (object, method)
+                        calls.append(match)
+                    else:
+                        # Function call
+                        calls.append((match, None))
+
+        except (IOError, UnicodeDecodeError, OSError) as e:
+            print(f"Warning: Failed to extract calls from {file_path}: {e}")
+            # Return empty list but LOG the failure
+
+        return calls
\ No newline at end of file
diff --git a/theauditor/graph/insights.py b/theauditor/graph/insights.py
new file mode 100644
index 0000000..963c874
--- /dev/null
+++ b/theauditor/graph/insights.py
@@ -0,0 +1,17 @@
+"""Backward compatibility shim for graph insights.
+
+This file exists to maintain backward compatibility for code that imports
+from theauditor.graph.insights directly. All functionality has been moved to
+theauditor.insights.graph for better organization.
+
+This ensures that:
+  - from theauditor.graph.insights import GraphInsights  # STILL WORKS
+  - from theauditor.graph import insights  # STILL WORKS
+  - import theauditor.graph.insights  # STILL WORKS
+"""
+
+# Import everything from the new location
+from theauditor.insights.graph import *
+
+# This shim ensures 100% backward compatibility while the actual
+# implementation is now in theauditor/insights/graph.py
\ No newline at end of file
diff --git a/theauditor/graph/store.py b/theauditor/graph/store.py
new file mode 100644
index 0000000..10bb450
--- /dev/null
+++ b/theauditor/graph/store.py
@@ -0,0 +1,444 @@
+"""Graph store module - persistence and database operations for graphs."""
+
+import json
+import sqlite3
+from pathlib import Path
+from typing import Any
+
+
+class XGraphStore:
+    """Store and query cross-project graphs in SQLite."""
+    
+    def __init__(self, db_path: str = "./.pf/graphs.db"):
+        """
+        Initialize store with database path.
+        
+        Args:
+            db_path: Path to SQLite database
+        """
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self._init_schema()
+    
+    def _init_schema(self) -> None:
+        """Initialize database schema."""
+        with sqlite3.connect(self.db_path) as conn:
+            # Nodes table
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS nodes (
+                    id TEXT PRIMARY KEY,
+                    file TEXT NOT NULL,
+                    lang TEXT,
+                    loc INTEGER DEFAULT 0,
+                    churn INTEGER,
+                    type TEXT DEFAULT 'module',
+                    graph_type TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            # Edges table
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS edges (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    source TEXT NOT NULL,
+                    target TEXT NOT NULL,
+                    type TEXT DEFAULT 'import',
+                    file TEXT,
+                    line INTEGER,
+                    graph_type TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    UNIQUE(source, target, type, graph_type)
+                )
+            """)
+            
+            # Analysis results table
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS analysis_results (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    analysis_type TEXT NOT NULL,
+                    result_json TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            # Create indexes
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(type)")
+            
+            conn.commit()
+    
+    def save_import_graph(self, graph: dict[str, Any]) -> None:
+        """
+        Save import graph to database.
+        
+        Args:
+            graph: Import graph with nodes and edges
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            # Clear existing import graph
+            conn.execute("DELETE FROM nodes WHERE graph_type = 'import'")
+            conn.execute("DELETE FROM edges WHERE graph_type = 'import'")
+            
+            # Insert nodes
+            for node in graph.get("nodes", []):
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO nodes 
+                    (id, file, lang, loc, churn, type, graph_type)
+                    VALUES (?, ?, ?, ?, ?, ?, 'import')
+                    """,
+                    (
+                        node["id"],
+                        node["file"],
+                        node.get("lang"),
+                        node.get("loc", 0),
+                        node.get("churn"),
+                        node.get("type", "module"),
+                    ),
+                )
+            
+            # Insert edges
+            for edge in graph.get("edges", []):
+                conn.execute(
+                    """
+                    INSERT OR IGNORE INTO edges 
+                    (source, target, type, file, line, graph_type)
+                    VALUES (?, ?, ?, ?, ?, 'import')
+                    """,
+                    (
+                        edge["source"],
+                        edge["target"],
+                        edge.get("type", "import"),
+                        edge.get("file"),
+                        edge.get("line"),
+                    ),
+                )
+            
+            conn.commit()
+    
+    def save_call_graph(self, graph: dict[str, Any]) -> None:
+        """
+        Save call graph to database.
+        
+        Args:
+            graph: Call graph with nodes and edges
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            # Clear existing call graph
+            conn.execute("DELETE FROM nodes WHERE graph_type = 'call'")
+            conn.execute("DELETE FROM edges WHERE graph_type = 'call'")
+            
+            # Insert nodes
+            for node in graph.get("nodes", []):
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO nodes 
+                    (id, file, lang, loc, churn, type, graph_type)
+                    VALUES (?, ?, ?, ?, ?, ?, 'call')
+                    """,
+                    (
+                        node["id"],
+                        node["file"],
+                        node.get("lang"),
+                        node.get("loc", 0),
+                        node.get("churn"),
+                        node.get("type", "function"),
+                    ),
+                )
+            
+            # Insert edges
+            for edge in graph.get("edges", []):
+                conn.execute(
+                    """
+                    INSERT OR IGNORE INTO edges 
+                    (source, target, type, file, line, graph_type)
+                    VALUES (?, ?, ?, ?, ?, 'call')
+                    """,
+                    (
+                        edge["source"],
+                        edge["target"],
+                        edge.get("type", "call"),
+                        edge.get("file"),
+                        edge.get("line"),
+                    ),
+                )
+            
+            conn.commit()
+    
+    def load_import_graph(self) -> dict[str, Any]:
+        """
+        Load import graph from database.
+        
+        Returns:
+            Import graph dict
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            
+            # Load nodes
+            nodes = []
+            for row in conn.execute(
+                "SELECT * FROM nodes WHERE graph_type = 'import'"
+            ):
+                nodes.append({
+                    "id": row["id"],
+                    "file": row["file"],
+                    "lang": row["lang"],
+                    "loc": row["loc"],
+                    "churn": row["churn"],
+                    "type": row["type"],
+                })
+            
+            # Load edges
+            edges = []
+            for row in conn.execute(
+                "SELECT * FROM edges WHERE graph_type = 'import'"
+            ):
+                edges.append({
+                    "source": row["source"],
+                    "target": row["target"],
+                    "type": row["type"],
+                    "file": row["file"],
+                    "line": row["line"],
+                })
+            
+            return {"nodes": nodes, "edges": edges}
+    
+    def load_call_graph(self) -> dict[str, Any]:
+        """
+        Load call graph from database.
+        
+        Returns:
+            Call graph dict
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            
+            # Load nodes
+            nodes = []
+            for row in conn.execute(
+                "SELECT * FROM nodes WHERE graph_type = 'call'"
+            ):
+                nodes.append({
+                    "id": row["id"],
+                    "file": row["file"],
+                    "lang": row["lang"],
+                    "loc": row["loc"],
+                    "churn": row["churn"],
+                    "type": row["type"],
+                })
+            
+            # Load edges
+            edges = []
+            for row in conn.execute(
+                "SELECT * FROM edges WHERE graph_type = 'call'"
+            ):
+                edges.append({
+                    "source": row["source"],
+                    "target": row["target"],
+                    "type": row["type"],
+                    "file": row["file"],
+                    "line": row["line"],
+                })
+            
+            return {"nodes": nodes, "edges": edges}
+    
+    def query_dependencies(
+        self, 
+        node_id: str, 
+        direction: str = "both",
+        graph_type: str = "import"
+    ) -> dict[str, list[str]]:
+        """
+        Query dependencies of a node.
+        
+        Args:
+            node_id: Node to query
+            direction: 'upstream', 'downstream', or 'both'
+            graph_type: 'import' or 'call'
+            
+        Returns:
+            Dict with upstream and/or downstream dependencies
+        """
+        result = {}
+        
+        with sqlite3.connect(self.db_path) as conn:
+            if direction in ["upstream", "both"]:
+                # Find who depends on this node
+                upstream = []
+                for row in conn.execute(
+                    "SELECT DISTINCT source FROM edges WHERE target = ? AND graph_type = ?",
+                    (node_id, graph_type)
+                ):
+                    upstream.append(row[0])
+                result["upstream"] = upstream
+            
+            if direction in ["downstream", "both"]:
+                # Find what this node depends on
+                downstream = []
+                for row in conn.execute(
+                    "SELECT DISTINCT target FROM edges WHERE source = ? AND graph_type = ?",
+                    (node_id, graph_type)
+                ):
+                    downstream.append(row[0])
+                result["downstream"] = downstream
+        
+        return result
+    
+    def query_calls(
+        self,
+        node_id: str,
+        direction: str = "both"
+    ) -> dict[str, list[str]]:
+        """
+        Query function calls related to a node.
+        
+        Args:
+            node_id: Node to query
+            direction: 'callers', 'callees', or 'both'
+            
+        Returns:
+            Dict with callers and/or callees
+        """
+        result = {}
+        
+        with sqlite3.connect(self.db_path) as conn:
+            if direction in ["callers", "both"]:
+                # Find who calls this function
+                callers = []
+                for row in conn.execute(
+                    "SELECT DISTINCT source FROM edges WHERE target = ? AND graph_type = 'call'",
+                    (node_id,)
+                ):
+                    callers.append(row[0])
+                result["callers"] = callers
+            
+            if direction in ["callees", "both"]:
+                # Find what this function calls
+                callees = []
+                for row in conn.execute(
+                    "SELECT DISTINCT target FROM edges WHERE source = ? AND graph_type = 'call'",
+                    (node_id,)
+                ):
+                    callees.append(row[0])
+                result["callees"] = callees
+        
+        return result
+    
+    def save_analysis_result(
+        self, 
+        analysis_type: str, 
+        result: dict[str, Any]
+    ) -> None:
+        """
+        Save analysis result to database.
+        
+        Args:
+            analysis_type: Type of analysis (e.g., 'cycles', 'hotspots')
+            result: Analysis result dict
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute(
+                """
+                INSERT INTO analysis_results (analysis_type, result_json)
+                VALUES (?, ?)
+                """,
+                (analysis_type, json.dumps(result))
+            )
+            conn.commit()
+    
+    def get_latest_analysis(self, analysis_type: str) -> dict[str, Any] | None:
+        """
+        Get most recent analysis result of given type.
+        
+        Args:
+            analysis_type: Type of analysis
+            
+        Returns:
+            Analysis result dict or None if not found
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            row = conn.execute(
+                """
+                SELECT result_json FROM analysis_results 
+                WHERE analysis_type = ?
+                ORDER BY created_at DESC
+                LIMIT 1
+                """,
+                (analysis_type,)
+            ).fetchone()
+            
+            if row:
+                return json.loads(row[0])
+            return None
+    
+    def get_graph_stats(self) -> dict[str, Any]:
+        """
+        Get summary statistics about stored graphs.
+        
+        Returns:
+            Dict with node and edge counts
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            stats = {
+                "import_nodes": conn.execute(
+                    "SELECT COUNT(*) FROM nodes WHERE graph_type = 'import'"
+                ).fetchone()[0],
+                "import_edges": conn.execute(
+                    "SELECT COUNT(*) FROM edges WHERE graph_type = 'import'"
+                ).fetchone()[0],
+                "call_nodes": conn.execute(
+                    "SELECT COUNT(*) FROM nodes WHERE graph_type = 'call'"
+                ).fetchone()[0],
+                "call_edges": conn.execute(
+                    "SELECT COUNT(*) FROM edges WHERE graph_type = 'call'"
+                ).fetchone()[0],
+            }
+            
+            return stats
+    
+    def get_high_risk_nodes(self, threshold: float = 0.5, limit: int = 10) -> list[dict[str, Any]]:
+        """
+        Get nodes with high risk based on connectivity and churn.
+        
+        Args:
+            threshold: Risk threshold (0-1)
+            limit: Maximum number of nodes to return
+            
+        Returns:
+            List of high-risk nodes
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            
+            # Calculate risk based on in-degree and churn
+            query = """
+                SELECT 
+                    n.id,
+                    n.file,
+                    n.churn,
+                    COUNT(DISTINCT e.source) as in_degree,
+                    (COUNT(DISTINCT e.source) * COALESCE(n.churn, 1)) / 100.0 as risk_score
+                FROM nodes n
+                LEFT JOIN edges e ON n.id = e.target
+                WHERE n.graph_type = 'import'
+                GROUP BY n.id
+                HAVING risk_score > ?
+                ORDER BY risk_score DESC
+                LIMIT ?
+            """
+            
+            nodes = []
+            for row in conn.execute(query, (threshold, limit)):
+                nodes.append({
+                    "id": row["id"],
+                    "file": row["file"],
+                    "churn": row["churn"],
+                    "in_degree": row["in_degree"],
+                    "risk_score": row["risk_score"],
+                })
+            
+            return nodes
\ No newline at end of file
diff --git a/theauditor/graph/visualizer.py b/theauditor/graph/visualizer.py
new file mode 100644
index 0000000..59f0805
--- /dev/null
+++ b/theauditor/graph/visualizer.py
@@ -0,0 +1,937 @@
+"""Graph visualizer module - rich Graphviz visualization with visual intelligence.
+
+This module transforms raw graph data and analysis results into actionable
+visualizations using Graphviz DOT format with intelligent visual encoding.
+
+Visual encoding strategy:
+- Node color: Programming language
+- Node size: Importance/connectivity (in-degree)
+- Edge color: Red for cycles, gray for normal
+- Edge style: Import type (solid/dashed/dotted)
+- Node shape: Type (box=module, ellipse=function)
+"""
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, Dict, List, Set, Optional
+
+
+class GraphVisualizer:
+    """Transform graph analysis into actionable visualizations."""
+    
+    # Language colors - high contrast, colorblind-friendly palette
+    LANGUAGE_COLORS = {
+        'python': '#3776AB',      # Python blue
+        'javascript': '#F7DF1E',   # JS yellow  
+        'typescript': '#3178C6',   # TS blue
+        'java': '#007396',         # Java blue-green
+        'go': '#00ADD8',           # Go cyan
+        'rust': '#CE4E21',         # Rust orange
+        'c': '#A8B9CC',           # C gray-blue
+        'c++': '#00599C',         # C++ dark blue
+        'c#': '#239120',          # C# green
+        'ruby': '#CC342D',        # Ruby red
+        'php': '#777BB4',         # PHP purple
+        'default': '#808080',     # Gray for unknown
+    }
+    
+    # Risk level colors for severity encoding
+    RISK_COLORS = {
+        'critical': '#D32F2F',    # Deep red
+        'high': '#F57C00',        # Orange
+        'medium': '#FBC02D',      # Yellow
+        'low': '#689F38',         # Green
+        'info': '#1976D2',        # Blue
+    }
+    
+    def __init__(self):
+        """Initialize the visualizer."""
+        self.cycle_edges = set()  # Track edges that are part of cycles
+        self.node_degrees = {}    # Track in/out degrees for sizing
+        
+    def generate_dot(
+        self,
+        graph: Dict[str, Any],
+        analysis: Optional[Dict[str, Any]] = None,
+        options: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """
+        Generate DOT format with visual intelligence encoding.
+        
+        Args:
+            graph: Graph dict with 'nodes' and 'edges'
+            analysis: Optional analysis results with cycles, hotspots, etc.
+            options: Optional visualization options
+            
+        Returns:
+            DOT format string ready for Graphviz
+        """
+        options = options or {}
+        analysis = analysis or {}
+        
+        # Pre-process analysis data
+        self._process_analysis(graph, analysis)
+        
+        # Start DOT file
+        dot_lines = ['digraph G {']
+        
+        # Global graph attributes
+        dot_lines.extend(self._generate_graph_attrs(options))
+        
+        # Generate nodes with visual encoding
+        dot_lines.extend(self._generate_nodes(graph, analysis, options))
+        
+        # Generate edges with visual encoding
+        dot_lines.extend(self._generate_edges(graph, analysis, options))
+        
+        # Close graph
+        dot_lines.append('}')
+        
+        return '\n'.join(dot_lines)
+    
+    def _process_analysis(
+        self,
+        graph: Dict[str, Any],
+        analysis: Dict[str, Any]
+    ) -> None:
+        """Pre-process analysis data for quick lookup."""
+        # Calculate node degrees
+        self.node_degrees.clear()
+        for edge in graph.get('edges', []):
+            source = edge.get('source', '')
+            target = edge.get('target', '')
+            
+            # Track out-degree
+            if source not in self.node_degrees:
+                self.node_degrees[source] = {'in': 0, 'out': 0}
+            self.node_degrees[source]['out'] += 1
+            
+            # Track in-degree
+            if target not in self.node_degrees:
+                self.node_degrees[target] = {'in': 0, 'out': 0}
+            self.node_degrees[target]['in'] += 1
+        
+        # Identify edges that are part of cycles
+        self.cycle_edges.clear()
+        cycles = analysis.get('cycles', [])
+        for cycle in cycles:
+            cycle_nodes = cycle.get('nodes', [])
+            # Mark edges between consecutive nodes in cycle
+            for i in range(len(cycle_nodes)):
+                source = cycle_nodes[i]
+                target = cycle_nodes[(i + 1) % len(cycle_nodes)]
+                self.cycle_edges.add((source, target))
+    
+    def _generate_graph_attrs(self, options: Dict[str, Any]) -> List[str]:
+        """Generate global graph attributes."""
+        attrs = []
+        attrs.append('  rankdir=LR;')  # Left to right layout
+        attrs.append('  bgcolor="white";')
+        attrs.append('  nodesep=0.5;')
+        attrs.append('  ranksep=1.0;')
+        attrs.append('  fontname="Arial";')
+        
+        # Default node attributes
+        attrs.append('  node [fontname="Arial", fontsize=10, style=filled];')
+        
+        # Default edge attributes
+        attrs.append('  edge [fontname="Arial", fontsize=8];')
+        
+        # Add title if provided
+        if options.get('title'):
+            attrs.append(f'  label="{options["title"]}";')
+            attrs.append('  labelloc=t;')
+            attrs.append('  fontsize=14;')
+        
+        return attrs
+    
+    def _generate_nodes(
+        self,
+        graph: Dict[str, Any],
+        analysis: Dict[str, Any],
+        options: Dict[str, Any]
+    ) -> List[str]:
+        """Generate nodes with visual encoding."""
+        node_lines = []
+        nodes = graph.get('nodes', [])
+        
+        # Get hotspots for special highlighting
+        hotspots = analysis.get('hotspots', [])
+        hotspot_ids = {h['id']: h for h in hotspots[:10]}  # Top 10 hotspots
+        
+        # Limit nodes if requested
+        max_nodes = options.get('max_nodes', 500)
+        if len(nodes) > max_nodes:
+            # Sort by importance (in-degree + out-degree)
+            nodes = sorted(
+                nodes,
+                key=lambda n: self.node_degrees.get(
+                    n['id'], {'in': 0, 'out': 0}
+                )['in'] + self.node_degrees.get(
+                    n['id'], {'in': 0, 'out': 0}
+                )['out'],
+                reverse=True
+            )[:max_nodes]
+        
+        for node in nodes:
+            node_id = node.get('id', '')
+            node_file = node.get('file', node_id)
+            node_lang = node.get('lang', 'default')
+            node_type = node.get('type', 'module')
+            
+            # Sanitize node ID for DOT format
+            safe_id = self._sanitize_id(node_id)
+            
+            # Determine node color based on language
+            color = self.LANGUAGE_COLORS.get(node_lang, self.LANGUAGE_COLORS['default'])
+            
+            # Determine node size based on in-degree (hotspot detection)
+            degrees = self.node_degrees.get(node_id, {'in': 0, 'out': 0})
+            in_degree = degrees['in']
+            
+            # Scale size based on in-degree (min 0.5, max 2.0)
+            if in_degree > 30:
+                size = 2.0
+            elif in_degree > 20:
+                size = 1.5
+            elif in_degree > 10:
+                size = 1.2
+            elif in_degree > 5:
+                size = 1.0
+            else:
+                size = 0.8
+            
+            # Determine shape based on type
+            if node_type == 'function':
+                shape = 'ellipse'
+            elif node_type == 'class':
+                shape = 'diamond'
+            else:  # module
+                shape = 'box'
+            
+            # Generate label (shortened for readability)
+            label = self._generate_node_label(node_id, node_file)
+            
+            # Build node attributes
+            attrs = []
+            attrs.append(f'label="{label}"')
+            attrs.append(f'fillcolor="{color}"')
+            attrs.append(f'shape={shape}')
+            attrs.append(f'width={size}')
+            attrs.append(f'height={size * 0.7}')
+            
+            # Special styling for hotspots
+            if node_id in hotspot_ids:
+                attrs.append('penwidth=3')
+                attrs.append('fontsize=12')
+                attrs.append('fontcolor="black"')
+                # Add tooltip with hotspot info
+                hotspot = hotspot_ids[node_id]
+                tooltip = f"Hotspot: in={hotspot.get('in_degree', 0)}, out={hotspot.get('out_degree', 0)}"
+                attrs.append(f'tooltip="{tooltip}"')
+            else:
+                attrs.append('penwidth=1')
+                attrs.append('fontcolor="white"')
+            
+            # Create node line
+            node_line = f'  {safe_id} [{", ".join(attrs)}];'
+            node_lines.append(node_line)
+        
+        return node_lines
+    
+    def _generate_edges(
+        self,
+        graph: Dict[str, Any],
+        analysis: Dict[str, Any],
+        options: Dict[str, Any]
+    ) -> List[str]:
+        """Generate edges with visual encoding."""
+        edge_lines = []
+        edges = graph.get('edges', [])
+        
+        # Get node IDs for filtering
+        node_ids = {n['id'] for n in graph.get('nodes', [])}
+        max_nodes = options.get('max_nodes', 500)
+        if len(node_ids) > max_nodes:
+            # Keep only edges between displayed nodes
+            important_nodes = set(list(node_ids)[:max_nodes])
+            edges = [
+                e for e in edges
+                if e.get('source') in important_nodes and e.get('target') in important_nodes
+            ]
+        
+        for edge in edges:
+            source = edge.get('source', '')
+            target = edge.get('target', '')
+            edge_type = edge.get('type', 'import')
+            
+            # Skip self-loops unless in options
+            if source == target and not options.get('show_self_loops'):
+                continue
+            
+            # Sanitize IDs
+            safe_source = self._sanitize_id(source)
+            safe_target = self._sanitize_id(target)
+            
+            # Build edge attributes
+            attrs = []
+            
+            # Color red if part of a cycle
+            if (source, target) in self.cycle_edges:
+                attrs.append('color="#D32F2F"')  # Red for cycles
+                attrs.append('penwidth=2')
+                attrs.append('fontcolor="#D32F2F"')
+                attrs.append('label="cycle"')
+            else:
+                attrs.append('color="#666666"')  # Gray for normal
+                attrs.append('penwidth=1')
+            
+            # Style based on edge type
+            if edge_type == 'call':
+                attrs.append('style=dashed')
+            elif edge_type == 'extends' or edge_type == 'implements':
+                attrs.append('style=bold')
+            else:  # import
+                attrs.append('style=solid')
+            
+            # Arrowhead style
+            if edge_type == 'extends':
+                attrs.append('arrowhead=empty')  # Inheritance
+            elif edge_type == 'implements':
+                attrs.append('arrowhead=odiamond')  # Interface
+            else:
+                attrs.append('arrowhead=normal')
+            
+            # Create edge line
+            if attrs:
+                edge_line = f'  {safe_source} -> {safe_target} [{", ".join(attrs)}];'
+            else:
+                edge_line = f'  {safe_source} -> {safe_target};'
+            
+            edge_lines.append(edge_line)
+        
+        return edge_lines
+    
+    def _sanitize_id(self, node_id: str) -> str:
+        """Sanitize node ID for DOT format."""
+        # Replace problematic characters
+        safe_id = node_id.replace('.', '_')
+        safe_id = safe_id.replace('/', '_')
+        safe_id = safe_id.replace('\\', '_')
+        safe_id = safe_id.replace('-', '_')
+        safe_id = safe_id.replace(':', '_')
+        safe_id = safe_id.replace(' ', '_')
+        safe_id = safe_id.replace('(', '_')
+        safe_id = safe_id.replace(')', '_')
+        safe_id = safe_id.replace('[', '_')
+        safe_id = safe_id.replace(']', '_')
+        
+        # Ensure it starts with a letter or underscore
+        if safe_id and not safe_id[0].isalpha() and safe_id[0] != '_':
+            safe_id = '_' + safe_id
+        
+        # Quote if necessary
+        if safe_id and not safe_id.replace('_', '').isalnum():
+            safe_id = f'"{safe_id}"'
+        
+        return safe_id
+    
+    def _generate_node_label(self, node_id: str, node_file: str) -> str:
+        """Generate readable label for a node."""
+        # Use filename for modules, full ID for functions
+        if '::' in node_id:  # Function node
+            # Show module::function
+            parts = node_id.split('::')
+            if len(parts) >= 2:
+                module = Path(parts[0]).stem  # Just filename without extension
+                function = parts[1]
+                return f"{module}::{function}"
+            return node_id
+        else:  # Module node
+            # Show just the filename without path
+            path = Path(node_file)
+            if path.parts:
+                # Show last 2 parts of path for context
+                if len(path.parts) > 2:
+                    return f".../{path.parts[-2]}/{path.name}"
+                elif len(path.parts) > 1:
+                    return f"{path.parts[-2]}/{path.name}"
+                else:
+                    return path.name
+            return node_id
+    
+    def generate_dot_with_layers(
+        self,
+        graph: Dict[str, Any],
+        layers: Dict[int, List[str]],
+        analysis: Optional[Dict[str, Any]] = None,
+        options: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """
+        Generate DOT format with architectural layers as subgraphs.
+        
+        Args:
+            graph: Graph dict with 'nodes' and 'edges'
+            layers: Dict mapping layer number to list of node IDs
+            analysis: Optional analysis results
+            options: Optional visualization options
+            
+        Returns:
+            DOT format string with layer subgraphs
+        """
+        options = options or {}
+        analysis = analysis or {}
+        
+        # Pre-process analysis data
+        self._process_analysis(graph, analysis)
+        
+        # Build node lookup for efficiency  
+        node_map = {n['id']: n for n in graph.get('nodes', []) if n.get('id') is not None}
+        
+        # Start DOT file
+        dot_lines = ['digraph G {']
+        
+        # Global graph attributes
+        dot_lines.extend(self._generate_graph_attrs(options))
+        dot_lines.append('  rankdir=TB;')  # Top-to-bottom for layers
+        
+        # Generate layer subgraphs
+        # Filter out None keys and ensure all keys are comparable
+        valid_layer_nums = [k for k in layers.keys() if k is not None]
+        for layer_num in sorted(valid_layer_nums):
+            layer_nodes = layers[layer_num]
+            if not layer_nodes:
+                continue
+                
+            # Create subgraph for this layer
+            dot_lines.append(f'  subgraph cluster_layer{layer_num} {{')
+            dot_lines.append(f'    label="Layer {layer_num}";')
+            dot_lines.append(f'    style=filled;')
+            dot_lines.append(f'    fillcolor="#F0F0F0";')
+            dot_lines.append(f'    color="#CCCCCC";')
+            dot_lines.append(f'    fontsize=12;')
+            dot_lines.append(f'    rank=same;')  # Keep nodes at same level
+            
+            # Add nodes for this layer
+            for node_id in layer_nodes:
+                if node_id not in node_map:
+                    continue
+                    
+                node = node_map[node_id]
+                node_lang = node.get('lang', 'default')
+                node_type = node.get('type', 'module')
+                
+                # Sanitize node ID
+                safe_id = self._sanitize_id(node_id)
+                
+                # Determine node color based on language
+                color = self.LANGUAGE_COLORS.get(node_lang, self.LANGUAGE_COLORS['default'])
+                
+                # Determine node size based on in-degree
+                degrees = self.node_degrees.get(node_id, {'in': 0, 'out': 0})
+                in_degree = degrees['in']
+                
+                # Scale size based on in-degree
+                if in_degree > 30:
+                    size = 2.0
+                elif in_degree > 20:
+                    size = 1.5
+                elif in_degree > 10:
+                    size = 1.2
+                elif in_degree > 5:
+                    size = 1.0
+                else:
+                    size = 0.8
+                
+                # Determine shape based on type
+                if node_type == 'function':
+                    shape = 'ellipse'
+                elif node_type == 'class':
+                    shape = 'diamond'
+                else:  # module
+                    shape = 'box'
+                
+                # Generate label
+                label = self._generate_node_label(node_id, node.get('file', node_id))
+                
+                # Check if node has churn data for border thickness
+                churn = node.get('churn', 0)
+                if churn is None:
+                    churn = 0
+                if churn > 100:
+                    penwidth = 4  # Very high churn
+                elif churn > 50:
+                    penwidth = 3  # High churn
+                elif churn > 20:
+                    penwidth = 2  # Medium churn
+                else:
+                    penwidth = 1  # Low/no churn
+                
+                # Build node attributes
+                attrs = []
+                attrs.append(f'label="{label}"')
+                attrs.append(f'fillcolor="{color}"')
+                attrs.append(f'shape={shape}')
+                attrs.append(f'width={size}')
+                attrs.append(f'height={size * 0.7}')
+                attrs.append(f'penwidth={penwidth}')
+                attrs.append('fontcolor="white"')
+                attrs.append('style=filled')
+                
+                # Add tooltip with layer info
+                tooltip = f"Layer {layer_num}: {node_id}"
+                if churn > 0:
+                    tooltip += f" (churn: {churn})"
+                attrs.append(f'tooltip="{tooltip}"')
+                
+                # Create node line
+                node_line = f'    {safe_id} [{", ".join(attrs)}];'
+                dot_lines.append(node_line)
+            
+            dot_lines.append('  }')  # Close subgraph
+        
+        # Generate edges (outside of subgraphs)
+        dot_lines.extend(self._generate_edges(graph, analysis, options))
+        
+        # Close graph
+        dot_lines.append('}')
+        
+        return '\n'.join(dot_lines)
+    
+    def generate_impact_visualization(
+        self,
+        graph: Dict[str, Any],
+        impact: Dict[str, Any],
+        options: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """
+        Generate DOT highlighting impact analysis results.
+        
+        Args:
+            graph: Graph dict with 'nodes' and 'edges'
+            impact: Impact analysis with targets, upstream, downstream
+            options: Optional visualization options
+            
+        Returns:
+            DOT format string with impact highlighting
+        """
+        options = options or {}
+        
+        # Extract impact sets
+        targets = set(impact.get('targets', []))
+        upstream = set(impact.get('upstream', []))
+        downstream = set(impact.get('downstream', []))
+        
+        # Pre-process analysis data
+        self._process_analysis(graph, {})
+        
+        # Start DOT file
+        dot_lines = ['digraph G {']
+        
+        # Global graph attributes
+        dot_lines.extend(self._generate_graph_attrs(options))
+        
+        # Add legend for impact visualization
+        dot_lines.append('  subgraph cluster_legend {')
+        dot_lines.append('    label="Impact Analysis Legend";')
+        dot_lines.append('    style=filled;')
+        dot_lines.append('    fillcolor=white;')
+        dot_lines.append('    node [shape=box, style=filled];')
+        dot_lines.append('    legend_target [label="Target", fillcolor="#FF0000"];')
+        dot_lines.append('    legend_upstream [label="Upstream", fillcolor="#FF9800"];')
+        dot_lines.append('    legend_downstream [label="Downstream", fillcolor="#2196F3"];')
+        dot_lines.append('    legend_both [label="Both", fillcolor="#9C27B0"];')
+        dot_lines.append('    legend_unaffected [label="Unaffected", fillcolor="#808080"];')
+        dot_lines.append('  }')
+        
+        # Generate nodes with impact highlighting
+        node_lines = []
+        for node in graph.get('nodes', []):
+            node_id = node.get('id', '')
+            node_file = node.get('file', node_id)
+            node_lang = node.get('lang', 'default')
+            node_type = node.get('type', 'module')
+            
+            # Sanitize node ID
+            safe_id = self._sanitize_id(node_id)
+            
+            # Determine impact color
+            if node_id in targets:
+                color = '#FF0000'  # Red for target
+                fontcolor = 'white'
+                penwidth = 3
+            elif node_id in upstream and node_id in downstream:
+                color = '#9C27B0'  # Purple for both upstream and downstream
+                fontcolor = 'white'
+                penwidth = 2
+            elif node_id in upstream:
+                color = '#FF9800'  # Orange for upstream
+                fontcolor = 'white'
+                penwidth = 2
+            elif node_id in downstream:
+                color = '#2196F3'  # Blue for downstream
+                fontcolor = 'white'
+                penwidth = 2
+            else:
+                color = '#E0E0E0'  # Light gray for unaffected
+                fontcolor = 'black'
+                penwidth = 1
+            
+            # Determine node size based on impact radius
+            degrees = self.node_degrees.get(node_id, {'in': 0, 'out': 0})
+            if node_id in targets:
+                size = 1.5  # Targets are emphasized
+            elif node_id in upstream or node_id in downstream:
+                size = 1.2  # Affected nodes are slightly larger
+            else:
+                size = 0.8  # Unaffected nodes are smaller
+            
+            # Determine shape based on type
+            if node_type == 'function':
+                shape = 'ellipse'
+            elif node_type == 'class':
+                shape = 'diamond'
+            else:  # module
+                shape = 'box'
+            
+            # Generate label
+            label = self._generate_node_label(node_id, node_file)
+            
+            # Build node attributes
+            attrs = []
+            attrs.append(f'label="{label}"')
+            attrs.append(f'fillcolor="{color}"')
+            attrs.append(f'shape={shape}')
+            attrs.append(f'width={size}')
+            attrs.append(f'height={size * 0.7}')
+            attrs.append(f'penwidth={penwidth}')
+            attrs.append(f'fontcolor="{fontcolor}"')
+            attrs.append('style=filled')
+            
+            # Add tooltip with impact info
+            tooltip_parts = []
+            if node_id in targets:
+                tooltip_parts.append("TARGET")
+            if node_id in upstream:
+                tooltip_parts.append("Upstream")
+            if node_id in downstream:
+                tooltip_parts.append("Downstream")
+            if tooltip_parts:
+                tooltip = f"{node_id}: {', '.join(tooltip_parts)}"
+            else:
+                tooltip = f"{node_id}: Unaffected"
+            attrs.append(f'tooltip="{tooltip}"')
+            
+            # Create node line
+            node_line = f'  {safe_id} [{", ".join(attrs)}];'
+            node_lines.append(node_line)
+        
+        dot_lines.extend(node_lines)
+        
+        # Generate edges with impact highlighting
+        edge_lines = []
+        for edge in graph.get('edges', []):
+            source = edge.get('source', '')
+            target = edge.get('target', '')
+            edge_type = edge.get('type', 'import')
+            
+            # Skip self-loops unless in options
+            if source == target and not options.get('show_self_loops'):
+                continue
+            
+            # Sanitize IDs
+            safe_source = self._sanitize_id(source)
+            safe_target = self._sanitize_id(target)
+            
+            # Build edge attributes
+            attrs = []
+            
+            # Color edges based on impact path
+            if source in targets and target in downstream:
+                attrs.append('color="#FF0000"')  # Red for direct impact
+                attrs.append('penwidth=3')
+            elif source in upstream and target in targets:
+                attrs.append('color="#FF9800"')  # Orange for upstream to target
+                attrs.append('penwidth=2')
+            elif (source in targets or source in upstream or source in downstream) and \
+                 (target in targets or target in upstream or target in downstream):
+                attrs.append('color="#666666"')  # Gray for affected connections
+                attrs.append('penwidth=1.5')
+            else:
+                attrs.append('color="#E0E0E0"')  # Light gray for unaffected
+                attrs.append('penwidth=0.5')
+                attrs.append('style=dashed')
+            
+            # Arrowhead style
+            attrs.append('arrowhead=normal')
+            
+            # Create edge line
+            if attrs:
+                edge_line = f'  {safe_source} -> {safe_target} [{", ".join(attrs)}];'
+            else:
+                edge_line = f'  {safe_source} -> {safe_target};'
+            
+            edge_lines.append(edge_line)
+        
+        dot_lines.extend(edge_lines)
+        
+        # Close graph
+        dot_lines.append('}')
+        
+        return '\n'.join(dot_lines)
+    
+    def generate_cycles_only_view(
+        self,
+        graph: Dict[str, Any],
+        cycles: List[Dict[str, Any]],
+        options: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """
+        Generate DOT format showing only nodes and edges involved in cycles.
+        
+        Args:
+            graph: Graph dict with 'nodes' and 'edges'
+            cycles: List of cycle dicts with 'nodes' lists
+            options: Optional visualization options
+            
+        Returns:
+            DOT format string with only cycle-related elements
+        """
+        options = options or {}
+        
+        # Collect all nodes involved in cycles
+        cycle_nodes = set()
+        cycle_edges = set()
+        
+        for cycle in cycles:
+            nodes = cycle.get('nodes', [])
+            cycle_nodes.update(nodes)
+            
+            # Mark edges between consecutive nodes in cycle
+            for i in range(len(nodes)):
+                source = nodes[i]
+                target = nodes[(i + 1) % len(nodes)]
+                cycle_edges.add((source, target))
+        
+        if not cycle_nodes:
+            # No cycles found
+            return 'digraph G {\n  label="No cycles detected";\n}'
+        
+        # Filter graph to only cycle-related elements
+        filtered_graph = {
+            'nodes': [n for n in graph.get('nodes', []) if n['id'] in cycle_nodes],
+            'edges': [e for e in graph.get('edges', []) 
+                     if (e['source'], e['target']) in cycle_edges]
+        }
+        
+        # Pre-process for visualization
+        self.cycle_edges = cycle_edges  # Mark for red highlighting
+        self._process_analysis(filtered_graph, {})
+        
+        # Start DOT file
+        dot_lines = ['digraph G {']
+        
+        # Global graph attributes
+        dot_lines.append('  label="Dependency Cycles Visualization";')
+        dot_lines.append('  labelloc=t;')
+        dot_lines.append('  fontsize=14;')
+        dot_lines.append('  bgcolor="white";')
+        dot_lines.append('  rankdir=LR;')
+        dot_lines.append('  node [fontname="Arial", fontsize=10, style=filled];')
+        dot_lines.append('  edge [fontname="Arial", fontsize=8];')
+        
+        # Group nodes by cycle for better visualization
+        for idx, cycle in enumerate(cycles):
+            cycle_node_set = set(cycle.get('nodes', []))
+            
+            dot_lines.append(f'  subgraph cluster_cycle{idx} {{')
+            dot_lines.append(f'    label="Cycle {idx + 1} (size: {len(cycle_node_set)})";')
+            dot_lines.append('    style=filled;')
+            dot_lines.append('    fillcolor="#FFE0E0";')  # Light red background
+            dot_lines.append('    color="#D32F2F";')  # Red border
+            
+            # Add nodes for this cycle
+            for node in filtered_graph['nodes']:
+                if node['id'] not in cycle_node_set:
+                    continue
+                    
+                node_id = node['id']
+                safe_id = self._sanitize_id(node_id)
+                label = self._generate_node_label(node_id, node.get('file', node_id))
+                
+                # Node styling
+                attrs = []
+                attrs.append(f'label="{label}"')
+                attrs.append('fillcolor="#FF5252"')  # Red for cycle nodes
+                attrs.append('fontcolor="white"')
+                attrs.append('shape=box')
+                attrs.append('penwidth=2')
+                
+                node_line = f'    {safe_id} [{", ".join(attrs)}];'
+                dot_lines.append(node_line)
+            
+            dot_lines.append('  }')
+        
+        # Add edges
+        for edge in filtered_graph['edges']:
+            source = edge['source']
+            target = edge['target']
+            
+            safe_source = self._sanitize_id(source)
+            safe_target = self._sanitize_id(target)
+            
+            attrs = []
+            attrs.append('color="#D32F2F"')  # Red for cycle edges
+            attrs.append('penwidth=2')
+            attrs.append('arrowhead=normal')
+            
+            edge_line = f'  {safe_source} -> {safe_target} [{", ".join(attrs)}];'
+            dot_lines.append(edge_line)
+        
+        dot_lines.append('}')
+        
+        return '\n'.join(dot_lines)
+    
+    def generate_hotspots_only_view(
+        self,
+        graph: Dict[str, Any],
+        hotspots: List[Dict[str, Any]],
+        options: Optional[Dict[str, Any]] = None,
+        top_n: int = 10,
+    ) -> str:
+        """
+        Generate DOT format showing only hotspot nodes and their connections.
+        
+        Args:
+            graph: Graph dict with 'nodes' and 'edges'
+            hotspots: List of hotspot dicts with 'id' and metrics
+            options: Optional visualization options
+            top_n: Number of top hotspots to show (default: 10)
+            
+        Returns:
+            DOT format string with only hotspot-related elements
+        """
+        options = options or {}
+        
+        # Get top N hotspots
+        top_hotspots = hotspots[:top_n]
+        hotspot_ids = {h['id'] for h in top_hotspots}
+        
+        if not hotspot_ids:
+            return 'digraph G {\n  label="No hotspots detected";\n}'
+        
+        # Collect nodes connected to hotspots (1 degree of separation)
+        connected_nodes = set(hotspot_ids)
+        for edge in graph.get('edges', []):
+            if edge['source'] in hotspot_ids:
+                connected_nodes.add(edge['target'])
+            if edge['target'] in hotspot_ids:
+                connected_nodes.add(edge['source'])
+        
+        # Filter graph
+        filtered_graph = {
+            'nodes': [n for n in graph.get('nodes', []) if n['id'] in connected_nodes],
+            'edges': [e for e in graph.get('edges', []) 
+                     if e['source'] in connected_nodes and e['target'] in connected_nodes]
+        }
+        
+        # Pre-process
+        self._process_analysis(filtered_graph, {})
+        
+        # Start DOT file
+        dot_lines = ['digraph G {']
+        
+        # Global graph attributes
+        dot_lines.append(f'  label="Top {top_n} Hotspots Visualization";')
+        dot_lines.append('  labelloc=t;')
+        dot_lines.append('  fontsize=14;')
+        dot_lines.append('  bgcolor="white";')
+        dot_lines.append('  rankdir=LR;')
+        dot_lines.append('  node [fontname="Arial", fontsize=10, style=filled];')
+        dot_lines.append('  edge [fontname="Arial", fontsize=8];')
+        
+        # Create hotspot lookup
+        hotspot_map = {h['id']: h for h in top_hotspots}
+        
+        # Generate nodes
+        for node in filtered_graph['nodes']:
+            node_id = node['id']
+            safe_id = self._sanitize_id(node_id)
+            label = self._generate_node_label(node_id, node.get('file', node_id))
+            
+            # Determine styling based on whether it's a hotspot
+            if node_id in hotspot_ids:
+                hotspot = hotspot_map[node_id]
+                in_degree = hotspot.get('in_degree', 0)
+                out_degree = hotspot.get('out_degree', 0)
+                
+                # Size based on total connections
+                total = in_degree + out_degree
+                if total > 50:
+                    size = 2.5
+                elif total > 30:
+                    size = 2.0
+                elif total > 20:
+                    size = 1.5
+                else:
+                    size = 1.2
+                
+                # Color intensity based on ranking
+                rank = list(hotspot_ids).index(node_id)
+                if rank == 0:
+                    color = '#D32F2F'  # Darkest red for #1
+                elif rank < 3:
+                    color = '#F44336'  # Red for top 3
+                elif rank < 5:
+                    color = '#FF5722'  # Deep orange for top 5
+                else:
+                    color = '#FF9800'  # Orange for rest
+                
+                attrs = []
+                attrs.append(f'label="{label}\\n[in:{in_degree} out:{out_degree}]"')
+                attrs.append(f'fillcolor="{color}"')
+                attrs.append('fontcolor="white"')
+                attrs.append('shape=box')
+                attrs.append(f'width={size}')
+                attrs.append(f'height={size * 0.7}')
+                attrs.append('penwidth=3')
+                
+                # Tooltip
+                tooltip = f"Hotspot #{rank+1}: in={in_degree}, out={out_degree}"
+                attrs.append(f'tooltip="{tooltip}"')
+            else:
+                # Connected node (not a hotspot)
+                attrs = []
+                attrs.append(f'label="{label}"')
+                attrs.append('fillcolor="#E0E0E0"')
+                attrs.append('fontcolor="black"')
+                attrs.append('shape=box')
+                attrs.append('width=0.8')
+                attrs.append('height=0.6')
+                attrs.append('penwidth=1')
+            
+            node_line = f'  {safe_id} [{", ".join(attrs)}];'
+            dot_lines.append(node_line)
+        
+        # Generate edges
+        for edge in filtered_graph['edges']:
+            source = edge['source']
+            target = edge['target']
+            
+            safe_source = self._sanitize_id(source)
+            safe_target = self._sanitize_id(target)
+            
+            # Highlight edges connected to hotspots
+            if source in hotspot_ids or target in hotspot_ids:
+                attrs = ['color="#666666"', 'penwidth=1.5']
+            else:
+                attrs = ['color="#CCCCCC"', 'penwidth=0.5']
+            
+            attrs.append('arrowhead=normal')
+            
+            edge_line = f'  {safe_source} -> {safe_target} [{", ".join(attrs)}];'
+            dot_lines.append(edge_line)
+        
+        dot_lines.append('}')
+        
+        return '\n'.join(dot_lines)
\ No newline at end of file
diff --git a/theauditor/impact_analyzer.py b/theauditor/impact_analyzer.py
new file mode 100644
index 0000000..4ecd5f2
--- /dev/null
+++ b/theauditor/impact_analyzer.py
@@ -0,0 +1,683 @@
+"""Impact analysis engine for tracing code dependencies and change blast radius."""
+
+import sqlite3
+from pathlib import Path
+from typing import Dict, List, Optional, Any, Set, Tuple
+
+
+def analyze_impact(
+    db_path: str,
+    target_file: str,
+    target_line: int,
+    trace_to_backend: bool = False
+) -> Dict[str, Any]:
+    """
+    Analyze the impact of changing code at a specific file and line.
+    
+    Traces both upstream dependencies (who calls this) and downstream 
+    dependencies (what this calls) to understand the blast radius of changes.
+    
+    Args:
+        db_path: Path to the SQLite database
+        target_file: Path to the file containing the target code
+        target_line: Line number of the target code
+        
+    Returns:
+        Dictionary containing:
+        - target_symbol: Name and type of the symbol at target location
+        - upstream: List of symbols that call the target (callers)
+        - downstream: List of symbols called by the target (callees)
+        - impact_summary: Statistics about the blast radius
+    """
+    # Connect to database
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    
+    try:
+        # Normalize the target file path to match database format
+        target_file = Path(target_file).as_posix()
+        if target_file.startswith("./"):
+            target_file = target_file[2:]
+        
+        # Check if cross-stack analysis is requested
+        if trace_to_backend and target_file.endswith(('.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs')):
+            # Attempt cross-stack tracing
+            cross_stack_trace = trace_frontend_to_backend(cursor, target_file, target_line)
+            
+            if cross_stack_trace:
+                # Found a backend endpoint - analyze its downstream impact
+                backend_file = cross_stack_trace["backend"]["file"]
+                backend_line = cross_stack_trace["backend"]["line"]
+                
+                # Find the backend function/class at the traced location
+                cursor.execute("""
+                    SELECT name, type, line, col
+                    FROM symbols
+                    WHERE path = ? 
+                    AND type IN ('function', 'class')
+                    AND line <= ?
+                    ORDER BY line DESC
+                    LIMIT 1
+                """, (backend_file, backend_line))
+                
+                backend_result = cursor.fetchone()
+                
+                if backend_result:
+                    backend_name, backend_type, backend_def_line, backend_col = backend_result
+                    
+                    # Only get downstream dependencies from backend (not upstream)
+                    downstream = find_downstream_dependencies(cursor, backend_file, backend_def_line, backend_name)
+                    downstream_transitive = calculate_transitive_impact(cursor, downstream, "downstream")
+                    
+                    # Build cross-stack response
+                    return {
+                        "cross_stack_trace": cross_stack_trace,
+                        "target_symbol": {
+                            "name": f"API Call to {cross_stack_trace['frontend']['url']}",
+                            "type": "api_call",
+                            "file": target_file,
+                            "line": target_line,
+                            "column": 0
+                        },
+                        "backend_symbol": {
+                            "name": backend_name,
+                            "type": backend_type,
+                            "file": backend_file,
+                            "line": backend_def_line,
+                            "column": backend_col
+                        },
+                        "upstream": [],  # Frontend has no upstream in this context
+                        "upstream_transitive": [],
+                        "downstream": downstream,
+                        "downstream_transitive": downstream_transitive,
+                        "impact_summary": {
+                            "direct_upstream": 0,
+                            "direct_downstream": len(downstream),
+                            "total_upstream": 0,
+                            "total_downstream": len(downstream) + len(downstream_transitive),
+                            "total_impact": len(downstream) + len(downstream_transitive),
+                            "affected_files": len(set(
+                                [d["file"] for d in downstream] +
+                                [d["file"] for d in downstream_transitive]
+                            )),
+                            "cross_stack": True
+                        }
+                    }
+        
+        # Step 1: Find the target symbol at the specified location
+        # Look for function or class definition at or near the target line
+        cursor.execute("""
+            SELECT name, type, line, col
+            FROM symbols
+            WHERE path = ? 
+            AND type IN ('function', 'class')
+            AND line <= ?
+            ORDER BY line DESC
+            LIMIT 1
+        """, (target_file, target_line))
+        
+        target_result = cursor.fetchone()
+        
+        if not target_result:
+            # No function/class found, return empty analysis
+            return {
+                "target_symbol": None,
+                "error": f"No function or class found at {target_file}:{target_line}",
+                "upstream": [],
+                "downstream": [],
+                "impact_summary": {
+                    "total_upstream": 0,
+                    "total_downstream": 0,
+                    "total_impact": 0
+                }
+            }
+        
+        target_name, target_type, target_def_line, target_col = target_result
+        
+        # Step 2: Find upstream dependencies (who calls this symbol)
+        upstream = find_upstream_dependencies(cursor, target_file, target_name, target_type)
+        
+        # Step 3: Find downstream dependencies (what this symbol calls)
+        downstream = find_downstream_dependencies(cursor, target_file, target_def_line, target_name)
+        
+        # Step 4: Calculate transitive impact (recursive dependencies)
+        upstream_transitive = calculate_transitive_impact(cursor, upstream, "upstream")
+        downstream_transitive = calculate_transitive_impact(cursor, downstream, "downstream")
+        
+        # Build response
+        return {
+            "target_symbol": {
+                "name": target_name,
+                "type": target_type,
+                "file": target_file,
+                "line": target_def_line,
+                "column": target_col
+            },
+            "upstream": upstream,
+            "upstream_transitive": upstream_transitive,
+            "downstream": downstream,
+            "downstream_transitive": downstream_transitive,
+            "impact_summary": {
+                "direct_upstream": len(upstream),
+                "direct_downstream": len(downstream),
+                "total_upstream": len(upstream) + len(upstream_transitive),
+                "total_downstream": len(downstream) + len(downstream_transitive),
+                "total_impact": len(upstream) + len(downstream) + len(upstream_transitive) + len(downstream_transitive),
+                "affected_files": len(set(
+                    [u["file"] for u in upstream] + 
+                    [d["file"] for d in downstream] +
+                    [u["file"] for u in upstream_transitive] +
+                    [d["file"] for d in downstream_transitive]
+                ))
+            }
+        }
+        
+    finally:
+        conn.close()
+
+
+def find_upstream_dependencies(
+    cursor: sqlite3.Cursor,
+    target_file: str,
+    target_name: str,
+    target_type: str
+) -> List[Dict[str, Any]]:
+    """
+    Find all symbols that call the target symbol (upstream dependencies).
+    
+    Args:
+        cursor: Database cursor
+        target_file: File containing the target symbol
+        target_name: Name of the target symbol
+        target_type: Type of the target symbol (function/class)
+        
+    Returns:
+        List of upstream dependency dictionaries
+    """
+    upstream = []
+    
+    # Find all calls to this symbol
+    # Match by name (simple matching, could be enhanced with qualified names)
+    cursor.execute("""
+        SELECT DISTINCT s1.path, s1.name, s1.type, s1.line, s1.col
+        FROM symbols s1
+        WHERE s1.type = 'call'
+        AND s1.name = ?
+        AND EXISTS (
+            SELECT 1 FROM symbols s2
+            WHERE s2.path = s1.path
+            AND s2.type IN ('function', 'class')
+            AND s2.line <= s1.line
+            AND s2.name != ?
+        )
+        ORDER BY s1.path, s1.line
+    """, (target_name, target_name))
+    
+    for row in cursor.fetchall():
+        call_file, call_name, call_type, call_line, call_col = row
+        
+        # Find the containing function/class for this call
+        cursor.execute("""
+            SELECT name, type, line
+            FROM symbols
+            WHERE path = ?
+            AND type IN ('function', 'class')
+            AND line <= ?
+            ORDER BY line DESC
+            LIMIT 1
+        """, (call_file, call_line))
+        
+        container = cursor.fetchone()
+        if container:
+            container_name, container_type, container_line = container
+            upstream.append({
+                "file": call_file,
+                "symbol": container_name,
+                "type": container_type,
+                "line": container_line,
+                "call_line": call_line,
+                "calls": target_name
+            })
+    
+    # Deduplicate by file+symbol combination
+    seen = set()
+    unique_upstream = []
+    for dep in upstream:
+        key = (dep["file"], dep["symbol"])
+        if key not in seen:
+            seen.add(key)
+            unique_upstream.append(dep)
+    
+    return unique_upstream
+
+
+def find_downstream_dependencies(
+    cursor: sqlite3.Cursor,
+    target_file: str,
+    target_line: int,
+    target_name: str
+) -> List[Dict[str, Any]]:
+    """
+    Find all symbols called by the target symbol (downstream dependencies).
+    
+    Args:
+        cursor: Database cursor
+        target_file: File containing the target symbol
+        target_line: Line where target symbol is defined
+        target_name: Name of the target symbol
+        
+    Returns:
+        List of downstream dependency dictionaries
+    """
+    downstream = []
+    
+    # Find the end line of the target function/class
+    # Look for the next function/class definition in the same file
+    cursor.execute("""
+        SELECT line
+        FROM symbols
+        WHERE path = ?
+        AND type IN ('function', 'class')
+        AND line > ?
+        ORDER BY line
+        LIMIT 1
+    """, (target_file, target_line))
+    
+    next_symbol = cursor.fetchone()
+    end_line = next_symbol[0] if next_symbol else 999999
+    
+    # Find all calls within the target function/class body
+    cursor.execute("""
+        SELECT DISTINCT name, line, col
+        FROM symbols
+        WHERE path = ?
+        AND type = 'call'
+        AND line > ?
+        AND line < ?
+        ORDER BY line
+    """, (target_file, target_line, end_line))
+    
+    for row in cursor.fetchall():
+        called_name, call_line, call_col = row
+        
+        # Skip recursive calls
+        if called_name == target_name:
+            continue
+            
+        # Try to find the definition of the called symbol
+        cursor.execute("""
+            SELECT path, type, line
+            FROM symbols
+            WHERE name = ?
+            AND type IN ('function', 'class')
+            LIMIT 1
+        """, (called_name,))
+        
+        definition = cursor.fetchone()
+        if definition:
+            def_file, def_type, def_line = definition
+            downstream.append({
+                "file": def_file,
+                "symbol": called_name,
+                "type": def_type,
+                "line": def_line,
+                "called_from_line": call_line,
+                "called_by": target_name
+            })
+        else:
+            # External or built-in function
+            downstream.append({
+                "file": "external",
+                "symbol": called_name,
+                "type": "unknown",
+                "line": 0,
+                "called_from_line": call_line,
+                "called_by": target_name
+            })
+    
+    # Deduplicate by symbol name
+    seen = set()
+    unique_downstream = []
+    for dep in downstream:
+        if dep["symbol"] not in seen:
+            seen.add(dep["symbol"])
+            unique_downstream.append(dep)
+    
+    return unique_downstream
+
+
+def calculate_transitive_impact(
+    cursor: sqlite3.Cursor,
+    direct_deps: List[Dict[str, Any]],
+    direction: str,
+    max_depth: int = 2,
+    visited: Optional[Set[Tuple[str, str]]] = None
+) -> List[Dict[str, Any]]:
+    """
+    Calculate transitive dependencies up to max_depth.
+    
+    Args:
+        cursor: Database cursor
+        direct_deps: Direct dependencies to expand
+        direction: "upstream" or "downstream"
+        max_depth: Maximum recursion depth
+        visited: Set of already visited (file, symbol) pairs
+        
+    Returns:
+        List of transitive dependencies
+    """
+    if max_depth <= 0 or not direct_deps:
+        return []
+    
+    if visited is None:
+        visited = set()
+    
+    transitive = []
+    
+    for dep in direct_deps:
+        # Skip external dependencies
+        if dep["file"] == "external":
+            continue
+            
+        dep_key = (dep["file"], dep["symbol"])
+        if dep_key in visited:
+            continue
+        visited.add(dep_key)
+        
+        if direction == "upstream":
+            # Find who calls this dependency
+            next_level = find_upstream_dependencies(
+                cursor, dep["file"], dep["symbol"], dep["type"]
+            )
+        else:
+            # Find what this dependency calls
+            next_level = find_downstream_dependencies(
+                cursor, dep["file"], dep["line"], dep["symbol"]
+            )
+        
+        # Add current level
+        for next_dep in next_level:
+            next_dep["depth"] = max_depth
+            transitive.append(next_dep)
+        
+        # Recurse
+        recursive_deps = calculate_transitive_impact(
+            cursor, next_level, direction, max_depth - 1, visited
+        )
+        transitive.extend(recursive_deps)
+    
+    return transitive
+
+
+def trace_frontend_to_backend(
+    cursor: sqlite3.Cursor,
+    target_file: str,
+    target_line: int
+) -> Optional[Dict[str, Any]]:
+    """
+    Trace a frontend API call to its corresponding backend endpoint.
+    
+    Args:
+        cursor: Database cursor
+        target_file: Frontend file containing API call
+        target_line: Line number of the API call
+        
+    Returns:
+        Dictionary with cross-stack trace information or None if not found
+    """
+    import re
+    from pathlib import Path
+    
+    # Read the target file to extract API call details
+    try:
+        file = Path(target_file)
+        if not file_path.exists():
+            # Try relative path
+            file = Path(".") / target_file
+            if not file_path.exists():
+                return None
+                
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            lines = f.readlines()
+            
+        # Get context around the target line (5 lines before and after)
+        start_idx = max(0, target_line - 6)  # -6 because line numbers are 1-based
+        end_idx = min(len(lines), target_line + 5)
+        context_lines = lines[start_idx:end_idx]
+        context = ''.join(context_lines)
+        
+        # Extract API call patterns
+        # Common patterns: axios.get('/api/users'), fetch('/api/users'), http.post('/api/items')
+        api_patterns = [
+            # axios patterns
+            r'axios\.(get|post|put|patch|delete)\s*\(\s*[\'"`]([^\'"`]+)[\'"`]',
+            # fetch patterns
+            r'fetch\s*\(\s*[\'"`]([^\'"`]+)[\'"`].*method:\s*[\'"`](GET|POST|PUT|PATCH|DELETE)[\'"`]',
+            # fetch with default GET
+            r'fetch\s*\(\s*[\'"`]([^\'"`]+)[\'"`]',
+            # http/request patterns
+            r'(http|request)\.(get|post|put|patch|delete)\s*\(\s*[\'"`]([^\'"`]+)[\'"`]',
+            # jQuery ajax
+            r'\$\.(ajax|get|post)\s*\(\s*\{[^}]*url:\s*[\'"`]([^\'"`]+)[\'"`]',
+        ]
+        
+        method = None
+        url_path = None
+        
+        for pattern in api_patterns:
+            match = re.search(pattern, context, re.IGNORECASE | re.MULTILINE)
+            if match:
+                groups = match.groups()
+                if 'fetch' in pattern and len(groups) == 2:
+                    # fetch with explicit method
+                    url_path = groups[0]
+                    method = groups[1].upper()
+                elif 'fetch' in pattern and len(groups) == 1:
+                    # fetch defaults to GET
+                    url_path = groups[0]
+                    method = 'GET'
+                elif len(groups) >= 2:
+                    # axios, http, request patterns
+                    if pattern.startswith(r'axios'):
+                        method = groups[0].upper()
+                        url_path = groups[1]
+                    elif pattern.startswith(r'(http|request)'):
+                        method = groups[1].upper()
+                        url_path = groups[2]
+                    elif pattern.startswith(r'\$'):
+                        # jQuery
+                        url_path = groups[1]
+                        if groups[0] == 'ajax':
+                            # Look for method in context
+                            method_match = re.search(r'type:\s*[\'"`](GET|POST|PUT|PATCH|DELETE)[\'"`]', context)
+                            method = method_match.group(1).upper() if method_match else 'GET'
+                        elif groups[0] == 'get':
+                            method = 'GET'
+                        elif groups[0] == 'post':
+                            method = 'POST'
+                break
+        
+        if not url_path or not method:
+            return None
+            
+        # Clean up the URL path
+        # Remove query parameters and fragments
+        url_path = url_path.split('?')[0].split('#')[0]
+        # Remove any template literals (${...})
+        url_path = re.sub(r'\$\{[^}]+\}', '*', url_path)
+        
+        # Query the api_endpoints table to find matching backend endpoint
+        # Try exact match first
+        cursor.execute("""
+            SELECT file, method, pattern, controls
+            FROM api_endpoints
+            WHERE pattern = ? AND method = ?
+            LIMIT 1
+        """, (url_path, method))
+        
+        backend_match = cursor.fetchone()
+        
+        if not backend_match:
+            # Try pattern matching (e.g., /api/users/* matches /api/users/:id)
+            # Convert URL to SQL LIKE pattern
+            like_pattern = url_path.replace('*', '%')
+            
+            cursor.execute("""
+                SELECT file, method, pattern, controls
+                FROM api_endpoints
+                WHERE ? LIKE REPLACE(REPLACE(pattern, ':id', '%'), ':{param}', '%')
+                AND method = ?
+                LIMIT 1
+            """, (url_path, method))
+            
+            backend_match = cursor.fetchone()
+        
+        if not backend_match:
+            # No matching backend endpoint found
+            return None
+            
+        backend_file, backend_method, backend_pattern, backend_controls = backend_match
+        
+        # Find the exact line number of the backend endpoint
+        cursor.execute("""
+            SELECT line
+            FROM symbols
+            WHERE path = ? AND type = 'function'
+            ORDER BY line
+            LIMIT 1
+        """, (backend_file,))
+        
+        line_result = cursor.fetchone()
+        backend_line = line_result[0] if line_result else 1
+        
+        return {
+            "frontend": {
+                "file": target_file,
+                "line": target_line,
+                "method": method,
+                "url": url_path
+            },
+            "backend": {
+                "file": backend_file,
+                "line": backend_line,
+                "method": backend_method,
+                "pattern": backend_pattern,
+                "controls": backend_controls
+            }
+        }
+            
+    except Exception as e:
+        # Error reading file or parsing
+        return None
+
+
+def format_impact_report(impact_data: Dict[str, Any]) -> str:
+    """
+    Format impact analysis results into a human-readable report.
+    
+    Args:
+        impact_data: Results from analyze_impact
+        
+    Returns:
+        Formatted string report
+    """
+    lines = []
+    
+    # Header
+    lines.append("=" * 60)
+    lines.append("IMPACT ANALYSIS REPORT")
+    lines.append("=" * 60)
+    
+    # Target symbol
+    if impact_data.get("error"):
+        lines.append(f"\nError: {impact_data['error']}")
+        return "\n".join(lines)
+    
+    # Check for cross-stack trace
+    if impact_data.get("cross_stack_trace"):
+        trace = impact_data["cross_stack_trace"]
+        lines.append(f"\n{'─' * 40}")
+        lines.append("FRONTEND TO BACKEND TRACE")
+        lines.append(f"{'─' * 40}")
+        lines.append(f"Frontend API Call:")
+        lines.append(f"  File: {trace['frontend']['file']}:{trace['frontend']['line']}")
+        lines.append(f"  Method: {trace['frontend']['method']}")
+        lines.append(f"  URL: {trace['frontend']['url']}")
+        lines.append(f"\nBackend Endpoint:")
+        lines.append(f"  File: {trace['backend']['file']}:{trace['backend']['line']}")
+        lines.append(f"  Method: {trace['backend']['method']}")
+        lines.append(f"  Pattern: {trace['backend']['pattern']}")
+        if trace['backend'].get('controls') and trace['backend']['controls'] != '[]':
+            lines.append(f"  Security Controls: {trace['backend']['controls']}")
+        
+        # Show backend symbol as the primary target
+        if impact_data.get("backend_symbol"):
+            backend = impact_data["backend_symbol"]
+            lines.append(f"\nBackend Function: {backend['name']} ({backend['type']})")
+            lines.append(f"Location: {backend['file']}:{backend['line']}")
+    else:
+        target = impact_data["target_symbol"]
+        lines.append(f"\nTarget Symbol: {target['name']} ({target['type']})")
+        lines.append(f"Location: {target['file']}:{target['line']}")
+    
+    # Impact summary
+    summary = impact_data["impact_summary"]
+    lines.append(f"\n{'─' * 40}")
+    lines.append("IMPACT SUMMARY")
+    lines.append(f"{'─' * 40}")
+    lines.append(f"Direct Upstream Dependencies: {summary['direct_upstream']}")
+    lines.append(f"Direct Downstream Dependencies: {summary['direct_downstream']}")
+    lines.append(f"Total Upstream (including transitive): {summary['total_upstream']}")
+    lines.append(f"Total Downstream (including transitive): {summary['total_downstream']}")
+    lines.append(f"Total Impact Radius: {summary['total_impact']} symbols")
+    lines.append(f"Affected Files: {summary['affected_files']}")
+    
+    # Upstream dependencies
+    if impact_data["upstream"]:
+        lines.append(f"\n{'─' * 40}")
+        lines.append("UPSTREAM DEPENDENCIES (Who calls this)")
+        lines.append(f"{'─' * 40}")
+        for dep in impact_data["upstream"][:10]:  # Limit to first 10
+            lines.append(f"  • {dep['symbol']} ({dep['type']}) in {dep['file']}:{dep['line']}")
+        if len(impact_data["upstream"]) > 10:
+            lines.append(f"  ... and {len(impact_data['upstream']) - 10} more")
+    
+    # Downstream dependencies
+    if impact_data["downstream"]:
+        lines.append(f"\n{'─' * 40}")
+        lines.append("DOWNSTREAM DEPENDENCIES (What this calls)")
+        lines.append(f"{'─' * 40}")
+        for dep in impact_data["downstream"][:10]:  # Limit to first 10
+            if dep["file"] != "external":
+                lines.append(f"  • {dep['symbol']} ({dep['type']}) in {dep['file']}:{dep['line']}")
+            else:
+                lines.append(f"  • {dep['symbol']} (external/built-in)")
+        if len(impact_data["downstream"]) > 10:
+            lines.append(f"  ... and {len(impact_data['downstream']) - 10} more")
+    
+    # Risk assessment
+    lines.append(f"\n{'─' * 40}")
+    lines.append("RISK ASSESSMENT")
+    lines.append(f"{'─' * 40}")
+    
+    risk_level = "LOW"
+    if summary["total_impact"] > 20:
+        risk_level = "HIGH"
+    elif summary["total_impact"] > 10:
+        risk_level = "MEDIUM"
+    
+    lines.append(f"Change Risk Level: {risk_level}")
+    
+    if risk_level == "HIGH":
+        lines.append("⚠ WARNING: This change has a large blast radius!")
+        lines.append("  Consider:")
+        lines.append("  - Breaking the change into smaller, incremental steps")
+        lines.append("  - Adding comprehensive tests before refactoring")
+        lines.append("  - Reviewing all upstream dependencies for compatibility")
+    elif risk_level == "MEDIUM":
+        lines.append("⚠ CAUTION: This change affects multiple components")
+        lines.append("  Ensure all callers are updated if the interface changes")
+    
+    lines.append("=" * 60)
+    
+    return "\n".join(lines)
\ No newline at end of file
diff --git a/theauditor/indexer/__init__.py b/theauditor/indexer/__init__.py
new file mode 100644
index 0000000..7c16f96
--- /dev/null
+++ b/theauditor/indexer/__init__.py
@@ -0,0 +1,393 @@
+"""TheAuditor Indexer Package.
+
+This package provides modular, extensible code indexing functionality.
+It includes:
+- FileWalker for directory traversal with monorepo support
+- DatabaseManager for SQLite operations
+- Pluggable language extractors
+- AST caching for performance
+"""
+
+import os
+import sys
+import json
+import logging
+from pathlib import Path
+from typing import Dict, Any, List, Optional, Tuple
+
+from theauditor.config_runtime import load_runtime_config
+from theauditor.ast_parser import ASTParser
+
+from .config import (
+    DEFAULT_BATCH_SIZE, JS_BATCH_SIZE,
+    SUPPORTED_AST_EXTENSIONS, SQL_EXTENSIONS, 
+    DOCKERFILE_PATTERNS
+)
+from .core import FileWalker, ASTCache
+from .database import DatabaseManager
+from .extractors import ExtractorRegistry
+from .extractors.docker import DockerExtractor
+from .extractors.generic import GenericExtractor
+
+logger = logging.getLogger(__name__)
+
+
+class IndexerOrchestrator:
+    """Orchestrates the indexing process, coordinating all components."""
+    
+    def __init__(self, root_path: Path, db_path: str, 
+                 batch_size: int = DEFAULT_BATCH_SIZE,
+                 follow_symlinks: bool = False,
+                 exclude_patterns: Optional[List[str]] = None):
+        """Initialize the indexer orchestrator.
+        
+        Args:
+            root_path: Project root path
+            db_path: Path to SQLite database
+            batch_size: Batch size for database operations
+            follow_symlinks: Whether to follow symbolic links
+            exclude_patterns: Patterns to exclude from indexing
+        """
+        self.root_path = root_path
+        self.config = load_runtime_config(str(root_path))
+        
+        # Initialize components
+        self.ast_parser = ASTParser()
+        self.ast_cache = ASTCache(root_path)
+        self.db_manager = DatabaseManager(db_path, batch_size)
+        self.file_walker = FileWalker(
+            root_path, self.config, follow_symlinks, exclude_patterns
+        )
+        self.extractor_registry = ExtractorRegistry(root_path, self.ast_parser)
+        
+        # Special extractors that don't follow standard extension mapping
+        self.docker_extractor = DockerExtractor(root_path, self.ast_parser)
+        self.generic_extractor = GenericExtractor(root_path, self.ast_parser)
+        
+        # Stats tracking
+        self.counts = {
+            "files": 0,
+            "refs": 0,
+            "routes": 0,
+            "sql": 0,
+            "sql_queries": 0,
+            "symbols": 0,
+            "docker": 0,
+            "orm": 0
+        }
+    
+    def index(self) -> Tuple[Dict[str, int], Dict[str, Any]]:
+        """Run the complete indexing process.
+        
+        Returns:
+            Tuple of (counts, stats) dictionaries
+        """
+        # Walk directory and collect files
+        files, stats = self.file_walker.walk()
+        
+        if not files:
+            print("[Indexer] No files found to index.")
+            return self.counts, stats
+        
+        print(f"[Indexer] Processing {len(files)} files...")
+        
+        # Separate JS/TS files for batch processing
+        js_ts_files = []
+        js_ts_cache = {}
+        
+        for file_info in files:
+            if file_info['ext'] in ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs']:
+                file_path = self.root_path / file_info['path']
+                js_ts_files.append(file_path)
+        
+        # Batch process JS/TS files if there are any
+        if js_ts_files:
+            print(f"[Indexer] Batch processing {len(js_ts_files)} JavaScript/TypeScript files...")
+            try:
+                # Process in batches for memory efficiency
+                for i in range(0, len(js_ts_files), JS_BATCH_SIZE):
+                    batch = js_ts_files[i:i+JS_BATCH_SIZE]
+                    batch_trees = self.ast_parser.parse_files_batch(
+                        batch, root_path=str(self.root_path)
+                    )
+                    
+                    # Cache the results
+                    for file_path in batch:
+                        file_str = str(file_path).replace("\\", "/")  # Normalize
+                        if file_str in batch_trees:
+                            js_ts_cache[file_str] = batch_trees[file_str]
+                
+                print(f"[Indexer] Successfully batch processed {len(js_ts_cache)} JS/TS files")
+            except Exception as e:
+                print(f"[Indexer] Batch processing failed, falling back to individual processing: {e}")
+                js_ts_cache = {}
+        
+        # Process all files
+        for idx, file_info in enumerate(files):
+            # Debug progress
+            if os.environ.get("THEAUDITOR_DEBUG") and idx % 50 == 0:
+                print(f"[INDEXER_DEBUG] Processing file {idx+1}/{len(files)}: {file_info['path']}", 
+                      file=sys.stderr)
+            
+            # Process the file
+            self._process_file(file_info, js_ts_cache)
+            
+            # Execute batch inserts periodically
+            if (idx + 1) % self.db_manager.batch_size == 0 or idx == len(files) - 1:
+                self.db_manager.flush_batch()
+        
+        # Final commit
+        self.db_manager.commit()
+        
+        # Report results with database location
+        print(f"[Indexer] Indexed {self.counts['files']} files, "
+              f"{self.counts['symbols']} symbols, {self.counts['refs']} imports, "
+              f"{self.counts['routes']} routes")
+        print(f"[Indexer] Database updated: {self.db_manager.db_path}")
+        
+        return self.counts, stats
+    
+    def _process_file(self, file_info: Dict[str, Any], js_ts_cache: Dict[str, Any]):
+        """Process a single file.
+        
+        Args:
+            file_info: File metadata
+            js_ts_cache: Cache of pre-parsed JS/TS ASTs
+        """
+        # Insert file record
+        self.db_manager.add_file(
+            file_info['path'], file_info['sha256'], file_info['ext'],
+            file_info['bytes'], file_info['loc']
+        )
+        self.counts['files'] += 1
+        
+        # Read file content (cap at 256KB)
+        file_path = self.root_path / file_info['path']
+        try:
+            with open(file_path, encoding="utf-8", errors="ignore") as f:
+                content = f.read(256 * 1024)
+        except Exception as e:
+            if os.environ.get("THEAUDITOR_DEBUG"):
+                print(f"Debug: Cannot read {file_path}: {e}")
+            return
+        
+        # Store configuration files for ModuleResolver
+        if file_info['path'].endswith('tsconfig.json'):
+            # Determine context from path
+            context_dir = None
+            if 'backend/' in file_info['path']:
+                context_dir = 'backend'
+            elif 'frontend/' in file_info['path']:
+                context_dir = 'frontend'
+            
+            self.db_manager.add_config_file(
+                file_info['path'], 
+                content, 
+                'tsconfig',
+                context_dir
+            )
+            if os.environ.get("THEAUDITOR_DEBUG"):
+                print(f"[DEBUG] Cached tsconfig: {file_info['path']} (context: {context_dir})")
+        
+        # Get or parse AST
+        tree = self._get_or_parse_ast(file_info, file_path, js_ts_cache)
+        
+        # Select appropriate extractor
+        extractor = self._select_extractor(file_info['path'], file_info['ext'])
+        if not extractor:
+            return  # No extractor for this file type
+        
+        # Extract all information
+        try:
+            extracted = extractor.extract(file_info, content, tree)
+        except Exception as e:
+            if os.environ.get("THEAUDITOR_DEBUG"):
+                print(f"Debug: Extraction failed for {file_path}: {e}")
+            return
+        
+        # Store extracted data in database
+        self._store_extracted_data(file_info['path'], extracted)
+    
+    def _get_or_parse_ast(self, file_info: Dict[str, Any], 
+                          file_path: Path, js_ts_cache: Dict[str, Any]) -> Optional[Dict]:
+        """Get AST from cache or parse the file.
+        
+        Args:
+            file_info: File metadata
+            file_path: Path to the file
+            js_ts_cache: Cache of pre-parsed JS/TS ASTs
+            
+        Returns:
+            Parsed AST tree or None
+        """
+        if file_info['ext'] not in SUPPORTED_AST_EXTENSIONS:
+            return None
+        
+        # Check JS/TS batch cache
+        file_str = str(file_path).replace("\\", "/")
+        if file_str in js_ts_cache:
+            return js_ts_cache[file_str]
+        
+        # Check persistent AST cache
+        cached_tree = self.ast_cache.get(file_info['sha256'])
+        if cached_tree:
+            return cached_tree
+        
+        # Parse the file
+        tree = self.ast_parser.parse_file(file_path, root_path=str(self.root_path))
+        
+        # Cache the result if it's JSON-serializable
+        if tree and isinstance(tree, dict):
+            self.ast_cache.set(file_info['sha256'], tree)
+        
+        return tree
+    
+    def _select_extractor(self, file_path: str, file_ext: str):
+        """Select the appropriate extractor for a file.
+        
+        Args:
+            file_path: Path to the file
+            file_ext: File extension
+            
+        Returns:
+            Appropriate extractor instance or None
+        """
+        # Check special extractors first (by filename pattern)
+        if self.docker_extractor.should_extract(file_path):
+            return self.docker_extractor
+        if self.generic_extractor.should_extract(file_path):
+            return self.generic_extractor
+        
+        # Use registry for standard extension-based extraction
+        return self.extractor_registry.get_extractor(file_ext)
+    
+    def _store_extracted_data(self, file_path: str, extracted: Dict[str, Any]):
+        """Store extracted data in the database.
+        
+        Args:
+            file_path: Path to the source file
+            extracted: Dictionary of extracted data
+        """
+        # Store imports/references
+        if 'imports' in extracted:
+            for kind, value in extracted['imports']:
+                # Check for resolved import
+                resolved = extracted.get('resolved_imports', {}).get(value, value)
+                self.db_manager.add_ref(file_path, kind, resolved)
+                self.counts['refs'] += 1
+        
+        # Store routes
+        if 'routes' in extracted:
+            for method, pattern, controls in extracted['routes']:
+                self.db_manager.add_endpoint(file_path, method, pattern, controls)
+                self.counts['routes'] += 1
+        
+        # Store SQL objects
+        if 'sql_objects' in extracted:
+            for kind, name in extracted['sql_objects']:
+                self.db_manager.add_sql_object(file_path, kind, name)
+                self.counts['sql'] += 1
+        
+        # Store SQL queries
+        if 'sql_queries' in extracted:
+            for query in extracted['sql_queries']:
+                self.db_manager.add_sql_query(
+                    file_path, query['line'], query['query_text'],
+                    query['command'], query['tables']
+                )
+                self.counts['sql_queries'] += 1
+        
+        # Store symbols
+        if 'symbols' in extracted:
+            for symbol in extracted['symbols']:
+                self.db_manager.add_symbol(
+                    file_path, symbol['name'], symbol['type'],
+                    symbol['line'], symbol['col']
+                )
+                self.counts['symbols'] += 1
+        
+        # Store ORM queries
+        if 'orm_queries' in extracted:
+            for query in extracted['orm_queries']:
+                self.db_manager.add_orm_query(
+                    file_path, query['line'], query['query_type'],
+                    query.get('includes'), query.get('has_limit', False),
+                    query.get('has_transaction', False)
+                )
+                self.counts['orm'] += 1
+        
+        # Store Docker information
+        if 'docker_info' in extracted and extracted['docker_info']:
+            info = extracted['docker_info']
+            self.db_manager.add_docker_image(
+                file_path, info.get('base_image'), info.get('exposed_ports', []),
+                info.get('env_vars', {}), info.get('build_args', {}),
+                info.get('user'), info.get('has_healthcheck', False)
+            )
+            self.counts['docker'] += 1
+        
+        # Store Docker security issues
+        if 'docker_issues' in extracted:
+            for issue in extracted['docker_issues']:
+                self.db_manager.add_docker_issue(
+                    file_path, issue['line'], issue['issue_type'], issue['severity']
+                )
+        
+        # Store data flow information for taint analysis
+        if 'assignments' in extracted:
+            if extracted['assignments']:
+                logger.info(f"[DEBUG] Found {len(extracted['assignments'])} assignments in {file_path}")
+                # Log first assignment for debugging
+                if extracted['assignments']:
+                    first = extracted['assignments'][0]
+                    logger.info(f"[DEBUG] First assignment: line {first.get('line')}, {first.get('target_var')} = {first.get('source_expr', '')[:50]}")
+            for assignment in extracted['assignments']:
+                self.db_manager.add_assignment(
+                    file_path, assignment['line'], assignment['target_var'],
+                    assignment['source_expr'], assignment['source_vars'],
+                    assignment['in_function']
+                )
+        
+        if 'function_calls' in extracted:
+            for call in extracted['function_calls']:
+                self.db_manager.add_function_call_arg(
+                    file_path, call['line'], call['caller_function'],
+                    call['callee_function'], call['argument_index'],
+                    call['argument_expr'], call['param_name']
+                )
+        
+        if 'returns' in extracted:
+            for ret in extracted['returns']:
+                self.db_manager.add_function_return(
+                    file_path, ret['line'], ret['function_name'],
+                    ret['return_expr'], ret['return_vars']
+                )
+
+
+# Import backward compatibility functions from the compat module
+from ..indexer_compat import (
+    build_index,
+    walk_directory,
+    populate_database,
+    extract_imports,
+    extract_routes,
+    extract_sql_objects,
+    extract_sql_queries
+)
+
+# Backward compatibility exports
+__all__ = [
+    'IndexerOrchestrator',
+    'FileWalker',
+    'DatabaseManager',
+    'ASTCache',
+    'ExtractorRegistry',
+    # Backward compat functions
+    'build_index',
+    'walk_directory',
+    'populate_database',
+    'extract_imports',
+    'extract_routes', 
+    'extract_sql_objects',
+    'extract_sql_queries'
+]
\ No newline at end of file
diff --git a/theauditor/indexer/config.py b/theauditor/indexer/config.py
new file mode 100644
index 0000000..bb30a72
--- /dev/null
+++ b/theauditor/indexer/config.py
@@ -0,0 +1,165 @@
+"""Centralized configuration for the indexer.
+
+All constants, patterns, and configuration values used across the indexer
+package are defined here.
+"""
+
+import re
+
+# Directories to skip (always ignored)
+SKIP_DIRS = {
+    ".git",
+    ".hg",
+    ".svn",
+    "node_modules",
+    "dist",
+    "build",
+    "out",
+    ".venv",
+    ".auditor_venv",  # TheAuditor's isolated virtual environment
+    ".venv_wsl",  # WSL virtual environments
+    "venv",
+    "__pycache__",
+    ".pytest_cache",
+    ".mypy_cache",
+    ".ruff_cache",
+    "target",  # Rust
+    ".next",  # Next.js
+    ".nuxt",  # Nuxt
+    "coverage",
+    ".coverage",
+    "htmlcov",
+    ".tox",
+    ".egg-info",
+    "__pycache__",
+    "*.egg-info",
+    ".pf",  # TheAuditor's own output directory (contains all artifacts now)
+    ".claude",  # Claude integration directory
+}
+
+# Compiled regex patterns for extraction
+IMPORT_PATTERNS = [
+    # JavaScript/TypeScript
+    re.compile(r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]"),
+    re.compile(r"import\s*\(['\"]([^'\"]+)['\"]\)"),
+    re.compile(r"require\s*\(['\"]([^'\"]+)['\"]\)"),
+    # Python
+    re.compile(r"from\s+([^\s]+)\s+import"),
+    re.compile(r"import\s+([^\s,]+)"),
+    # Go
+    re.compile(r'import\s+"([^"]+)"'),
+    re.compile(r"import\s+\(\s*[\"']([^\"']+)[\"']"),
+    # Java
+    re.compile(r"import\s+([^\s;]+);"),
+    re.compile(r"package\s+([^\s;]+);"),
+    # Ruby
+    re.compile(r"require\s+['\"]([^'\"]+)['\"]"),
+    re.compile(r"require_relative\s+['\"]([^'\"]+)['\"]"),
+]
+
+ROUTE_PATTERNS = [
+    # Express/Fastify style
+    re.compile(r"(?:app|router)\.(get|post|put|patch|delete|all)\s*\(['\"`]([^'\"`]+)['\"`]"),
+    # Decorator style (Python Flask, Java Spring, etc)
+    re.compile(r"@(Get|Post|Put|Patch|Delete|RequestMapping)\s*\(['\"`]([^'\"`]+)['\"`]\)"),
+    re.compile(r"@(GET|POST|PUT|PATCH|DELETE)\s*\(['\"`]([^'\"`]+)['\"`]\)"),
+]
+
+SQL_PATTERNS = [
+    re.compile(r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)", re.IGNORECASE),
+    re.compile(r"CREATE\s+INDEX\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)", re.IGNORECASE),
+    re.compile(r"CREATE\s+VIEW\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)", re.IGNORECASE),
+    re.compile(r"CREATE\s+(?:OR\s+REPLACE\s+)?FUNCTION\s+(\w+)", re.IGNORECASE),
+    re.compile(r"CREATE\s+POLICY\s+(\w+)", re.IGNORECASE),
+    re.compile(r"CONSTRAINT\s+(\w+)", re.IGNORECASE),
+]
+
+# Patterns to find SQL query strings in code
+SQL_QUERY_PATTERNS = [
+    # Multi-line SQL strings (Python, JS, etc.)
+    re.compile(r'"""([^"]*(?:SELECT|INSERT|UPDATE|DELETE|MERGE|WITH)[^"]*)"""', re.IGNORECASE | re.DOTALL),
+    re.compile(r"'''([^']*(?:SELECT|INSERT|UPDATE|DELETE|MERGE|WITH)[^']*)'''", re.IGNORECASE | re.DOTALL),
+    re.compile(r'`([^`]*(?:SELECT|INSERT|UPDATE|DELETE|MERGE|WITH)[^`]*)`', re.IGNORECASE | re.DOTALL),
+    # Single-line SQL strings
+    re.compile(r'"([^"]*(?:SELECT|INSERT|UPDATE|DELETE|MERGE|WITH)[^"]*)"', re.IGNORECASE),
+    re.compile(r"'([^']*(?:SELECT|INSERT|UPDATE|DELETE|MERGE|WITH)[^']*)'", re.IGNORECASE),
+    # Common ORM/query builder patterns
+    re.compile(r'\.query\s*\(\s*["\']([^"\']+)["\']', re.IGNORECASE),
+    re.compile(r'\.execute\s*\(\s*["\']([^"\']+)["\']', re.IGNORECASE),
+    re.compile(r'\.raw\s*\(\s*["\']([^"\']+)["\']', re.IGNORECASE),
+]
+
+# Default batch size for database operations
+DEFAULT_BATCH_SIZE = 200
+MAX_BATCH_SIZE = 1000
+
+# File processing batch size for JavaScript/TypeScript
+JS_BATCH_SIZE = 20
+
+# Standard monorepo structures to check
+STANDARD_MONOREPO_PATHS = [
+    ("backend", "src"),      # backend/src
+    ("frontend", "src"),     # frontend/src
+    ("mobile", "src"),       # mobile/src
+    ("server", "src"),       # server/src
+    ("client", "src"),       # client/src
+    ("web", "src"),          # web/src
+    ("api", "src"),          # api/src
+    ("packages", None),      # packages/* (for lerna/yarn workspaces)
+    ("apps", None),          # apps/* (for nx/turborepo)
+]
+
+# Common root-level entry files in monorepos
+MONOREPO_ENTRY_FILES = ["app.ts", "app.js", "index.ts", "index.js", "server.ts", "server.js"]
+
+# File extensions supported for AST parsing
+SUPPORTED_AST_EXTENSIONS = [".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"]
+
+# SQL file extensions
+SQL_EXTENSIONS = [".sql", ".psql", ".ddl"]
+
+# Dockerfile name patterns
+DOCKERFILE_PATTERNS = ['dockerfile', 'dockerfile.dev', 'dockerfile.prod', 'dockerfile.test']
+
+# Docker Compose file patterns
+COMPOSE_PATTERNS = [
+    'docker-compose.yml', 'docker-compose.yaml',
+    'docker-compose.override.yml', 'docker-compose.override.yaml',
+    'compose.yml', 'compose.yaml'
+]
+
+# Nginx config file patterns
+NGINX_PATTERNS = ['nginx.conf', 'default.conf', 'site.conf']
+
+# Sensitive ports for Docker security analysis
+SENSITIVE_PORTS = ['22', '23', '135', '139', '445', '3389']  # SSH, Telnet, SMB, RDP
+
+# Sensitive keywords for Docker ENV security analysis
+SENSITIVE_ENV_KEYWORDS = ['SECRET', 'TOKEN', 'PASSWORD', 'API_KEY', 'PRIVATE_KEY', 'ACCESS_KEY']
+
+# ORM method patterns to detect
+SEQUELIZE_METHODS = {
+    'findAll', 'findOne', 'findByPk', 'findOrCreate',
+    'create', 'update', 'destroy', 'bulkCreate', 'bulkUpdate',
+    'count', 'max', 'min', 'sum', 'findAndCountAll'
+}
+
+PRISMA_METHODS = {
+    'findMany', 'findFirst', 'findUnique', 'findUniqueOrThrow',
+    'create', 'createMany', 'update', 'updateMany', 'upsert',
+    'delete', 'deleteMany', 'count', 'aggregate', 'groupBy'
+}
+
+TYPEORM_REPOSITORY_METHODS = {
+    'find', 'findOne', 'findOneBy', 'findOneOrFail', 'findBy',
+    'findAndCount', 'findAndCountBy', 'save', 'remove', 'delete',
+    'update', 'insert', 'create', 'merge', 'preload', 'count',
+    'increment', 'decrement', 'restore', 'softRemove'
+}
+
+TYPEORM_QB_METHODS = {
+    'createQueryBuilder', 'select', 'addSelect', 'where', 'andWhere',
+    'orWhere', 'having', 'orderBy', 'groupBy', 'limit', 'take',
+    'skip', 'offset', 'getMany', 'getOne', 'getRawMany', 'getRawOne',
+    'getManyAndCount', 'getCount', 'execute', 'delete', 'update', 'insert'
+}
\ No newline at end of file
diff --git a/theauditor/indexer/core.py b/theauditor/indexer/core.py
new file mode 100644
index 0000000..954455c
--- /dev/null
+++ b/theauditor/indexer/core.py
@@ -0,0 +1,409 @@
+"""Core functionality for file system operations and AST caching.
+
+This module contains the FileWalker class for directory traversal with monorepo
+detection, and the ASTCache class for persistent AST caching.
+"""
+
+import os
+import json
+import sqlite3
+import fnmatch
+from pathlib import Path
+from typing import Tuple, List, Dict, Any, Optional, Set
+
+from theauditor.utils import compute_file_hash, count_lines_in_file
+from theauditor.config_runtime import load_runtime_config
+from .config import (
+    SKIP_DIRS, STANDARD_MONOREPO_PATHS, MONOREPO_ENTRY_FILES
+)
+
+
+class ASTCache:
+    """Manages persistent AST caching for improved performance."""
+    
+    def __init__(self, root_path: Path):
+        """Initialize the AST cache.
+        
+        Args:
+            root_path: Project root path for cache directory
+        """
+        self.cache_dir = root_path / ".pf" / "ast_cache"
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+    
+    def get(self, file_hash: str) -> Optional[Dict]:
+        """Get cached AST for a file by its hash.
+        
+        Args:
+            file_hash: SHA256 hash of the file content
+            
+        Returns:
+            Cached AST tree or None if not found
+        """
+        cache_file = self.cache_dir / f"{file_hash}.json"
+        if cache_file.exists():
+            try:
+                with open(cache_file, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            except (json.JSONDecodeError, OSError):
+                # Cache corrupted, return None
+                return None
+        return None
+    
+    def set(self, file_hash: str, tree: Dict) -> None:
+        """Store an AST tree in the cache.
+        
+        Args:
+            file_hash: SHA256 hash of the file content
+            tree: AST tree to cache (must be JSON serializable)
+        """
+        cache_file = self.cache_dir / f"{file_hash}.json"
+        try:
+            # Only cache if tree is JSON serializable (dict), not a Tree object
+            if isinstance(tree, dict):
+                with open(cache_file, 'w', encoding='utf-8') as f:
+                    json.dump(tree, f)
+        except (OSError, PermissionError, TypeError):
+            # Cache write failed, non-critical
+            pass
+    
+    def invalidate(self, file_hash: str) -> None:
+        """Invalidate cache entry for a specific file.
+        
+        Args:
+            file_hash: SHA256 hash of the file content
+        """
+        cache_file = self.cache_dir / f"{file_hash}.json"
+        if cache_file.exists():
+            try:
+                cache_file.unlink()
+            except (OSError, PermissionError):
+                pass
+
+
+def is_text_file(file_path: Path) -> bool:
+    """Check if file is text (not binary).
+    
+    Args:
+        file_path: Path to the file to check
+        
+    Returns:
+        True if file is text, False if binary
+    """
+    try:
+        with open(file_path, "rb") as f:
+            chunk = f.read(8192)
+            if b"\0" in chunk:
+                return False
+            # Try to decode as UTF-8
+            try:
+                chunk.decode("utf-8")
+                return True
+            except UnicodeDecodeError:
+                return False
+    except (FileNotFoundError, PermissionError, UnicodeDecodeError):
+        return False
+
+
+def get_first_lines(file_path: Path, n: int = 2) -> List[str]:
+    """Get first n lines of a text file.
+    
+    Args:
+        file_path: Path to the file
+        n: Number of lines to read
+        
+    Returns:
+        List of first n lines from the file
+    """
+    lines = []
+    try:
+        with open(file_path, encoding="utf-8", errors="ignore") as f:
+            for i, line in enumerate(f):
+                if i >= n:
+                    break
+                # Strip \r and truncate at 200 chars
+                line = line.replace("\r", "").rstrip("\n")[:200]
+                lines.append(line)
+    except (FileNotFoundError, PermissionError, UnicodeDecodeError):
+        # Gracefully skip unreadable files
+        pass
+    return lines
+
+
+def load_gitignore_patterns(root_path: Path) -> Set[str]:
+    """Load patterns from .gitignore if it exists.
+    
+    Args:
+        root_path: Project root path
+        
+    Returns:
+        Set of directory patterns to ignore
+    """
+    gitignore_path = root_path / ".gitignore"
+    patterns = set()
+    
+    if gitignore_path.exists():
+        try:
+            with open(gitignore_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = line.strip()
+                    # Skip comments and empty lines
+                    if line and not line.startswith('#'):
+                        # Convert gitignore patterns to simple dir names
+                        # This is a simplified approach - just extract directory names
+                        pattern = line.rstrip('/')
+                        if '/' not in pattern and '*' not in pattern:
+                            patterns.add(pattern)
+        except Exception:
+            pass  # Ignore errors reading .gitignore
+    
+    return patterns
+
+
+class FileWalker:
+    """Handles directory walking with monorepo detection and filtering."""
+    
+    def __init__(self, root_path: Path, config: Dict[str, Any], 
+                 follow_symlinks: bool = False, exclude_patterns: Optional[List[str]] = None):
+        """Initialize the file walker.
+        
+        Args:
+            root_path: Root directory to walk
+            config: Runtime configuration
+            follow_symlinks: Whether to follow symbolic links
+            exclude_patterns: Additional patterns to exclude
+        """
+        self.root_path = root_path
+        self.config = config
+        self.follow_symlinks = follow_symlinks
+        self.exclude_patterns = exclude_patterns or []
+        
+        # Load gitignore patterns and combine with default skip dirs
+        gitignore_patterns = load_gitignore_patterns(root_path)
+        self.skip_dirs = SKIP_DIRS | gitignore_patterns
+        
+        # Stats tracking
+        self.stats = {
+            "total_files": 0,
+            "text_files": 0,
+            "binary_files": 0,
+            "large_files": 0,
+            "skipped_dirs": 0,
+        }
+    
+    def detect_monorepo(self) -> Tuple[bool, List[Path], List[Path]]:
+        """Detect if project is a monorepo and return source directories.
+        
+        Returns:
+            Tuple of (is_monorepo, src_directories, root_entry_files)
+        """
+        monorepo_dirs = []
+        monorepo_detected = False
+        
+        # Check which monorepo directories exist
+        for base_dir, src_dir in STANDARD_MONOREPO_PATHS:
+            base_path = self.root_path / base_dir
+            if base_path.exists() and base_path.is_dir():
+                if src_dir:
+                    # Check if src subdirectory exists
+                    src_path = base_path / src_dir
+                    if src_path.exists() and src_path.is_dir():
+                        monorepo_dirs.append(src_path)
+                        monorepo_detected = True
+                else:
+                    # For packages/apps directories, add all subdirectories with src folders
+                    for subdir in base_path.iterdir():
+                        if subdir.is_dir() and not subdir.name.startswith('.'):
+                            src_path = subdir / "src"
+                            if src_path.exists() and src_path.is_dir():
+                                monorepo_dirs.append(src_path)
+                                monorepo_detected = True
+        
+        # Check for root-level entry files in monorepo
+        root_entry_files = []
+        if monorepo_detected:
+            for entry_file in MONOREPO_ENTRY_FILES:
+                entry_path = self.root_path / entry_file
+                if entry_path.exists() and entry_path.is_file():
+                    root_entry_files.append(entry_path)
+        
+        return monorepo_detected, monorepo_dirs, root_entry_files
+    
+    def process_file(self, file: Path, exclude_file_patterns: List[str]) -> Optional[Dict[str, Any]]:
+        """Process a single file and return its info.
+        
+        Args:
+            file: Path to the file to process
+            exclude_file_patterns: Patterns for files to exclude
+            
+        Returns:
+            File info dictionary or None if file should be skipped
+        """
+        # Check if file matches any exclude pattern
+        if exclude_file_patterns:
+            filename = file.name
+            relative_path = file.relative_to(self.root_path).as_posix()
+            for pattern in exclude_file_patterns:
+                # Check both the filename and the full relative path
+                if fnmatch.fnmatch(filename, pattern) or fnmatch.fnmatch(relative_path, pattern):
+                    return None
+        
+        # Skip symlinks if not following
+        try:
+            if not self.follow_symlinks and file.is_symlink():
+                return None
+        except (OSError, PermissionError):
+            # On Windows, is_symlink() can fail on certain paths
+            return None
+        
+        try:
+            file_size = file.stat().st_size
+            
+            # Skip large files
+            if file_size >= self.config["limits"]["max_file_size"]:
+                self.stats["large_files"] += 1
+                return None
+            
+            # Check if text file
+            if not is_text_file(file):
+                self.stats["binary_files"] += 1
+                return None
+            
+            self.stats["text_files"] += 1
+            
+            # Compute metadata
+            relative_path = file.relative_to(self.root_path)
+            posix_path = relative_path.as_posix()
+            
+            file_info = {
+                "path": posix_path,
+                "sha256": compute_file_hash(file),
+                "ext": file.suffix,
+                "bytes": file_size,
+                "loc": count_lines_in_file(file),
+                "first_lines": get_first_lines(file),
+            }
+            
+            return file_info
+            
+        except (FileNotFoundError, PermissionError, UnicodeDecodeError, sqlite3.Error, OSError):
+            # Skip files we can't read
+            return None
+    
+    def walk(self) -> Tuple[List[Dict], Dict[str, Any]]:
+        """Walk directory and collect file information.
+        
+        Returns:
+            Tuple of (files_list, statistics)
+        """
+        files = []
+        
+        # Separate file and directory patterns from exclude_patterns
+        exclude_file_patterns = []
+        if self.exclude_patterns:
+            for pattern in self.exclude_patterns:
+                # Directory patterns
+                if pattern.endswith('/**'):
+                    # Pattern like "theauditor/**" means skip the directory
+                    self.skip_dirs.add(pattern.rstrip('/**'))
+                elif pattern.endswith('/'):
+                    self.skip_dirs.add(pattern.rstrip('/'))
+                elif '/' in pattern and '*' not in pattern:
+                    # Add the first directory component
+                    self.skip_dirs.add(pattern.split('/')[0])
+                else:
+                    # File pattern (e.g., "*.md", "pyproject.toml")
+                    exclude_file_patterns.append(pattern)
+        
+        # Detect if this is a monorepo
+        monorepo_detected, monorepo_dirs, root_entry_files = self.detect_monorepo()
+        
+        if monorepo_detected:
+            print(f"[Indexer] Monorepo detected. Using whitelist for {len(monorepo_dirs)} src directories")
+            
+            # Process whitelisted directories only
+            for src_dir in monorepo_dirs:
+                for dirpath, dirnames, filenames in os.walk(src_dir, followlinks=self.follow_symlinks):
+                    # Still apply skip_dirs within the whitelisted paths
+                    skipped_count = len([d for d in dirnames if d in self.skip_dirs])
+                    self.stats["skipped_dirs"] += skipped_count
+                    dirnames[:] = [d for d in dirnames if d not in self.skip_dirs]
+                    
+                    # Process files in this directory
+                    for filename in filenames:
+                        self.stats["total_files"] += 1
+                        file = Path(dirpath) / filename
+                        
+                        file_info = self.process_file(file, exclude_file_patterns)
+                        if file_info:
+                            files.append(file_info)
+            
+            # CRITICAL: Also collect config files from monorepo directories
+            # These are outside src/ but essential for module resolution
+            config_patterns = ['tsconfig.json', 'tsconfig.*.json', 'package.json', 
+                             'webpack.config.js', 'vite.config.ts', '.babelrc*']
+            
+            for base_dir, _ in STANDARD_MONOREPO_PATHS:
+                base_path = self.root_path / base_dir
+                if base_path.exists() and base_path.is_dir():
+                    # Look for config files in the base directory (not just src)
+                    for pattern in config_patterns:
+                        for config_file in base_path.glob(pattern):
+                            if config_file.is_file():
+                                self.stats["total_files"] += 1
+                                file_info = self.process_file(config_file, [])
+                                if file_info:
+                                    files.append(file_info)
+            
+            # Also check root directory for configs
+            for pattern in config_patterns:
+                for config_file in self.root_path.glob(pattern):
+                    if config_file.is_file() and config_file not in [f for f in files]:
+                        self.stats["total_files"] += 1
+                        file_info = self.process_file(config_file, [])
+                        if file_info:
+                            files.append(file_info)
+            
+            # Also process root-level entry files
+            for entry_file in root_entry_files:
+                self.stats["total_files"] += 1
+                file_info = self.process_file(entry_file, [])
+                if file_info:
+                    files.append(file_info)
+        
+        else:
+            # Not a monorepo, use traditional approach
+            print("[Indexer] Standard project structure detected. Using traditional scanning.")
+            
+            for dirpath, dirnames, filenames in os.walk(self.root_path, followlinks=self.follow_symlinks):
+                # Count directories that will be skipped
+                skipped_count = len([d for d in dirnames if d in self.skip_dirs])
+                self.stats["skipped_dirs"] += skipped_count
+                
+                # Skip ignored directories
+                dirnames[:] = [d for d in dirnames if d not in self.skip_dirs]
+                
+                # On Windows, skip problematic symlink directories in venv
+                current_path = Path(dirpath)
+                try:
+                    if not os.access(dirpath, os.R_OK):
+                        continue
+                    # Skip known problematic symlinks in virtual environments
+                    if any(part in [".venv", "venv", "virtualenv"] for part in current_path.parts):
+                        if current_path.name in ["lib64", "bin64", "include64"]:
+                            dirnames.clear()
+                            continue
+                except (OSError, PermissionError):
+                    continue
+                
+                for filename in filenames:
+                    self.stats["total_files"] += 1
+                    file = Path(dirpath) / filename
+                    
+                    file_info = self.process_file(file, exclude_file_patterns)
+                    if file_info:
+                        files.append(file_info)
+        
+        # Sort by path for deterministic output
+        files.sort(key=lambda x: x["path"])
+        
+        return files, self.stats
\ No newline at end of file
diff --git a/theauditor/indexer/database.py b/theauditor/indexer/database.py
new file mode 100644
index 0000000..e9b5d59
--- /dev/null
+++ b/theauditor/indexer/database.py
@@ -0,0 +1,607 @@
+"""Database operations for the indexer.
+
+This module contains the DatabaseManager class which handles all database
+operations including schema creation, batch inserts, and transaction management.
+"""
+
+import sqlite3
+import json
+from typing import Any, List, Dict, Optional
+from pathlib import Path
+
+from .config import DEFAULT_BATCH_SIZE, MAX_BATCH_SIZE
+
+
+class DatabaseManager:
+    """Manages database operations with batching and transactions."""
+
+    def __init__(self, db_path: str, batch_size: int = DEFAULT_BATCH_SIZE):
+        """Initialize the database manager.
+        
+        Args:
+            db_path: Path to the SQLite database file
+            batch_size: Size of batches for insert operations
+        """
+        self.db_path = db_path
+        self.conn = sqlite3.connect(db_path)
+        
+        # Validate and set batch size
+        if batch_size <= 0:
+            self.batch_size = DEFAULT_BATCH_SIZE
+        elif batch_size > MAX_BATCH_SIZE:
+            self.batch_size = MAX_BATCH_SIZE
+        else:
+            self.batch_size = batch_size
+            
+        # Initialize batch lists
+        self.files_batch = []
+        self.refs_batch = []
+        self.endpoints_batch = []
+        self.sql_objects_batch = []
+        self.sql_queries_batch = []
+        self.symbols_batch = []
+        self.orm_queries_batch = []
+        self.docker_images_batch = []
+        self.docker_issues_batch = []
+        self.assignments_batch = []
+        self.function_call_args_batch = []
+        self.function_returns_batch = []
+        self.prisma_batch = []
+        self.compose_batch = []
+        self.nginx_batch = []
+
+    def begin_transaction(self):
+        """Start a new transaction."""
+        self.conn.execute("BEGIN IMMEDIATE")
+
+    def commit(self):
+        """Commit the current transaction."""
+        try:
+            self.conn.commit()
+        except sqlite3.Error as e:
+            self.conn.rollback()
+            raise RuntimeError(f"Failed to commit database changes: {e}")
+
+    def rollback(self):
+        """Rollback the current transaction."""
+        self.conn.rollback()
+
+    def close(self):
+        """Close the database connection."""
+        self.conn.close()
+
+    def create_schema(self):
+        """Create all database tables and indexes."""
+        cursor = self.conn.cursor()
+
+        # Create tables
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS files(
+                path TEXT PRIMARY KEY,
+                sha256 TEXT NOT NULL,
+                ext TEXT NOT NULL,
+                bytes INTEGER NOT NULL,
+                loc INTEGER NOT NULL
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS config_files(
+                path TEXT PRIMARY KEY,
+                content TEXT NOT NULL,
+                type TEXT NOT NULL,
+                context_dir TEXT,
+                FOREIGN KEY(path) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS refs(
+                src TEXT NOT NULL,
+                kind TEXT NOT NULL,
+                value TEXT NOT NULL,
+                FOREIGN KEY(src) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS api_endpoints(
+                file TEXT NOT NULL,
+                method TEXT NOT NULL,
+                pattern TEXT NOT NULL,
+                controls TEXT,
+                FOREIGN KEY(file) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS sql_objects(
+                file TEXT NOT NULL,
+                kind TEXT NOT NULL,
+                name TEXT NOT NULL,
+                FOREIGN KEY(file) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS symbols(
+                path TEXT NOT NULL,
+                name TEXT NOT NULL,
+                type TEXT NOT NULL,
+                line INTEGER NOT NULL,
+                col INTEGER NOT NULL,
+                FOREIGN KEY(path) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS sql_queries(
+                file_path TEXT NOT NULL,
+                line_number INTEGER NOT NULL,
+                query_text TEXT NOT NULL,
+                command TEXT NOT NULL,
+                tables TEXT,
+                FOREIGN KEY(file_path) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS docker_images(
+                file_path TEXT PRIMARY KEY,
+                base_image TEXT,
+                exposed_ports TEXT,
+                env_vars TEXT,
+                build_args TEXT,
+                user TEXT,
+                has_healthcheck BOOLEAN DEFAULT 0,
+                FOREIGN KEY(file_path) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS docker_issues(
+                file TEXT NOT NULL,
+                line INTEGER NOT NULL,
+                issue_type TEXT NOT NULL,
+                severity TEXT NOT NULL,
+                FOREIGN KEY(file) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS orm_queries(
+                file TEXT NOT NULL,
+                line INTEGER NOT NULL,
+                query_type TEXT NOT NULL,
+                includes TEXT,
+                has_limit BOOLEAN DEFAULT 0,
+                has_transaction BOOLEAN DEFAULT 0,
+                FOREIGN KEY(file) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS prisma_models(
+                model_name TEXT NOT NULL,
+                field_name TEXT NOT NULL,
+                field_type TEXT NOT NULL,
+                is_indexed BOOLEAN DEFAULT 0,
+                is_unique BOOLEAN DEFAULT 0,
+                is_relation BOOLEAN DEFAULT 0,
+                PRIMARY KEY (model_name, field_name)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS compose_services(
+                file_path TEXT NOT NULL,
+                service_name TEXT NOT NULL,
+                image TEXT,
+                ports TEXT,
+                volumes TEXT,
+                environment TEXT,
+                is_privileged BOOLEAN DEFAULT 0,
+                network_mode TEXT,
+                PRIMARY KEY (file_path, service_name),
+                FOREIGN KEY(file_path) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS nginx_configs(
+                file_path TEXT NOT NULL,
+                block_type TEXT NOT NULL,
+                block_context TEXT,
+                directives TEXT,
+                level INTEGER DEFAULT 0,
+                PRIMARY KEY (file_path, block_type, block_context),
+                FOREIGN KEY(file_path) REFERENCES files(path)
+            )
+        """
+        )
+
+        # Data flow analysis tables for taint tracking
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS assignments (
+                file TEXT NOT NULL,
+                line INTEGER NOT NULL,
+                target_var TEXT NOT NULL,
+                source_expr TEXT NOT NULL,
+                source_vars TEXT,
+                in_function TEXT NOT NULL,
+                FOREIGN KEY(file) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS function_call_args (
+                file TEXT NOT NULL,
+                line INTEGER NOT NULL,
+                caller_function TEXT NOT NULL,
+                callee_function TEXT NOT NULL,
+                argument_index INTEGER NOT NULL,
+                argument_expr TEXT NOT NULL,
+                param_name TEXT NOT NULL,
+                FOREIGN KEY(file) REFERENCES files(path)
+            )
+        """
+        )
+
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS function_returns (
+                file TEXT NOT NULL,
+                line INTEGER NOT NULL,
+                function_name TEXT NOT NULL,
+                return_expr TEXT NOT NULL,
+                return_vars TEXT,
+                FOREIGN KEY(file) REFERENCES files(path)
+            )
+        """
+        )
+
+        # Create indexes
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_refs_src ON refs(src)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_api_endpoints_file ON api_endpoints(file)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_sql_file ON sql_objects(file)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_path ON symbols(path)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(type)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_sql_queries_file ON sql_queries(file_path)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_sql_queries_command ON sql_queries(command)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_docker_images_base ON docker_images(base_image)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_docker_issues_file ON docker_issues(file)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_docker_issues_severity ON docker_issues(severity)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_orm_queries_file ON orm_queries(file)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_orm_queries_type ON orm_queries(query_type)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_prisma_models_indexed ON prisma_models(is_indexed)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_compose_services_file ON compose_services(file_path)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_compose_services_privileged ON compose_services(is_privileged)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_nginx_configs_file ON nginx_configs(file_path)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_nginx_configs_type ON nginx_configs(block_type)")
+        
+        # Indexes for data flow tables
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_assignments_file ON assignments(file)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_assignments_function ON assignments(in_function)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_function_call_args_file ON function_call_args(file)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_function_call_args_caller ON function_call_args(caller_function)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_function_call_args_callee ON function_call_args(callee_function)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_function_returns_file ON function_returns(file)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_function_returns_function ON function_returns(function_name)")
+
+        self.conn.commit()
+
+    def clear_tables(self):
+        """Clear all existing data from tables."""
+        cursor = self.conn.cursor()
+        
+        try:
+            cursor.execute("DELETE FROM files")
+            cursor.execute("DELETE FROM refs")
+            cursor.execute("DELETE FROM api_endpoints")
+            cursor.execute("DELETE FROM sql_objects")
+            cursor.execute("DELETE FROM symbols")
+            cursor.execute("DELETE FROM sql_queries")
+            cursor.execute("DELETE FROM docker_images")
+            cursor.execute("DELETE FROM docker_issues")
+            cursor.execute("DELETE FROM orm_queries")
+            cursor.execute("DELETE FROM prisma_models")
+            cursor.execute("DELETE FROM compose_services")
+            cursor.execute("DELETE FROM nginx_configs")
+            cursor.execute("DELETE FROM assignments")
+            cursor.execute("DELETE FROM function_call_args")
+            cursor.execute("DELETE FROM function_returns")
+        except sqlite3.Error as e:
+            self.conn.rollback()
+            raise RuntimeError(f"Failed to clear existing data: {e}")
+
+    def add_file(self, path: str, sha256: str, ext: str, bytes_size: int, loc: int):
+        """Add a file record to the batch."""
+        self.files_batch.append((path, sha256, ext, bytes_size, loc))
+
+    def add_ref(self, src: str, kind: str, value: str):
+        """Add a reference record to the batch."""
+        self.refs_batch.append((src, kind, value))
+
+    def add_endpoint(self, file_path: str, method: str, pattern: str, controls: List[str]):
+        """Add an API endpoint record to the batch."""
+        controls_json = json.dumps(controls) if controls else "[]"
+        self.endpoints_batch.append((file_path, method, pattern, controls_json))
+
+    def add_sql_object(self, file_path: str, kind: str, name: str):
+        """Add a SQL object record to the batch."""
+        self.sql_objects_batch.append((file_path, kind, name))
+
+    def add_sql_query(self, file_path: str, line: int, query_text: str, command: str, tables: List[str]):
+        """Add a SQL query record to the batch."""
+        tables_json = json.dumps(tables) if tables else "[]"
+        self.sql_queries_batch.append((file_path, line, query_text, command, tables_json))
+
+    def add_symbol(self, path: str, name: str, symbol_type: str, line: int, col: int):
+        """Add a symbol record to the batch."""
+        self.symbols_batch.append((path, name, symbol_type, line, col))
+
+    def add_orm_query(self, file_path: str, line: int, query_type: str, includes: Optional[str],
+                      has_limit: bool, has_transaction: bool):
+        """Add an ORM query record to the batch."""
+        self.orm_queries_batch.append((file_path, line, query_type, includes, has_limit, has_transaction))
+
+    def add_docker_image(self, file_path: str, base_image: Optional[str], exposed_ports: List[str],
+                        env_vars: Dict, build_args: Dict, user: Optional[str], has_healthcheck: bool):
+        """Add a Docker image record to the batch."""
+        ports_json = json.dumps(exposed_ports)
+        env_json = json.dumps(env_vars)
+        args_json = json.dumps(build_args)
+        self.docker_images_batch.append((file_path, base_image, ports_json, env_json, 
+                                        args_json, user, has_healthcheck))
+
+    def add_docker_issue(self, file_path: str, line: int, issue_type: str, severity: str):
+        """Add a Docker security issue to the batch."""
+        self.docker_issues_batch.append((file_path, line, issue_type, severity))
+
+    def add_assignment(self, file_path: str, line: int, target_var: str, source_expr: str,
+                      source_vars: List[str], in_function: str):
+        """Add a variable assignment record to the batch."""
+        source_vars_json = json.dumps(source_vars)
+        self.assignments_batch.append((file_path, line, target_var, source_expr, 
+                                      source_vars_json, in_function))
+
+    def add_function_call_arg(self, file_path: str, line: int, caller_function: str,
+                              callee_function: str, arg_index: int, arg_expr: str, param_name: str):
+        """Add a function call argument record to the batch."""
+        self.function_call_args_batch.append((file_path, line, caller_function, callee_function,
+                                             arg_index, arg_expr, param_name))
+
+    def add_function_return(self, file_path: str, line: int, function_name: str,
+                           return_expr: str, return_vars: List[str]):
+        """Add a function return statement record to the batch."""
+        return_vars_json = json.dumps(return_vars)
+        self.function_returns_batch.append((file_path, line, function_name, 
+                                           return_expr, return_vars_json))
+
+    def add_config_file(self, path: str, content: str, file_type: str, context: Optional[str] = None):
+        """Add a configuration file content to the batch."""
+        if not hasattr(self, 'config_files_batch'):
+            self.config_files_batch = []
+        self.config_files_batch.append((path, content, file_type, context))
+
+    def add_prisma_model(self, model_name: str, field_name: str, field_type: str,
+                        is_indexed: bool, is_unique: bool, is_relation: bool):
+        """Add a Prisma model field record to the batch."""
+        self.prisma_batch.append((model_name, field_name, field_type, 
+                                 is_indexed, is_unique, is_relation))
+
+    def add_compose_service(self, file_path: str, service_name: str, image: Optional[str],
+                           ports: List[str], volumes: List[str], environment: Dict,
+                           is_privileged: bool, network_mode: str):
+        """Add a Docker Compose service record to the batch."""
+        ports_json = json.dumps(ports)
+        volumes_json = json.dumps(volumes)
+        env_json = json.dumps(environment)
+        self.compose_batch.append((file_path, service_name, image, ports_json,
+                                  volumes_json, env_json, is_privileged, network_mode))
+
+    def add_nginx_config(self, file_path: str, block_type: str, block_context: str,
+                        directives: Dict, level: int):
+        """Add an Nginx configuration block to the batch."""
+        directives_json = json.dumps(directives)
+        # Use a default context if empty to avoid primary key issues
+        block_context = block_context or 'default'
+        
+        # Check for duplicates before adding
+        batch_key = (file_path, block_type, block_context)
+        if not any(b[:3] == batch_key for b in self.nginx_batch):
+            self.nginx_batch.append((file_path, block_type, block_context,
+                                   directives_json, level))
+
+    def flush_batch(self, batch_idx: Optional[int] = None):
+        """Execute all pending batch inserts."""
+        cursor = self.conn.cursor()
+        
+        try:
+            if self.files_batch:
+                cursor.executemany(
+                    "INSERT INTO files (path, sha256, ext, bytes, loc) VALUES (?, ?, ?, ?, ?)",
+                    self.files_batch
+                )
+                self.files_batch = []
+            
+            if self.refs_batch:
+                cursor.executemany(
+                    "INSERT INTO refs (src, kind, value) VALUES (?, ?, ?)",
+                    self.refs_batch
+                )
+                self.refs_batch = []
+            
+            if self.endpoints_batch:
+                cursor.executemany(
+                    "INSERT INTO api_endpoints (file, method, pattern, controls) VALUES (?, ?, ?, ?)",
+                    self.endpoints_batch
+                )
+                self.endpoints_batch = []
+            
+            if self.sql_objects_batch:
+                cursor.executemany(
+                    "INSERT INTO sql_objects (file, kind, name) VALUES (?, ?, ?)",
+                    self.sql_objects_batch
+                )
+                self.sql_objects_batch = []
+            
+            if self.sql_queries_batch:
+                cursor.executemany(
+                    "INSERT INTO sql_queries (file_path, line_number, query_text, command, tables) VALUES (?, ?, ?, ?, ?)",
+                    self.sql_queries_batch
+                )
+                self.sql_queries_batch = []
+            
+            if self.symbols_batch:
+                cursor.executemany(
+                    "INSERT INTO symbols (path, name, type, line, col) VALUES (?, ?, ?, ?, ?)",
+                    self.symbols_batch
+                )
+                self.symbols_batch = []
+            
+            if self.orm_queries_batch:
+                cursor.executemany(
+                    "INSERT INTO orm_queries (file, line, query_type, includes, has_limit, has_transaction) VALUES (?, ?, ?, ?, ?, ?)",
+                    self.orm_queries_batch
+                )
+                self.orm_queries_batch = []
+            
+            if self.docker_images_batch:
+                cursor.executemany(
+                    "INSERT INTO docker_images (file_path, base_image, exposed_ports, env_vars, build_args, user, has_healthcheck) VALUES (?, ?, ?, ?, ?, ?, ?)",
+                    self.docker_images_batch
+                )
+                self.docker_images_batch = []
+            
+            if self.docker_issues_batch:
+                cursor.executemany(
+                    "INSERT INTO docker_issues (file, line, issue_type, severity) VALUES (?, ?, ?, ?)",
+                    self.docker_issues_batch
+                )
+                self.docker_issues_batch = []
+            
+            if self.assignments_batch:
+                cursor.executemany(
+                    "INSERT INTO assignments (file, line, target_var, source_expr, source_vars, in_function) VALUES (?, ?, ?, ?, ?, ?)",
+                    self.assignments_batch
+                )
+                self.assignments_batch = []
+            
+            if self.function_call_args_batch:
+                cursor.executemany(
+                    "INSERT INTO function_call_args (file, line, caller_function, callee_function, argument_index, argument_expr, param_name) VALUES (?, ?, ?, ?, ?, ?, ?)",
+                    self.function_call_args_batch
+                )
+                self.function_call_args_batch = []
+            
+            if self.function_returns_batch:
+                cursor.executemany(
+                    "INSERT INTO function_returns (file, line, function_name, return_expr, return_vars) VALUES (?, ?, ?, ?, ?)",
+                    self.function_returns_batch
+                )
+                self.function_returns_batch = []
+            
+            if self.prisma_batch:
+                cursor.executemany(
+                    """INSERT INTO prisma_models 
+                       (model_name, field_name, field_type, is_indexed, is_unique, is_relation) 
+                       VALUES (?, ?, ?, ?, ?, ?)""",
+                    self.prisma_batch
+                )
+                self.prisma_batch = []
+            
+            if self.compose_batch:
+                cursor.executemany(
+                    """INSERT INTO compose_services 
+                       (file_path, service_name, image, ports, volumes, environment, 
+                        is_privileged, network_mode) 
+                       VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
+                    self.compose_batch
+                )
+                self.compose_batch = []
+            
+            if self.nginx_batch:
+                cursor.executemany(
+                    """INSERT INTO nginx_configs 
+                       (file_path, block_type, block_context, directives, level) 
+                       VALUES (?, ?, ?, ?, ?)""",
+                    self.nginx_batch
+                )
+                self.nginx_batch = []
+            
+            if hasattr(self, 'config_files_batch') and self.config_files_batch:
+                cursor.executemany(
+                    "INSERT OR REPLACE INTO config_files (path, content, type, context_dir) VALUES (?, ?, ?, ?)",
+                    self.config_files_batch
+                )
+                self.config_files_batch = []
+                
+        except sqlite3.Error as e:
+            if batch_idx is not None:
+                raise RuntimeError(f"Batch insert failed at file index {batch_idx}: {e}")
+            else:
+                raise RuntimeError(f"Batch insert failed: {e}")
+
+
+# Standalone function for backward compatibility
+def create_database_schema(conn: sqlite3.Connection) -> None:
+    """Create SQLite database schema - backward compatibility wrapper.
+    
+    Args:
+        conn: SQLite connection (remains open after schema creation)
+    """
+    # Use the existing connection to create schema
+    manager = DatabaseManager.__new__(DatabaseManager)
+    manager.conn = conn
+    manager.cursor = conn.cursor()
+    manager.batch_size = 200
+    
+    # Initialize batch lists
+    manager.files_batch = []
+    manager.refs_batch = []
+    manager.endpoints_batch = []
+    manager.sql_objects_batch = []
+    manager.sql_queries_batch = []
+    manager.symbols_batch = []
+    manager.orm_queries_batch = []
+    manager.docker_images_batch = []
+    manager.docker_issues_batch = []
+    manager.assignments_batch = []
+    manager.function_calls_batch = []
+    manager.returns_batch = []
+    manager.prisma_batch = []
+    manager.compose_batch = []
+    manager.nginx_batch = []
+    
+    # Create the schema using the existing connection
+    manager.create_schema()
+    # Don't close - let caller handle connection lifecycle
\ No newline at end of file
diff --git a/theauditor/indexer/extractors/__init__.py b/theauditor/indexer/extractors/__init__.py
new file mode 100644
index 0000000..77d4368
--- /dev/null
+++ b/theauditor/indexer/extractors/__init__.py
@@ -0,0 +1,287 @@
+"""Extractor framework for the indexer.
+
+This module defines the BaseExtractor abstract class and the ExtractorRegistry
+for dynamic discovery and registration of language-specific extractors.
+"""
+
+import os
+import re
+import importlib
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Dict, Any, List, Optional, Tuple
+
+from ..config import (
+    IMPORT_PATTERNS, ROUTE_PATTERNS, SQL_PATTERNS, SQL_QUERY_PATTERNS
+)
+
+# Optional SQL parsing support
+try:
+    import sqlparse
+    HAS_SQLPARSE = True
+except ImportError:
+    HAS_SQLPARSE = False
+
+
+class BaseExtractor(ABC):
+    """Abstract base class for all language extractors."""
+    
+    def __init__(self, root_path: Path, ast_parser: Optional[Any] = None):
+        """Initialize the extractor.
+        
+        Args:
+            root_path: Project root path
+            ast_parser: Optional AST parser instance
+        """
+        self.root_path = root_path
+        self.ast_parser = ast_parser
+    
+    @abstractmethod
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this extractor supports.
+        
+        Returns:
+            List of file extensions (e.g., ['.py', '.pyx'])
+        """
+        pass
+    
+    @abstractmethod
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract all relevant information from a file.
+        
+        Args:
+            file_info: File metadata dictionary
+            content: File content
+            tree: Optional pre-parsed AST tree
+            
+        Returns:
+            Dictionary containing all extracted data
+        """
+        pass
+    
+    def extract_imports(self, content: str, file_ext: str) -> List[Tuple[str, str]]:
+        """Extract import statements from file content.
+        
+        Args:
+            content: File content
+            file_ext: File extension
+            
+        Returns:
+            List of (kind, value) tuples for imports
+        """
+        imports = []
+        for pattern in IMPORT_PATTERNS:
+            for match in pattern.finditer(content):
+                value = match.group(1) if match.lastindex else match.group(0)
+                # Determine kind based on pattern
+                if "require" in pattern.pattern:
+                    kind = "require"
+                elif "from" in pattern.pattern and "import" in pattern.pattern:
+                    kind = "from"
+                elif "package" in pattern.pattern:
+                    kind = "package"
+                else:
+                    kind = "import"
+                imports.append((kind, value))
+        return imports
+    
+    def extract_routes(self, content: str) -> List[Tuple[str, str]]:
+        """Extract route definitions from file content.
+        
+        Args:
+            content: File content
+            
+        Returns:
+            List of (method, path) tuples
+        """
+        routes = []
+        for pattern in ROUTE_PATTERNS:
+            for match in pattern.finditer(content):
+                if match.lastindex == 2:
+                    method = match.group(1).upper()
+                    path = match.group(2)
+                else:
+                    method = "ANY"
+                    path = match.group(1) if match.lastindex else match.group(0)
+                routes.append((method, path))
+        return routes
+    
+    def extract_sql_objects(self, content: str) -> List[Tuple[str, str]]:
+        """Extract SQL object definitions from file content.
+        
+        Args:
+            content: File content
+            
+        Returns:
+            List of (kind, name) tuples
+        """
+        objects = []
+        for pattern in SQL_PATTERNS:
+            for match in pattern.finditer(content):
+                name = match.group(1)
+                # Determine kind from pattern
+                pattern_text = pattern.pattern.lower()
+                if "table" in pattern_text:
+                    kind = "table"
+                elif "index" in pattern_text:
+                    kind = "index"
+                elif "view" in pattern_text:
+                    kind = "view"
+                elif "function" in pattern_text:
+                    kind = "function"
+                elif "policy" in pattern_text:
+                    kind = "policy"
+                elif "constraint" in pattern_text:
+                    kind = "constraint"
+                else:
+                    kind = "unknown"
+                objects.append((kind, name))
+        return objects
+    
+    def extract_sql_queries(self, content: str) -> List[Dict]:
+        """Extract and parse SQL queries from code.
+        
+        Args:
+            content: File content
+            
+        Returns:
+            List of query dictionaries
+        """
+        if not HAS_SQLPARSE:
+            return []
+        
+        queries = []
+        
+        # Find all potential SQL query strings
+        for pattern in SQL_QUERY_PATTERNS:
+            for match in pattern.finditer(content):
+                query_text = match.group(1) if match.lastindex else match.group(0)
+                
+                # Calculate line number
+                line = content[:match.start()].count('\n') + 1
+                
+                # Clean up the query text
+                query_text = query_text.strip()
+                if not query_text:
+                    continue
+                
+                try:
+                    # Parse the SQL query
+                    parsed = sqlparse.parse(query_text)
+                    if not parsed:
+                        continue
+                    
+                    for statement in parsed:
+                        # Extract command type
+                        command = statement.get_type()
+                        if not command:
+                            # Try to extract manually from first token
+                            tokens = statement.tokens
+                            for token in tokens:
+                                if not token.is_whitespace:
+                                    command = str(token).upper()
+                                    break
+                        
+                        # Extract table names
+                        tables = []
+                        tokens = list(statement.flatten())
+                        for i, token in enumerate(tokens):
+                            if token.ttype is None and token.value.upper() in ['FROM', 'INTO', 'UPDATE', 'TABLE', 'JOIN']:
+                                # Look for the next non-whitespace token
+                                for j in range(i + 1, len(tokens)):
+                                    next_token = tokens[j]
+                                    if not next_token.is_whitespace:
+                                        if next_token.ttype in [None, sqlparse.tokens.Name]:
+                                            table_name = next_token.value
+                                            # Clean up table name
+                                            table_name = table_name.strip('"\'`')
+                                            if '.' in table_name:
+                                                table_name = table_name.split('.')[-1]
+                                            if table_name and not table_name.upper() in ['SELECT', 'WHERE', 'SET', 'VALUES']:
+                                                tables.append(table_name)
+                                        break
+                        
+                        queries.append({
+                            'line': line,
+                            'query_text': query_text[:1000],  # Limit length
+                            'command': command or 'UNKNOWN',
+                            'tables': tables
+                        })
+                except Exception:
+                    # Skip queries that can't be parsed
+                    continue
+        
+        return queries
+
+
+class ExtractorRegistry:
+    """Registry for dynamic discovery and management of extractors."""
+    
+    def __init__(self, root_path: Path, ast_parser: Optional[Any] = None):
+        """Initialize the registry and discover extractors.
+        
+        Args:
+            root_path: Project root path
+            ast_parser: Optional AST parser instance
+        """
+        self.root_path = root_path
+        self.ast_parser = ast_parser
+        self.extractors = {}
+        self._discover()
+    
+    def _discover(self):
+        """Auto-discover and register all extractor modules."""
+        extractor_dir = Path(__file__).parent
+        
+        # Find all Python files in the extractors directory
+        for file_path in extractor_dir.glob("*.py"):
+            if file_path.name.startswith('_'):
+                continue  # Skip __init__.py and private modules
+            
+            module_name = file_path.stem
+            
+            try:
+                # Import the module
+                module = importlib.import_module(f'.{module_name}', package='theauditor.indexer.extractors')
+                
+                # Find extractor class (looking for subclasses of BaseExtractor)
+                for attr_name in dir(module):
+                    attr = getattr(module, attr_name)
+                    if (isinstance(attr, type) and 
+                        issubclass(attr, BaseExtractor) and 
+                        attr != BaseExtractor):
+                        
+                        # Instantiate the extractor
+                        extractor = attr(self.root_path, self.ast_parser)
+                        
+                        # Register for all supported extensions
+                        for ext in extractor.supported_extensions():
+                            self.extractors[ext] = extractor
+                        
+                        break  # One extractor per module
+                        
+            except (ImportError, AttributeError) as e:
+                # Skip modules that can't be imported or don't have extractors
+                if os.environ.get("THEAUDITOR_DEBUG"):
+                    print(f"Debug: Failed to load extractor {module_name}: {e}")
+                continue
+    
+    def get_extractor(self, file_extension: str) -> Optional[BaseExtractor]:
+        """Get the appropriate extractor for a file extension.
+        
+        Args:
+            file_extension: File extension (e.g., '.py')
+            
+        Returns:
+            Extractor instance or None if not supported
+        """
+        return self.extractors.get(file_extension)
+    
+    def supported_extensions(self) -> List[str]:
+        """Get list of all supported file extensions.
+        
+        Returns:
+            List of supported extensions
+        """
+        return list(self.extractors.keys())
\ No newline at end of file
diff --git a/theauditor/indexer/extractors/docker.py b/theauditor/indexer/extractors/docker.py
new file mode 100644
index 0000000..01a8188
--- /dev/null
+++ b/theauditor/indexer/extractors/docker.py
@@ -0,0 +1,279 @@
+"""Docker file extractor.
+
+Handles extraction of Docker-specific elements including:
+- Base images and build stages
+- Environment variables and build arguments
+- Security issues (running as root, unpinned images, etc.)
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+
+from . import BaseExtractor
+from ..config import SENSITIVE_PORTS, SENSITIVE_ENV_KEYWORDS
+
+# Check for optional Docker parsing libraries
+try:
+    from dockerfile_parse import DockerfileParser as DFParser
+    HAS_DOCKERFILE_PARSE = True
+except ImportError:
+    HAS_DOCKERFILE_PARSE = False
+
+try:
+    from theauditor.parsers.dockerfile_parser import DockerfileParser
+    HAS_CUSTOM_PARSERS = True
+except ImportError:
+    HAS_CUSTOM_PARSERS = False
+
+
+class DockerExtractor(BaseExtractor):
+    """Extractor for Docker files."""
+    
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this extractor supports.
+        
+        Note: Dockerfiles don't have extensions, we match by filename.
+        """
+        return []  # We handle this specially in should_extract
+    
+    def should_extract(self, file_path: str) -> bool:
+        """Check if this extractor should handle the file.
+        
+        Args:
+            file_path: Path to the file
+            
+        Returns:
+            True if this is a Dockerfile
+        """
+        file_name_lower = Path(file_path).name.lower()
+        dockerfile_patterns = [
+            'dockerfile', 'dockerfile.dev', 'dockerfile.prod', 
+            'dockerfile.test', 'dockerfile.staging'
+        ]
+        return (file_name_lower in dockerfile_patterns or 
+                file_name_lower.startswith('dockerfile.'))
+    
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract all relevant information from a Dockerfile.
+        
+        Args:
+            file_info: File metadata dictionary
+            content: File content
+            tree: Optional pre-parsed AST tree (not used for Docker)
+            
+        Returns:
+            Dictionary containing all extracted data
+        """
+        result = {
+            'docker_info': {},
+            'docker_issues': []
+        }
+        
+        # Extract basic Docker info if dockerfile_parse available
+        if HAS_DOCKERFILE_PARSE:
+            result['docker_info'] = self._extract_docker_info(content)
+        
+        # Analyze for security issues if custom parser available
+        if HAS_CUSTOM_PARSERS:
+            file_path = self.root_path / file_info['path']
+            result['docker_issues'] = self._analyze_security(file_path, content)
+        
+        return result
+    
+    def _extract_docker_info(self, content: str) -> Dict[str, Any]:
+        """Extract structured information from Dockerfile content.
+        
+        Args:
+            content: Dockerfile content
+            
+        Returns:
+            Dict with Docker information
+        """
+        info = {
+            'base_image': None,
+            'exposed_ports': [],
+            'env_vars': {},
+            'build_args': {},
+            'user': None,
+            'has_healthcheck': False
+        }
+        
+        try:
+            # Parse the Dockerfile
+            parser = DFParser()
+            parser.content = content
+            
+            # Extract base image
+            if parser.baseimage:
+                info['base_image'] = parser.baseimage
+            
+            # Extract exposed ports
+            for instruction in parser.structure:
+                if instruction['instruction'] == 'EXPOSE':
+                    # Parse ports from the value
+                    ports_str = instruction['value']
+                    ports = ports_str.split()
+                    info['exposed_ports'].extend(ports)
+                
+                # Extract environment variables
+                elif instruction['instruction'] == 'ENV':
+                    # Parse ENV key=value or ENV key value
+                    env_str = instruction['value']
+                    # Handle both formats: KEY=value and KEY value
+                    if '=' in env_str:
+                        # Format: KEY=value KEY2=value2
+                        parts = env_str.split()
+                        for part in parts:
+                            if '=' in part:
+                                key, value = part.split('=', 1)
+                                info['env_vars'][key] = value
+                    else:
+                        # Format: KEY value
+                        parts = env_str.split(None, 1)
+                        if len(parts) == 2:
+                            info['env_vars'][parts[0]] = parts[1]
+                
+                # Extract build arguments
+                elif instruction['instruction'] == 'ARG':
+                    arg_str = instruction['value']
+                    # Handle ARG key=value or ARG key
+                    if '=' in arg_str:
+                        key, value = arg_str.split('=', 1)
+                        info['build_args'][key] = value
+                    else:
+                        info['build_args'][arg_str] = None
+                
+                # Check for USER and HEALTHCHECK
+                elif instruction['instruction'] == 'USER':
+                    info['user'] = instruction['value']
+                elif instruction['instruction'] == 'HEALTHCHECK':
+                    info['has_healthcheck'] = True
+                elif instruction['instruction'] == 'WORKDIR':
+                    info['env_vars']['_DOCKER_WORKDIR'] = instruction['value']
+        
+        except Exception:
+            # If parsing fails, return empty info
+            return {}
+        
+        return info
+    
+    def _analyze_security(self, file_path: Path, content: str) -> List[Dict]:
+        """Analyze Dockerfile for security issues.
+        
+        Args:
+            file_path: Path to the Dockerfile
+            content: Dockerfile content
+            
+        Returns:
+            List of security issue dictionaries
+        """
+        issues = []
+        
+        try:
+            parser = DockerfileParser()
+            parsed_data = parser.parse_file(file_path)
+            
+            if 'instructions' not in parsed_data:
+                return issues
+            
+            instructions = parsed_data['instructions']
+            
+            # Security Rule 1: Check for running as root
+            has_user = False
+            runs_as_root = False
+            for inst in instructions:
+                if inst['instruction'] == 'USER':
+                    has_user = True
+                    if inst['value'].strip().lower() == 'root':
+                        runs_as_root = True
+                        issues.append({
+                            'line': inst['line'],
+                            'issue_type': 'ROOT_USER',
+                            'severity': 'critical'
+                        })
+            
+            if not has_user:
+                # No USER instruction means runs as root by default
+                issues.append({
+                    'line': 1,
+                    'issue_type': 'ROOT_USER',
+                    'severity': 'critical'
+                })
+            
+            # Security Rule 2: Check for unpinned images
+            for inst in instructions:
+                if inst['instruction'] == 'FROM':
+                    image = inst['value'].strip()
+                    # Check for :latest or no tag
+                    if ':latest' in image or (':' not in image and ' as ' not in image.lower()):
+                        issues.append({
+                            'line': inst['line'],
+                            'issue_type': 'UNPINNED_IMAGE',
+                            'severity': 'high'
+                        })
+            
+            # Security Rule 3: Check for secrets in ENV
+            for inst in instructions:
+                if inst['instruction'] == 'ENV':
+                    env_value = inst['value'].upper()
+                    for keyword in SENSITIVE_ENV_KEYWORDS:
+                        if keyword in env_value:
+                            issues.append({
+                                'line': inst['line'],
+                                'issue_type': 'SECRET_IN_ENV',
+                                'severity': 'critical'
+                            })
+                            break
+            
+            # Security Rule 4: Check for missing healthcheck
+            has_healthcheck = any(inst['instruction'] == 'HEALTHCHECK' for inst in instructions)
+            if not has_healthcheck:
+                issues.append({
+                    'line': 1,
+                    'issue_type': 'MISSING_HEALTHCHECK',
+                    'severity': 'medium'
+                })
+            
+            # Security Rule 5: Check for dangerous COPY commands
+            for inst in instructions:
+                if inst['instruction'] == 'COPY':
+                    copy_value = inst['value'].strip()
+                    # Check for copying entire directory including potential secrets
+                    if copy_value.startswith('. ') or copy_value == '.' or '.env' in copy_value:
+                        issues.append({
+                            'line': inst['line'],
+                            'issue_type': 'DANGEROUS_COPY',
+                            'severity': 'high'
+                        })
+            
+            # Security Rule 6: Check for apt-get upgrade in production
+            for inst in instructions:
+                if inst['instruction'] == 'RUN':
+                    run_value = inst['value'].lower()
+                    if 'apt-get upgrade' in run_value or 'apt upgrade' in run_value:
+                        issues.append({
+                            'line': inst['line'],
+                            'issue_type': 'APT_UPGRADE_IN_PROD',
+                            'severity': 'medium'
+                        })
+            
+            # Security Rule 7: Check for exposed sensitive ports
+            for inst in instructions:
+                if inst['instruction'] == 'EXPOSE':
+                    ports = inst['value'].split()
+                    for port in ports:
+                        port_num = port.split('/')[0]  # Handle "8080/tcp" format
+                        if port_num in SENSITIVE_PORTS:
+                            issues.append({
+                                'line': inst['line'],
+                                'issue_type': 'SENSITIVE_PORT_EXPOSED',
+                                'severity': 'high'
+                            })
+        
+        except Exception:
+            # Silently fail security analysis
+            pass
+        
+        return issues
\ No newline at end of file
diff --git a/theauditor/indexer/extractors/generic.py b/theauditor/indexer/extractors/generic.py
new file mode 100644
index 0000000..f6b4faa
--- /dev/null
+++ b/theauditor/indexer/extractors/generic.py
@@ -0,0 +1,121 @@
+"""Generic file extractor.
+
+Handles extraction for files that don't have specialized extractors:
+- Webpack configurations
+- Docker Compose files
+- Nginx configurations
+- Other configuration files
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+
+from . import BaseExtractor
+from ..config import COMPOSE_PATTERNS, NGINX_PATTERNS
+
+# Check for optional custom parsers
+try:
+    from theauditor.parsers.webpack_config_parser import WebpackConfigParser
+    from theauditor.parsers.compose_parser import ComposeParser
+    from theauditor.parsers.nginx_parser import NginxParser
+    HAS_CUSTOM_PARSERS = True
+except ImportError:
+    HAS_CUSTOM_PARSERS = False
+
+
+class GenericExtractor(BaseExtractor):
+    """Generic extractor for configuration and other files."""
+    
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this extractor supports."""
+        # This extractor handles files by name pattern, not extension
+        return []
+    
+    def should_extract(self, file_path: str) -> bool:
+        """Check if this extractor should handle the file.
+        
+        Args:
+            file_path: Path to the file
+            
+        Returns:
+            True if this extractor should handle the file
+        """
+        file_name = Path(file_path).name.lower()
+        
+        # Check for specific file patterns
+        if file_name.endswith('webpack.config.js'):
+            return True
+        if file_name in COMPOSE_PATTERNS:
+            return True
+        if file_name in NGINX_PATTERNS or file_name.endswith('.conf'):
+            return True
+        
+        return False
+    
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract information from generic configuration files.
+        
+        Args:
+            file_info: File metadata dictionary
+            content: File content
+            tree: Optional pre-parsed AST tree
+            
+        Returns:
+            Dictionary containing all extracted data
+        """
+        result = {
+            'config_data': {},
+            'imports': [],
+            'routes': [],
+            'sql_queries': []
+        }
+        
+        file_path = self.root_path / file_info['path']
+        file_name = file_path.name.lower()
+        
+        # Handle webpack configuration
+        if HAS_CUSTOM_PARSERS and file_name.endswith('webpack.config.js'):
+            try:
+                parser = WebpackConfigParser()
+                webpack_data = parser.parse_file(file_path)
+                if webpack_data and not webpack_data.get('error'):
+                    result['config_data']['webpack'] = webpack_data
+            except Exception:
+                pass
+        
+        # Handle Docker Compose files
+        if HAS_CUSTOM_PARSERS and file_name in COMPOSE_PATTERNS:
+            try:
+                parser = ComposeParser()
+                compose_data = parser.parse_file(file_path)
+                if compose_data and not compose_data.get('error'):
+                    result['config_data']['docker_compose'] = compose_data
+            except Exception:
+                pass
+        
+        # Handle Nginx configuration
+        if HAS_CUSTOM_PARSERS and (file_name in NGINX_PATTERNS or file_name.endswith('.conf')):
+            try:
+                parser = NginxParser()
+                nginx_data = parser.parse_file(file_path)
+                if nginx_data and not nginx_data.get('error'):
+                    result['config_data']['nginx'] = nginx_data
+                    # Extract routes from nginx location blocks
+                    if 'locations' in nginx_data:
+                        for location in nginx_data['locations']:
+                            result['routes'].append((
+                                'ANY',  # Nginx handles all methods by default
+                                location.get('path', '/'),
+                                []  # No middleware concept in nginx
+                            ))
+            except Exception:
+                pass
+        
+        # For all files, try to extract common patterns
+        result['imports'].extend(self.extract_imports(content, file_info['ext']))
+        result['routes'].extend([(m, p, []) for m, p in self.extract_routes(content)])
+        result['sql_queries'].extend(self.extract_sql_queries(content))
+        
+        return result
\ No newline at end of file
diff --git a/theauditor/indexer/extractors/javascript.py b/theauditor/indexer/extractors/javascript.py
new file mode 100644
index 0000000..b4023d5
--- /dev/null
+++ b/theauditor/indexer/extractors/javascript.py
@@ -0,0 +1,345 @@
+"""JavaScript/TypeScript file extractor.
+
+Handles extraction of JavaScript and TypeScript specific elements including:
+- ES6/CommonJS imports and requires
+- Express/Fastify routes with middleware
+- ORM queries (Sequelize, Prisma, TypeORM)
+- Property accesses for taint analysis
+"""
+
+import re
+import json
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+
+from . import BaseExtractor
+from ..config import (
+    SEQUELIZE_METHODS, PRISMA_METHODS, 
+    TYPEORM_REPOSITORY_METHODS, TYPEORM_QB_METHODS
+)
+
+
+class JavaScriptExtractor(BaseExtractor):
+    """Extractor for JavaScript and TypeScript files."""
+    
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this extractor supports."""
+        return ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs']
+    
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract all relevant information from a JavaScript/TypeScript file.
+        
+        Args:
+            file_info: File metadata dictionary
+            content: File content
+            tree: Optional pre-parsed AST tree
+            
+        Returns:
+            Dictionary containing all extracted data
+        """
+        result = {
+            'imports': [],
+            'resolved_imports': {},
+            'routes': [],
+            'symbols': [],
+            'assignments': [],
+            'function_calls': [],
+            'returns': [],
+            'orm_queries': []
+        }
+        
+        # Extract imports using regex patterns
+        result['imports'] = self.extract_imports(content, file_info['ext'])
+        
+        # Resolve imports if we have js_semantic_parser
+        if tree and tree.get('success'):
+            try:
+                from theauditor.js_semantic_parser import JSSemanticParser
+                js_parser = JSSemanticParser(project_root=str(self.root_path))
+                result['resolved_imports'] = js_parser.resolve_imports(
+                    tree, file_info['path']
+                )
+            except Exception:
+                # Resolution failed, keep unresolved imports
+                pass
+        
+        # Extract routes
+        if tree:
+            result['routes'] = self._extract_routes_ast(tree, content)
+        else:
+            result['routes'] = [(method, path, []) 
+                               for method, path in self.extract_routes(content)]
+        
+        # Extract symbols from AST if available
+        if tree and self.ast_parser:
+            # Functions
+            functions = self.ast_parser.extract_functions(tree)
+            for func in functions:
+                line = func.get('line', 0)
+                # Validate line numbers are reasonable
+                if line < 1 or line > 100000:
+                    continue  # Skip invalid symbols
+                
+                result['symbols'].append({
+                    'name': func.get('name', ''),
+                    'type': 'function',
+                    'line': line,
+                    'col': func.get('col', 0)
+                })
+            
+            # Classes
+            classes = self.ast_parser.extract_classes(tree)
+            for cls in classes:
+                line = cls.get('line', 0)
+                # Validate line numbers are reasonable
+                if line < 1 or line > 100000:
+                    continue  # Skip invalid symbols
+                
+                result['symbols'].append({
+                    'name': cls.get('name', ''),
+                    'type': 'class',
+                    'line': line,
+                    'col': cls.get('col', 0)
+                })
+            
+            # Calls and other symbols
+            symbols = self.ast_parser.extract_calls(tree)
+            for symbol in symbols:
+                line = symbol.get('line', 0)
+                # Validate line numbers are reasonable
+                if line < 1 or line > 100000:
+                    continue  # Skip invalid symbols
+                
+                result['symbols'].append({
+                    'name': symbol.get('name', ''),
+                    'type': symbol.get('type', 'call'),
+                    'line': line,
+                    'col': symbol.get('col', symbol.get('column', 0))
+                })
+            
+            # CRITICAL: Extract property accesses for taint analysis
+            # This is needed to find patterns like req.body, req.query, etc.
+            properties = self.ast_parser.extract_properties(tree)
+            for prop in properties:
+                line = prop.get('line', 0)
+                # Validate line numbers are reasonable
+                if line < 1 or line > 100000:
+                    continue  # Skip invalid symbols
+                
+                result['symbols'].append({
+                    'name': prop.get('name', ''),
+                    'type': 'property',
+                    'line': line,
+                    'col': prop.get('col', prop.get('column', 0))
+                })
+            
+            # Extract data flow information
+            assignments = self.ast_parser.extract_assignments(tree)
+            for assignment in assignments:
+                result['assignments'].append({
+                    'line': assignment.get('line', 0),
+                    'target_var': assignment.get('target_var', ''),
+                    'source_expr': assignment.get('source_expr', ''),
+                    'source_vars': assignment.get('source_vars', []),
+                    'in_function': assignment.get('in_function', 'global')
+                })
+            
+            # Extract function calls with arguments
+            calls_with_args = self.ast_parser.extract_function_calls_with_args(tree)
+            for call in calls_with_args:
+                result['function_calls'].append({
+                    'line': call.get('line', 0),
+                    'caller_function': call.get('caller_function', 'global'),
+                    'callee_function': call.get('callee_function', ''),
+                    'argument_index': call.get('argument_index', 0),
+                    'argument_expr': call.get('argument_expr', ''),
+                    'param_name': call.get('param_name', '')
+                })
+            
+            # Extract return statements
+            return_statements = self.ast_parser.extract_returns(tree)
+            for ret in return_statements:
+                result['returns'].append({
+                    'line': ret.get('line', 0),
+                    'function_name': ret.get('function_name', 'global'),
+                    'return_expr': ret.get('return_expr', ''),
+                    'return_vars': ret.get('return_vars', [])
+                })
+            
+            # Extract ORM queries
+            result['orm_queries'] = self._extract_orm_queries(tree, content)
+        
+        # Extract SQL queries embedded in JavaScript code
+        result['sql_queries'] = self.extract_sql_queries(content)
+        
+        return result
+    
+    def _extract_routes_ast(self, tree: Dict[str, Any], content: str) -> List[tuple]:
+        """Extract Express/Fastify routes with middleware.
+        
+        Args:
+            tree: Parsed AST tree
+            content: File content for fallback extraction
+            
+        Returns:
+            List of (method, pattern, controls) tuples
+        """
+        routes = []
+        
+        # Enhanced regex to capture middleware
+        # Pattern: router.METHOD('/path', [middleware1, middleware2,] handler)
+        pattern = re.compile(
+            r'(?:app|router)\.(get|post|put|patch|delete|all)\s*\(\s*[\'\"\`]([^\'\"\`]+)[\'\"\`]\s*,\s*([^)]+)\)',
+            re.MULTILINE | re.DOTALL
+        )
+        
+        for match in pattern.finditer(content):
+            method = match.group(1).upper()
+            path = match.group(2)
+            middleware_str = match.group(3)
+            
+            # Extract middleware function names
+            middleware = []
+            # Look for function names before the final handler
+            middleware_pattern = re.compile(r'(\w+)(?:\s*,|\s*\))')
+            for m in middleware_pattern.finditer(middleware_str):
+                name = m.group(1)
+                # Filter out common non-middleware terms
+                if name not in ['req', 'res', 'next', 'async', 'function', 'err']:
+                    middleware.append(name)
+            
+            # Remove the last item as it's likely the handler, not middleware
+            if len(middleware) > 1:
+                middleware = middleware[:-1]
+            
+            routes.append((method, path, middleware))
+        
+        # If no routes found with enhanced regex, fallback to basic extraction
+        if not routes:
+            routes = [(method, path, []) 
+                     for method, path in self.extract_routes(content)]
+        
+        return routes
+    
+    def _extract_orm_queries(self, tree: Dict[str, Any], content: str) -> List[Dict]:
+        """Extract ORM query calls from JavaScript/TypeScript code.
+        
+        Args:
+            tree: AST tree from ast_parser
+            content: File content for line extraction
+            
+        Returns:
+            List of ORM query dictionaries
+        """
+        queries = []
+        
+        if not tree or not self.ast_parser:
+            return queries
+        
+        # Handle wrapped tree format
+        if not isinstance(tree, dict) or tree.get("type") != "tree_sitter":
+            return queries
+        
+        try:
+            # Extract all function calls from the tree
+            calls = self.ast_parser.extract_calls(tree)
+            lines = content.split('\n')
+            
+            # All ORM methods to check
+            all_orm_methods = (
+                SEQUELIZE_METHODS | PRISMA_METHODS | 
+                TYPEORM_REPOSITORY_METHODS | TYPEORM_QB_METHODS
+            )
+            
+            # Process each call
+            for call in calls:
+                method_name = call.get('name', '')
+                
+                # Check for ORM method patterns
+                if '.' in method_name:
+                    parts = method_name.split('.')
+                    method = parts[-1]
+                    
+                    if method in all_orm_methods:
+                        line_num = call.get('line', 0)
+                        
+                        # Determine ORM type and extract context
+                        orm_type = self._determine_orm_type(method, parts)
+                        
+                        # Try to extract options from context
+                        has_include = False
+                        has_limit = False
+                        has_transaction = False
+                        includes_json = None
+                        
+                        if 0 < line_num <= len(lines):
+                            # Get context for multi-line calls
+                            start_line = max(0, line_num - 1)
+                            end_line = min(len(lines), line_num + 10)
+                            context = '\n'.join(lines[start_line:end_line])
+                            
+                            # Check for includes/relations (eager loading)
+                            if 'include:' in context or 'include :' in context or 'relations:' in context:
+                                has_include = True
+                                # Check for death query pattern in Sequelize
+                                if 'all: true' in context and 'nested: true' in context:
+                                    includes_json = json.dumps({"all": True, "nested": True})
+                                else:
+                                    # Try to extract include/relations specification
+                                    include_match = re.search(
+                                        r'(?:include|relations):\s*(\[.*?\]|\{.*?\})', 
+                                        context, re.DOTALL
+                                    )
+                                    if include_match:
+                                        includes_json = json.dumps({"raw": include_match.group(1)[:200]})
+                            
+                            # Check for limit/take
+                            if 'limit:' in context or 'limit :' in context or 'take:' in context:
+                                has_limit = True
+                            
+                            # Check for transaction
+                            if 'transaction:' in context or '.$transaction' in context:
+                                has_transaction = True
+                        
+                        # Format query type with model name for Prisma
+                        if orm_type == 'prisma' and len(parts) >= 3:
+                            query_type = f'{parts[-2]}.{method}'  # model.method
+                        else:
+                            query_type = method
+                        
+                        queries.append({
+                            'line': line_num,
+                            'query_type': query_type,
+                            'includes': includes_json,
+                            'has_limit': has_limit,
+                            'has_transaction': has_transaction
+                        })
+        
+        except Exception:
+            # Silently fail ORM extraction
+            pass
+        
+        return queries
+    
+    def _determine_orm_type(self, method: str, parts: List[str]) -> str:
+        """Determine which ORM is being used based on method and call pattern.
+        
+        Args:
+            method: The method name
+            parts: The split call parts (e.g., ['prisma', 'user', 'findMany'])
+            
+        Returns:
+            ORM type string: 'sequelize', 'prisma', 'typeorm', or 'unknown'
+        """
+        if method in SEQUELIZE_METHODS:
+            return 'sequelize'
+        elif method in PRISMA_METHODS:
+            # Prisma typically uses prisma.modelName.method pattern
+            if len(parts) >= 3 and parts[-3] in ['prisma', 'db', 'client']:
+                return 'prisma'
+        elif method in TYPEORM_REPOSITORY_METHODS:
+            return 'typeorm_repository'
+        elif method in TYPEORM_QB_METHODS:
+            return 'typeorm_qb'
+        return 'unknown'
\ No newline at end of file
diff --git a/theauditor/indexer/extractors/python.py b/theauditor/indexer/extractors/python.py
new file mode 100644
index 0000000..87739d0
--- /dev/null
+++ b/theauditor/indexer/extractors/python.py
@@ -0,0 +1,189 @@
+"""Python file extractor.
+
+Handles extraction of Python-specific elements including:
+- Python imports (import/from statements)
+- Flask/FastAPI route decorators with middleware
+- AST-based symbol extraction
+"""
+
+import ast
+import json
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+
+from . import BaseExtractor
+
+
+class PythonExtractor(BaseExtractor):
+    """Extractor for Python files."""
+    
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this extractor supports."""
+        return ['.py', '.pyx']
+    
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract all relevant information from a Python file.
+        
+        Args:
+            file_info: File metadata dictionary
+            content: File content
+            tree: Optional pre-parsed AST tree
+            
+        Returns:
+            Dictionary containing all extracted data
+        """
+        result = {
+            'imports': [],
+            'routes': [],
+            'symbols': [],
+            'assignments': [],
+            'function_calls': [],
+            'returns': []
+        }
+        
+        # Extract imports using regex patterns (for all types)
+        result['imports'] = self.extract_imports(content, file_info['ext'])
+        
+        # If we have an AST tree, extract Python-specific information
+        if tree and isinstance(tree, dict):
+            # Extract routes with decorators using AST
+            result['routes'] = self._extract_routes_ast(tree, file_info['path'])
+            
+            # Extract symbols from AST parser results
+            if self.ast_parser:
+                # Functions
+                functions = self.ast_parser.extract_functions(tree)
+                for func in functions:
+                    result['symbols'].append({
+                        'name': func.get('name', ''),
+                        'type': 'function',
+                        'line': func.get('line', 0),
+                        'col': func.get('col', 0)
+                    })
+                
+                # Classes
+                classes = self.ast_parser.extract_classes(tree)
+                for cls in classes:
+                    result['symbols'].append({
+                        'name': cls.get('name', ''),
+                        'type': 'class',
+                        'line': cls.get('line', 0),
+                        'col': cls.get('col', 0)
+                    })
+                
+                # Calls and other symbols
+                symbols = self.ast_parser.extract_calls(tree)
+                for symbol in symbols:
+                    result['symbols'].append({
+                        'name': symbol.get('name', ''),
+                        'type': symbol.get('type', 'call'),
+                        'line': symbol.get('line', 0),
+                        'col': symbol.get('col', symbol.get('column', 0))
+                    })
+                
+                # Extract data flow information for taint analysis
+                assignments = self.ast_parser.extract_assignments(tree)
+                for assignment in assignments:
+                    result['assignments'].append({
+                        'line': assignment.get('line', 0),
+                        'target_var': assignment.get('target_var', ''),
+                        'source_expr': assignment.get('source_expr', ''),
+                        'source_vars': assignment.get('source_vars', []),
+                        'in_function': assignment.get('in_function', 'global')
+                    })
+                
+                # Extract function calls with arguments
+                calls_with_args = self.ast_parser.extract_function_calls_with_args(tree)
+                for call in calls_with_args:
+                    result['function_calls'].append({
+                        'line': call.get('line', 0),
+                        'caller_function': call.get('caller_function', 'global'),
+                        'callee_function': call.get('callee_function', ''),
+                        'argument_index': call.get('argument_index', 0),
+                        'argument_expr': call.get('argument_expr', ''),
+                        'param_name': call.get('param_name', '')
+                    })
+                
+                # Extract return statements
+                return_statements = self.ast_parser.extract_returns(tree)
+                for ret in return_statements:
+                    result['returns'].append({
+                        'line': ret.get('line', 0),
+                        'function_name': ret.get('function_name', 'global'),
+                        'return_expr': ret.get('return_expr', ''),
+                        'return_vars': ret.get('return_vars', [])
+                    })
+        else:
+            # Fallback to regex extraction for routes if no AST
+            result['routes'] = [(method, path, []) 
+                               for method, path in self.extract_routes(content)]
+        
+        # Extract SQL queries embedded in Python code
+        result['sql_queries'] = self.extract_sql_queries(content)
+        
+        return result
+    
+    def _extract_routes_ast(self, tree: Dict[str, Any], file_path: str) -> List[tuple]:
+        """Extract Flask/FastAPI routes using Python AST.
+        
+        Args:
+            tree: Parsed AST tree
+            file_path: Path to file being analyzed
+            
+        Returns:
+            List of (method, pattern, controls) tuples
+        """
+        routes = []
+        
+        # Check if we have a Python AST tree
+        if not isinstance(tree.get("tree"), ast.Module):
+            return routes
+        
+        # Walk the AST to find decorated functions
+        for node in ast.walk(tree["tree"]):
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                decorators = []
+                route_info = None
+                
+                # Extract all decorator names
+                for decorator in node.decorator_list:
+                    dec_name = None
+                    if isinstance(decorator, ast.Name):
+                        dec_name = decorator.id
+                    elif isinstance(decorator, ast.Attribute):
+                        dec_name = decorator.attr
+                    elif isinstance(decorator, ast.Call):
+                        if isinstance(decorator.func, ast.Attribute):
+                            # Handle @app.route('/path') or @router.get('/path')
+                            method_name = decorator.func.attr
+                            if method_name in ['route', 'get', 'post', 'put', 'patch', 'delete']:
+                                # Extract path from first argument
+                                if decorator.args and isinstance(decorator.args[0], ast.Constant):
+                                    path = decorator.args[0].value
+                                    # Determine HTTP method
+                                    if method_name == 'route':
+                                        # Check for methods argument
+                                        method = 'GET'  # Default
+                                        for keyword in decorator.keywords:
+                                            if keyword.arg == 'methods':
+                                                if isinstance(keyword.value, ast.List):
+                                                    if keyword.value.elts:
+                                                        if isinstance(keyword.value.elts[0], ast.Constant):
+                                                            method = keyword.value.elts[0].value.upper()
+                                    else:
+                                        method = method_name.upper()
+                                    route_info = (method, path)
+                            dec_name = method_name
+                        elif isinstance(decorator.func, ast.Name):
+                            dec_name = decorator.func.id
+                    
+                    # Collect non-route decorators as potential middleware/controls
+                    if dec_name and dec_name not in ['route', 'get', 'post', 'put', 'patch', 'delete']:
+                        decorators.append(dec_name)
+                
+                # If we found a route, add it with its security decorators
+                if route_info:
+                    routes.append((route_info[0], route_info[1], decorators))
+        
+        return routes
\ No newline at end of file
diff --git a/theauditor/indexer/extractors/sql.py b/theauditor/indexer/extractors/sql.py
new file mode 100644
index 0000000..358d90a
--- /dev/null
+++ b/theauditor/indexer/extractors/sql.py
@@ -0,0 +1,44 @@
+"""SQL file extractor.
+
+Handles extraction of SQL-specific elements including:
+- SQL object definitions (tables, indexes, views, functions)
+- SQL queries and their structure
+"""
+
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+
+from . import BaseExtractor
+
+
+class SQLExtractor(BaseExtractor):
+    """Extractor for SQL files."""
+    
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this extractor supports."""
+        return ['.sql', '.psql', '.ddl']
+    
+    def extract(self, file_info: Dict[str, Any], content: str, 
+                tree: Optional[Any] = None) -> Dict[str, Any]:
+        """Extract all relevant information from a SQL file.
+        
+        Args:
+            file_info: File metadata dictionary
+            content: File content
+            tree: Optional pre-parsed AST tree (not used for SQL)
+            
+        Returns:
+            Dictionary containing all extracted data
+        """
+        result = {
+            'sql_objects': [],
+            'sql_queries': []
+        }
+        
+        # Extract SQL objects (CREATE statements)
+        result['sql_objects'] = self.extract_sql_objects(content)
+        
+        # Extract and parse SQL queries
+        result['sql_queries'] = self.extract_sql_queries(content)
+        
+        return result
\ No newline at end of file
diff --git a/theauditor/indexer_compat.py b/theauditor/indexer_compat.py
new file mode 100644
index 0000000..288457f
--- /dev/null
+++ b/theauditor/indexer_compat.py
@@ -0,0 +1,321 @@
+"""Repository indexer - Backward Compatibility Shim.
+
+This module provides backward compatibility for code that imports from indexer.py.
+All functionality has been refactored into the theauditor.indexer package.
+
+IMPORTANT: New code should import from theauditor.indexer package directly:
+    from theauditor.indexer import IndexerOrchestrator
+"""
+
+import json
+import sqlite3
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+# Import from the new package structure  
+from theauditor.indexer import IndexerOrchestrator
+from theauditor.indexer.config import (
+    SKIP_DIRS, IMPORT_PATTERNS, ROUTE_PATTERNS, SQL_PATTERNS,
+    SQL_QUERY_PATTERNS, DEFAULT_BATCH_SIZE
+)
+from theauditor.indexer.core import (
+    FileWalker, is_text_file, get_first_lines, load_gitignore_patterns
+)
+from theauditor.indexer.database import create_database_schema
+from theauditor.config_runtime import load_runtime_config
+
+# Re-export commonly used items for backward compatibility
+__all__ = [
+    'build_index',
+    'walk_directory',
+    'populate_database',
+    'create_database_schema',
+    'SKIP_DIRS',
+    'extract_imports',
+    'extract_routes',
+    'extract_sql_objects',
+    'extract_sql_queries'
+]
+
+
+def extract_imports(content: str, file_ext: str) -> List[tuple]:
+    """Extract import statements - backward compatibility wrapper."""
+    imports = []
+    for pattern in IMPORT_PATTERNS:
+        for match in pattern.finditer(content):
+            value = match.group(1) if match.lastindex else match.group(0)
+            # Determine kind based on pattern
+            if "require" in pattern.pattern:
+                kind = "require"
+            elif "from" in pattern.pattern and "import" in pattern.pattern:
+                kind = "from"
+            elif "package" in pattern.pattern:
+                kind = "package"
+            else:
+                kind = "import"
+            imports.append((kind, value))
+    return imports
+
+
+def extract_routes(content: str) -> List[tuple]:
+    """Extract route definitions - backward compatibility wrapper."""
+    routes = []
+    for pattern in ROUTE_PATTERNS:
+        for match in pattern.finditer(content):
+            if match.lastindex == 2:
+                method = match.group(1).upper()
+                path = match.group(2)
+            else:
+                method = "ANY"
+                path = match.group(1) if match.lastindex else match.group(0)
+            routes.append((method, path))
+    return routes
+
+
+def extract_sql_objects(content: str) -> List[tuple]:
+    """Extract SQL object definitions - backward compatibility wrapper."""
+    objects = []
+    for pattern in SQL_PATTERNS:
+        for match in pattern.finditer(content):
+            name = match.group(1)
+            # Determine kind from pattern
+            pattern_text = pattern.pattern.lower()
+            if "table" in pattern_text:
+                kind = "table"
+            elif "index" in pattern_text:
+                kind = "index"
+            elif "view" in pattern_text:
+                kind = "view"
+            elif "function" in pattern_text:
+                kind = "function"
+            elif "policy" in pattern_text:
+                kind = "policy"
+            elif "constraint" in pattern_text:
+                kind = "constraint"
+            else:
+                kind = "unknown"
+            objects.append((kind, name))
+    return objects
+
+
+def extract_sql_queries(content: str) -> List[dict]:
+    """Extract SQL queries - backward compatibility wrapper.
+    
+    Note: This requires sqlparse to be installed for full functionality.
+    """
+    try:
+        import sqlparse
+    except ImportError:
+        return []
+    
+    queries = []
+    for pattern in SQL_QUERY_PATTERNS:
+        for match in pattern.finditer(content):
+            query_text = match.group(1) if match.lastindex else match.group(0)
+            
+            # Calculate line number
+            line = content[:match.start()].count('\n') + 1
+            
+            # Clean up the query text
+            query_text = query_text.strip()
+            if not query_text:
+                continue
+            
+            try:
+                # Parse the SQL query
+                parsed = sqlparse.parse(query_text)
+                if not parsed:
+                    continue
+                
+                for statement in parsed:
+                    # Extract command type
+                    command = statement.get_type()
+                    if not command:
+                        # Try to extract manually from first token
+                        tokens = statement.tokens
+                        for token in tokens:
+                            if not token.is_whitespace:
+                                command = str(token).upper()
+                                break
+                    
+                    # Extract table names
+                    tables = []
+                    tokens = list(statement.flatten())
+                    for i, token in enumerate(tokens):
+                        if token.ttype is None and token.value.upper() in ['FROM', 'INTO', 'UPDATE', 'TABLE', 'JOIN']:
+                            # Look for the next non-whitespace token
+                            for j in range(i + 1, len(tokens)):
+                                next_token = tokens[j]
+                                if not next_token.is_whitespace:
+                                    if next_token.ttype in [None, sqlparse.tokens.Name]:
+                                        table_name = next_token.value
+                                        # Clean up table name
+                                        table_name = table_name.strip('"\'`')
+                                        if '.' in table_name:
+                                            table_name = table_name.split('.')[-1]
+                                        if table_name and not table_name.upper() in ['SELECT', 'WHERE', 'SET', 'VALUES']:
+                                            tables.append(table_name)
+                                    break
+                    
+                    queries.append({
+                        'line': line,
+                        'query_text': query_text[:1000],  # Limit length
+                        'command': command or 'UNKNOWN',
+                        'tables': tables
+                    })
+            except Exception:
+                # Skip queries that can't be parsed
+                continue
+    
+    return queries
+
+
+def walk_directory(
+    root_path: Path, 
+    follow_symlinks: bool = False, 
+    exclude_patterns: Optional[List[str]] = None
+) -> tuple[List[dict], Dict[str, Any]]:
+    """Walk directory and collect file information - backward compatibility wrapper.
+    
+    Args:
+        root_path: Root directory to walk
+        follow_symlinks: Whether to follow symbolic links
+        exclude_patterns: Additional patterns to exclude
+        
+    Returns:
+        Tuple of (files_list, statistics)
+    """
+    config = load_runtime_config(str(root_path))
+    walker = FileWalker(root_path, config, follow_symlinks, exclude_patterns)
+    return walker.walk()
+
+
+def populate_database(
+    conn: sqlite3.Connection,
+    files: List[dict],
+    root_path: Path,
+    batch_size: int = DEFAULT_BATCH_SIZE
+) -> Dict[str, int]:
+    """Populate SQLite database - backward compatibility wrapper.
+    
+    Args:
+        conn: SQLite connection
+        files: List of file dictionaries
+        root_path: Project root path
+        batch_size: Batch size for database operations
+        
+    Returns:
+        Dictionary of extraction counts
+    """
+    # Create orchestrator with the existing connection's path
+    db_path = conn.execute("PRAGMA database_list").fetchone()[2]
+    orchestrator = IndexerOrchestrator(root_path, db_path, batch_size)
+    
+    # Close the passed connection as orchestrator creates its own
+    conn.close()
+    
+    # Run the indexing
+    counts, _ = orchestrator.index()
+    return counts
+
+
+def build_index(
+    root_path: str = ".",
+    manifest_path: str = "manifest.json",
+    db_path: str = "repo_index.db",
+    print_stats: bool = False,
+    dry_run: bool = False,
+    follow_symlinks: bool = False,
+    exclude_patterns: Optional[List[str]] = None,
+) -> Dict[str, Any]:
+    """Build repository index - main entry point for backward compatibility.
+    
+    Args:
+        root_path: Root directory to index
+        manifest_path: Path to write manifest JSON
+        db_path: Path to SQLite database
+        print_stats: Whether to print statistics
+        dry_run: If True, only scan files without creating database
+        follow_symlinks: Whether to follow symbolic links
+        exclude_patterns: Patterns to exclude from indexing
+        
+    Returns:
+        Dictionary with success status and statistics
+    """
+    start_time = time.time()
+    root = Path(root_path).resolve()
+
+    if not root.exists():
+        return {"error": f"Root path does not exist: {root_path}"}
+
+    # Walk directory and collect files
+    config = load_runtime_config(str(root))
+    walker = FileWalker(root, config, follow_symlinks, exclude_patterns)
+    files, walk_stats = walker.walk()
+
+    if dry_run:
+        if print_stats:
+            elapsed_ms = int((time.time() - start_time) * 1000)
+            print(f"Files scanned: {walk_stats['total_files']}")
+            print(f"Text files indexed: {walk_stats['text_files']}")
+            print(f"Binary files skipped: {walk_stats['binary_files']}")
+            print(f"Large files skipped: {walk_stats['large_files']}")
+            print(f"Elapsed: {elapsed_ms}ms")
+        return {"success": True, "dry_run": True, "stats": walk_stats}
+
+    # Write manifest
+    try:
+        with open(manifest_path, "w", encoding="utf-8") as f:
+            json.dump(files, f, indent=2, sort_keys=True)
+    except Exception as e:
+        return {"error": f"Failed to write manifest: {e}"}
+
+    # Create and populate database
+    try:
+        # Check if database already exists
+        db_exists = Path(db_path).exists()
+        
+        # Create database schema
+        conn = sqlite3.connect(db_path)
+        conn.execute("BEGIN IMMEDIATE")
+        create_database_schema(conn)
+        conn.commit()
+        conn.close()
+        
+        # Report database creation if new
+        if not db_exists:
+            print(f"[Indexer] Created database: {db_path}")
+        
+        # Use orchestrator to populate the database
+        orchestrator = IndexerOrchestrator(root, db_path, DEFAULT_BATCH_SIZE, 
+                                          follow_symlinks, exclude_patterns)
+        
+        # Clear existing data to avoid UNIQUE constraint errors
+        orchestrator.db_manager.clear_tables()
+        
+        extract_counts, _ = orchestrator.index()
+        
+    except Exception as e:
+        return {"error": f"Failed to create database: {e}"}
+
+    if print_stats:
+        elapsed_ms = int((time.time() - start_time) * 1000)
+        print(f"Files scanned: {walk_stats['total_files']}")
+        print(f"Text files indexed: {walk_stats['text_files']}")
+        print(f"Binary files skipped: {walk_stats['binary_files']}")
+        print(f"Large files skipped: {walk_stats['large_files']}")
+        print(f"Refs extracted: {extract_counts['refs']}")
+        print(f"Routes extracted: {extract_counts['routes']}")
+        print(f"SQL objects extracted: {extract_counts['sql']}")
+        print(f"SQL queries extracted: {extract_counts['sql_queries']}")
+        print(f"Docker images analyzed: {extract_counts['docker']}")
+        print(f"Symbols extracted: {extract_counts['symbols']}")
+        print(f"Elapsed: {elapsed_ms}ms")
+
+    return {
+        "success": True,
+        "stats": walk_stats,
+        "extract_counts": extract_counts,
+        "elapsed_ms": int((time.time() - start_time) * 1000),
+    }
\ No newline at end of file
diff --git a/theauditor/init.py b/theauditor/init.py
new file mode 100644
index 0000000..7e2b0aa
--- /dev/null
+++ b/theauditor/init.py
@@ -0,0 +1,182 @@
+"""Initialization module for TheAuditor - handles project setup and initialization."""
+
+from pathlib import Path
+from typing import Dict, Any
+from theauditor.security import sanitize_config_path, SecurityError
+
+
+def initialize_project(
+    offline: bool = False,
+    skip_docs: bool = False,
+    skip_deps: bool = False
+) -> Dict[str, Any]:
+    """
+    Initialize TheAuditor for first-time use by running all setup steps.
+    
+    This function handles the sequence of operations:
+    1. Index repository
+    2. Create workset
+    3. Check dependencies (unless skipped/offline)
+    4. Fetch documentation (unless skipped/offline)
+    
+    Args:
+        offline: Skip network operations (deps check, docs fetch)
+        skip_docs: Skip documentation fetching
+        skip_deps: Skip dependency checking
+    
+    Returns:
+        Dict containing:
+            - stats: Statistics for each step
+            - success: Overall success status
+            - has_failures: Whether any steps failed
+            - next_steps: List of recommended next commands
+    """
+    from theauditor.indexer import build_index
+    from theauditor.workset import compute_workset
+    from theauditor.deps import parse_dependencies, check_latest_versions
+    from theauditor.docs_fetch import fetch_docs
+    from theauditor.docs_summarize import summarize_docs
+    from theauditor.config_runtime import load_runtime_config
+    
+    # Load configuration
+    config = load_runtime_config(".")
+    stats = {}
+    
+    # 1. Index
+    try:
+        # Sanitize paths from config before use
+        manifest_path = str(sanitize_config_path(config["paths"]["manifest"], "paths", "manifest", "."))
+        db_path = str(sanitize_config_path(config["paths"]["db"], "paths", "db", "."))
+        
+        result = build_index(
+            root_path=".",
+            manifest_path=manifest_path,
+            db_path=db_path,
+            print_stats=False,
+            dry_run=False,
+            follow_symlinks=False
+        )
+        if result.get("error"):
+            raise Exception(result["error"])
+        # Extract stats from nested structure
+        index_stats = result.get("stats", {})
+        stats["index"] = {
+            "files": index_stats.get("total_files", 0),
+            "text_files": index_stats.get("text_files", 0),
+            "success": True
+        }
+    except SecurityError as e:
+        stats["index"] = {"success": False, "error": f"Security violation: {str(e)}"}
+    except Exception as e:
+        stats["index"] = {"success": False, "error": str(e)}
+    
+    # 2. Workset
+    try:
+        # Skip if indexing failed or found no files
+        if not stats.get("index", {}).get("success"):
+            raise Exception("Skipping - indexing failed")
+        if stats.get("index", {}).get("text_files", 0) == 0:
+            stats["workset"] = {"success": False, "files": 0}
+        else:
+            # Sanitize paths from config before use
+            db_path = str(sanitize_config_path(config["paths"]["db"], "paths", "db", "."))
+            manifest_path = str(sanitize_config_path(config["paths"]["manifest"], "paths", "manifest", "."))
+            output_path = str(sanitize_config_path(config["paths"]["workset"], "paths", "workset", "."))
+            
+            result = compute_workset(
+                all_files=True,
+                root_path=".",
+                db_path=db_path,
+                manifest_path=manifest_path,
+                output_path=output_path,
+                max_depth=2,
+                print_stats=False
+            )
+            stats["workset"] = {
+                "files": result.get("expanded_count", 0),
+                "coverage": result.get("coverage", 0),
+                "success": True
+            }
+    except SecurityError as e:
+        stats["workset"] = {"success": False, "error": f"Security violation: {str(e)}"}
+    except Exception as e:
+        stats["workset"] = {"success": False, "error": str(e)}
+    
+    # 3. Dependencies
+    if not skip_deps and not offline:
+        try:
+            deps_list = parse_dependencies(root_path=".")
+            
+            if deps_list:
+                latest_info = check_latest_versions(deps_list, allow_net=True, offline=False)
+                outdated = sum(1 for info in latest_info.values() if info["is_outdated"])
+                stats["deps"] = {
+                    "total": len(deps_list),
+                    "outdated": outdated,
+                    "success": True
+                }
+            else:
+                stats["deps"] = {"total": 0, "success": True}
+        except Exception as e:
+            stats["deps"] = {"success": False, "error": str(e)}
+    else:
+        stats["deps"] = {"skipped": True}
+    
+    # 4. Documentation
+    if not skip_docs and not offline:
+        try:
+            deps_list = parse_dependencies(root_path=".")
+            
+            if deps_list:
+                # Limit to first 50 deps for init command to avoid hanging
+                if len(deps_list) > 50:
+                    deps_list = deps_list[:50]
+                
+                # Fetch with progress indicator
+                fetch_result = fetch_docs(deps_list)
+                fetched = fetch_result.get('fetched', 0)
+                cached = fetch_result.get('cached', 0)
+                errors = fetch_result.get('errors', [])
+                
+                # Summarize
+                summarize_result = summarize_docs()
+                stats["docs"] = {
+                    "fetched": fetched,
+                    "cached": cached,
+                    "capsules": summarize_result.get('capsules_created', 0),
+                    "success": True,
+                    "errors": errors
+                }
+            else:
+                stats["docs"] = {"success": True, "fetched": 0, "capsules": 0}
+        except KeyboardInterrupt:
+            stats["docs"] = {"success": False, "error": "Interrupted by user"}
+        except Exception as e:
+            stats["docs"] = {"success": False, "error": str(e)}
+    else:
+        stats["docs"] = {"skipped": True}
+    
+    # Code capsules feature has been removed - the command was deleted
+    # Doc capsules (for dependency documentation) are handled by 'aud docs summarize'
+    
+    # Check if initialization was successful
+    has_failures = any(
+        not stats.get(step, {}).get("success", False) and not stats.get(step, {}).get("skipped", False)
+        for step in ["index", "workset", "deps", "docs"]
+    )
+    
+    # Determine next steps
+    next_steps = []
+    if stats.get("workset", {}).get("files", 0) > 0:
+        next_steps = [
+            "aud lint --workset",
+            "aud ast-verify --workset",
+            "aud report"
+        ]
+    
+    return {
+        "stats": stats,
+        "success": not has_failures,
+        "has_failures": has_failures,
+        "next_steps": next_steps
+    }
\ No newline at end of file
diff --git a/theauditor/insights/__init__.py b/theauditor/insights/__init__.py
new file mode 100644
index 0000000..e061548
--- /dev/null
+++ b/theauditor/insights/__init__.py
@@ -0,0 +1,86 @@
+"""TheAuditor insights package - optional interpretive intelligence.
+
+This package contains all optional scoring, classification, and 
+recommendation modules that add interpretation on top of facts.
+
+The insights package follows the Truth Courier principle - all modules
+here are OPTIONAL and add subjective analysis on top of objective facts.
+The core audit pipeline works without any of these modules.
+
+Modules:
+  - ml: Machine learning predictions and risk scoring
+  - graph: Architecture health metrics and recommendations
+  - taint: Security vulnerability severity classification
+"""
+
+# ML Insights - predictions and risk scoring
+from theauditor.insights.ml import (
+    check_ml_available,
+    learn,
+    suggest,
+    build_feature_matrix,
+    build_labels,
+    train_models,
+    save_models,
+    load_models,
+    is_source_file,
+    load_journal_stats,
+    load_rca_stats,
+    load_ast_stats,
+    load_graph_stats,
+    load_git_churn,
+    load_semantic_import_features,
+    load_ast_complexity_metrics,
+    extract_text_features,
+    fowler_noll_hash,
+)
+
+# Graph Insights - health metrics and recommendations  
+from theauditor.insights.graph import (
+    GraphInsights,
+    check_insights_available,
+    create_insights,
+)
+
+# Taint Insights - severity scoring and classification
+from theauditor.insights.taint import (
+    calculate_severity,
+    classify_vulnerability,
+    generate_summary,
+    format_taint_report,
+    get_taint_summary,
+    is_vulnerable_sink,
+)
+
+__all__ = [
+    # ML exports
+    'check_ml_available',
+    'learn',
+    'suggest',
+    'build_feature_matrix',
+    'build_labels',
+    'train_models',
+    'save_models',
+    'load_models',
+    'is_source_file',
+    'load_journal_stats',
+    'load_rca_stats',
+    'load_ast_stats',
+    'load_graph_stats',
+    'load_git_churn',
+    'load_semantic_import_features',
+    'load_ast_complexity_metrics',
+    'extract_text_features',
+    'fowler_noll_hash',
+    # Graph exports
+    'GraphInsights',
+    'check_insights_available',
+    'create_insights',
+    # Taint exports
+    'calculate_severity',
+    'classify_vulnerability',
+    'generate_summary',
+    'format_taint_report',
+    'get_taint_summary',
+    'is_vulnerable_sink',
+]
\ No newline at end of file
diff --git a/theauditor/insights/graph.py b/theauditor/insights/graph.py
new file mode 100644
index 0000000..3e6f107
--- /dev/null
+++ b/theauditor/insights/graph.py
@@ -0,0 +1,470 @@
+"""Graph insights module - OPTIONAL interpretive analysis for dependency graphs.
+
+This module provides interpretive metrics like health scores, recommendations,
+and weighted rankings. It's completely optional and decoupled from core graph
+analysis - similar to how ml.py works.
+
+IMPORTANT: This module performs interpretation and scoring, which goes beyond
+pure data extraction. It's designed for teams that want actionable insights
+and are willing to accept some subjective analysis.
+"""
+
+from collections import defaultdict
+from typing import Any
+
+
+class GraphInsights:
+    """Optional graph interpretation and scoring.
+    
+    This class provides subjective metrics and recommendations based on
+    graph topology. All methods here involve interpretation and scoring,
+    not just raw data extraction.
+    """
+    
+    # Weights for hotspot scoring (configurable)
+    DEFAULT_WEIGHTS = {
+        "in_degree": 0.3,
+        "out_degree": 0.2,
+        "centrality": 0.3,
+        "churn": 0.1,
+        "loc": 0.1,
+    }
+    
+    def __init__(self, weights: dict[str, float] | None = None):
+        """
+        Initialize insights analyzer with optional weight configuration.
+        
+        Args:
+            weights: Custom weights for hotspot scoring
+        """
+        self.weights = weights or self.DEFAULT_WEIGHTS
+    
+    def rank_hotspots(
+        self, 
+        import_graph: dict[str, Any], 
+        call_graph: dict[str, Any] | None = None
+    ) -> list[dict[str, Any]]:
+        """
+        Rank nodes by their importance as hotspots using weighted scoring.
+        
+        This is an INTERPRETIVE method that assigns subjective importance
+        scores based on configurable weights.
+        
+        Args:
+            import_graph: Import/dependency graph
+            call_graph: Optional call graph for additional signals
+            
+        Returns:
+            List of hotspot nodes sorted by interpreted score
+        """
+        # Calculate in/out degrees for import graph
+        in_degree = defaultdict(int)
+        out_degree = defaultdict(int)
+        
+        for edge in import_graph.get("edges", []):
+            out_degree[edge["source"]] += 1
+            in_degree[edge["target"]] += 1
+        
+        # Add call graph degrees if available
+        if call_graph:
+            for edge in call_graph.get("edges", []):
+                out_degree[edge["source"]] += 1
+                in_degree[edge["target"]] += 1
+        
+        # Calculate centrality (simplified betweenness centrality approximation)
+        centrality = self._calculate_centrality(import_graph)
+        
+        # Build node metrics with INTERPRETED SCORING
+        hotspots = []
+        for node in import_graph.get("nodes", []):
+            node_id = node["id"]
+            
+            # Normalize metrics
+            in_deg = in_degree[node_id]
+            out_deg = out_degree[node_id]
+            cent = centrality.get(node_id, 0)
+            churn = node.get("churn", 0) or 0
+            loc = node.get("loc", 0) or 0
+            
+            # INTERPRETATION: Calculate weighted score
+            score = (
+                self.weights["in_degree"] * in_deg +
+                self.weights["out_degree"] * out_deg +
+                self.weights["centrality"] * cent +
+                self.weights["churn"] * (churn / 100) +  # Normalize churn
+                self.weights["loc"] * (loc / 1000)  # Normalize LOC
+            )
+            
+            hotspots.append({
+                "id": node_id,
+                "in_degree": in_deg,
+                "out_degree": out_deg,
+                "centrality": cent,
+                "churn": churn,
+                "loc": loc,
+                "score": score,  # INTERPRETED METRIC
+            })
+        
+        # Sort by interpreted score (highest first)
+        hotspots.sort(key=lambda h: h["score"], reverse=True)
+        
+        return hotspots
+    
+    def _calculate_centrality(self, graph: dict[str, Any]) -> dict[str, float]:
+        """
+        Calculate centrality scores using PageRank-like algorithm.
+        
+        This is an INTERPRETIVE scoring algorithm that assigns importance
+        based on graph topology.
+        
+        Args:
+            graph: Graph with nodes and edges
+            
+        Returns:
+            Dict mapping node IDs to centrality scores [0, 1]
+        """
+        # Build adjacency list
+        adj = defaultdict(list)
+        nodes = set()
+        
+        for edge in graph.get("edges", []):
+            adj[edge["source"]].append(edge["target"])
+            nodes.add(edge["source"])
+            nodes.add(edge["target"])
+        
+        # Initialize scores
+        scores = {node: 1.0 for node in nodes}
+        damping = 0.85
+        iterations = 10
+        
+        # Power iteration (PageRank algorithm)
+        for _ in range(iterations):
+            new_scores = {}
+            for node in nodes:
+                score = (1 - damping)
+                for source in nodes:
+                    if node in adj[source]:
+                        out_count = len(adj[source]) or 1
+                        score += damping * scores[source] / out_count
+                new_scores[node] = score
+            scores = new_scores
+        
+        # Normalize scores to [0, 1]
+        if scores:
+            max_score = max(scores.values())
+            if max_score > 0:
+                scores = {k: v / max_score for k, v in scores.items()}
+        
+        return scores
+    
+    def calculate_health_metrics(
+        self,
+        import_graph: dict[str, Any],
+        cycles: list[dict] | None = None,
+        hotspots: list[dict] | None = None,
+        layers: dict[str, list[str]] | None = None,
+    ) -> dict[str, Any]:
+        """
+        Calculate interpreted health metrics and grades.
+        
+        This method provides SUBJECTIVE health scoring based on
+        architectural best practices. The scoring is opinionated
+        and may not apply to all codebases.
+        
+        Args:
+            import_graph: Import/dependency graph
+            cycles: Pre-computed cycles (optional)
+            hotspots: Pre-computed hotspots (optional)
+            layers: Pre-computed layers (optional)
+            
+        Returns:
+            Dict with health scores, grades, and metrics
+        """
+        # Calculate graph density
+        nodes_count = len(import_graph.get("nodes", []))
+        edges_count = len(import_graph.get("edges", []))
+        max_edges = nodes_count * (nodes_count - 1) if nodes_count > 1 else 1
+        density = edges_count / max_edges if max_edges > 0 else 0
+        
+        # INTERPRETATION: Calculate health score
+        health_score = 100
+        
+        # SUBJECTIVE PENALTY: Penalize for cycles
+        if cycles:
+            cycle_penalty = min(len(cycles) * 5, 30)
+            health_score -= cycle_penalty
+        
+        # SUBJECTIVE PENALTY: Penalize for high density (too coupled)
+        if density > 0.3:
+            density_penalty = min((density - 0.3) * 100, 20)
+            health_score -= density_penalty
+        
+        # SUBJECTIVE PENALTY: Penalize for hotspots with very high degree
+        if hotspots and hotspots[0]["in_degree"] > 50:
+            hotspot_penalty = min(hotspots[0]["in_degree"] // 10, 20)
+            health_score -= hotspot_penalty
+        
+        # INTERPRETATION: Assign letter grade
+        health_grade = (
+            "A" if health_score >= 90
+            else "B" if health_score >= 80
+            else "C" if health_score >= 70
+            else "D" if health_score >= 60
+            else "F"
+        )
+        
+        # INTERPRETATION: Calculate fragility score (0-100, higher is worse)
+        fragility = 0
+        
+        # Hotspots increase fragility
+        if hotspots:
+            top_hotspot_score = hotspots[0]["score"]
+            fragility += min(top_hotspot_score * 10, 40)
+        
+        # Cycles increase fragility
+        if cycles:
+            fragility += min(len(cycles) * 3, 30)
+        
+        # High coupling increases fragility
+        fragility += min(density * 100, 30)
+        
+        return {
+            "health_score": max(health_score, 0),
+            "health_grade": health_grade,
+            "fragility_score": min(fragility, 100),
+            "density": density,
+            "cycle_free": len(cycles) == 0 if cycles else True,
+            "well_layered": len(layers) > 2 and max(layers.keys()) < 10 if layers else False,
+            "loosely_coupled": density < 0.2,
+            "no_god_objects": not hotspots or hotspots[0]["in_degree"] < 30,
+        }
+    
+    def generate_recommendations(
+        self,
+        import_graph: dict[str, Any],
+        cycles: list[dict] | None = None,
+        hotspots: list[dict] | None = None,
+        layers: dict[str, list[str]] | None = None,
+    ) -> list[str]:
+        """
+        Generate actionable recommendations based on graph analysis.
+        
+        These are OPINIONATED suggestions based on common architectural
+        best practices. They may not apply to all projects.
+        
+        Args:
+            import_graph: Import/dependency graph
+            cycles: Pre-computed cycles (optional)
+            hotspots: Pre-computed hotspots (optional)
+            layers: Pre-computed layers (optional)
+            
+        Returns:
+            List of recommendation strings
+        """
+        recommendations = []
+        
+        # Calculate density for recommendations
+        nodes_count = len(import_graph.get("nodes", []))
+        edges_count = len(import_graph.get("edges", []))
+        max_edges = nodes_count * (nodes_count - 1) if nodes_count > 1 else 1
+        density = edges_count / max_edges if max_edges > 0 else 0
+        
+        # INTERPRETATION: Generate recommendations
+        if cycles and len(cycles) > 0:
+            recommendations.append(
+                f"Break {len(cycles)} dependency cycles to improve maintainability"
+            )
+        
+        if density > 0.3:
+            recommendations.append(
+                "Reduce coupling between modules (current density: {:.2f})".format(
+                    density
+                )
+            )
+        
+        if hotspots and len(hotspots) > 0 and hotspots[0]["in_degree"] > 30:
+            recommendations.append(
+                f"Refactor hotspot '{hotspots[0]['id']}' with {hotspots[0]['in_degree']} dependencies"
+            )
+        
+        if layers and len(layers) <= 2:
+            recommendations.append(
+                "Consider introducing more architectural layers for better separation"
+            )
+        
+        return recommendations
+    
+    def summarize(
+        self,
+        import_graph: dict[str, Any],
+        call_graph: dict[str, Any] | None = None,
+        cycles: list[dict] | None = None,
+        hotspots: list[dict] | None = None,
+    ) -> dict[str, Any]:
+        """
+        Generate comprehensive INTERPRETED summary of graph analysis.
+        
+        This method combines objective metrics with subjective scoring
+        and recommendations. It's designed for teams that want actionable
+        insights beyond raw data.
+        
+        Args:
+            import_graph: Import/dependency graph
+            call_graph: Optional call graph
+            cycles: Pre-computed cycles (optional)
+            hotspots: Pre-computed hotspots (optional)
+            
+        Returns:
+            Summary dict with metrics, health scores, and recommendations
+        """
+        from theauditor.graph.analyzer import XGraphAnalyzer
+        
+        # Use base analyzer for pure algorithms
+        analyzer = XGraphAnalyzer()
+        
+        # Get pure metrics
+        summary = {
+            "import_graph": {
+                "nodes": len(import_graph.get("nodes", [])),
+                "edges": len(import_graph.get("edges", [])),
+            }
+        }
+        
+        # Add call graph metrics if available
+        if call_graph:
+            summary["call_graph"] = {
+                "nodes": len(call_graph.get("nodes", [])),
+                "edges": len(call_graph.get("edges", [])),
+            }
+        
+        # Calculate graph density
+        nodes_count = len(import_graph.get("nodes", []))
+        edges_count = len(import_graph.get("edges", []))
+        max_edges = nodes_count * (nodes_count - 1) if nodes_count > 1 else 1
+        density = edges_count / max_edges if max_edges > 0 else 0
+        summary["import_graph"]["density"] = density
+        
+        # Add cycle metrics
+        if cycles is None:
+            cycles = analyzer.detect_cycles(import_graph)
+        
+        summary["cycles"] = {
+            "total": len(cycles),
+            "largest": cycles[0]["size"] if cycles else 0,
+            "nodes_in_cycles": len(
+                set(node for cycle in cycles for node in cycle["nodes"])
+            ),
+        }
+        
+        # Add hotspot metrics
+        if hotspots is None:
+            hotspots = self.rank_hotspots(import_graph, call_graph)
+        
+        summary["hotspots"] = {
+            "top_5": [h["id"] for h in hotspots[:5]],
+            "max_in_degree": max((h["in_degree"] for h in hotspots), default=0),
+            "max_out_degree": max((h["out_degree"] for h in hotspots), default=0),
+        }
+        
+        # Identify layers
+        layers = analyzer.identify_layers(import_graph)
+        summary["layers"] = {
+            "count": len(layers),
+            "distribution": {k: len(v) for k, v in layers.items()},
+        }
+        
+        # Add INTERPRETED health metrics
+        summary["health_metrics"] = self.calculate_health_metrics(
+            import_graph, cycles, hotspots, layers
+        )
+        
+        # Add INTERPRETED recommendations
+        summary["recommendations"] = self.generate_recommendations(
+            import_graph, cycles, hotspots, layers
+        )
+        
+        return summary
+    
+    def interpret_graph_summary(self, graph_data: dict[str, Any]) -> dict[str, Any]:
+        """
+        Add interpretive labels to graph summary data.
+        
+        This method adds subjective interpretations to raw graph statistics,
+        such as coupling levels and architectural insights.
+        
+        Args:
+            graph_data: Raw graph summary from analyzer
+            
+        Returns:
+            Enhanced summary with interpretive insights
+        """
+        # Get base statistics
+        stats = graph_data.get("statistics", {})
+        density = stats.get("graph_density", 0)
+        hotspots = graph_data.get("top_hotspots", [])
+        
+        # Add interpretive insights
+        insights = {
+            "coupling_level": (
+                "high" if density > 0.3 
+                else "medium" if density > 0.1 
+                else "low"
+            ),
+            "potential_god_objects": len([
+                h for h in hotspots 
+                if h.get("in_degree", 0) > 30
+            ]),
+            "highly_connected": len([
+                h for h in hotspots 
+                if h.get("total_connections", 0) > 20
+            ]),
+        }
+        
+        # Merge with original data
+        graph_data["architectural_insights"] = insights
+        
+        return graph_data
+    
+    def calculate_impact_ratio(
+        self,
+        targets: list[str],
+        all_impacted: set[str],
+        total_nodes: int,
+    ) -> float:
+        """
+        Calculate interpreted impact ratio for change analysis.
+        
+        This is a SUBJECTIVE metric that interprets the scope of impact
+        as a ratio of total system size.
+        
+        Args:
+            targets: Original target nodes
+            all_impacted: All impacted nodes (targets + upstream + downstream)
+            total_nodes: Total number of nodes in graph
+            
+        Returns:
+            Impact ratio [0, 1]
+        """
+        if total_nodes == 0:
+            return 0.0
+        
+        return len(all_impacted) / total_nodes
+
+
+# Module-level function for backward compatibility
+def check_insights_available() -> bool:
+    """Check if insights module is available (always True)."""
+    return True
+
+
+def create_insights(weights: dict[str, float] | None = None) -> GraphInsights:
+    """
+    Factory function to create GraphInsights instance.
+    
+    Args:
+        weights: Optional custom weights for scoring
+        
+    Returns:
+        GraphInsights instance
+    """
+    return GraphInsights(weights)
\ No newline at end of file
diff --git a/theauditor/insights/ml.py b/theauditor/insights/ml.py
new file mode 100644
index 0000000..ed156b0
--- /dev/null
+++ b/theauditor/insights/ml.py
@@ -0,0 +1,1241 @@
+"""Offline ML signals for TheAuditor - manual trigger, non-blocking."""
+
+import json
+import os
+import sqlite3
+import subprocess
+import tempfile
+from collections import defaultdict
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    import numpy as np
+
+# Safe import of ML dependencies
+ML_AVAILABLE = False
+try:
+    import joblib
+    import numpy as np
+    from sklearn.isotonic import IsotonicRegression
+    from sklearn.linear_model import Ridge, SGDClassifier
+    from sklearn.ensemble import GradientBoostingClassifier
+    from sklearn.preprocessing import StandardScaler
+
+    ML_AVAILABLE = True
+except ImportError:
+    pass
+
+
+def check_ml_available():
+    """Check if ML dependencies are available."""
+    if not ML_AVAILABLE:
+        print("ML disabled. Install extras: pip install -e .[ml]")
+        return False
+    return True
+
+
+def fowler_noll_hash(text: str, dim: int = 2000) -> int:
+    """Simple FNV-1a hash for text feature hashing."""
+    FNV_PRIME = 0x01000193
+    FNV_OFFSET = 0x811C9DC5
+
+    hash_val = FNV_OFFSET
+    for char in text.encode("utf-8"):
+        hash_val ^= char
+        hash_val = (hash_val * FNV_PRIME) & 0xFFFFFFFF
+
+    return hash_val % dim
+
+
+def extract_text_features(
+    path: str, rca_messages: list[str] = None, dim: int = 2000
+) -> dict[int, float]:
+    """Extract hashed text features from path and RCA messages."""
+    features = defaultdict(float)
+
+    # Hash path components
+    parts = Path(path).parts
+    for part in parts:
+        idx = fowler_noll_hash(part, dim)
+        features[idx] += 1.0
+
+    # Hash basename
+    basename = Path(path).name
+    idx = fowler_noll_hash(basename, dim)
+    features[idx] += 2.0
+
+    # Hash RCA messages if present
+    if rca_messages:
+        for msg in rca_messages[:5]:  # Limit to recent 5
+            tokens = msg.lower().split()[:10]  # First 10 tokens
+            for token in tokens:
+                idx = fowler_noll_hash(token, dim)
+                features[idx] += 0.5
+
+    return dict(features)
+
+
+def load_journal_stats(history_dir: Path, window: int = 50, run_type: str = "full") -> dict[str, dict]:
+    """
+    Load and aggregate stats from all historical journal files.
+    
+    Args:
+        history_dir: Base history directory
+        window: Number of recent entries to analyze per file
+        run_type: Type of runs to load ("full", "diff", or "all")
+    """
+    if not history_dir.exists():
+        return {}
+
+    stats = defaultdict(
+        lambda: {
+            "touches": 0,
+            "failures": 0,
+            "successes": 0,
+            "recent_phases": [],
+        }
+    )
+
+    try:
+        # Find historical journal files based on run type
+        if run_type == "full":
+            journal_files = list(history_dir.glob('full/*/journal.ndjson'))
+        elif run_type == "diff":
+            journal_files = list(history_dir.glob('diff/*/journal.ndjson'))
+        else:  # run_type == "all"
+            journal_files = list(history_dir.glob('*/*/journal.ndjson'))
+        
+        # If no journal files found, fallback to FCE data
+        if not journal_files:
+            print("Warning: No journal.ndjson files found. Using FCE and AST failure data as fallback for training.")
+            
+            # Load from FCE files instead
+            if run_type == "full":
+                fce_files = list(history_dir.glob('full/*/raw/fce.json'))
+            elif run_type == "diff":
+                fce_files = list(history_dir.glob('diff/*/raw/fce.json'))
+            else:  # run_type == "all"
+                fce_files = list(history_dir.glob('*/*/raw/fce.json'))
+            
+            # Process FCE files as proxy for journal data
+            for fce_path in fce_files:
+                try:
+                    with open(fce_path) as f:
+                        data = json.load(f)
+                    
+                    # Treat each finding as a "touch" and errors/criticals as "failures"
+                    for finding in data.get("all_findings", []):
+                        file = finding.get("file", "")
+                        if file:
+                            stats[file]["touches"] += 1
+                            severity = finding.get("severity", "")
+                            if severity in ["error", "critical"]:
+                                stats[file]["failures"] += 1
+                            else:
+                                stats[file]["successes"] += 1
+                except Exception:
+                    continue  # Skip files that can't be read
+            
+            return dict(stats)
+        
+        for journal_path in journal_files:
+            try:
+                with open(journal_path) as f:
+                    lines = f.readlines()[-window * 20 :]  # Approximate last N runs per file
+
+                    for line in lines:
+                        try:
+                            event = json.loads(line)
+
+                            if event.get("phase") == "apply_patch" and "file" in event:
+                                file = event["file"]
+                                stats[file]["touches"] += 1
+
+                            if "result" in event:
+                                for file_path in stats:
+                                    if event["result"] == "fail":
+                                        stats[file_path]["failures"] += 1
+                                    else:
+                                        stats[file_path]["successes"] += 1
+
+                        except json.JSONDecodeError:
+                            continue
+            except Exception:
+                continue  # Skip files that can't be read
+    except (ImportError, ValueError, AttributeError):
+        pass  # ML unavailable - gracefully skip
+
+    return dict(stats)
+
+
+def load_rca_stats(history_dir: Path, run_type: str = "full") -> dict[str, dict]:
+    """
+    Load RCA failure stats from all historical RCA files.
+    
+    Args:
+        history_dir: Base history directory
+        run_type: Type of runs to load ("full", "diff", or "all")
+    """
+    if not history_dir.exists():
+        return {}
+
+    stats = defaultdict(
+        lambda: {
+            "fail_count": 0,
+            "categories": [],
+            "messages": [],
+        }
+    )
+
+    try:
+        # Find historical FCE files based on run type
+        if run_type == "full":
+            fce_files = list(history_dir.glob('full/*/fce.json'))
+        elif run_type == "diff":
+            fce_files = list(history_dir.glob('diff/*/fce.json'))
+        else:  # run_type == "all"
+            fce_files = list(history_dir.glob('*/*/fce.json'))
+        
+        for fce_path in fce_files:
+            try:
+                with open(fce_path) as f:
+                    data = json.load(f)
+
+                for failure in data.get("failures", []):
+                    file = failure.get("file", "")
+                    if file:
+                        stats[file]["fail_count"] += 1
+                        if "category" in failure:
+                            stats[file]["categories"].append(failure["category"])
+                        if "message" in failure:
+                            stats[file]["messages"].append(failure["message"][:100])
+            except Exception:
+                continue  # Skip files that can't be read
+    except (ImportError, ValueError, AttributeError):
+        pass  # ML unavailable - gracefully skip
+
+    return dict(stats)
+
+
+def load_ast_stats(history_dir: Path, run_type: str = "full") -> dict[str, dict]:
+    """
+    Load AST proof stats from all historical AST files.
+    
+    Args:
+        history_dir: Base history directory
+        run_type: Type of runs to load ("full", "diff", or "all")
+    """
+    if not history_dir.exists():
+        return {}
+
+    stats = defaultdict(
+        lambda: {
+            "invariant_fails": 0,
+            "invariant_passes": 0,
+            "failed_checks": [],
+        }
+    )
+
+    try:
+        # Find historical AST proof files based on run type
+        if run_type == "full":
+            ast_files = list(history_dir.glob('full/*/ast_proofs.json'))
+        elif run_type == "diff":
+            ast_files = list(history_dir.glob('diff/*/ast_proofs.json'))
+        else:  # run_type == "all"
+            ast_files = list(history_dir.glob('*/*/ast_proofs.json'))
+        
+        for ast_path in ast_files:
+            try:
+                with open(ast_path) as f:
+                    data = json.load(f)
+
+                for result in data.get("results", []):
+                    file = result.get("path", "")
+                    for check in result.get("checks", []):
+                        if check["status"] == "FAIL":
+                            stats[file]["invariant_fails"] += 1
+                            stats[file]["failed_checks"].append(check["id"])
+                        elif check["status"] == "PASS":
+                            stats[file]["invariant_passes"] += 1
+            except Exception:
+                continue  # Skip files that can't be read
+    except (ImportError, ValueError, AttributeError):
+        pass  # ML unavailable - gracefully skip
+
+    return dict(stats)
+
+
+def load_graph_stats(db_path: str, file_paths: list[str]) -> dict[str, dict]:
+    """Load graph topology stats from index DB."""
+    if not Path(db_path).exists() or not file_paths:
+        return {}
+
+    stats = defaultdict(
+        lambda: {
+            "in_degree": 0,
+            "out_degree": 0,
+            "has_routes": False,
+            "has_sql": False,
+        }
+    )
+
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        # Get refs (imports/exports)
+        placeholders = ",".join("?" * len(file_paths))
+
+        # In-degree: files that import this file
+        cursor.execute(
+            f"""
+            SELECT value, COUNT(*) as count
+            FROM refs
+            WHERE value IN ({placeholders})
+            GROUP BY value
+        """,
+            file_paths,
+        )
+
+        for file_path, count in cursor.fetchall():
+            stats[file_path]["in_degree"] = count
+
+        # Out-degree: files this file imports
+        cursor.execute(
+            f"""
+            SELECT src, COUNT(*) as count
+            FROM refs
+            WHERE src IN ({placeholders})
+            GROUP BY src
+        """,
+            file_paths,
+        )
+
+        for file_path, count in cursor.fetchall():
+            stats[file_path]["out_degree"] = count
+
+        # Check for routes (now stored in api_endpoints table after refactor)
+        cursor.execute(
+            f"""
+            SELECT DISTINCT file
+            FROM api_endpoints
+            WHERE file IN ({placeholders})
+        """,
+            file_paths,
+        )
+
+        for (file_path,) in cursor.fetchall():
+            stats[file_path]["has_routes"] = True
+
+        # Check for SQL objects
+        cursor.execute(
+            f"""
+            SELECT DISTINCT file
+            FROM sql_objects
+            WHERE file IN ({placeholders})
+        """,
+            file_paths,
+        )
+
+        for (file_path,) in cursor.fetchall():
+            stats[file_path]["has_sql"] = True
+
+        conn.close()
+    except (ImportError, ValueError, AttributeError):
+        pass  # ML unavailable - gracefully skip
+
+    return dict(stats)
+
+
+def load_git_churn(file_paths: list[str], window_days: int = 30) -> dict[str, int]:
+    """Load git churn counts if available."""
+    if not Path(".git").exists():
+        return {}
+
+    churn = defaultdict(int)
+
+    try:
+        # Use temp files to avoid buffer overflow
+        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
+             tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:
+            
+            stdout_path = stdout_fp.name
+            stderr_path = stderr_fp.name
+            
+            result = subprocess.run(
+                ["git", "log", "--name-only", "--pretty=format:", f"--since={window_days} days ago"],
+                stdout=stdout_fp,
+                stderr=stderr_fp,
+                text=True,
+                timeout=10,
+            )
+        
+        with open(stdout_path, 'r', encoding='utf-8') as f:
+            result.stdout = f.read()
+        with open(stderr_path, 'r', encoding='utf-8') as f:
+            result.stderr = f.read()
+        
+        os.unlink(stdout_path)
+        os.unlink(stderr_path)
+
+        if result.returncode == 0:
+            for line in result.stdout.split("\n"):
+                line = line.strip()
+                if line and line in file_paths:
+                    churn[line] += 1
+    except (ImportError, ValueError, AttributeError):
+        pass  # ML unavailable - gracefully skip
+
+    return dict(churn)
+
+
+def load_semantic_import_features(db_path: str, file_paths: list[str]) -> dict[str, dict]:
+    """
+    Extract semantic import features to understand file purpose.
+    
+    Returns dict with keys: has_http_import, has_db_import, has_auth_import, has_test_import
+    """
+    if not Path(db_path).exists() or not file_paths:
+        return {}
+    
+    # Common library patterns for different purposes
+    HTTP_LIBS = {
+        'requests', 'aiohttp', 'httpx', 'urllib', 'axios', 'fetch', 'superagent',
+        'express', 'fastapi', 'flask', 'django.http', 'tornado', 'starlette'
+    }
+    
+    DB_LIBS = {
+        'sqlalchemy', 'psycopg2', 'psycopg', 'pymongo', 'redis', 'django.db',
+        'peewee', 'tortoise', 'databases', 'asyncpg', 'sqlite3', 'mysql',
+        'mongoose', 'sequelize', 'typeorm', 'prisma', 'knex', 'pg'
+    }
+    
+    AUTH_LIBS = {
+        'jwt', 'pyjwt', 'passlib', 'oauth', 'oauth2', 'authlib', 'django.contrib.auth',
+        'flask_login', 'flask_jwt', 'bcrypt', 'cryptography', 'passport',
+        'jsonwebtoken', 'express-jwt', 'firebase-auth', 'auth0'
+    }
+    
+    TEST_LIBS = {
+        'pytest', 'unittest', 'mock', 'faker', 'factory_boy', 'hypothesis',
+        'jest', 'mocha', 'chai', 'sinon', 'enzyme', 'vitest', 'testing-library'
+    }
+    
+    stats = defaultdict(lambda: {
+        "has_http_import": False,
+        "has_db_import": False,
+        "has_auth_import": False,
+        "has_test_import": False,
+    })
+    
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        
+        placeholders = ",".join("?" * len(file_paths))
+        
+        # Get all imports for the specified files
+        cursor.execute(
+            f"""
+            SELECT src, value
+            FROM refs
+            WHERE src IN ({placeholders})
+            AND kind IN ('import', 'from', 'require')
+            """,
+            file_paths,
+        )
+        
+        for file_path, import_value in cursor.fetchall():
+            # Normalize import value (strip quotes, extract package name)
+            import_name = import_value.lower().strip('"\'')
+            # Handle scoped packages like @angular/core
+            if '/' in import_name:
+                import_name = import_name.split('/')[0].lstrip('@')
+            # Handle sub-modules like django.contrib.auth
+            base_import = import_name.split('.')[0]
+            
+            # Check against our semantic categories
+            if any(lib in import_name or base_import == lib for lib in HTTP_LIBS):
+                stats[file_path]["has_http_import"] = True
+            
+            if any(lib in import_name or base_import == lib for lib in DB_LIBS):
+                stats[file_path]["has_db_import"] = True
+            
+            if any(lib in import_name or base_import == lib for lib in AUTH_LIBS):
+                stats[file_path]["has_auth_import"] = True
+            
+            if any(lib in import_name or base_import == lib for lib in TEST_LIBS):
+                stats[file_path]["has_test_import"] = True
+        
+        conn.close()
+    except Exception:
+        pass  # Gracefully skip on error
+    
+    return dict(stats)
+
+
+def load_ast_complexity_metrics(db_path: str, file_paths: list[str]) -> dict[str, dict]:
+    """
+    Extract AST-based complexity metrics from the symbols table.
+    
+    Returns dict with keys: function_count, class_count, call_count, try_except_count, async_def_count
+    """
+    if not Path(db_path).exists() or not file_paths:
+        return {}
+    
+    stats = defaultdict(lambda: {
+        "function_count": 0,
+        "class_count": 0,
+        "call_count": 0,
+        "try_except_count": 0,
+        "async_def_count": 0,
+    })
+    
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        
+        placeholders = ",".join("?" * len(file_paths))
+        
+        # Count different symbol types per file
+        cursor.execute(
+            f"""
+            SELECT path, type, COUNT(*) as count
+            FROM symbols
+            WHERE path IN ({placeholders})
+            GROUP BY path, type
+            """,
+            file_paths,
+        )
+        
+        for file_path, symbol_type, count in cursor.fetchall():
+            if symbol_type == "function":
+                stats[file_path]["function_count"] = count
+            elif symbol_type == "class":
+                stats[file_path]["class_count"] = count
+            elif symbol_type == "call":
+                stats[file_path]["call_count"] = count
+        
+        # Count async functions (those with 'async' in the name)
+        # This is a heuristic since we don't have a dedicated async flag
+        cursor.execute(
+            f"""
+            SELECT path, COUNT(*) as count
+            FROM symbols
+            WHERE path IN ({placeholders})
+            AND type = 'function'
+            AND (name LIKE 'async%' OR name LIKE '%async%')
+            GROUP BY path
+            """,
+            file_paths,
+        )
+        
+        for file_path, count in cursor.fetchall():
+            stats[file_path]["async_def_count"] = count
+        
+        # Count try/except patterns - look for exception handling calls
+        # Common patterns: catch, except, rescue, error
+        cursor.execute(
+            f"""
+            SELECT path, COUNT(*) as count
+            FROM symbols
+            WHERE path IN ({placeholders})
+            AND type = 'call'
+            AND (name IN ('catch', 'except', 'rescue', 'error', 'try', 'finally'))
+            GROUP BY path
+            """,
+            file_paths,
+        )
+        
+        for file_path, count in cursor.fetchall():
+            stats[file_path]["try_except_count"] = count
+        
+        conn.close()
+    except Exception:
+        pass  # Gracefully skip on error
+    
+    return dict(stats)
+
+
+def build_feature_matrix(
+    file_paths: list[str],
+    manifest_path: str,
+    db_path: str,
+    journal_stats: dict = None,
+    rca_stats: dict = None,
+    ast_stats: dict = None,
+    enable_git: bool = False,
+) -> tuple["np.ndarray", dict[str, int]]:
+    """Build feature matrix for files."""
+    if not ML_AVAILABLE:
+        return None, {}
+
+    # Load manifest for file metadata
+    manifest_map = {}
+    try:
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+        for entry in manifest:
+            manifest_map[entry["path"]] = entry
+    except (ImportError, ValueError, AttributeError):
+        pass  # ML unavailable - gracefully skip
+
+    # Use provided stats or default to empty dicts
+    journal_stats = journal_stats if journal_stats is not None else {}
+    rca_stats = rca_stats if rca_stats is not None else {}
+    ast_stats = ast_stats if ast_stats is not None else {}
+    graph_stats = load_graph_stats(db_path, file_paths)
+    
+    # Load centrality from graph metrics if available
+    try:
+        metrics_path = Path("./.pf/raw/graph_metrics.json")
+        if metrics_path.exists():
+            with open(metrics_path) as f:
+                graph_metrics = json.load(f)
+            # Merge into existing stats
+            for path in file_paths:
+                if path in graph_metrics:
+                    if path not in graph_stats:
+                        graph_stats[path] = {
+                            "in_degree": 0,
+                            "out_degree": 0,
+                            "has_routes": False,
+                            "has_sql": False,
+                        }
+                    graph_stats[path]["centrality"] = graph_metrics[path]
+    except (json.JSONDecodeError, IOError):
+        pass  # Proceed without centrality scores
+    
+    git_churn = load_git_churn(file_paths) if enable_git else {}
+    
+    # Load new advanced features
+    semantic_imports = load_semantic_import_features(db_path, file_paths)
+    complexity_metrics = load_ast_complexity_metrics(db_path, file_paths)
+
+    # Build feature vectors
+    feature_names = []
+    features = []
+
+    for file_path in file_paths:
+        feat = []
+
+        # Basic metadata features
+        meta = manifest_map.get(file_path, {})
+        feat.append(meta.get("bytes", 0) / 10000.0)  # Normalized
+        feat.append(meta.get("loc", 0) / 100.0)  # Normalized
+
+        # Extension as categorical
+        ext = meta.get("ext", "")
+        feat.append(1.0 if ext in [".ts", ".tsx", ".js", ".jsx"] else 0.0)
+        feat.append(1.0 if ext == ".py" else 0.0)
+
+        # Graph topology
+        graph = graph_stats.get(file_path, {})
+        feat.append(graph.get("in_degree", 0) / 10.0)
+        feat.append(graph.get("out_degree", 0) / 10.0)
+        feat.append(1.0 if graph.get("has_routes") else 0.0)
+        feat.append(1.0 if graph.get("has_sql") else 0.0)
+        feat.append(graph.get("centrality", 0.0))  # Already normalized [0,1]
+
+        # Journal history
+        journal = journal_stats.get(file_path, {})
+        feat.append(journal.get("touches", 0) / 10.0)
+        feat.append(journal.get("failures", 0) / 5.0)
+        feat.append(journal.get("successes", 0) / 5.0)
+
+        # RCA history
+        rca = rca_stats.get(file_path, {})
+        feat.append(rca.get("fail_count", 0) / 5.0)
+
+        # AST checks
+        ast = ast_stats.get(file_path, {})
+        feat.append(ast.get("invariant_fails", 0) / 3.0)
+        feat.append(ast.get("invariant_passes", 0) / 3.0)
+
+        # Git churn
+        feat.append(git_churn.get(file_path, 0) / 5.0)
+        
+        # NEW: Semantic import features
+        semantic = semantic_imports.get(file_path, {})
+        feat.append(1.0 if semantic.get("has_http_import") else 0.0)
+        feat.append(1.0 if semantic.get("has_db_import") else 0.0)
+        feat.append(1.0 if semantic.get("has_auth_import") else 0.0)
+        feat.append(1.0 if semantic.get("has_test_import") else 0.0)
+        
+        # NEW: AST complexity metrics
+        complexity = complexity_metrics.get(file_path, {})
+        feat.append(complexity.get("function_count", 0) / 20.0)  # Normalized
+        feat.append(complexity.get("class_count", 0) / 10.0)  # Normalized
+        feat.append(complexity.get("call_count", 0) / 50.0)  # Normalized
+        feat.append(complexity.get("try_except_count", 0) / 5.0)  # Normalized
+        feat.append(complexity.get("async_def_count", 0) / 5.0)  # Normalized
+
+        # Text features (simplified - just path hash)
+        text_feats = extract_text_features(
+            file_path, rca.get("messages", []), dim=50  # Small for speed
+        )
+        text_vec = [0.0] * 50
+        for idx, val in text_feats.items():
+            if idx < 50:
+                text_vec[idx] = val
+        feat.extend(text_vec)
+
+        features.append(feat)
+
+    # Feature names for debugging
+    feature_names = [
+        "bytes_norm",
+        "loc_norm",
+        "is_js",
+        "is_py",
+        "in_degree",
+        "out_degree",
+        "has_routes",
+        "has_sql",
+        "centrality",
+        "touches",
+        "failures",
+        "successes",
+        "rca_fails",
+        "ast_fails",
+        "ast_passes",
+        "git_churn",
+        # New semantic import features
+        "has_http_import",
+        "has_db_import",
+        "has_auth_import",
+        "has_test_import",
+        # New complexity metrics
+        "function_count",
+        "class_count",
+        "call_count",
+        "try_except_count",
+        "async_def_count",
+    ] + [f"text_{i}" for i in range(50)]
+
+    feature_name_map = {name: i for i, name in enumerate(feature_names)}
+
+    return np.array(features), feature_name_map
+
+
+def build_labels(
+    file_paths: list[str],
+    journal_stats: dict,
+    rca_stats: dict,
+) -> tuple["np.ndarray", "np.ndarray", "np.ndarray"]:
+    """Build label vectors for training."""
+    if not ML_AVAILABLE:
+        return None, None, None
+
+    # Root cause labels (binary): file failed in RCA
+    root_cause_labels = np.array(
+        [1.0 if rca_stats.get(fp, {}).get("fail_count", 0) > 0 else 0.0 for fp in file_paths]
+    )
+
+    # Next edit labels (binary): file was edited in journal
+    next_edit_labels = np.array(
+        [1.0 if journal_stats.get(fp, {}).get("touches", 0) > 0 else 0.0 for fp in file_paths]
+    )
+
+    # Risk scores (continuous): failure ratio
+    risk_labels = np.array(
+        [
+            min(
+                1.0,
+                journal_stats.get(fp, {}).get("failures", 0)
+                / max(1, journal_stats.get(fp, {}).get("touches", 1)),
+            )
+            for fp in file_paths
+        ]
+    )
+
+    return root_cause_labels, next_edit_labels, risk_labels
+
+
+def train_models(
+    features: "np.ndarray",
+    root_cause_labels: "np.ndarray",
+    next_edit_labels: "np.ndarray",
+    risk_labels: "np.ndarray",
+    seed: int = 13,
+    sample_weight: "np.ndarray" = None,
+) -> tuple[Any, Any, Any, Any]:
+    """Train the three models with optional sample weighting for human feedback."""
+    if not ML_AVAILABLE:
+        return None, None, None, None
+
+    # Handle empty or all-same labels
+    if len(np.unique(root_cause_labels)) < 2:
+        root_cause_labels[0] = 1 - root_cause_labels[0]  # Flip one for training
+    if len(np.unique(next_edit_labels)) < 2:
+        next_edit_labels[0] = 1 - next_edit_labels[0]
+
+    # Scale features
+    scaler = StandardScaler()
+    features_scaled = scaler.fit_transform(features)
+
+    # Train root cause classifier with GradientBoostingClassifier
+    # More powerful ensemble model that captures non-linear relationships
+    root_cause_clf = GradientBoostingClassifier(
+        n_estimators=50,  # Reduced for speed
+        learning_rate=0.1,
+        max_depth=3,
+        random_state=seed,
+        subsample=0.8,  # Stochastic gradient boosting
+        min_samples_split=5,  # Prevent overfitting
+    )
+    root_cause_clf.fit(features_scaled, root_cause_labels, sample_weight=sample_weight)
+
+    # Train next edit classifier with GradientBoostingClassifier
+    next_edit_clf = GradientBoostingClassifier(
+        n_estimators=50,
+        learning_rate=0.1,
+        max_depth=3,
+        random_state=seed,
+        subsample=0.8,
+        min_samples_split=5,
+    )
+    next_edit_clf.fit(features_scaled, next_edit_labels, sample_weight=sample_weight)
+
+    # Train risk regressor (keep Ridge for regression task)
+    risk_reg = Ridge(alpha=1.0, random_state=seed)
+    risk_reg.fit(features_scaled, risk_labels, sample_weight=sample_weight)
+
+    return root_cause_clf, next_edit_clf, risk_reg, scaler
+
+
+def save_models(
+    model_dir: str,
+    root_cause_clf: Any,
+    next_edit_clf: Any,
+    risk_reg: Any,
+    scaler: Any,
+    feature_name_map: dict,
+    stats: dict,
+):
+    """Save trained models and metadata."""
+    if not ML_AVAILABLE:
+        return
+
+    Path(model_dir).mkdir(parents=True, exist_ok=True)
+
+    # Save models
+    model_data = {
+        "root_cause_clf": root_cause_clf,
+        "next_edit_clf": next_edit_clf,
+        "risk_reg": risk_reg,
+        "scaler": scaler,
+    }
+    joblib.dump(model_data, Path(model_dir) / "model.joblib")
+
+    # Save feature map
+    with open(Path(model_dir) / "feature_map.json", "w") as f:
+        json.dump(feature_name_map, f, indent=2)
+
+    # Save training stats
+    with open(Path(model_dir) / "training_stats.json", "w") as f:
+        json.dump(stats, f, indent=2)
+
+
+def is_source_file(file_path: str) -> bool:
+    """Check if a file is a source code file (not test, config, or docs)."""
+    path = Path(file_path)
+    
+    # Skip test files and test directories
+    if any(part in ['test', 'tests', '__tests__', 'spec'] for part in path.parts):
+        return False
+    if path.name.startswith('test_') or path.name.endswith('_test.py') or '.test.' in path.name or '.spec.' in path.name:
+        return False
+    
+    # Skip documentation
+    if path.suffix.lower() in ['.md', '.rst', '.txt', '.yaml', '.yml']:
+        return False
+    
+    # Skip configuration files
+    config_files = {
+        '.gitignore', '.gitattributes', '.editorconfig',
+        'pyproject.toml', 'setup.py', 'setup.cfg',
+        'package.json', 'package-lock.json', 'yarn.lock',
+        'Makefile', 'makefile', 'requirements.txt',
+        'Dockerfile', 'docker-compose.yml', '.dockerignore',
+        '.env', '.env.example', 'tsconfig.json', 'jest.config.js',
+        'webpack.config.js', 'babel.config.js', '.eslintrc.js',
+        '.prettierrc', 'tox.ini', 'pytest.ini'
+    }
+    if path.name.lower() in config_files:
+        return False
+    
+    # Skip non-source extensions
+    non_source_exts = {
+        '.json', '.xml', '.lock', '.log', '.bak',
+        '.tmp', '.temp', '.cache', '.pid', '.sock'
+    }
+    if path.suffix.lower() in non_source_exts and path.name != 'manifest.json':
+        return False
+    
+    # Skip directories that are typically not source
+    skip_dirs = {'docs', 'documentation', 'examples', 'samples', 'fixtures'}
+    if any(part.lower() in skip_dirs for part in path.parts):
+        return False
+    
+    # Accept common source file extensions
+    source_exts = {
+        '.py', '.js', '.jsx', '.ts', '.tsx', '.java', '.go',
+        '.cs', '.cpp', '.cc', '.c', '.h', '.hpp', '.rs',
+        '.rb', '.php', '.swift', '.kt', '.scala', '.lua',
+        '.sh', '.bash', '.ps1', '.sql'
+    }
+    
+    return path.suffix.lower() in source_exts
+
+
+def load_models(model_dir: str) -> tuple[Any, Any, Any, Any, dict]:
+    """Load trained models."""
+    if not ML_AVAILABLE:
+        return None, None, None, None, {}
+
+    model_path = Path(model_dir) / "model.joblib"
+    if not model_path.exists():
+        return None, None, None, None, {}
+
+    try:
+        model_data = joblib.load(model_path)
+
+        with open(Path(model_dir) / "feature_map.json") as f:
+            feature_map = json.load(f)
+
+        return (
+            model_data["root_cause_clf"],
+            model_data["next_edit_clf"],
+            model_data["risk_reg"],
+            model_data["scaler"],
+            feature_map,
+        )
+    except (ImportError, ValueError, AttributeError):
+        # ML unavailable - return graceful defaults
+        return None, None, None, None, {}
+
+
+def learn(
+    db_path: str = "./.pf/repo_index.db",
+    manifest_path: str = "./.pf/manifest.json",
+    journal_path: str = "./.pf/journal.ndjson",
+    fce_path: str = "./.pf/fce.json",
+    ast_path: str = "./.pf/ast_proofs.json",
+    enable_git: bool = False,
+    model_dir: str = "./.pf/ml",
+    window: int = 50,
+    seed: int = 13,
+    print_stats: bool = False,
+    feedback_path: str = None,
+    train_on: str = "full",
+) -> dict[str, Any]:
+    """Train ML models from artifacts."""
+    if not check_ml_available():
+        return {"success": False, "error": "ML not available"}
+
+    # Get all files from manifest
+    try:
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+        all_file_paths = [entry["path"] for entry in manifest]
+        
+        # Filter to only source files
+        file_paths = [fp for fp in all_file_paths if is_source_file(fp)]
+        
+        if print_stats:
+            excluded_count = len(all_file_paths) - len(file_paths)
+            if excluded_count > 0:
+                print(f"Excluded {excluded_count} non-source files (tests, docs, configs)")
+                
+    except Exception as e:
+        return {"success": False, "error": f"Failed to load manifest: {e}"}
+
+    if not file_paths:
+        return {"success": False, "error": "No source files found in manifest"}
+
+    # Define history directory
+    history_dir = Path("./.pf/history")
+    
+    # Load historical data based on train_on parameter
+    journal_stats = load_journal_stats(history_dir, window, run_type=train_on)
+    rca_stats = load_rca_stats(history_dir, run_type=train_on)
+    ast_stats = load_ast_stats(history_dir, run_type=train_on)
+
+    # Build features with loaded stats
+    features, feature_name_map = build_feature_matrix(
+        file_paths,
+        manifest_path,
+        db_path,
+        journal_stats,
+        rca_stats,
+        ast_stats,
+        enable_git,
+    )
+
+    # Build labels with loaded stats
+    root_cause_labels, next_edit_labels, risk_labels = build_labels(
+        file_paths,
+        journal_stats,
+        rca_stats,
+    )
+    
+    # Load human feedback if provided
+    sample_weight = None
+    if feedback_path and Path(feedback_path).exists():
+        try:
+            with open(feedback_path) as f:
+                feedback_data = json.load(f)
+            
+            # Create sample weights array
+            sample_weight = np.ones(len(file_paths))
+            
+            # Increase weight for files with human feedback
+            for i, fp in enumerate(file_paths):
+                if fp in feedback_data:
+                    # Weight human-reviewed files 5x higher
+                    sample_weight[i] = 5.0
+                    
+                    # Also update labels based on feedback
+                    feedback = feedback_data[fp]
+                    if "is_risky" in feedback:
+                        # Human says file is risky - treat as positive for risk
+                        risk_labels[i] = 1.0 if feedback["is_risky"] else 0.0
+                    if "is_root_cause" in feedback:
+                        # Human says file is root cause
+                        root_cause_labels[i] = 1.0 if feedback["is_root_cause"] else 0.0
+                    if "will_need_edit" in feedback:
+                        # Human says file will need editing
+                        next_edit_labels[i] = 1.0 if feedback["will_need_edit"] else 0.0
+            
+            if print_stats:
+                feedback_count = sum(1 for fp in file_paths if fp in feedback_data)
+                print(f"Incorporating human feedback for {feedback_count} files")
+                
+        except Exception as e:
+            if print_stats:
+                print(f"Warning: Could not load feedback file: {e}")
+
+    # Check data size
+    n_samples = len(file_paths)
+    cold_start = n_samples < 500
+
+    if print_stats:
+        print(f"Training on {n_samples} files")
+        print(f"Features: {features.shape[1]} dimensions")
+        print(f"Root cause positive: {np.sum(root_cause_labels)}/{n_samples}")
+        print(f"Next edit positive: {np.sum(next_edit_labels)}/{n_samples}")
+        print(f"Mean risk: {np.mean(risk_labels):.3f}")
+        if cold_start:
+            print("WARNING: Cold-start with <500 samples, expect noisy signals")
+
+    # Train models with optional sample weights from human feedback
+    root_cause_clf, next_edit_clf, risk_reg, scaler = train_models(
+        features,
+        root_cause_labels,
+        next_edit_labels,
+        risk_labels,
+        seed,
+        sample_weight=sample_weight,
+    )
+
+    # Calculate simple metrics
+    stats = {
+        "n_samples": n_samples,
+        "n_features": features.shape[1],
+        "root_cause_positive_ratio": float(np.mean(root_cause_labels)),
+        "next_edit_positive_ratio": float(np.mean(next_edit_labels)),
+        "mean_risk": float(np.mean(risk_labels)),
+        "cold_start": cold_start,
+        "timestamp": datetime.now(UTC).isoformat(),
+    }
+
+    # Save models
+    save_models(
+        model_dir,
+        root_cause_clf,
+        next_edit_clf,
+        risk_reg,
+        scaler,
+        feature_name_map,
+        stats,
+    )
+
+    if print_stats:
+        print(f"Models saved to {model_dir}")
+
+    return {
+        "success": True,
+        "stats": stats,
+        "model_dir": model_dir,
+        "source_files": len(file_paths),
+        "total_files": len(all_file_paths),
+        "excluded_count": len(all_file_paths) - len(file_paths),
+    }
+
+
+def suggest(
+    db_path: str = "./.pf/repo_index.db",
+    manifest_path: str = "./.pf/manifest.json",
+    workset_path: str = "./.pf/workset.json",
+    fce_path: str = "./.pf/fce.json",
+    ast_path: str = "./.pf/ast_proofs.json",
+    model_dir: str = "./.pf/ml",
+    topk: int = 10,
+    out_path: str = "./.pf/insights/ml_suggestions.json",
+    print_plan: bool = False,
+) -> dict[str, Any]:
+    """Generate ML suggestions for workset files."""
+    if not check_ml_available():
+        return {"success": False, "error": "ML not available"}
+
+    # Load models
+    root_cause_clf, next_edit_clf, risk_reg, scaler, feature_map = load_models(model_dir)
+
+    if root_cause_clf is None:
+        print(f"No models found in {model_dir}. Run 'aud learn' first.")
+        return {"success": False, "error": "Models not found"}
+
+    # Load workset
+    try:
+        with open(workset_path) as f:
+            workset = json.load(f)
+        all_file_paths = [p["path"] for p in workset.get("paths", [])]
+        
+        # Filter to only source files
+        file_paths = [fp for fp in all_file_paths if is_source_file(fp)]
+        
+        if print_plan:
+            excluded_count = len(all_file_paths) - len(file_paths)
+            if excluded_count > 0:
+                print(f"Excluded {excluded_count} non-source files from suggestions")
+                
+    except Exception as e:
+        return {"success": False, "error": f"Failed to load workset: {e}"}
+
+    if not file_paths:
+        return {"success": False, "error": "No source files in workset"}
+
+    # Load current FCE and AST stats if available
+    current_fce_stats = {}
+    if fce_path and Path(fce_path).exists():
+        try:
+            with open(fce_path) as f:
+                data = json.load(f)
+            for failure in data.get("failures", []):
+                file = failure.get("file", "")
+                if file:
+                    if file not in current_fce_stats:
+                        current_fce_stats[file] = {"fail_count": 0, "categories": [], "messages": []}
+                    current_fce_stats[file]["fail_count"] += 1
+                    if "category" in failure:
+                        current_fce_stats[file]["categories"].append(failure["category"])
+                    if "message" in failure:
+                        current_fce_stats[file]["messages"].append(failure["message"][:100])
+        except Exception:
+            pass
+    
+    current_ast_stats = {}
+    if ast_path and Path(ast_path).exists():
+        try:
+            with open(ast_path) as f:
+                data = json.load(f)
+            for result in data.get("results", []):
+                file = result.get("path", "")
+                if file:
+                    if file not in current_ast_stats:
+                        current_ast_stats[file] = {"invariant_fails": 0, "invariant_passes": 0, "failed_checks": []}
+                    for check in result.get("checks", []):
+                        if check["status"] == "FAIL":
+                            current_ast_stats[file]["invariant_fails"] += 1
+                            current_ast_stats[file]["failed_checks"].append(check["id"])
+                        elif check["status"] == "PASS":
+                            current_ast_stats[file]["invariant_passes"] += 1
+        except Exception:
+            pass
+    
+    # Build features for workset files
+    features, _ = build_feature_matrix(
+        file_paths,
+        manifest_path,
+        db_path,
+        None,  # No journal for prediction
+        current_fce_stats,  # Use current FCE if available
+        current_ast_stats,  # Use current AST if available
+        False,  # No git for speed
+    )
+
+    # Scale features
+    features_scaled = scaler.transform(features)
+
+    # Get predictions
+    root_cause_scores = root_cause_clf.predict_proba(features_scaled)[:, 1]
+    next_edit_scores = next_edit_clf.predict_proba(features_scaled)[:, 1]
+    risk_scores = np.clip(risk_reg.predict(features_scaled), 0, 1)
+
+    # Rank files
+    root_cause_ranked = sorted(
+        zip(file_paths, root_cause_scores, strict=False),
+        key=lambda x: x[1],
+        reverse=True,
+    )[:topk]
+
+    next_edit_ranked = sorted(
+        zip(file_paths, next_edit_scores, strict=False),
+        key=lambda x: x[1],
+        reverse=True,
+    )[:topk]
+
+    risk_ranked = sorted(
+        zip(file_paths, risk_scores, strict=False),
+        key=lambda x: x[1],
+        reverse=True,
+    )[:topk]
+
+    # Build output
+    output = {
+        "generated_at": datetime.now(UTC).isoformat(),
+        "workset_size": len(file_paths),
+        "likely_root_causes": [
+            {"path": path, "score": float(score)} for path, score in root_cause_ranked
+        ],
+        "next_files_to_edit": [
+            {"path": path, "score": float(score)} for path, score in next_edit_ranked
+        ],
+        "risk": [{"path": path, "score": float(score)} for path, score in risk_ranked],
+    }
+
+    # Ensure output directory exists
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    
+    # Write output atomically
+    tmp_path = f"{out_path}.tmp"
+    with open(tmp_path, "w") as f:
+        json.dump(output, f, indent=2, sort_keys=True)
+    os.replace(tmp_path, out_path)
+
+    if print_plan:
+        print(f"Workset: {len(file_paths)} files")
+        print(f"\nTop {min(5, topk)} likely root causes:")
+        for item in output["likely_root_causes"][:5]:
+            print(f"  {item['score']:.3f} - {item['path']}")
+
+        print(f"\nTop {min(5, topk)} next files to edit:")
+        for item in output["next_files_to_edit"][:5]:
+            print(f"  {item['score']:.3f} - {item['path']}")
+
+        print(f"\nTop {min(5, topk)} risk scores:")
+        for item in output["risk"][:5]:
+            print(f"  {item['score']:.3f} - {item['path']}")
+
+    return {
+        "success": True,
+        "out_path": out_path,
+        "workset_size": len(file_paths),
+        "original_size": len(all_file_paths),
+        "excluded_count": len(all_file_paths) - len(file_paths),
+        "topk": topk,
+    }
diff --git a/theauditor/insights/taint.py b/theauditor/insights/taint.py
new file mode 100644
index 0000000..9f3a45d
--- /dev/null
+++ b/theauditor/insights/taint.py
@@ -0,0 +1,446 @@
+"""Interpretive intelligence layer for taint analysis - optional severity scoring and vulnerability classification."""
+
+import sqlite3
+import platform
+from typing import Dict, List, Any
+from collections import defaultdict
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+
+def calculate_severity(path_data: Dict[str, Any]) -> str:
+    """
+    Calculate severity based on vulnerability type and path complexity.
+    This is interpretive logic that assigns risk levels.
+    
+    Args:
+        path_data: Dictionary with vulnerability_type and path information
+        
+    Returns:
+        Severity level: "critical", "high", "medium", or "low"
+    """
+    vulnerability_type = path_data.get("vulnerability_type", "")
+    path_length = len(path_data.get("path", []))
+    
+    high_severity = ["SQL Injection", "Command Injection", "NoSQL Injection"]
+    medium_severity = ["Cross-Site Scripting (XSS)", "Path Traversal", "LDAP Injection"]
+    
+    if vulnerability_type in high_severity:
+        return "critical" if path_length <= 2 else "high"
+    elif vulnerability_type in medium_severity:
+        return "high" if path_length <= 2 else "medium"
+    else:
+        return "medium" if path_length <= 3 else "low"
+
+
+def classify_vulnerability(sink: Dict[str, Any], security_sinks: Dict[str, List[str]]) -> str:
+    """
+    Classify the vulnerability based on sink type.
+    This is interpretive logic that categorizes vulnerabilities.
+    
+    Args:
+        sink: Sink dictionary with name
+        security_sinks: Mapping of vulnerability types to sink patterns
+        
+    Returns:
+        Human-readable vulnerability type
+    """
+    sink_name = sink["name"].lower() if "name" in sink else ""
+    
+    for vuln_type, sinks in security_sinks.items():
+        if any(s.lower() in sink_name for s in sinks):
+            return {
+                "sql": "SQL Injection",
+                "command": "Command Injection",
+                "xss": "Cross-Site Scripting (XSS)",
+                "path": "Path Traversal",
+                "ldap": "LDAP Injection",
+                "nosql": "NoSQL Injection"
+            }.get(vuln_type, vuln_type.upper())
+    
+    return "Data Exposure"
+
+
+def is_vulnerable_sink(cursor: sqlite3.Cursor, sink: Dict[str, Any], context: Dict[str, Any]) -> bool:
+    """
+    Check if a sink is actually vulnerable based on context.
+    This is interpretive logic that makes security judgments.
+    
+    For example, parameterized queries are safe even if they use execute().
+    
+    Args:
+        cursor: SQLite cursor for querying code patterns
+        sink: Sink dictionary with name and category
+        context: Context information about the sink
+        
+    Returns:
+        True if the sink is judged to be vulnerable, False if safe
+    """
+    # Direct access - sinks must have name and category
+    sink_name = sink["name"].lower() if "name" in sink else ""
+    sink_category = sink["category"] if "category" in sink else ""
+    
+    # SQL injection context checking
+    if sink_category == "sql" or "execute" in sink_name or "query" in sink_name:
+        # Check if this is a parameterized query
+        # Look for patterns that indicate parameterization
+        cursor.execute("""
+            SELECT name
+            FROM symbols
+            WHERE path = ?
+            AND type = 'call'
+            AND line = ?
+        """, (sink["file"], sink["line"]))
+        
+        call_at_line = cursor.fetchone()
+        if call_at_line:
+            call_text = call_at_line[0]
+            # Heuristics for parameterized queries
+            # If using ? or %s placeholders, it's likely parameterized
+            # If using prepare/bind patterns, it's safe
+            safe_patterns = [
+                "prepare",
+                "bind",
+                "execute(",  # With parameters
+                "executemany",
+                "format(",  # SQL formatting functions
+                "sql.SQL",
+                "sql.Identifier",
+                "text(",  # SQLAlchemy safe text
+            ]
+            
+            for pattern in safe_patterns:
+                if pattern in call_text:
+                    return False  # Not vulnerable - using safe pattern
+            
+            # Check for dangerous patterns (string concatenation)
+            dangerous_patterns = [
+                "+",  # String concatenation
+                ".format",  # String formatting (when not SQL.format)
+                "f\"",  # F-strings
+                "%",  # Old-style formatting
+            ]
+            
+            # Get the actual code around the sink to check for concatenation
+            cursor.execute("""
+                SELECT name
+                FROM symbols  
+                WHERE path = ?
+                AND type = 'call'
+                AND line >= ?
+                AND line <= ?
+            """, (sink["file"], sink["line"] - 1, sink["line"] + 1))
+            
+            nearby_calls = cursor.fetchall()
+            for call in nearby_calls:
+                call_str = str(call[0])
+                for pattern in dangerous_patterns:
+                    if pattern in call_str and "sql" not in call_str.lower():
+                        return True  # Vulnerable - using dangerous pattern
+    
+    # Command injection context checking
+    elif sink_category == "command" or any(cmd in sink_name for cmd in ["system", "exec", "spawn"]):
+        # Check if using shell=False or proper escaping
+        cursor.execute("""
+            SELECT name
+            FROM symbols
+            WHERE path = ?
+            AND line = ?
+        """, (sink["file"], sink["line"]))
+        
+        call_details = cursor.fetchone()
+        if call_details:
+            call_text = call_details[0]
+            # Safe patterns for command execution
+            if "shell=False" in call_text or "shlex" in call_text:
+                return False  # Not vulnerable - using safe execution
+    
+    # Path traversal context checking
+    elif sink_category == "path":
+        # Check if path is validated/sanitized
+        cursor.execute("""
+            SELECT name
+            FROM symbols
+            WHERE path = ?
+            AND type = 'call'
+            AND line >= ?
+            AND line <= ?
+        """, (sink["file"], sink["line"] - 3, sink["line"]))
+        
+        recent_calls = cursor.fetchall()
+        for call in recent_calls:
+            if any(san in str(call[0]) for san in ["basename", "secure_filename", "normalize"]):
+                return False  # Path is sanitized
+    
+    # Default: consider it vulnerable if we can't prove it's safe
+    return True
+
+
+def generate_summary(paths: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Generate a summary of the taint analysis results.
+    This is interpretive logic that creates risk assessments and recommendations.
+    
+    Args:
+        paths: List of taint path dictionaries
+        
+    Returns:
+        Summary with risk levels and recommendations
+    """
+    if not paths:
+        return {
+            "risk_level": "low",
+            "critical_count": 0,
+            "high_count": 0,
+            "medium_count": 0,
+            "low_count": 0,
+            "recommendation": "No direct taint paths found. Continue monitoring for indirect flows."
+        }
+    
+    severity_counts = defaultdict(int)
+    for path in paths:
+        # Calculate severity for each path
+        severity = calculate_severity(path)
+        severity_counts[severity] += 1
+    
+    # Determine overall risk level with clear explanation
+    critical_count = severity_counts.get("critical", 0)
+    high_count = severity_counts.get("high", 0)
+    medium_count = severity_counts.get("medium", 0)
+    low_count = severity_counts.get("low", 0)
+    
+    if critical_count > 0:
+        risk_level = "critical"
+        recommendation = f"URGENT: Critical risk level assigned due to {critical_count} critical-severity vulnerability(ies). Immediate remediation required!"
+    elif high_count > 2:
+        risk_level = "high"
+        recommendation = f"High risk level assigned due to {high_count} high-severity vulnerabilities. Priority remediation needed."
+    elif high_count > 0:
+        risk_level = "medium"
+        recommendation = f"Medium risk level assigned due to {high_count} high-severity vulnerability(ies) found. Schedule remediation in next sprint."
+    elif medium_count > 5:
+        risk_level = "medium"
+        recommendation = f"Medium risk level assigned due to high volume ({medium_count}) of medium-severity findings. Review and prioritize fixes."
+    else:
+        risk_level = "low"
+        recommendation = f"Low risk level assigned. Found {medium_count} medium and {low_count} low severity issues. Review and address as time permits."
+    
+    return {
+        "risk_level": risk_level,
+        "critical_count": severity_counts.get("critical", 0),
+        "high_count": severity_counts.get("high", 0),
+        "medium_count": severity_counts.get("medium", 0),
+        "low_count": severity_counts.get("low", 0),
+        "recommendation": recommendation,
+        "most_common_vulnerability": max(
+            [(v, k) for k, v in severity_counts.items()],
+            default=(0, "None")
+        )[1] if paths else "None"
+    }
+
+
+def format_taint_report(analysis_result: Dict[str, Any]) -> str:
+    """
+    Format taint analysis results into a human-readable report.
+    This is interpretive presentation logic.
+    
+    Args:
+        analysis_result: Raw analysis results from trace_taint
+        
+    Returns:
+        Formatted string report
+    """
+    lines = []
+    
+    # Use ASCII characters on Windows, Unicode elsewhere
+    if IS_WINDOWS:
+        border_char = "="
+        section_char = "-"
+        arrow = "->"
+    else:
+        border_char = "="
+        section_char = "─"
+        arrow = "→"
+    
+    # Header
+    lines.append(border_char * 60)
+    lines.append("TAINT ANALYSIS SECURITY REPORT")
+    lines.append(border_char * 60)
+    
+    if not analysis_result.get("success"):
+        lines.append(f"\nError: {analysis_result.get('error', 'Unknown error')}")
+        return "\n".join(lines)
+    
+    # Summary
+    summary = analysis_result.get("summary", {})
+    lines.append(f"\nRisk Level: {summary.get('risk_level', '').upper()}")
+    lines.append(f"Recommendation: {summary.get('recommendation', '')}")
+    
+    # Statistics
+    lines.append(f"\n{section_char * 40}")
+    lines.append("SCAN STATISTICS")
+    lines.append(f"{section_char * 40}")
+    lines.append(f"Taint Sources Found: {analysis_result.get('sources_found', 0)}")
+    lines.append(f"Security Sinks Found: {analysis_result.get('sinks_found', 0)}")
+    lines.append(f"Total Vulnerabilities: {analysis_result.get('total_vulnerabilities', 0)}")
+    
+    # Vulnerabilities by type
+    vuln_types = analysis_result.get("vulnerabilities_by_type", {})
+    if vuln_types:
+        lines.append(f"\n{section_char * 40}")
+        lines.append("VULNERABILITIES BY TYPE")
+        lines.append(f"{section_char * 40}")
+        for vuln_type, count in sorted(vuln_types.items(), key=lambda x: x[1], reverse=True):
+            lines.append(f"  {vuln_type}: {count}")
+    
+    # Severity breakdown
+    lines.append(f"\n{section_char * 40}")
+    lines.append("SEVERITY BREAKDOWN")
+    lines.append(f"{section_char * 40}")
+    lines.append(f"  CRITICAL: {summary.get('critical_count', 0)}")
+    lines.append(f"  HIGH: {summary.get('high_count', 0)}")
+    lines.append(f"  MEDIUM: {summary.get('medium_count', 0)}")
+    lines.append(f"  LOW: {summary.get('low_count', 0)}")
+    
+    # Detailed paths (limit to top 10)
+    # Handle both "taint_paths" and "paths" keys for compatibility
+    paths = analysis_result.get("taint_paths", analysis_result.get("paths", []))
+    if paths:
+        lines.append(f"\n{section_char * 40}")
+        lines.append("TOP VULNERABILITY PATHS")
+        lines.append(f"{section_char * 40}")
+        
+        # Sort by severity
+        sorted_paths = sorted(paths, key=lambda p: (
+            {"critical": 0, "high": 1, "medium": 2, "low": 3}.get(p.get("severity", "unknown"), 4),
+            p.get("path_length", 0)
+        ))
+        
+        for i, path in enumerate(sorted_paths[:10], 1):
+            lines.append(f"\n{i}. {path.get('vulnerability_type', 'Unknown')} ({path.get('severity', 'unknown').upper()})")
+            lines.append(f"   Source: {path.get('source', {}).get('name', '')} at {path.get('source', {}).get('file', '')}:{path.get('source', {}).get('line', 0)}")
+            lines.append(f"   Sink: {path.get('sink', {}).get('name', '')} at {path.get('sink', {}).get('file', '')}:{path.get('sink', {}).get('line', 0)}")
+            lines.append(f"   Path Length: {path.get('path_length', 0)} steps")
+            
+            if len(path.get('path', [])) <= 4:
+                lines.append("   Flow:")
+                for step in path.get('path', []):
+                    if isinstance(step, dict):
+                        lines.append(f"     {arrow} {step.get('name', '')}")
+    
+    lines.append("\n" + border_char * 60)
+    
+    return "\n".join(lines)
+
+
+def get_taint_summary(taint_data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Distill potentially large taint analysis data into a concise, AI-readable summary.
+    This is interpretive intelligence that extracts key insights.
+    
+    This function solves the "200MB file paradox" by extracting key insights
+    from large taint analysis results that the AI cannot read directly.
+    
+    Args:
+        taint_data: Large taint analysis dict with vulnerability paths
+        
+    Returns:
+        Concise summary (<1MB) with key security insights
+    """
+    vulnerabilities = taint_data.get("vulnerabilities", [])
+    
+    # Count vulnerabilities by type
+    vuln_by_type = defaultdict(int)
+    vuln_by_severity = defaultdict(int)
+    source_files = set()
+    sink_files = set()
+    
+    for vuln in vulnerabilities:
+        # Categorize by type
+        vuln_type = vuln.get("vulnerability_type", "")  # Empty not unknown
+        vuln_by_type[vuln_type] += 1
+        
+        # Categorize by severity
+        severity = vuln.get("severity", "medium")
+        vuln_by_severity[severity] += 1
+        
+        # Track source and sink files
+        if "source" in vuln:
+            source_files.add(vuln["source"].get("file", ""))  # Empty not unknown
+        if "sink" in vuln:
+            sink_files.add(vuln["sink"].get("file", ""))  # Empty not unknown
+    
+    # Find top risky source files (files that originate the most vulnerabilities)
+    source_file_counts = defaultdict(int)
+    for vuln in vulnerabilities[:100]:  # Limit for efficiency
+        if "source" in vuln:
+            source_file = vuln["source"].get("file", "")  # Empty not unknown
+            source_file_counts[source_file] += 1
+    
+    top_source_files = sorted(
+        source_file_counts.items(), 
+        key=lambda x: x[1], 
+        reverse=True
+    )[:5]
+    
+    # Find top vulnerable sinks (functions that are most frequently vulnerable)
+    sink_counts = defaultdict(int)
+    for vuln in vulnerabilities[:100]:  # Limit for efficiency
+        if "sink" in vuln:
+            sink_name = vuln["sink"].get("name", "")  # Empty not unknown
+            sink_counts[sink_name] += 1
+    
+    top_sinks = sorted(
+        sink_counts.items(),
+        key=lambda x: x[1],
+        reverse=True
+    )[:5]
+    
+    # Extract critical vulnerabilities (first 5 high/critical severity)
+    critical_vulns = []
+    for vuln in vulnerabilities:
+        if vuln.get("severity") in ["critical", "high"] and len(critical_vulns) < 5:
+            # Create a condensed version
+            critical_vulns.append({
+                "type": vuln.get("vulnerability_type", ""),  # Empty not unknown
+                "severity": vuln.get("severity"),
+                "source": f"{vuln.get('source', {}).get('file', '')}:{vuln.get('source', {}).get('line', 0)}",  # Empty not unknown
+                "sink": f"{vuln.get('sink', {}).get('file', '')}:{vuln.get('sink', {}).get('line', 0)}",  # Empty not unknown
+                "path_length": len(vuln.get("path", []))
+            })
+    
+    # Create summary
+    summary = {
+        "statistics": {
+            "total_vulnerabilities": len(vulnerabilities),
+            "unique_source_files": len(source_files),
+            "unique_sink_files": len(sink_files),
+            "total_paths_analyzed": taint_data.get("total_paths", 0)
+        },
+        "vulnerabilities_by_type": dict(vuln_by_type),
+        "vulnerabilities_by_severity": dict(vuln_by_severity),
+        "top_risky_source_files": [
+            {"file": file, "vulnerability_count": count}
+            for file, count in top_source_files
+        ],
+        "top_vulnerable_sinks": [
+            {"sink": sink, "occurrence_count": count}
+            for sink, count in top_sinks
+        ],
+        "critical_vulnerabilities": critical_vulns,
+        "security_insights": {
+            "has_sql_injection": vuln_by_type.get("sql_injection", 0) > 0,
+            "has_xss": vuln_by_type.get("xss", 0) > 0,
+            "has_command_injection": vuln_by_type.get("command_injection", 0) > 0,
+            "has_path_traversal": vuln_by_type.get("path_traversal", 0) > 0,
+            "critical_count": vuln_by_severity.get("critical", 0),
+            "high_count": vuln_by_severity.get("high", 0),
+            "risk_level": "critical" if vuln_by_severity.get("critical", 0) > 0 
+                         else "high" if vuln_by_severity.get("high", 0) > 5
+                         else "medium" if len(vulnerabilities) > 10
+                         else "low"
+        }
+    }
+    
+    return summary
\ No newline at end of file
diff --git a/theauditor/journal.py b/theauditor/journal.py
new file mode 100644
index 0000000..9907b78
--- /dev/null
+++ b/theauditor/journal.py
@@ -0,0 +1,446 @@
+"""Journal system for tracking audit execution history.
+
+This module provides functionality to write and read execution journals in NDJSON format.
+The journal tracks all pipeline events, file touches, and results for ML training.
+"""
+
+import json
+import os
+from datetime import datetime, UTC
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+
+class JournalWriter:
+    """Writes execution events to journal.ndjson file."""
+    
+    def __init__(self, journal_path: str = "./.pf/journal.ndjson", history_dir: Optional[str] = None):
+        """Initialize journal writer.
+        
+        Args:
+            journal_path: Path to the journal file
+            history_dir: Optional history directory for archival copies
+        """
+        self.journal_path = Path(journal_path)
+        self.history_dir = Path(history_dir) if history_dir else None
+        self.session_id = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
+        
+        # Ensure parent directory exists
+        self.journal_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Open file in append mode for continuous writing
+        self.file_handle = None
+        self._open_journal()
+    
+    def _open_journal(self):
+        """Open journal file for writing."""
+        try:
+            self.file_handle = open(self.journal_path, 'a', encoding='utf-8', buffering=1)
+        except Exception as e:
+            print(f"[WARNING] Could not open journal file {self.journal_path}: {e}")
+            self.file_handle = None
+    
+    def write_event(self, event_type: str, data: Dict[str, Any]) -> bool:
+        """Write an event to the journal.
+        
+        Args:
+            event_type: Type of event (phase, file_touch, result, error, etc.)
+            data: Event data dictionary
+            
+        Returns:
+            True if written successfully, False otherwise
+        """
+        if not self.file_handle:
+            return False
+        
+        try:
+            event = {
+                "timestamp": datetime.now(UTC).isoformat(),
+                "session_id": self.session_id,
+                "event_type": event_type,
+                **data
+            }
+            
+            # Write as NDJSON (one JSON object per line)
+            json.dump(event, self.file_handle)
+            self.file_handle.write('\n')
+            self.file_handle.flush()  # Force write to disk
+            return True
+            
+        except Exception as e:
+            print(f"[WARNING] Failed to write journal event: {e}")
+            return False
+    
+    def phase_start(self, phase_name: str, command: str, phase_num: int = 0) -> bool:
+        """Record the start of a pipeline phase.
+        
+        Args:
+            phase_name: Human-readable phase name
+            command: Command being executed
+            phase_num: Phase number in sequence
+        """
+        return self.write_event("phase_start", {
+            "phase": phase_name,
+            "command": command,
+            "phase_num": phase_num
+        })
+    
+    def phase_end(self, phase_name: str, success: bool, elapsed: float, 
+                  exit_code: int = 0, error_msg: Optional[str] = None) -> bool:
+        """Record the end of a pipeline phase.
+        
+        Args:
+            phase_name: Human-readable phase name
+            success: Whether phase succeeded
+            elapsed: Execution time in seconds
+            exit_code: Process exit code
+            error_msg: Optional error message
+        """
+        return self.write_event("phase_end", {
+            "phase": phase_name,
+            "result": "success" if success else "fail",
+            "elapsed": elapsed,
+            "exit_code": exit_code,
+            "error": error_msg
+        })
+    
+    def file_touch(self, file_path: str, operation: str = "analyze", 
+                   success: bool = True, findings: int = 0) -> bool:
+        """Record a file being touched/analyzed.
+        
+        Args:
+            file_path: Path to the file
+            operation: Type of operation (analyze, modify, create, etc.)
+            success: Whether operation succeeded
+            findings: Number of findings/issues found
+        """
+        return self.write_event("file_touch", {
+            "file": file_path,
+            "operation": operation,
+            "result": "success" if success else "fail",
+            "findings": findings
+        })
+    
+    def finding(self, file_path: str, severity: str, category: str, 
+                message: str, line: Optional[int] = None) -> bool:
+        """Record a specific finding/issue.
+        
+        Args:
+            file_path: File where finding was detected
+            severity: Severity level (critical, high, medium, low)
+            category: Category of finding
+            message: Finding message
+            line: Optional line number
+        """
+        return self.write_event("finding", {
+            "file": file_path,
+            "severity": severity,
+            "category": category,
+            "message": message,
+            "line": line
+        })
+    
+    def apply_patch(self, file_path: str, success: bool, 
+                    patch_type: str = "fix", error_msg: Optional[str] = None) -> bool:
+        """Record a patch/fix being applied to a file.
+        
+        Args:
+            file_path: File being patched
+            success: Whether patch succeeded
+            patch_type: Type of patch (fix, refactor, update, etc.)
+            error_msg: Optional error message
+        """
+        return self.write_event("apply_patch", {
+            "file": file_path,
+            "result": "success" if success else "fail",
+            "patch_type": patch_type,
+            "error": error_msg
+        })
+    
+    def pipeline_summary(self, total_phases: int, failed_phases: int,
+                        total_files: int, total_findings: int,
+                        elapsed: float, status: str = "complete") -> bool:
+        """Record pipeline execution summary.
+        
+        Args:
+            total_phases: Total number of phases executed
+            failed_phases: Number of failed phases
+            total_files: Total files analyzed
+            total_findings: Total findings detected
+            elapsed: Total execution time
+            status: Overall status (complete, partial, failed)
+        """
+        return self.write_event("pipeline_summary", {
+            "total_phases": total_phases,
+            "failed_phases": failed_phases,
+            "total_files": total_files,
+            "total_findings": total_findings,
+            "elapsed": elapsed,
+            "status": status
+        })
+    
+    def close(self, copy_to_history: bool = True):
+        """Close the journal file and optionally copy to history.
+        
+        Args:
+            copy_to_history: Whether to copy journal to history directory
+        """
+        if self.file_handle:
+            try:
+                self.file_handle.close()
+            except:
+                pass
+            self.file_handle = None
+        
+        # Copy to history if requested and history_dir is set
+        if copy_to_history and self.history_dir and self.journal_path.exists():
+            try:
+                import shutil
+                self.history_dir.mkdir(parents=True, exist_ok=True)
+                dest_path = self.history_dir / f"journal_{self.session_id}.ndjson"
+                shutil.copy2(self.journal_path, dest_path)
+                print(f"[INFO] Journal copied to history: {dest_path}")
+            except Exception as e:
+                print(f"[WARNING] Could not copy journal to history: {e}")
+    
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - close journal."""
+        self.close()
+
+
+class JournalReader:
+    """Reads and queries journal.ndjson files."""
+    
+    def __init__(self, journal_path: str = "./.pf/journal.ndjson"):
+        """Initialize journal reader.
+        
+        Args:
+            journal_path: Path to the journal file
+        """
+        self.journal_path = Path(journal_path)
+    
+    def read_events(self, event_type: Optional[str] = None,
+                    since: Optional[datetime] = None,
+                    session_id: Optional[str] = None) -> List[Dict[str, Any]]:
+        """Read events from journal with optional filtering.
+        
+        Args:
+            event_type: Filter by event type
+            since: Only events after this timestamp
+            session_id: Filter by session ID
+            
+        Returns:
+            List of matching events
+        """
+        if not self.journal_path.exists():
+            return []
+        
+        events = []
+        try:
+            with open(self.journal_path, 'r', encoding='utf-8') as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if not line:
+                        continue
+                    
+                    try:
+                        event = json.loads(line)
+                        
+                        # Apply filters
+                        if event_type and event.get("event_type") != event_type:
+                            continue
+                        
+                        if session_id and event.get("session_id") != session_id:
+                            continue
+                        
+                        if since:
+                            event_time = datetime.fromisoformat(event.get("timestamp", ""))
+                            if event_time < since:
+                                continue
+                        
+                        events.append(event)
+                        
+                    except json.JSONDecodeError:
+                        print(f"[WARNING] Skipping malformed JSON at line {line_num}")
+                        continue
+                        
+        except Exception as e:
+            print(f"[WARNING] Error reading journal: {e}")
+        
+        return events
+    
+    def get_file_stats(self) -> Dict[str, Dict[str, int]]:
+        """Get statistics for file touches and failures.
+        
+        Returns:
+            Dict mapping file paths to stats (touches, failures, successes)
+        """
+        stats = {}
+        
+        for event in self.read_events(event_type="file_touch"):
+            file_path = event.get("file", "")
+            if not file_path:
+                continue
+            
+            if file_path not in stats:
+                stats[file_path] = {
+                    "touches": 0,
+                    "failures": 0,
+                    "successes": 0,
+                    "findings": 0
+                }
+            
+            stats[file_path]["touches"] += 1
+            
+            if event.get("result") == "fail":
+                stats[file_path]["failures"] += 1
+            else:
+                stats[file_path]["successes"] += 1
+            
+            stats[file_path]["findings"] += event.get("findings", 0)
+        
+        # Also count apply_patch events
+        for event in self.read_events(event_type="apply_patch"):
+            file_path = event.get("file", "")
+            if not file_path:
+                continue
+            
+            if file_path not in stats:
+                stats[file_path] = {
+                    "touches": 0,
+                    "failures": 0, 
+                    "successes": 0,
+                    "findings": 0
+                }
+            
+            stats[file_path]["touches"] += 1
+            
+            if event.get("result") == "fail":
+                stats[file_path]["failures"] += 1
+            else:
+                stats[file_path]["successes"] += 1
+        
+        return stats
+    
+    def get_phase_stats(self) -> Dict[str, Dict[str, Any]]:
+        """Get statistics for pipeline phases.
+        
+        Returns:
+            Dict mapping phase names to execution stats
+        """
+        stats = {}
+        
+        # Track phase starts
+        for event in self.read_events(event_type="phase_start"):
+            phase = event.get("phase", "")
+            if not phase:
+                continue
+            
+            if phase not in stats:
+                stats[phase] = {
+                    "executions": 0,
+                    "failures": 0,
+                    "total_elapsed": 0.0,
+                    "last_executed": None
+                }
+            
+            stats[phase]["executions"] += 1
+            stats[phase]["last_executed"] = event.get("timestamp")
+        
+        # Track phase ends
+        for event in self.read_events(event_type="phase_end"):
+            phase = event.get("phase", "")
+            if not phase or phase not in stats:
+                continue
+            
+            if event.get("result") == "fail":
+                stats[phase]["failures"] += 1
+            
+            stats[phase]["total_elapsed"] += event.get("elapsed", 0.0)
+        
+        return stats
+    
+    def get_recent_failures(self, limit: int = 10) -> List[Dict[str, Any]]:
+        """Get recent failure events.
+        
+        Args:
+            limit: Maximum number of failures to return
+            
+        Returns:
+            List of recent failure events
+        """
+        failures = []
+        
+        # Get all failure events
+        for event in self.read_events():
+            if event.get("result") == "fail" or event.get("event_type") == "error":
+                failures.append(event)
+        
+        # Sort by timestamp (most recent first)
+        failures.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
+        
+        return failures[:limit]
+
+
+# Integration functions for pipeline
+def get_journal_writer(run_type: str = "full") -> JournalWriter:
+    """Get a journal writer for the current run.
+    
+    Args:
+        run_type: Type of run (full, diff, etc.)
+        
+    Returns:
+        JournalWriter instance
+    """
+    # Determine history directory based on run type
+    history_dir = Path("./.pf/history") / run_type / datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
+    
+    return JournalWriter(
+        journal_path="./.pf/journal.ndjson",
+        history_dir=str(history_dir)
+    )
+
+
+def integrate_with_pipeline(pipeline_func):
+    """Decorator to integrate journal writing with pipeline execution.
+    
+    This decorator wraps pipeline functions to automatically write journal events.
+    """
+    def wrapper(*args, **kwargs):
+        # Get or create journal writer
+        journal = kwargs.pop('journal', None)
+        close_journal = False
+        
+        if journal is None:
+            journal = get_journal_writer(kwargs.get('run_type', 'full'))
+            close_journal = True
+        
+        try:
+            # Inject journal into kwargs
+            kwargs['journal'] = journal
+            
+            # Execute pipeline
+            result = pipeline_func(*args, **kwargs)
+            
+            # Write summary if available
+            if isinstance(result, dict):
+                journal.pipeline_summary(
+                    total_phases=result.get('total_phases', 0),
+                    failed_phases=result.get('failed_phases', 0),
+                    total_files=len(result.get('created_files', [])),
+                    total_findings=result.get('findings', {}).get('total_vulnerabilities', 0),
+                    elapsed=result.get('elapsed_time', 0.0),
+                    status='complete' if result.get('success') else 'failed'
+                )
+            
+            return result
+            
+        finally:
+            if close_journal:
+                journal.close()
+    
+    return wrapper
\ No newline at end of file
diff --git a/theauditor/js_init.py b/theauditor/js_init.py
new file mode 100644
index 0000000..8ce893f
--- /dev/null
+++ b/theauditor/js_init.py
@@ -0,0 +1,154 @@
+"""JavaScript/TypeScript project initialization."""
+
+import json
+from pathlib import Path
+
+
+def deep_merge(base: dict, overlay: dict) -> dict:
+    """
+    Deep merge overlay into base, only adding missing keys.
+
+    Existing values in base are never overwritten.
+    """
+    result = base.copy()
+
+    for key, value in overlay.items():
+        if key not in result:
+            result[key] = value
+        elif isinstance(value, dict) and isinstance(result[key], dict):
+            # Recursively merge nested dicts
+            result[key] = deep_merge(result[key], value)
+
+    return result
+
+
+def ensure_package_json(path: str) -> dict[str, str]:
+    """
+    Create or merge minimal package.json for lint/typecheck.
+
+    Returns:
+        {"status": "created"} if new file created
+        {"status": "merged"} if existing file updated
+        {"status": "unchanged"} if no changes needed
+    """
+    package_path = Path(path)
+
+    # Template with PIN_ME placeholders
+    template = {
+        "private": True,
+        "devDependencies": {
+            "eslint": "<PIN_ME>",
+            "@typescript-eslint/parser": "<PIN_ME>",
+            "@typescript-eslint/eslint-plugin": "<PIN_ME>",
+            "typescript": "<PIN_ME>",
+            "prettier": "<PIN_ME>",
+        },
+        "scripts": {
+            "lint": "eslint .",
+            "typecheck": "tsc --noEmit",
+            "format": "prettier -c .",
+        },
+    }
+
+    if package_path.exists():
+        # Load existing
+        with open(package_path) as f:
+            existing = json.load(f)
+
+        # Deep merge
+        merged = deep_merge(existing, template)
+
+        if merged == existing:
+            return {"status": "unchanged"}
+
+        # Write merged version
+        with open(package_path, "w") as f:
+            json.dump(merged, f, indent=2)
+
+        return {"status": "merged"}
+    else:
+        # Create new file
+        with open(package_path, "w") as f:
+            json.dump(template, f, indent=2)
+
+        return {"status": "created"}
+
+
+def add_auditor_hooks(path: str) -> dict[str, str]:
+    """
+    Add TheAuditor hooks to package.json scripts non-destructively.
+    
+    Adds the following hooks:
+    - pretest: aud lint --workset
+    - prebuild: aud ast-verify  
+    - prepush: aud taint-analyze
+    
+    If hooks already exist, prepends Auditor commands with &&.
+    
+    Args:
+        path: Path to package.json file
+        
+    Returns:
+        {"status": "hooks_added", "details": <list of changes>} if hooks were added
+        {"status": "unchanged"} if all hooks already present
+        {"status": "error", "message": <error>} if error occurred
+    """
+    package_path = Path(path)
+    
+    # Check if file exists
+    if not package_path.exists():
+        return {"status": "error", "message": f"File not found: {path}"}
+    
+    try:
+        # Read existing package.json
+        with open(package_path, 'r') as f:
+            package_data = json.load(f)
+        
+        # Ensure scripts object exists
+        if "scripts" not in package_data:
+            package_data["scripts"] = {}
+        
+        scripts = package_data["scripts"]
+        
+        # Define desired Auditor hooks
+        auditor_hooks = {
+            "pretest": "aud lint --workset",
+            "prebuild": "aud ast-verify",
+            "prepush": "aud taint-analyze"
+        }
+        
+        changes = []
+        
+        for hook_name, auditor_cmd in auditor_hooks.items():
+            if hook_name not in scripts:
+                # Hook doesn't exist, add it
+                scripts[hook_name] = auditor_cmd
+                changes.append(f"Added {hook_name}: {auditor_cmd}")
+            else:
+                existing_cmd = scripts[hook_name]
+                
+                # Check if Auditor command is already present
+                if auditor_cmd in existing_cmd:
+                    # Already has the command, skip
+                    continue
+                
+                # Prepend Auditor command with &&
+                new_cmd = f"{auditor_cmd} && {existing_cmd}"
+                scripts[hook_name] = new_cmd
+                changes.append(f"Modified {hook_name}: prepended {auditor_cmd}")
+        
+        if not changes:
+            return {"status": "unchanged"}
+        
+        # Write modified package.json with 2-space indent
+        with open(package_path, 'w') as f:
+            json.dump(package_data, f, indent=2)
+            # Add trailing newline for consistency with npm
+            f.write('\n')
+        
+        return {"status": "hooks_added", "details": changes}
+        
+    except json.JSONDecodeError as e:
+        return {"status": "error", "message": f"Invalid JSON in {path}: {e}"}
+    except Exception as e:
+        return {"status": "error", "message": f"Error processing {path}: {e}"}
diff --git a/theauditor/js_semantic_parser.py b/theauditor/js_semantic_parser.py
new file mode 100644
index 0000000..218a9c3
--- /dev/null
+++ b/theauditor/js_semantic_parser.py
@@ -0,0 +1,1270 @@
+"""JavaScript/TypeScript semantic parser using the TypeScript Compiler API.
+
+This module replaces Tree-sitter's syntactic parsing with true semantic analysis
+using the TypeScript compiler, enabling accurate type analysis, symbol resolution,
+and cross-file understanding for JavaScript and TypeScript projects.
+"""
+
+import json
+import os
+import platform
+import re
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+from typing import Dict, Optional, Any, List, Tuple
+
+# Import our custom temp manager to avoid WSL2/Windows issues
+try:
+    from theauditor.utils.temp_manager import TempManager
+except ImportError:
+    # Fallback to regular tempfile if custom manager not available
+    TempManager = None
+
+# Windows compatibility for subprocess calls
+IS_WINDOWS = platform.system() == "Windows"
+
+# Module-level cache for resolver (it's stateless now)
+_module_resolver_cache = None
+
+
+class JSSemanticParser:
+    """Semantic parser for JavaScript/TypeScript using the TypeScript Compiler API."""
+    
+    def __init__(self, project_root: str = None):
+        """Initialize the semantic parser.
+        
+        Args:
+            project_root: Absolute path to project root. If not provided, uses current directory.
+        """
+        self.project_root = Path(project_root).resolve() if project_root else Path.cwd().resolve()
+        self.using_windows_node = False  # Track if we're using Windows node.exe from WSL
+        self.tsc_path = None  # Path to TypeScript compiler
+        self.node_modules_path = None  # Path to sandbox node_modules
+        
+        # CRITICAL: Reuse cached ModuleResolver (stateless, database-driven)
+        global _module_resolver_cache
+        if _module_resolver_cache is None:
+            from theauditor.module_resolver import ModuleResolver
+            _module_resolver_cache = ModuleResolver()  # No project_root needed!
+            print("[DEBUG] Created singleton ModuleResolver instance")
+        
+        self.module_resolver = _module_resolver_cache
+        
+        # CRITICAL FIX: Find the sandboxed node executable (like linters do)
+        # Platform-agnostic: Check multiple possible locations
+        sandbox_base = self.project_root / ".auditor_venv" / ".theauditor_tools"
+        node_runtime = sandbox_base / "node-runtime"
+        
+        # Check all possible node locations (Windows or Unix layout)
+        possible_node_paths = [
+            node_runtime / "node.exe",     # Windows binary in root
+            node_runtime / "node",          # Unix binary in root  
+            node_runtime / "bin" / "node",  # Unix binary in bin/
+            node_runtime / "bin" / "node.exe",  # Windows binary in bin/ (unusual but possible)
+        ]
+        
+        self.node_exe = None
+        for node_path in possible_node_paths:
+            if node_path.exists():
+                self.node_exe = node_path
+                # Track if we're using Windows node on WSL
+                self.using_windows_node = str(node_path).endswith('.exe') and str(node_path).startswith('/')
+                break
+        
+        # If not found, will trigger proper error messages
+        
+        self.tsc_available = self._check_tsc_availability()
+        self.helper_script = self._create_helper_script()
+        self.batch_helper_script = self._create_batch_helper_script()  # NEW: Batch processing helper
+    
+    def _convert_path_for_node(self, path: Path) -> str:
+        """Convert path to appropriate format for node execution.
+        
+        If using Windows node.exe from WSL, converts to Windows path.
+        Otherwise returns the path as-is.
+        """
+        path_str = str(path)
+        if self.using_windows_node:
+            try:
+                import subprocess as sp
+                result = sp.run(['wslpath', '-w', path_str], 
+                              capture_output=True, text=True, timeout=2)
+                if result.returncode == 0:
+                    return result.stdout.strip()
+            except:
+                pass  # Fall back to original path
+        return path_str
+        
+    def _check_tsc_availability(self) -> bool:
+        """Check if TypeScript compiler is available in our sandbox.
+        
+        CRITICAL: We ONLY use our own sandboxed TypeScript installation.
+        We do not check or use any user-installed versions.
+        """
+        # Check our sandbox location ONLY - no invasive checking of user's environment
+        # CRITICAL: Use absolute path from project root to avoid finding wrong sandboxes
+        sandbox_base = self.project_root / ".auditor_venv" / ".theauditor_tools" / "node_modules"
+        
+        # Check if sandbox exists at the absolute location
+        sandbox_locations = [sandbox_base]
+        
+        for sandbox_base in sandbox_locations:
+            if not sandbox_base.exists():
+                continue
+                
+            # Check for TypeScript in sandbox
+            tsc_paths = [
+                sandbox_base / ".bin" / "tsc",
+                sandbox_base / ".bin" / "tsc.cmd",  # Windows
+            ]
+            
+            # Also check for the actual TypeScript compiler JS file
+            tsc_js_path = sandbox_base / "typescript" / "lib" / "tsc.js"
+            
+            # If we have node and the TypeScript compiler JS file, we can use it
+            if self.node_exe and tsc_js_path.exists():
+                try:
+                    # Verify it actually works by running through node
+                    # CRITICAL: Use absolute path for NODE_PATH
+                    absolute_sandbox = sandbox_base.resolve()
+                    # Use temp files to avoid buffer overflow
+                    if TempManager:
+                        stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(
+                            str(self.project_root), "tsc_verify"
+                        )
+                        with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+                             open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+                            pass  # File handles created, will be used below
+                    else:
+                        # Fallback to regular tempfile
+                        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
+                             tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:
+                            stdout_path = stdout_fp.name
+                            stderr_path = stderr_fp.name
+                    
+                    with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+                         open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+                        
+                        # Convert paths for Windows node if needed
+                        tsc_path_str = self._convert_path_for_node(tsc_js_path)
+                        
+                        # Run TypeScript through node.exe
+                        result = subprocess.run(
+                            [str(self.node_exe), tsc_path_str, "--version"],
+                            stdout=stdout_fp,
+                            stderr=stderr_fp,
+                            text=True,
+                            timeout=5,
+                            env={**os.environ, "NODE_PATH": str(absolute_sandbox)},
+                            shell=False  # Never use shell when we have full path
+                        )
+                        
+                        with open(stdout_path, 'r', encoding='utf-8') as f:
+                            result.stdout = f.read()
+                        with open(stderr_path, 'r', encoding='utf-8') as f:
+                            result.stderr = f.read()
+                        
+                    os.unlink(stdout_path)
+                    os.unlink(stderr_path)
+                    if result.returncode == 0:
+                        self.tsc_path = tsc_js_path  # Store the JS file path, not the shell script
+                        self.node_modules_path = absolute_sandbox  # Store absolute path
+                        return True
+                except (subprocess.SubprocessError, FileNotFoundError, OSError):
+                    pass  # TypeScript check failed
+        
+        # No sandbox TypeScript found - this is expected on first run
+        return False
+    
+    def _extract_vue_blocks(self, content: str) -> Tuple[Optional[str], Optional[str]]:
+        """Extract script and template blocks from Vue SFC content.
+        
+        Args:
+            content: The raw Vue SFC file content
+            
+        Returns:
+            Tuple of (script_content, template_content) or (None, None) if not found
+        """
+        # Extract <script> block (handles both <script> and <script setup>)
+        # Supports optional attributes like lang="ts" or setup
+        script_pattern = r'<script(?:\s+[^>]*)?>(.+?)</script>'
+        script_match = re.search(script_pattern, content, re.DOTALL | re.IGNORECASE)
+        script_content = script_match.group(1).strip() if script_match else None
+        
+        # Extract <template> block for future analysis
+        template_pattern = r'<template(?:\s+[^>]*)?>(.+?)</template>'
+        template_match = re.search(template_pattern, content, re.DOTALL | re.IGNORECASE)
+        template_content = template_match.group(1).strip() if template_match else None
+        
+        return script_content, template_content
+    
+    def _create_helper_script(self) -> Path:
+        """Create a Node.js helper script for TypeScript AST extraction.
+        
+        Returns:
+            Path to the created helper script
+        """
+        # CRITICAL: Create helper script with relative path resolution
+        # Always create in project root's .pf directory
+        pf_dir = self.project_root / ".pf"
+        pf_dir.mkdir(exist_ok=True)
+        
+        helper_path = pf_dir / "tsc_ast_helper.js"
+        
+        # Check if TypeScript module exists in our sandbox
+        typescript_exists = False
+        if self.node_modules_path:
+            # The TypeScript module is at node_modules/typescript/lib/typescript.js
+            ts_path = self.node_modules_path / "typescript" / "lib" / "typescript.js"
+            typescript_exists = ts_path.exists()
+        
+        # Write the helper script that uses TypeScript Compiler API
+        # CRITICAL: Use relative path from helper script location to find TypeScript
+        helper_content = '''
+// Use TypeScript from our sandbox location with RELATIVE PATH
+// This is portable - works on any machine in any location
+const path = require('path');
+const fs = require('fs');
+
+// Find project root by going up from .pf directory
+const projectRoot = path.resolve(__dirname, '..');
+
+// Build path to TypeScript module relative to project root
+const tsPath = path.join(projectRoot, '.auditor_venv', '.theauditor_tools', 'node_modules', 'typescript', 'lib', 'typescript.js');
+
+// Try to load TypeScript with helpful error message
+let ts;
+try {
+    if (!fs.existsSync(tsPath)) {
+        throw new Error(`TypeScript not found at expected location: ${tsPath}. Run 'aud setup-claude' to install tools.`);
+    }
+    ts = require(tsPath);
+} catch (error) {
+    console.error(JSON.stringify({
+        success: false,
+        error: `Failed to load TypeScript: ${error.message}`,
+        expectedPath: tsPath,
+        projectRoot: projectRoot
+    }));
+    process.exit(1);
+}
+
+// Get file path and output path from command line arguments
+const filePath = process.argv[2];
+const outputPath = process.argv[3];
+
+if (!filePath || !outputPath) {
+    console.error(JSON.stringify({ error: "File path and output path required" }));
+    process.exit(1);
+}
+
+try {
+    // Read the source file
+    const sourceCode = fs.readFileSync(filePath, 'utf8');
+    
+    // Create a source file object
+    const sourceFile = ts.createSourceFile(
+        filePath,
+        sourceCode,
+        ts.ScriptTarget.Latest,
+        true,  // setParentNodes - important for full AST traversal
+        ts.ScriptKind.TSX  // Support both TS and TSX
+    );
+    
+    // Helper function to serialize AST nodes
+    function serializeNode(node, depth = 0) {
+        if (depth > 100) {  // Prevent infinite recursion
+            return { kind: "TooDeep" };
+        }
+        
+        const result = {
+            kind: node.kind !== undefined ? (ts.SyntaxKind[node.kind] || node.kind) : 'Unknown',
+            kindValue: node.kind || 0,
+            pos: node.pos || 0,
+            end: node.end || 0,
+            flags: node.flags || 0
+        };
+        
+        // Add text content for leaf nodes
+        if (node.text !== undefined) {
+            result.text = node.text;
+        }
+        
+        // Add identifier name
+        if (node.name) {
+            if (typeof node.name === 'object') {
+                // Handle both escapedName and regular name
+                if (node.name.escapedText !== undefined) {
+                    result.name = node.name.escapedText;
+                } else if (node.name.text !== undefined) {
+                    result.name = node.name.text;
+                } else {
+                    result.name = serializeNode(node.name, depth + 1);
+                }
+            } else {
+                result.name = node.name;
+            }
+        }
+        
+        // Add type information if available
+        if (node.type) {
+            result.type = serializeNode(node.type, depth + 1);
+        }
+        
+        // Add children - handle nodes with members property
+        const children = [];
+        if (node.members && Array.isArray(node.members)) {
+            // Handle nodes with members (interfaces, enums, etc.)
+            node.members.forEach(member => {
+                if (member) children.push(serializeNode(member, depth + 1));
+            });
+        }
+        ts.forEachChild(node, child => {
+            if (child) children.push(serializeNode(child, depth + 1));
+        });
+        
+        if (children.length > 0) {
+            result.children = children;
+        }
+        
+        // Get line and column information
+        // CRITICAL FIX: Use getStart() to exclude leading trivia for accurate line numbers
+        const actualStart = node.getStart ? node.getStart(sourceFile) : node.pos;
+        const { line, character } = sourceFile.getLineAndCharacterOfPosition(actualStart);
+        result.line = line + 1;  // Convert to 1-indexed
+        result.column = character;
+        
+        // RESTORED: Text extraction needed for accurate symbol names in taint analysis
+        result.text = sourceCode.substring(node.pos, node.end).trim();
+        
+        return result;
+    }
+    
+    // Collect diagnostics (errors, warnings)
+    const diagnostics = [];
+    const program = ts.createProgram([filePath], {
+        target: ts.ScriptTarget.Latest,
+        module: ts.ModuleKind.ESNext,
+        jsx: ts.JsxEmit.Preserve,
+        allowJs: true,
+        checkJs: false,
+        noEmit: true,
+        skipLibCheck: true  // Skip checking .d.ts files for speed
+    });
+    
+    const allDiagnostics = ts.getPreEmitDiagnostics(program);
+    allDiagnostics.forEach(diagnostic => {
+        const message = ts.flattenDiagnosticMessageText(diagnostic.messageText, '\\n');
+        const location = diagnostic.file && diagnostic.start
+            ? diagnostic.file.getLineAndCharacterOfPosition(diagnostic.start)
+            : null;
+            
+        diagnostics.push({
+            message,
+            category: ts.DiagnosticCategory[diagnostic.category],
+            code: diagnostic.code,
+            line: location ? location.line + 1 : null,
+            column: location ? location.character : null
+        });
+    });
+    
+    // Collect symbols and type information
+    const checker = program.getTypeChecker();
+    const symbols = [];
+    
+    // Visit nodes to collect symbols
+    function visit(node) {
+        try {
+            const symbol = checker.getSymbolAtLocation(node);
+            if (symbol && symbol.getName) {
+                const type = checker.getTypeOfSymbolAtLocation(symbol, node);
+                const typeString = checker.typeToString(type);
+                
+                symbols.push({
+                    name: symbol.getName ? symbol.getName() : 'anonymous',
+                    kind: symbol.flags ? (ts.SymbolFlags[symbol.flags] || symbol.flags) : 0,
+                    type: typeString || 'unknown',
+                    line: node.pos !== undefined ? sourceFile.getLineAndCharacterOfPosition(node.pos).line + 1 : 0
+                });
+            }
+        } catch (e) {
+            // Log error for debugging
+            console.error(`[ERROR] Symbol extraction failed at ${filePath}:${node.pos}: ${e.message}`);
+        }
+        
+        ts.forEachChild(node, visit);
+    }
+    
+    visit(sourceFile);
+    
+    // Log symbol extraction results
+    console.error(`[INFO] Found ${symbols.length} symbols in ${filePath}`);
+    
+    // Output the complete AST with metadata
+    const result = {
+        success: true,
+        fileName: filePath,
+        languageVersion: ts.ScriptTarget[sourceFile.languageVersion],
+        ast: serializeNode(sourceFile),
+        diagnostics: diagnostics,
+        symbols: symbols,
+        nodeCount: 0,
+        hasTypes: symbols.some(s => s.type && s.type !== 'any')
+    };
+    
+    // Count nodes
+    function countNodes(node) {
+        if (!node) return;
+        result.nodeCount++;
+        if (node.children && Array.isArray(node.children)) {
+            node.children.forEach(countNodes);
+        }
+    }
+    if (result.ast) countNodes(result.ast);
+    
+    // Write output to file instead of stdout to avoid pipe buffer limits
+    fs.writeFileSync(outputPath, JSON.stringify(result, null, 2), 'utf8');
+    process.exit(0);  // CRITICAL: Ensure clean exit on success
+    
+} catch (error) {
+    console.error(JSON.stringify({
+        success: false,
+        error: error.message,
+        stack: error.stack
+    }));
+    process.exit(1);
+}
+'''
+        
+        helper_path.write_text(helper_content, encoding='utf-8')
+        return helper_path
+    
+    def _create_batch_helper_script(self) -> Path:
+        """Create a Node.js helper script for batch TypeScript AST extraction.
+        
+        This script processes multiple files in a single TypeScript program,
+        dramatically improving performance by reusing the dependency cache.
+        
+        Returns:
+            Path to the created batch helper script
+        """
+        pf_dir = self.project_root / ".pf"
+        pf_dir.mkdir(exist_ok=True)
+        
+        batch_helper_path = pf_dir / "tsc_batch_helper.js"
+        
+        batch_helper_content = '''
+// Batch TypeScript AST extraction - processes multiple files in one program
+const path = require('path');
+const fs = require('fs');
+
+// Find project root by going up from .pf directory
+const projectRoot = path.resolve(__dirname, '..');
+
+// Build path to TypeScript module
+const tsPath = path.join(projectRoot, '.auditor_venv', '.theauditor_tools', 'node_modules', 'typescript', 'lib', 'typescript.js');
+
+// Load TypeScript
+let ts;
+try {
+    if (!fs.existsSync(tsPath)) {
+        throw new Error(`TypeScript not found at: ${tsPath}`);
+    }
+    ts = require(tsPath);
+} catch (error) {
+    console.error(JSON.stringify({
+        success: false,
+        error: `Failed to load TypeScript: ${error.message}`
+    }));
+    process.exit(1);
+}
+
+// Get request and output paths from command line
+const requestPath = process.argv[2];
+const outputPath = process.argv[3];
+
+if (!requestPath || !outputPath) {
+    console.error(JSON.stringify({ error: "Request and output paths required" }));
+    process.exit(1);
+}
+
+try {
+    // Read batch request
+    const request = JSON.parse(fs.readFileSync(requestPath, 'utf8'));
+    const filePaths = request.files || [];
+    
+    if (filePaths.length === 0) {
+        fs.writeFileSync(outputPath, JSON.stringify({}), 'utf8');
+        process.exit(0);
+    }
+    
+    // Create a SINGLE TypeScript program with ALL files
+    // This is the key optimization - TypeScript will parse dependencies ONCE
+    const program = ts.createProgram(filePaths, {
+        target: ts.ScriptTarget.Latest,
+        module: ts.ModuleKind.ESNext,
+        jsx: ts.JsxEmit.Preserve,
+        allowJs: true,
+        checkJs: false,
+        noEmit: true,
+        skipLibCheck: true,  // Skip checking .d.ts files for speed
+        moduleResolution: ts.ModuleResolutionKind.NodeJs
+    });
+    
+    const checker = program.getTypeChecker();
+    const results = {};
+    
+    // Process each file using the SHARED program
+    for (const filePath of filePaths) {
+        try {
+            const sourceFile = program.getSourceFile(filePath);
+            if (!sourceFile) {
+                results[filePath] = {
+                    success: false,
+                    error: `Could not load source file: ${filePath}`
+                };
+                continue;
+            }
+            
+            const sourceCode = sourceFile.text;
+            
+            // Helper function to serialize AST nodes (same as single-file version)
+            function serializeNode(node, depth = 0) {
+                if (depth > 100) return { kind: "TooDeep" };
+                
+                const result = {
+                    kind: node.kind !== undefined ? (ts.SyntaxKind[node.kind] || node.kind) : 'Unknown',
+                    kindValue: node.kind || 0,
+                    pos: node.pos || 0,
+                    end: node.end || 0,
+                    flags: node.flags || 0
+                };
+                
+                if (node.text !== undefined) result.text = node.text;
+                
+                if (node.name) {
+                    if (typeof node.name === 'object') {
+                        if (node.name.escapedText !== undefined) {
+                            result.name = node.name.escapedText;
+                        } else if (node.name.text !== undefined) {
+                            result.name = node.name.text;
+                        } else {
+                            result.name = serializeNode(node.name, depth + 1);
+                        }
+                    } else {
+                        result.name = node.name;
+                    }
+                }
+                
+                if (node.type) {
+                    result.type = serializeNode(node.type, depth + 1);
+                }
+                
+                const children = [];
+                if (node.members && Array.isArray(node.members)) {
+                    node.members.forEach(member => {
+                        if (member) children.push(serializeNode(member, depth + 1));
+                    });
+                }
+                ts.forEachChild(node, child => {
+                    if (child) children.push(serializeNode(child, depth + 1));
+                });
+                
+                if (children.length > 0) {
+                    result.children = children;
+                }
+                
+                // CRITICAL FIX: Use getStart() to exclude leading trivia for accurate line numbers
+                const actualStart = node.getStart ? node.getStart(sourceFile) : node.pos;
+                const { line, character } = sourceFile.getLineAndCharacterOfPosition(actualStart);
+                result.line = line + 1;
+                result.column = character;
+                // RESTORED: Text extraction needed for accurate symbol names in taint analysis
+                result.text = sourceCode.substring(node.pos, node.end).trim();
+                
+                return result;
+            }
+            
+            // Collect diagnostics for this file
+            const diagnostics = [];
+            const fileDiagnostics = ts.getPreEmitDiagnostics(program, sourceFile);
+            fileDiagnostics.forEach(diagnostic => {
+                const message = ts.flattenDiagnosticMessageText(diagnostic.messageText, '\\n');
+                const location = diagnostic.file && diagnostic.start
+                    ? diagnostic.file.getLineAndCharacterOfPosition(diagnostic.start)
+                    : null;
+                
+                diagnostics.push({
+                    message,
+                    category: ts.DiagnosticCategory[diagnostic.category],
+                    code: diagnostic.code,
+                    line: location ? location.line + 1 : null,
+                    column: location ? location.character : null
+                });
+            });
+            
+            // Collect symbols for this file
+            const symbols = [];
+            function visit(node) {
+                try {
+                    const symbol = checker.getSymbolAtLocation(node);
+                    if (symbol && symbol.getName) {
+                        const type = checker.getTypeOfSymbolAtLocation(symbol, node);
+                        const typeString = checker.typeToString(type);
+                        
+                        symbols.push({
+                            name: symbol.getName ? symbol.getName() : 'anonymous',
+                            kind: symbol.flags ? (ts.SymbolFlags[symbol.flags] || symbol.flags) : 0,
+                            type: typeString || 'unknown',
+                            line: node.pos !== undefined ? sourceFile.getLineAndCharacterOfPosition(node.pos).line + 1 : 0
+                        });
+                    }
+                } catch (e) {
+                    // Log error for debugging
+                    console.error(`[ERROR] Symbol extraction failed at ${filePath}:${node.pos}: ${e.message}`);
+                }
+                ts.forEachChild(node, visit);
+            }
+            visit(sourceFile);
+            
+            // Log symbol extraction results
+            console.error(`[INFO] Found ${symbols.length} symbols in ${filePath}`);
+            
+            // Build result for this file
+            const result = {
+                success: true,
+                fileName: filePath,
+                languageVersion: ts.ScriptTarget[sourceFile.languageVersion],
+                ast: serializeNode(sourceFile),
+                diagnostics: diagnostics,
+                symbols: symbols,
+                nodeCount: 0,
+                hasTypes: symbols.some(s => s.type && s.type !== 'any')
+            };
+            
+            // Count nodes
+            function countNodes(node) {
+                if (!node) return;
+                result.nodeCount++;
+                if (node.children && Array.isArray(node.children)) {
+                    node.children.forEach(countNodes);
+                }
+            }
+            if (result.ast) countNodes(result.ast);
+            
+            results[filePath] = result;
+            
+        } catch (error) {
+            results[filePath] = {
+                success: false,
+                error: `Error processing file: ${error.message}`,
+                ast: null,
+                diagnostics: [],
+                symbols: []
+            };
+        }
+    }
+    
+    // Write all results to output file
+    fs.writeFileSync(outputPath, JSON.stringify(results, null, 2), 'utf8');
+    process.exit(0);
+    
+} catch (error) {
+    console.error(JSON.stringify({
+        success: false,
+        error: error.message,
+        stack: error.stack
+    }));
+    process.exit(1);
+}
+'''
+        
+        batch_helper_path.write_text(batch_helper_content, encoding='utf-8')
+        return batch_helper_path
+    
+    def get_semantic_ast_batch(self, file_paths: List[str]) -> Dict[str, Dict[str, Any]]:
+        """Get semantic ASTs for multiple JavaScript/TypeScript files in a single process.
+        
+        This dramatically improves performance by reusing the TypeScript program
+        and dependency cache across multiple files.
+        
+        Args:
+            file_paths: List of paths to JavaScript or TypeScript files to parse
+            
+        Returns:
+            Dictionary mapping file paths to their AST results
+        """
+        # Validate all files exist
+        results = {}
+        valid_files = []
+        
+        for file_path in file_paths:
+            file = Path(file_path).resolve()
+            if not file.exists():
+                results[file_path] = {
+                    "success": False,
+                    "error": f"File not found: {file_path}",
+                    "ast": None,
+                    "diagnostics": [],
+                    "symbols": []
+                }
+            elif file.suffix.lower() not in ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.vue']:
+                results[file_path] = {
+                    "success": False,
+                    "error": f"Not a JavaScript/TypeScript file: {file_path}",
+                    "ast": None,
+                    "diagnostics": [],
+                    "symbols": []
+                }
+            else:
+                valid_files.append(str(file.resolve()))
+        
+        if not valid_files:
+            return results
+        
+        if not self.tsc_available:
+            for file_path in valid_files:
+                results[file_path] = {
+                    "success": False,
+                    "error": "TypeScript compiler not available in TheAuditor sandbox. Run 'aud setup-claude' to install tools.",
+                    "ast": None,
+                    "diagnostics": [],
+                    "symbols": []
+                }
+            return results
+        
+        try:
+            # Create batch request
+            batch_request = {
+                "files": valid_files,
+                "projectRoot": str(self.project_root)
+            }
+            
+            # Write batch request to temp file
+            if TempManager:
+                request_path, req_fd = TempManager.create_temp_file(str(self.project_root), suffix='_request.json')
+                os.close(req_fd)
+                output_path, out_fd = TempManager.create_temp_file(str(self.project_root), suffix='_output.json')
+                os.close(out_fd)
+            else:
+                # Fallback to regular tempfile
+                with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp_req:
+                    request_path = tmp_req.name
+                with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False, encoding='utf-8') as tmp_out:
+                    output_path = tmp_out.name
+            
+            # Write batch request data
+            with open(request_path, 'w', encoding='utf-8') as f:
+                json.dump(batch_request, f)
+            
+            # Calculate timeout based on batch size
+            # 5 seconds base + 2 seconds per file
+            dynamic_timeout = min(5 + (len(valid_files) * 2), 120)
+            
+            try:
+                # Run batch helper script
+                # Convert paths for Windows node if needed
+                helper_path = self._convert_path_for_node(self.batch_helper_script.resolve())
+                request_path_converted = self._convert_path_for_node(Path(request_path))
+                output_path_converted = self._convert_path_for_node(Path(output_path))
+                
+                # CRITICAL FIX: Use sandboxed node executable, not system "node"
+                if not self.node_exe:
+                    raise RuntimeError("Node.js runtime not found. Run 'aud setup-claude' to install tools.")
+                
+                result = subprocess.run(
+                    [str(self.node_exe), helper_path, request_path_converted, output_path_converted],
+                    capture_output=False,
+                    stderr=subprocess.PIPE,
+                    text=True,
+                    timeout=dynamic_timeout,
+                    cwd=self.project_root,
+                    shell=IS_WINDOWS  # Windows compatibility fix
+                )
+                
+                if result.returncode != 0:
+                    error_msg = f"Batch TypeScript compiler failed (exit code {result.returncode})"
+                    if result.stderr:
+                        error_msg += f": {result.stderr.strip()[:500]}"
+                    
+                    for file_path in valid_files:
+                        results[file_path] = {
+                            "success": False,
+                            "error": error_msg,
+                            "ast": None,
+                            "diagnostics": [],
+                            "symbols": []
+                        }
+                else:
+                    # Read batch results
+                    if Path(output_path).exists():
+                        with open(output_path, 'r', encoding='utf-8') as f:
+                            batch_results = json.load(f)
+                        
+                        # Map results back to original file paths
+                        for file_path in file_paths:
+                            resolved_path = str(Path(file_path).resolve())
+                            if resolved_path in batch_results:
+                                results[file_path] = batch_results[resolved_path]
+                            elif file_path not in results:
+                                results[file_path] = {
+                                    "success": False,
+                                    "error": "File not processed in batch",
+                                    "ast": None,
+                                    "diagnostics": [],
+                                    "symbols": []
+                                }
+                    else:
+                        for file_path in valid_files:
+                            results[file_path] = {
+                                "success": False,
+                                "error": "Batch output file not created",
+                                "ast": None,
+                                "diagnostics": [],
+                                "symbols": []
+                            }
+            finally:
+                # Clean up temp files
+                for temp_path in [request_path, output_path]:
+                    if Path(temp_path).exists():
+                        Path(temp_path).unlink()
+            
+        except subprocess.TimeoutExpired:
+            for file_path in valid_files:
+                results[file_path] = {
+                    "success": False,
+                    "error": f"Batch timeout: Files too large or complex to parse within {dynamic_timeout:.0f} seconds",
+                    "ast": None,
+                    "diagnostics": [],
+                    "symbols": []
+                }
+        except Exception as e:
+            for file_path in valid_files:
+                results[file_path] = {
+                    "success": False,
+                    "error": f"Unexpected error in batch processing: {e}",
+                    "ast": None,
+                    "diagnostics": [],
+                    "symbols": []
+                }
+        
+        return results
+    
+    def get_semantic_ast(self, file_path: str) -> Dict[str, Any]:
+        """Get semantic AST for a JavaScript/TypeScript file using the TypeScript compiler.
+        
+        Args:
+            file_path: Path to the JavaScript or TypeScript file to parse
+            
+        Returns:
+            Dictionary containing the semantic AST and metadata:
+            - success: Boolean indicating if parsing was successful
+            - ast: The full AST tree with semantic information
+            - diagnostics: List of errors/warnings from TypeScript
+            - symbols: List of symbols with type information
+            - nodeCount: Total number of AST nodes
+            - hasTypes: Boolean indicating if type information is available
+            - error: Error message if parsing failed
+        """
+        # Validate file exists
+        file = Path(file_path).resolve()
+        if not file.exists():
+            return {
+                "success": False,
+                "error": f"File not found: {file_path}",
+                "ast": None,
+                "diagnostics": [],
+                "symbols": []
+            }
+        
+        # Check if it's a JavaScript, TypeScript, or Vue file
+        if file.suffix.lower() not in ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.vue']:
+            return {
+                "success": False,
+                "error": f"Not a JavaScript/TypeScript file: {file_path}",
+                "ast": None,
+                "diagnostics": [],
+                "symbols": []
+            }
+        
+        # CRITICAL: No fallbacks allowed - fail fast with clear error
+        if not self.tsc_available:
+            return {
+                "success": False,
+                "error": "TypeScript compiler not available in TheAuditor sandbox. Run 'aud setup-claude' to install tools.",
+                "ast": None,
+                "diagnostics": [],
+                "symbols": []
+            }
+        
+        try:
+            # CRITICAL: No automatic installation - user must install TypeScript manually
+            # This enforces fail-fast philosophy
+            
+            # Handle Vue SFC files specially
+            actual_file_to_parse = file_path
+            vue_metadata = None
+            temp_file = None
+            
+            if file.suffix.lower() == '.vue':
+                # Read Vue SFC content
+                vue_content = file.read_text(encoding='utf-8')
+                script_content, template_content = self._extract_vue_blocks(vue_content)
+                
+                if script_content is None:
+                    return {
+                        "success": False,
+                        "error": "No <script> block found in Vue SFC",
+                        "ast": None,
+                        "diagnostics": [],
+                        "symbols": [],
+                        "vueMetadata": {
+                            "hasTemplate": template_content is not None,
+                            "hasScript": False
+                        }
+                    }
+                
+                # Create a temporary file with the extracted script content
+                with tempfile.NamedTemporaryFile(mode='w', suffix='.ts', delete=False, encoding='utf-8') as tmp:
+                    tmp.write(script_content)
+                    temp_file = Path(tmp.name)
+                    actual_file_to_parse = temp_file
+                
+                # Store Vue metadata for the response
+                vue_metadata = {
+                    "originalFile": str(file_path),
+                    "hasTemplate": template_content is not None,
+                    "hasScript": True,
+                    "scriptLines": script_content.count('\n') + 1
+                }
+            
+            # Run the helper script with Node.js - use POSIX paths for Windows compatibility
+            # NO LONGER NEED NODE_PATH - we use absolute paths in the helper script
+            
+            # CRITICAL: Use absolute path for helper script since cwd changes
+            # On Windows, use forward slashes for Node.js paths
+            helper_absolute = str(self.helper_script.resolve()).replace('\\', '/')
+            
+            # Calculate dynamic timeout based on file size
+            # Base timeout of 10 seconds + 1 second per 10KB
+            file_size_kb = Path(actual_file_to_parse).stat().st_size / 1024
+            dynamic_timeout = min(10 + (file_size_kb / 10), 60)  # Cap at 60 seconds
+            
+            # Create temporary file for output to avoid pipe buffer limits
+            if TempManager:
+                tmp_output_path, out_fd = TempManager.create_temp_file(str(self.project_root), suffix='_ast_output.json')
+                os.close(out_fd)
+            else:
+                # Fallback to regular tempfile
+                with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False, encoding='utf-8') as tmp_out:
+                    tmp_output_path = tmp_out.name
+            
+            try:
+                # CRITICAL FIX: Use sandboxed node executable, not system "node"
+                if not self.node_exe:
+                    return {
+                        "success": False,
+                        "error": "Node.js runtime not found. Run 'aud setup-claude' to install tools.",
+                        "ast": None,
+                        "diagnostics": [],
+                        "symbols": []
+                    }
+                
+                # Convert paths for Windows node if needed
+                helper_path_converted = self._convert_path_for_node(Path(helper_absolute))
+                file_path_converted = self._convert_path_for_node(Path(actual_file_to_parse).resolve())
+                output_path_converted = self._convert_path_for_node(Path(tmp_output_path))
+                
+                result = subprocess.run(
+                    [str(self.node_exe), helper_path_converted, file_path_converted, output_path_converted],
+                    capture_output=False,  # Don't capture stdout - writing to file instead
+                    stderr=subprocess.PIPE,  # Still capture stderr for error messages
+                    text=True,
+                    timeout=dynamic_timeout,  # Dynamic timeout based on file size
+                    cwd=file.parent,  # Run in the file's directory for proper module resolution
+                    shell=IS_WINDOWS  # Windows compatibility fix
+                )
+            finally:
+                # Clean up temporary file if created for Vue
+                if temp_file and temp_file.exists():
+                    temp_file.unlink()
+            
+            # Handle the result - read from file instead of stdout
+            try:
+                if result.returncode != 0:
+                    # Consolidate error information from stderr (stdout is not captured)
+                    error_json = None
+                    
+                    # Try to parse error output as JSON from stderr
+                    if result.stderr and result.stderr.strip():
+                        try:
+                            error_json = json.loads(result.stderr)
+                        except json.JSONDecodeError:
+                            # Not JSON, will use as plain text
+                            pass
+                    
+                    if error_json and isinstance(error_json, dict):
+                        error_msg = error_json.get("error", "Unknown error from TypeScript compiler")
+                    else:
+                        # Build detailed error message from stderr
+                        error_details = []
+                        if result.stderr and result.stderr.strip():
+                            error_details.append(f"stderr: {result.stderr.strip()[:500]}")
+                        if not error_details:
+                            error_details.append("No error output from TypeScript compiler")
+                        
+                        error_msg = f"TypeScript compiler failed (exit code {result.returncode}). " + " | ".join(error_details)
+                    
+                    return {
+                        "success": False,
+                        "error": error_msg,
+                        "ast": None,
+                        "diagnostics": [],
+                        "symbols": []
+                    }
+                else:
+                    # Read output from file
+                    if not Path(tmp_output_path).exists():
+                        return {
+                            "success": False,
+                            "error": "TypeScript compiler succeeded but output file was not created",
+                            "ast": None,
+                            "diagnostics": [],
+                            "symbols": []
+                        }
+                    
+                    try:
+                        with open(tmp_output_path, 'r', encoding='utf-8') as f:
+                            ast_data = json.load(f)
+                        
+                        # Add Vue metadata if this was a Vue file
+                        if vue_metadata:
+                            ast_data["vueMetadata"] = vue_metadata
+                        return ast_data
+                    except json.JSONDecodeError as e:
+                        # Include file size in error for debugging
+                        file_size = Path(tmp_output_path).stat().st_size
+                        return {
+                            "success": False,
+                            "error": f"Failed to parse TypeScript AST output: {e}. Output file size: {file_size} bytes",
+                            "ast": None,
+                            "diagnostics": [],
+                            "symbols": []
+                        }
+            finally:
+                # Clean up temporary output file
+                if Path(tmp_output_path).exists():
+                    Path(tmp_output_path).unlink()
+                
+        except subprocess.TimeoutExpired:
+            return {
+                "success": False,
+                "error": f"Timeout: File too large or complex to parse within {dynamic_timeout:.0f} seconds",
+                "ast": None,
+                "diagnostics": [],
+                "symbols": []
+            }
+        except subprocess.SubprocessError as e:
+            return {
+                "success": False,
+                "error": f"Subprocess error: {e}",
+                "ast": None,
+                "diagnostics": [],
+                "symbols": []
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": f"Unexpected error: {e}",
+                "ast": None,
+                "diagnostics": [],
+                "symbols": []
+            }
+    
+    
+    def resolve_imports(self, ast_data: Dict[str, Any], current_file: str) -> Dict[str, str]:
+        """Resolve import statements in the AST using ModuleResolver.
+        
+        Args:
+            ast_data: The AST data returned by get_semantic_ast
+            current_file: Path to the current file being analyzed
+            
+        Returns:
+            Dictionary mapping import statements to resolved file paths
+        """
+        resolved_imports = {}
+        
+        if not ast_data.get("success") or not ast_data.get("ast"):
+            return resolved_imports
+        
+        # Extract import statements from AST
+        def find_imports(node, depth=0):
+            if depth > 100 or not isinstance(node, dict):
+                return
+            
+            kind = node.get("kind")
+            
+            # Check for import declarations
+            if kind == "ImportDeclaration":
+                # Extract module specifier
+                module_specifier = node.get("moduleSpecifier", {})
+                if isinstance(module_specifier, dict):
+                    import_path = module_specifier.get("text", "")
+                    if import_path:
+                        # Resolve the import using ModuleResolver
+                        resolved = self.module_resolver.resolve(import_path, current_file)
+                        if resolved:
+                            resolved_imports[import_path] = resolved
+                        elif os.environ.get("THEAUDITOR_DEBUG"):
+                            print(f"[RESOLVER_DEBUG] Failed to resolve '{import_path}' from '{current_file}'", file=sys.stderr)
+            
+            # Check for require calls
+            elif kind == "CallExpression":
+                expression = node.get("expression", {})
+                if isinstance(expression, dict) and expression.get("text") == "require":
+                    arguments = node.get("arguments", [])
+                    if arguments and isinstance(arguments[0], dict):
+                        import_path = arguments[0].get("text", "")
+                        if import_path:
+                            # Resolve the require using ModuleResolver
+                            resolved = self.module_resolver.resolve(import_path, current_file)
+                            if resolved:
+                                resolved_imports[import_path] = resolved
+                            elif os.environ.get("THEAUDITOR_DEBUG"):
+                                print(f"[RESOLVER_DEBUG] Failed to resolve require('{import_path}') from '{current_file}'", file=sys.stderr)
+            
+            # Recurse through children
+            for child in node.get("children", []):
+                find_imports(child, depth + 1)
+        
+        find_imports(ast_data.get("ast", {}))
+        
+        if os.environ.get("THEAUDITOR_DEBUG") and resolved_imports:
+            print(f"[RESOLVER_DEBUG] Resolved {len(resolved_imports)} imports in {current_file}", file=sys.stderr)
+            for imp, resolved in list(resolved_imports.items())[:3]:  # Show first 3
+                print(f"[RESOLVER_DEBUG]   '{imp}' -> '{resolved}'", file=sys.stderr)
+        
+        return resolved_imports
+    
+    def extract_type_issues(self, ast_data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Extract type-related issues from the semantic AST.
+        
+        Args:
+            ast_data: The AST data returned by get_semantic_ast
+            
+        Returns:
+            List of type issues found (any types, type suppressions, unsafe casts)
+        """
+        issues = []
+        
+        if not ast_data.get("success") or not ast_data.get("ast"):
+            return issues
+        
+        # Check symbols for 'any' types
+        for symbol in ast_data.get("symbols", []):
+            if symbol.get("type") == "any":
+                issues.append({
+                    "type": "any_type",
+                    "name": symbol.get("name"),
+                    "line": symbol.get("line"),
+                    "severity": "warning",
+                    "message": f"Symbol '{symbol.get('name')}' has type 'any'"
+                })
+        
+        # Check diagnostics for type errors
+        for diagnostic in ast_data.get("diagnostics", []):
+            if diagnostic.get("category") == "Error":
+                issues.append({
+                    "type": "type_error",
+                    "line": diagnostic.get("line"),
+                    "column": diagnostic.get("column"),
+                    "severity": "error",
+                    "message": diagnostic.get("message"),
+                    "code": diagnostic.get("code")
+                })
+        
+        # Recursively search AST for problematic patterns
+        def search_ast(node, depth=0):
+            if depth > 100 or not isinstance(node, dict):
+                return
+            
+            # Check for 'any' keyword
+            if node.get("kind") == "AnyKeyword":
+                issues.append({
+                    "type": "any_type",
+                    "line": node.get("line"),
+                    "column": node.get("column"),
+                    "severity": "warning",
+                    "message": "Explicit 'any' type annotation",
+                    "text": node.get("text", "")[:100]
+                })
+            
+            # Check for type assertions (as any, as unknown)
+            if node.get("kind") == "AsExpression":
+                type_node = node.get("type", {})
+                if type_node.get("kind") in ["AnyKeyword", "UnknownKeyword"]:
+                    issues.append({
+                        "type": "unsafe_cast",
+                        "line": node.get("line"),
+                        "column": node.get("column"),
+                        "severity": "warning",
+                        "message": f"Unsafe type assertion to '{type_node.get('kind')}'",
+                        "text": node.get("text", "")[:100]
+                    })
+            
+            # Check for @ts-ignore and @ts-nocheck comments
+            text = node.get("text", "")
+            if "@ts-ignore" in text or "@ts-nocheck" in text:
+                issues.append({
+                    "type": "type_suppression",
+                    "line": node.get("line"),
+                    "column": node.get("column"),
+                    "severity": "warning",
+                    "message": "TypeScript error suppression comment",
+                    "text": text[:100]
+                })
+            
+            # Recursively check children
+            for child in node.get("children", []):
+                search_ast(child, depth + 1)
+        
+        search_ast(ast_data.get("ast", {}))
+        
+        return issues
+
+
+# Module-level function for direct usage
+def get_semantic_ast(file_path: str, project_root: str = None) -> Dict[str, Any]:
+    """Get semantic AST for a JavaScript/TypeScript file.
+    
+    This is a convenience function that creates a parser instance
+    and calls its get_semantic_ast method.
+    
+    Args:
+        file_path: Path to the JavaScript or TypeScript file to parse
+        project_root: Absolute path to project root. If not provided, uses current directory.
+        
+    Returns:
+        Dictionary containing the semantic AST and metadata
+    """
+    parser = JSSemanticParser(project_root=project_root)
+    return parser.get_semantic_ast(file_path)
+
+
+def get_semantic_ast_batch(file_paths: List[str], project_root: str = None) -> Dict[str, Dict[str, Any]]:
+    """Get semantic ASTs for multiple JavaScript/TypeScript files in batch.
+    
+    This is a convenience function that creates a parser instance
+    and calls its get_semantic_ast_batch method.
+    
+    Args:
+        file_paths: List of paths to JavaScript or TypeScript files to parse
+        project_root: Absolute path to project root. If not provided, uses current directory.
+        
+    Returns:
+        Dictionary mapping file paths to their AST results
+    """
+    parser = JSSemanticParser(project_root=project_root)
+    return parser.get_semantic_ast_batch(file_paths)
\ No newline at end of file
diff --git a/theauditor/linters/__init__.py b/theauditor/linters/__init__.py
new file mode 100644
index 0000000..e37f253
--- /dev/null
+++ b/theauditor/linters/__init__.py
@@ -0,0 +1,36 @@
+"""Linters package - detection, execution, and parsing of linter outputs."""
+
+# Re-export main functions for convenience
+from .detector import detect_linters, check_package_json_has_eslint
+from .runner import run_linter
+from .parsers import (
+    parse_eslint_output,
+    parse_ruff_output,
+    parse_mypy_output,
+    parse_tsc_output,
+    parse_prettier_output,
+    parse_black_output,
+    parse_golangci_output,
+    parse_go_vet_output,
+    parse_maven_output,
+    parse_bandit_output,
+)
+
+__all__ = [
+    # Detection
+    "detect_linters",
+    "check_package_json_has_eslint",
+    # Execution
+    "run_linter",
+    # Parsing
+    "parse_eslint_output",
+    "parse_ruff_output",
+    "parse_mypy_output",
+    "parse_tsc_output",
+    "parse_prettier_output",
+    "parse_black_output",
+    "parse_golangci_output",
+    "parse_go_vet_output",
+    "parse_maven_output",
+    "parse_bandit_output",
+]
\ No newline at end of file
diff --git a/theauditor/linters/detector.py b/theauditor/linters/detector.py
new file mode 100644
index 0000000..cfba430
--- /dev/null
+++ b/theauditor/linters/detector.py
@@ -0,0 +1,275 @@
+"""Linter detection module - discovers available linters in the repository."""
+
+import json
+import os
+import platform
+import subprocess
+import tempfile
+from pathlib import Path
+
+# Detect if running on Windows for subprocess shell handling
+IS_WINDOWS = platform.system() == "Windows"
+
+
+def run_subprocess_safe(cmd, cwd=None, timeout=5, shell=IS_WINDOWS):
+    """Helper to run subprocess with temp files to avoid buffer overflow."""
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
+         tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:
+        
+        stdout_path = stdout_fp.name
+        stderr_path = stderr_fp.name
+        
+        result = subprocess.run(
+            cmd,
+            cwd=cwd,
+            stdout=stdout_fp,
+            stderr=stderr_fp,
+            text=True,
+            encoding='utf-8',
+            errors='replace',
+            timeout=timeout,
+            shell=shell,
+        )
+    
+    with open(stdout_path, 'r', encoding='utf-8', errors='replace') as f:
+        result.stdout = f.read()
+    with open(stderr_path, 'r', encoding='utf-8', errors='replace') as f:
+        result.stderr = f.read()
+    
+    os.unlink(stdout_path)
+    os.unlink(stderr_path)
+    
+    return result
+
+
+def detect_linters(root_path: str, auto_fix: bool = False) -> dict[str, list[str]]:
+    """
+    Detect available linters in the repository.
+    
+    Args:
+        root_path: Root directory to detect linters in
+        auto_fix: DEPRECATED - No longer used. Kept for compatibility.
+                  Users should run linters directly with their own --fix flags.
+    
+    Returns:
+        Dict mapping tool name to command args
+    """
+    # DEPRECATED: Auto-fix functionality disabled to prevent version mismatch issues
+    # between sandboxed tools and project-specific versions. Users should run
+    # linters directly with their own auto-fix flags if desired.
+    auto_fix = False  # Force disabled
+    linters = {}
+    root = Path(root_path)
+    
+    # Check for sandboxed JavaScript/TypeScript tools in .auditor_venv/.theauditor_tools
+    # ONLY use TheAuditor's dedicated sandbox environment - no contamination from user's .venv
+    venv_dir = root / ".auditor_venv"
+    if venv_dir.exists():
+        sandbox_dir = venv_dir / ".theauditor_tools"
+    else:
+        sandbox_dir = None
+    
+    # JavaScript/TypeScript linters - prefer sandboxed versions
+    js_files_exist = any(root.glob("**/*.js")) or any(root.glob("**/*.jsx")) or any(root.glob("**/*.ts")) or any(root.glob("**/*.tsx"))
+    
+    if js_files_exist and sandbox_dir and sandbox_dir.exists():
+        # Check if bundled Node.js is available before registering JS tools
+        node_runtime_dir = sandbox_dir / "node-runtime"
+        if IS_WINDOWS:
+            bundled_node = node_runtime_dir / "node.exe"
+        else:
+            bundled_node = node_runtime_dir / "bin" / "node"
+        
+        if not bundled_node.exists():
+            # No bundled Node.js, skip JavaScript tool detection
+            print("    ⚠ Bundled Node.js not found. Run 'aud setup-claude' to install.")
+            sandbox_dir = None  # Disable JS tool detection
+        
+    if js_files_exist and sandbox_dir and sandbox_dir.exists():
+        # Use sandboxed tools (isolated from project)
+        bin_dir = sandbox_dir / "node_modules" / ".bin"
+        
+        # Check for ESLint in sandbox
+        eslint_configs = list(root.glob(".eslintrc.*")) + list(root.glob("eslint.*"))
+        eslint_cmd = "eslint.cmd" if IS_WINDOWS else "eslint"
+        eslint_path = bin_dir / eslint_cmd
+        if eslint_path.exists() and (eslint_configs or (root / "package.json").exists()):
+            try:
+                # Windows .cmd files need string command with shell=True
+                if IS_WINDOWS:
+                    result = run_subprocess_safe(f'"{str(eslint_path)}" --version')
+                else:
+                    result = run_subprocess_safe([str(eslint_path), "--version"])
+                if result.returncode == 0:
+                    # CRITICAL: Use our sandboxed ESLint v9 flat config to enforce strict rules
+                    eslint_config = sandbox_dir / "eslint.config.cjs"  # .cjs forces CommonJS
+                    # AUTO-FIX DEPRECATED: Always run in check-only mode
+                    # if auto_fix:
+                    #     linters["eslint"] = [str(eslint_path), "-c", str(eslint_config), "--fix", "--format", "json"]
+                    # else:
+                    linters["eslint"] = [str(eslint_path), "-c", str(eslint_config), "--format", "json"]
+            except (subprocess.SubprocessError, FileNotFoundError, OSError):
+                pass
+        
+        # TypeScript checking is handled by ESLint with @typescript-eslint plugin
+        # TSC is a compiler, not a linter - it needs all project dependencies
+        # which our sandbox doesn't have. ESLint provides better linting for TS.
+        
+        # Check for Prettier in sandbox
+        prettier_configs = list(root.glob(".prettierrc*")) + list(root.glob("prettier.*"))
+        prettier_cmd = "prettier.cmd" if IS_WINDOWS else "prettier"
+        prettier_path = bin_dir / prettier_cmd
+        if prettier_path.exists() and (prettier_configs or (root / "package.json").exists()):
+            try:
+                # Windows .cmd files need string command with shell=True
+                if IS_WINDOWS:
+                    result = run_subprocess_safe(f'"{str(prettier_path)}" --version')
+                else:
+                    result = run_subprocess_safe([str(prettier_path), "--version"])
+                if result.returncode == 0:
+                    # Use --no-config to ignore project's .prettierrc that may require plugins we don't have
+                    # AUTO-FIX DEPRECATED: Always run in check-only mode
+                    # if auto_fix:
+                    #     linters["prettier"] = [str(prettier_path), "--write", "--no-config"]
+                    # else:
+                    linters["prettier"] = [str(prettier_path), "--check", "--no-config"]
+            except (subprocess.SubprocessError, FileNotFoundError, OSError):
+                pass
+    
+    # REMOVED: We no longer fall back to user's project tools
+    # TheAuditor uses only its own sandboxed tools to avoid contamination
+    # and ensure consistent, reproducible results across all environments.
+    # If sandboxed tools are not available, user must run 'aud setup-claude'
+
+    # Python linters - check if ANY Python files exist
+    # Use a generator to avoid materializing the entire list for performance
+    python_files_exist = any(root.glob("**/*.py"))
+    if python_files_exist:
+        # Check for ruff (always try if Python files exist)
+        try:
+            result = run_subprocess_safe(["ruff", "--version"])
+            if result.returncode == 0:
+                # Use focused ruff configuration for high-signal output
+                # Only report bugs, security issues, and critical errors
+                ruff_rules = [
+                    "--select", "E,F,I,S",  # E: pycodestyle Errors, F: Pyflakes, I: isort, S: bandit security
+                    "--ignore", "D,ANN,T20",  # Ignore docs, type annotations, and print
+                ]
+                # AUTO-FIX DEPRECATED: Always run in check-only mode
+                # if auto_fix:
+                #     linters["ruff"] = ["ruff", "check", "--fix", "--output-format", "concise"] + ruff_rules
+                # else:
+                linters["ruff"] = ["ruff", "check", "--output-format", "concise"] + ruff_rules
+        except (subprocess.SubprocessError, FileNotFoundError, OSError):
+            pass
+
+        # flake8 removed - using ruff as the primary Python linter
+
+        # Check for mypy
+        try:
+            result = run_subprocess_safe(["mypy", "--version"])
+            if result.returncode == 0:
+                # CRITICAL: Use strict mypy configuration
+                linters["mypy"] = [
+                    "mypy", 
+                    "--strict",  # Enable all strict checks
+                    "--warn-return-any",
+                    "--warn-unused-configs",
+                    "--disallow-untyped-defs",
+                    "--disallow-any-unimported",
+                    "--no-implicit-optional",
+                    "--warn-redundant-casts",
+                    "--warn-unused-ignores",
+                    "--warn-unreachable",
+                    "--no-error-summary", 
+                    "--no-pretty"
+                ]
+        except (subprocess.SubprocessError, FileNotFoundError, OSError):
+            pass
+
+        # Check for black
+        try:
+            result = run_subprocess_safe(["black", "--version"])
+            if result.returncode == 0:
+                # AUTO-FIX DEPRECATED: Always run in check-only mode
+                # if auto_fix:
+                #     # In fix mode, run black to format files
+                #     linters["black"] = ["black", "--quiet"]
+                # else:
+                # In check mode, use --check and --diff to report issues
+                linters["black"] = ["black", "--check", "--diff", "--quiet"]
+        except (subprocess.SubprocessError, FileNotFoundError, OSError):
+            pass
+        
+        # Check for bandit (Python security linter)
+        try:
+            result = run_subprocess_safe(["bandit", "--version"])
+            if result.returncode == 0:
+                # Run bandit with high confidence and severity
+                linters["bandit"] = [
+                    "bandit", 
+                    # NO -r flag! Runner.py provides explicit file list
+                    "-f", "json",  # JSON output for parsing
+                    "-ll",  # Only medium and high severity
+                    "-i",  # Only medium and high confidence
+                ]
+        except (subprocess.SubprocessError, FileNotFoundError, OSError):
+            pass
+
+    # Go linters
+    if (root / "go.mod").exists():
+        # Check for golangci-lint
+        try:
+            result = run_subprocess_safe(["golangci-lint", "--version"])
+            if result.returncode == 0:
+                linters["golangci-lint"] = ["golangci-lint", "run", "--out-format", "line-number"]
+            else:
+                # Fall back to go vet
+                try:
+                    result = run_subprocess_safe(["go", "version"])
+                    if result.returncode == 0:
+                        linters["go-vet"] = ["go", "vet"]
+                except (subprocess.SubprocessError, FileNotFoundError, OSError):
+                    pass
+        except (subprocess.SubprocessError, FileNotFoundError, OSError):
+            # Try go vet as fallback
+            try:
+                result = run_subprocess_safe(["go", "version"])
+                if result.returncode == 0:
+                    linters["go-vet"] = ["go", "vet"]
+            except (subprocess.SubprocessError, FileNotFoundError, OSError):
+                pass
+
+    # Java linters (basic detection)
+    if (root / "pom.xml").exists() or (root / "build.gradle").exists():
+        # Check for SpotBugs
+        if (root / "spotbugs.xml").exists():
+            linters["spotbugs"] = ["mvn", "spotbugs:check"]
+
+        # Check for Checkstyle
+        if (root / "checkstyle.xml").exists():
+            linters["checkstyle"] = ["mvn", "checkstyle:check"]
+
+    return linters
+
+
+def check_package_json_has_eslint(package_json_path: Path) -> bool:
+    """Check if package.json has ESLint configured."""
+    try:
+        with open(package_json_path) as f:
+            pkg = json.load(f)
+
+        # Check dependencies
+        deps = pkg.get("devDependencies", {})
+        deps.update(pkg.get("dependencies", {}))
+        if any("eslint" in dep for dep in deps):
+            return True
+
+        # Check scripts
+        scripts = pkg.get("scripts", {})
+        if any("eslint" in script for script in scripts.values()):
+            return True
+    except (json.JSONDecodeError, KeyError):
+        pass
+
+    return False
\ No newline at end of file
diff --git a/theauditor/linters/eslint.config.cjs b/theauditor/linters/eslint.config.cjs
new file mode 100644
index 0000000..e96d86c
--- /dev/null
+++ b/theauditor/linters/eslint.config.cjs
@@ -0,0 +1,119 @@
+// ESLint v9 Flat Config Format
+const globals = require('globals');
+const js = require('@eslint/js');
+const typescript = require('@typescript-eslint/eslint-plugin');
+const typescriptParser = require('@typescript-eslint/parser');
+const react = require('eslint-plugin-react');
+const reactHooks = require('eslint-plugin-react-hooks');
+
+module.exports = [
+  // Base configuration
+  js.configs.recommended,
+  
+  // Global ignores
+  {
+    ignores: ['**/node_modules/**', '**/dist/**', '**/build/**', '**/.auditor_venv/**']
+  },
+  
+  // Configuration for CommonJS files (configs, scripts, migrations)
+  {
+    files: [
+      '**/*.config.js',
+      '**/*.config.ts', 
+      '**/scripts/**/*.js',
+      '**/migrations/**/*.js',
+      '**/seeders/**/*.js',
+      '**/database-cli.js'
+    ],
+    languageOptions: {
+      sourceType: 'commonjs',
+      ecmaVersion: 2020,
+      globals: {
+        ...globals.node, // Adds Node.js globals like 'module', 'require', '__dirname'
+      }
+    },
+    rules: {
+      '@typescript-eslint/no-var-requires': 'off', // Allow require() in CJS files
+      'no-undef': 'error'
+    }
+  },
+  
+  // Configuration for FRONTEND (browser) source code
+  {
+    files: ['**/frontend/src/**/*.js', '**/frontend/src/**/*.jsx', '**/frontend/src/**/*.ts', '**/frontend/src/**/*.tsx'],
+    languageOptions: {
+      parser: typescriptParser,
+      parserOptions: {
+        ecmaVersion: 2020,
+        sourceType: 'module',
+        ecmaFeatures: {
+          jsx: true
+        }
+      },
+      globals: {
+        ...globals.browser, // Browser globals
+      }
+    },
+    plugins: {
+      '@typescript-eslint': typescript,
+      'react': react,
+      'react-hooks': reactHooks
+    },
+    rules: {
+      // TypeScript rules
+      '@typescript-eslint/no-explicit-any': 'error',
+      '@typescript-eslint/explicit-function-return-type': 'warn',
+      '@typescript-eslint/no-unused-vars': 'error',
+      
+      // General rules
+      'no-console': 'warn',
+      'no-debugger': 'error',
+      'no-eval': 'error',
+      'no-implied-eval': 'error',
+      'no-var': 'error',
+      'prefer-const': 'error',
+      
+      // React rules
+      'react/prop-types': 'off',
+      'react/react-in-jsx-scope': 'off',
+      'react-hooks/rules-of-hooks': 'error',
+      'react-hooks/exhaustive-deps': 'warn'
+    },
+    settings: {
+      react: {
+        version: 'detect'
+      }
+    }
+  },
+  
+  // Configuration for BACKEND (Node.js) source code
+  {
+    files: ['**/backend/src/**/*.js', '**/backend/src/**/*.ts'],
+    ignores: ['**/backend/src/config/**', '**/backend/src/scripts/**', '**/backend/src/migrations/**', '**/backend/src/seeders/**'],
+    languageOptions: {
+      parser: typescriptParser,
+      parserOptions: {
+        ecmaVersion: 2020,
+        sourceType: 'module'
+      },
+      globals: {
+        ...globals.node, // Node.js globals (process, __dirname, etc.)
+      }
+    },
+    plugins: {
+      '@typescript-eslint': typescript
+    },
+    rules: {
+      // TypeScript rules
+      '@typescript-eslint/no-explicit-any': 'error',
+      '@typescript-eslint/explicit-function-return-type': 'warn',
+      '@typescript-eslint/no-unused-vars': 'error',
+      
+      // General rules
+      'no-console': 'warn',
+      'no-debugger': 'error',
+      'no-var': 'error',
+      'prefer-const': 'error'
+    }
+  }
+];
\ No newline at end of file
diff --git a/theauditor/linters/package.json b/theauditor/linters/package.json
new file mode 100644
index 0000000..65decdf
--- /dev/null
+++ b/theauditor/linters/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "theauditor-tools",
+  "version": "0.90.0",
+  "private": true,
+  "description": "Sandboxed tools for TheAuditor static analysis",
+  "devDependencies": {
+    "eslint": "9.34.0",
+    "@eslint/js": "9.34.0",
+    "globals": "^16.3.0",
+    "prettier": "3.6.2",
+    "@typescript-eslint/parser": "8.41.0",
+    "@typescript-eslint/eslint-plugin": "8.41.0",
+    "eslint-plugin-react": "7.37.5",
+    "eslint-plugin-react-hooks": "5.2.0",
+    "typescript": "5.9.2"
+  }
+}
diff --git a/theauditor/linters/parsers.py b/theauditor/linters/parsers.py
new file mode 100644
index 0000000..9f187a9
--- /dev/null
+++ b/theauditor/linters/parsers.py
@@ -0,0 +1,504 @@
+"""Linter output parsers - converts various linter outputs to normalized format.
+
+COURIER PHILOSOPHY:
+- We translate tool output keys to standard keys
+- We preserve exact messages and severities
+- We perform direct data access without interpretation
+- We validate translation, not content
+"""
+
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+
+def parse_eslint_output(output: str, workset_files: set[str]) -> tuple[list[dict[str, Any]], dict[str, Any]]:
+    """Parse ESLint JSON output.
+    
+    Returns:
+        Tuple of (findings, ast_data) where ast_data maps file paths to AST objects
+    """
+    findings = []
+    ast_data = {}
+
+    try:
+        results = json.loads(output)
+        for file_result in results:
+            file = Path(file_result["filePath"])
+            # Normalize path to forward slashes for cross-platform compatibility
+            file_str = str(file).replace("\\", "/")
+
+            # Try to match against workset in various forms
+            matched = False
+            for workset_file in workset_files:
+                # Check if the absolute path ends with the relative workset path
+                # This handles both Windows absolute paths and Unix paths
+                if file_str.endswith(workset_file):
+                    matched = True
+                    file_str = workset_file
+                    break
+                # Also check if workset file is contained in the path (with proper separators)
+                elif "/" + workset_file in file_str or file_str.startswith(workset_file):
+                    matched = True
+                    file_str = workset_file
+                    break
+
+            if not matched:
+                continue
+
+            # Extract AST if present
+            if "ast" in file_result:
+                ast_data[file_str] = file_result["ast"]
+
+            for message in file_result.get("messages", []):
+                # Direct data access from ESLint output
+                # Create the translated finding using standard keys
+                # ESLint severity: numeric (2=error, 1=warning) - translate to standard
+                eslint_severity = message.get("severity", 1)
+                # Map numeric severity to standard strings
+                if eslint_severity == 2:
+                    standard_severity = "error"
+                elif eslint_severity == 1:
+                    standard_severity = "warning"
+                else:
+                    standard_severity = "warning"  # Default for unknown values
+                
+                translated = {
+                    "tool": "eslint",
+                    "file": file_str,
+                    "line": int(message.get("line", 0)),
+                    "column": int(message.get("column", 0)),
+                    "rule": message.get("ruleId", ""),  # Empty not "unknown"
+                    "message": message.get("message", ""),
+                    "severity": standard_severity,  # Use standardized severity
+                    "category": "lint",
+                }
+                
+                # No validation needed
+                
+                findings.append(translated)
+    except json.JSONDecodeError:
+        # Fall back to regex parsing
+        pattern = r"([^:]+):(\d+):(\d+):\s+(error|warning)\s+(.+?)\s+([a-z0-9\-\/]+)\s*$"
+        for line in output.strip().split("\n"):
+            match = re.match(pattern, line.strip())
+            if match:
+                file = match.group(1).strip()
+                if file in workset_files:
+                    findings.append(
+                        {
+                            "tool": "eslint",
+                            "file": file,
+                            "line": int(match.group(2)),
+                            "column": int(match.group(3)),
+                            "rule": match.group(6),
+                            "message": match.group(5),
+                            "severity": match.group(4),  # Keep original for /raw/
+                            "category": "lint",
+                        }
+                    )
+
+    return findings, ast_data
+
+
+def parse_ruff_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse ruff output - translate to standard keys."""
+    findings = []
+
+    # Format: path:line:col: code message
+    pattern = r"([^:]+):(\d+):(\d+):\s+([A-Z]\d+)\s+(.+)$"
+    for line in output.strip().split("\n"):
+        match = re.match(pattern, line.strip())
+        if match:
+            file = match.group(1).strip()
+            # Normalize Windows backslashes to forward slashes for comparison
+            normalized_file = file.replace("\\", "/")
+            
+            # Check if file is in workset (comparing normalized paths)
+            if normalized_file in workset_files or file in workset_files:
+                # Create original dict for validation
+                original = {
+                    "file": file,
+                    "line": match.group(2),
+                    "column": match.group(3),
+                    "code": match.group(4),
+                    "message": match.group(5)
+                }
+                
+                # COURIER: Translate to standard keys
+                code = match.group(4)
+                translated = {
+                    "tool": "ruff",
+                    "file": normalized_file,  # Use normalized path
+                    "line": int(match.group(2)),
+                    "column": int(match.group(3)),
+                    "rule": code,  # Preserve original code in rule field
+                    "message": match.group(5),  # Preserve exactly
+                    "severity": "warning",  # Direct preservation - ruff doesn't provide severity in concise format
+                    "category": "lint",
+                }
+                
+                # No validation needed
+                
+                findings.append(translated)
+
+    return findings
+
+
+def parse_mypy_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse mypy output."""
+    findings = []
+
+    # Format: path:line: error: message [type-code]
+    pattern = r"([^:]+):(\d+):\s+(error|warning|note):\s+(.+?)(?:\s+\[([^\]]+)\])?$"
+    for line in output.strip().split("\n"):
+        match = re.match(pattern, line.strip())
+        if match:
+            file = match.group(1).strip()
+            # Normalize Windows backslashes to forward slashes for comparison
+            normalized_file = file.replace("\\", "/")
+            
+            # Check if file is in workset (comparing normalized paths)
+            if normalized_file in workset_files or file in workset_files:
+                # Create translated finding
+                original = {
+                    "file": file,
+                    "line": match.group(2),
+                    "severity": match.group(3),
+                    "message": match.group(4),
+                    "code": match.group(5)
+                }
+                
+                translated = {
+                    "tool": "mypy",
+                    "file": normalized_file,  # Use normalized path
+                    "line": int(match.group(2)),
+                    "column": 0,
+                    "rule": match.group(5) or "type-error",
+                    "message": match.group(4),
+                    "severity": match.group(3),  # Keep original mypy severity for /raw/
+                    "category": "type",
+                }
+                
+                # No validation needed
+                
+                findings.append(translated)
+
+    return findings
+
+
+def parse_tsc_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse TypeScript compiler output."""
+    findings = []
+
+    # Format: path(line,col): error TS1234: message
+    pattern = r"([^(]+)\((\d+),(\d+)\):\s+(error|warning)\s+(TS\d+):\s+(.+)$"
+    for line in output.strip().split("\n"):
+        match = re.match(pattern, line.strip())
+        if match:
+            file = match.group(1).strip()
+            if file in workset_files:
+                # Create translated finding
+                original = {
+                    "file": file,
+                    "line": match.group(2),
+                    "column": match.group(3),
+                    "severity": match.group(4),
+                    "code": match.group(5),
+                    "message": match.group(6)
+                }
+                
+                translated = {
+                    "tool": "tsc",
+                    "file": file,
+                    "line": int(match.group(2)),
+                    "column": int(match.group(3)),
+                    "rule": match.group(5),
+                    "message": match.group(6),
+                    "severity": match.group(4),  # Keep original tsc severity for /raw/
+                    "category": "type",
+                }
+                
+                # No validation needed
+                
+                findings.append(translated)
+
+    return findings
+
+
+def parse_prettier_output(
+    stdout: str, stderr: str, workset_files: set[str]
+) -> list[dict[str, Any]]:
+    """Parse Prettier output."""
+    findings = []
+
+    # When run with --check, Prettier lists unformatted files on stderr with [warn] prefix
+    # Example: "[warn] backend/src/app.ts" or with ANSI codes: "\x1b[33m[warn]\x1b[39m backend/src/app.ts"
+    import re
+    
+    # Pattern to extract file path after [warn] prefix, handling ANSI codes
+    # Matches: [warn] file.ts or \x1b[XXm[warn]\x1b[XXm file.ts
+    pattern = r'\[warn\]\s+(.+?)$'
+    
+    for line in stderr.strip().split("\n"):
+        if line and not line.startswith("Checking"):
+            # Remove ANSI color codes first
+            clean_line = re.sub(r'\x1b\[[0-9;]*m', '', line)
+            clean_line = re.sub(r'\[\d+m', '', clean_line)  # Also handle [33m format
+            
+            # Extract file path after [warn]
+            match = re.search(pattern, clean_line)
+            if match:
+                file = match.group(1).strip()
+            else:
+                # Fallback: if no [warn] prefix, use the whole line
+                file = clean_line.strip()
+                
+            # Normalize path for comparison
+            normalized_file = file.replace("\\", "/")
+            
+            # Check if file is in workset
+            if normalized_file in workset_files or file in workset_files:
+                # Create translated finding
+                original = {"file": file}
+                
+                translated = {
+                    "tool": "prettier",
+                    "file": normalized_file,  # Use normalized path
+                    "line": 0,
+                    "column": 0,
+                    "rule": "format",
+                    "message": "File needs formatting",
+                    "severity": "warning",  # Keep original for /raw/
+                    "category": "style",
+                }
+                
+                # No validation needed
+                
+                findings.append(translated)
+
+    return findings
+
+
+def parse_black_output(stdout: str, stderr: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse Black output."""
+    findings = []
+
+    # Black shows files that would be reformatted in stderr
+    pattern = r"would reformat (.+)$"
+    for match in re.finditer(pattern, stderr, re.MULTILINE):
+        file = match.group(1)
+        # Normalize Windows backslashes to forward slashes for comparison
+        normalized_file = file.replace("\\", "/")
+        
+        # Check if file is in workset (comparing normalized paths)
+        if normalized_file in workset_files or file in workset_files:
+            # Create translated finding
+            original = {"file": file}
+            
+            translated = {
+                "tool": "black",
+                "file": normalized_file,  # Use normalized path
+                "line": 0,
+                "column": 0,
+                "rule": "format",
+                "message": "File needs formatting",
+                "severity": "warning",  # Keep original for /raw/
+                "category": "style",
+            }
+            
+            # No validation needed
+            
+            findings.append(translated)
+    
+    # Also check for --diff output in stdout
+    # When --diff is used, Black outputs unified diff format to stdout
+    if stdout and stdout.startswith("---"):
+        # Extract filenames from diff headers
+        diff_pattern = r"^---\s+(.+?)\s+\d{4}-\d{2}-\d{2}"
+        for match in re.finditer(diff_pattern, stdout, re.MULTILINE):
+            file = match.group(1)
+            # Normalize Windows backslashes to forward slashes for comparison
+            normalized_file = file.replace("\\", "/")
+            
+            # Check if file is in workset (comparing normalized paths)
+            if normalized_file in workset_files or file in workset_files:
+                # Check if we already added this file from stderr
+                if not any(f["file"] == normalized_file for f in findings):
+                    translated = {
+                        "tool": "black",
+                        "file": normalized_file,  # Use normalized path
+                        "line": 0,
+                        "column": 0,
+                        "rule": "format",
+                        "message": "File needs formatting",
+                        "severity": "warning",
+                        "category": "style",
+                    }
+                    findings.append(translated)
+
+    return findings
+
+
+def parse_golangci_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse golangci-lint output."""
+    findings = []
+
+    # Format: path:line:col: message (linter)
+    pattern = r"([^:]+):(\d+):(\d+):\s+(.+?)\s+\(([^)]+)\)$"
+    for match in re.finditer(pattern, output, re.MULTILINE):
+        file = match.group(1)
+        if file in workset_files:
+            # Create translated finding
+            original = {
+                "file": file,
+                "line": match.group(2),
+                "column": match.group(3),
+                "message": match.group(4),
+                "linter": match.group(5)
+            }
+            
+            translated = {
+                "tool": "golangci-lint",
+                "file": file,
+                "line": int(match.group(2)),
+                "column": int(match.group(3)),
+                "rule": match.group(5),
+                "message": match.group(4),
+                "severity": "warning",  # Keep original for /raw/
+                "category": "lint",
+            }
+            
+            # No validation needed
+            
+            findings.append(translated)
+
+    return findings
+
+
+def parse_go_vet_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse go vet output."""
+    findings = []
+
+    # Format: path:line:col: message
+    pattern = r"([^:]+):(\d+):(\d+):\s+(.+)$"
+    for match in re.finditer(pattern, output, re.MULTILINE):
+        file = match.group(1)
+        if file in workset_files:
+            # Create translated finding
+            original = {
+                "file": file,
+                "line": match.group(2),
+                "column": match.group(3),
+                "message": match.group(4)
+            }
+            
+            translated = {
+                "tool": "go-vet",
+                "file": file,
+                "line": int(match.group(2)),
+                "column": int(match.group(3)),
+                "rule": "vet",
+                "message": match.group(4),
+                "severity": "warning",  # Keep original for /raw/
+                "category": "lint",
+            }
+            
+            # No validation needed
+            
+            findings.append(translated)
+
+    return findings
+
+
+def parse_maven_output(tool: str, output: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse Maven-based tool output (SpotBugs/Checkstyle)."""
+    findings = []
+
+    # Simple pattern matching for Maven output
+    pattern = r"\[(?:ERROR|WARNING)\]\s+([^:]+):(\d+):\s+(.+)$"
+    for match in re.finditer(pattern, output, re.MULTILINE):
+        file = match.group(1)
+        if file in workset_files:
+            # Create translated finding
+            original = {
+                "file": file,
+                "line": match.group(2),
+                "message": match.group(3)
+            }
+            
+            translated = {
+                "tool": tool,
+                "file": file,
+                "line": int(match.group(2)),
+                "column": 0,
+                "rule": tool,
+                "message": match.group(3),
+                "severity": "warning",  # Keep original for /raw/
+                "category": "lint",
+            }
+            
+            # No validation needed
+            
+            findings.append(translated)
+
+    return findings
+
+
+def parse_bandit_output(output: str, workset_files: set[str]) -> list[dict[str, Any]]:
+    """Parse bandit JSON output for Python security issues."""
+    findings = []
+    
+    try:
+        results = json.loads(output)
+        # Bandit JSON structure has "results" key with findings
+        for result in results.get("results", []):
+            file = result.get("filename", "")
+            # Normalize Windows backslashes to forward slashes for comparison
+            normalized_file = file.replace("\\", "/")
+            
+            # Check if the absolute path from Bandit matches any relative workset path
+            # Bandit returns absolute paths like C:/Users/.../file.py
+            # Workset has relative paths like scrapers/file.py
+            matched = False
+            matched_file = normalized_file  # Default to normalized absolute path
+            
+            for workset_file in workset_files:
+                # Normalize workset file for comparison
+                normalized_workset = workset_file.replace("\\", "/")
+                # Check if absolute path ends with the relative path
+                if normalized_file.endswith(normalized_workset):
+                    matched = True
+                    matched_file = normalized_workset  # Use the workset's relative path
+                    break
+                # Also check with leading slash
+                elif normalized_file.endswith("/" + normalized_workset):
+                    matched = True
+                    matched_file = normalized_workset
+                    break
+            
+            if matched:
+                # Map bandit severity/confidence to standard
+                severity_map = {
+                    "HIGH": "error",
+                    "MEDIUM": "warning", 
+                    "LOW": "warning"
+                }
+                
+                translated = {
+                    "tool": "bandit",
+                    "file": matched_file,  # Use the matched relative path from workset
+                    "line": int(result.get("line_number", 0)),
+                    "column": int(result.get("col_offset", 0)),
+                    "rule": result.get("test_id", ""),
+                    "message": result.get("issue_text", ""),
+                    "severity": severity_map.get(result.get("issue_severity", "MEDIUM"), "warning"),
+                    "category": "security",
+                }
+                findings.append(translated)
+    except json.JSONDecodeError:
+        # Fallback to text parsing if JSON fails
+        pass
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/linters/runner.py b/theauditor/linters/runner.py
new file mode 100644
index 0000000..3a4649c
--- /dev/null
+++ b/theauditor/linters/runner.py
@@ -0,0 +1,387 @@
+"""Linter runner module - executes linter subprocesses."""
+
+import json
+import os
+import platform
+import subprocess
+from pathlib import Path
+from typing import Any
+
+# Import our custom temp manager to avoid WSL2/Windows issues
+from theauditor.utils.temp_manager import TempManager
+
+# Detect if running on Windows for subprocess shell handling
+IS_WINDOWS = platform.system() == "Windows"
+
+# Note: Path quoting is NOT needed when using shell=False (which we now use everywhere).
+# subprocess.run() with shell=False passes arguments directly to the OS without 
+# shell interpretation, so paths with spaces work correctly without quotes.
+
+
+def run_linter(
+    tool: str,
+    command: list[str],
+    root_path: str,
+    workset_files: set[str],
+    timeout: int,
+) -> tuple[list[dict[str, Any]], dict[str, Any]]:
+    """
+    Run a linter and parse its output.
+
+    Returns:
+        Tuple of (findings, ast_data) where ast_data maps file paths to AST objects
+    """
+    findings = []
+    ast_data = {}
+
+    try:
+        # Add workset files to command if tool supports it
+        if tool in ["eslint", "ruff", "mypy", "prettier", "black", "bandit"]:
+            # Filter files by appropriate extension for each tool
+            if tool in ["ruff", "mypy", "black", "bandit"]:
+                # Python linters - only process .py files
+                file_args = [f for f in workset_files if f.endswith('.py')]
+                # Skip Python linters entirely if no Python files in workset
+                if not file_args:
+                    # Silent skip - no Python files to lint
+                    return [], {}
+            elif tool == "eslint":
+                # JavaScript/TypeScript linter
+                file_args = []
+                has_standard_structure = False
+                
+                for f in workset_files:
+                    if f.endswith(('.js', '.jsx', '.ts', '.tsx', '.mjs')):
+                        normalized = f.replace('\\', '/')
+                        
+                        # Professional structures: /src/ anywhere (monorepo + traditional)
+                        if '/src/' in normalized:
+                            has_standard_structure = True
+                            # Exclude obvious non-source files with more robust path-based exclusions
+                            if not any(excluded_path in normalized for excluded_path in [
+                                '/config/', '/scripts/', '/migrations/', '/seeders/',
+                                '.config.', '.test.', '.spec.',
+                                '/node_modules/', '/dist/', '/build/', '/.next/', '/.nuxt/'
+                            ]):
+                                file_args.append(f)
+                
+                # Fallback for non-standard projects
+                if not file_args and not has_standard_structure:
+                    print("\n" + "="*60)
+                    print("WARNING: NON-STANDARD PROJECT STRUCTURE DETECTED")
+                    print("="*60)
+                    print("This project does not follow conventional src/ directory structure.")
+                    print("TheAuditor will attempt to lint ALL JavaScript files.")
+                    print("This is HIGH RISK and may produce incorrect results.")
+                    print("Consider restructuring your project to use:")
+                    print("  - frontend/src/ and backend/src/ (traditional)")
+                    print("  - packages/*/src/ or apps/*/src/ (monorepo)")
+                    print("="*60 + "\n")
+                    
+                    # Just grab everything and pray
+                    for f in workset_files:
+                        if f.endswith(('.js', '.jsx', '.ts', '.tsx', '.mjs')):
+                            normalized = f.replace('\\', '/')
+                            # At least skip the absolute garbage
+                            if not any(x in normalized.lower() for x in [
+                                '/node_modules/', '/dist/', '/build/', '/.git/'
+                            ]):
+                                file_args.append(f)
+            elif tool == "prettier":
+                # Prettier can handle many file types - focus on source code only
+                file_args = [f for f in workset_files if f.endswith(('.js', '.jsx', '.ts', '.tsx', '.json', '.css', '.scss', '.html'))]
+            else:
+                # Default: use all files
+                file_args = list(workset_files)
+            
+            if not file_args:
+                return [], {}
+            
+            # Check if we need to chunk for Windows command line limit
+            CHUNK_SIZE = 50  # Safe for 8KB Windows limit
+            # Enable chunking for all tools that accept file lists
+            all_chunking_tools = ["eslint", "prettier", "ruff", "mypy", "black", "bandit"]
+            needs_chunking = tool in all_chunking_tools and len(file_args) > CHUNK_SIZE
+            
+            if needs_chunking:
+                # We'll process in chunks - set up aggregation
+                all_findings = []
+                all_ast_data = {}
+                total_chunks = (len(file_args) + CHUNK_SIZE - 1) // CHUNK_SIZE
+                print(f"  Processing {len(file_args)} files in {total_chunks} chunks...")
+                
+                # Process each chunk
+                for chunk_num, i in enumerate(range(0, len(file_args), CHUNK_SIZE), 1):
+                    chunk_files = file_args[i:i + CHUNK_SIZE]
+                    print(f"    Chunk {chunk_num}/{total_chunks}: {len(chunk_files)} files")
+                    
+                    # Normalize paths for JavaScript tools only
+                    if tool in ["eslint", "prettier"]:
+                        chunk_files = [f.replace('\\', '/') for f in chunk_files]
+                    # Python tools use native path format
+                    chunk_command = command + chunk_files
+                    
+                    # Execute this chunk
+                    chunk_findings, chunk_ast_data = _execute_linter_command(
+                        tool, chunk_command, root_path, workset_files, timeout
+                    )
+                    
+                    # Aggregate results
+                    all_findings.extend(chunk_findings)
+                    all_ast_data.update(chunk_ast_data)
+                
+                return all_findings, all_ast_data
+            else:
+                # Single command execution (no chunking needed)
+                if tool in ["eslint", "prettier"]:  # JS tools need normalized paths
+                    file_args = [f.replace('\\', '/') for f in file_args]
+                # Python tools use native path format
+                command = command + file_args
+                
+                # Execute single command
+                return _execute_linter_command(
+                    tool, command, root_path, workset_files, timeout
+                )
+                
+        elif tool in ["golangci-lint", "go-vet"]:
+            # Go linters - filter to .go files if needed
+            go_files = [f for f in workset_files if f.endswith('.go')]
+            if not go_files:
+                return [], {}
+            # Note: These tools typically operate on packages/directories, not individual files
+        elif tool == "tsc":
+            # TypeScript compiler - check if we have any TS/TSX files
+            ts_files = [f for f in workset_files if f.endswith(('.ts', '.tsx'))]
+            if not ts_files:
+                return [], {}
+            # Note: tsc doesn't take file arguments - it uses tsconfig.json
+        elif tool in ["spotbugs", "checkstyle"]:
+            # Java linters - check if we have any Java files
+            java_files = [f for f in workset_files if f.endswith('.java')]
+            if not java_files:
+                return [], {}
+            # Note: Maven tools operate on the whole project
+        
+        # For non-chunked tools, execute directly
+        return _execute_linter_command(tool, command, root_path, workset_files, timeout)
+
+    except subprocess.TimeoutExpired:
+        print(f"Warning: {tool} timed out after {timeout}s")
+    except FileNotFoundError:
+        print(f"Warning: {tool} not found, skipping")
+    except Exception as e:
+        print(f"Warning: Error running {tool}: {e}")
+
+    return findings, ast_data
+
+
+def _execute_linter_command(
+    tool: str,
+    command: list[str],
+    root_path: str,
+    workset_files: set[str],
+    timeout: int,
+) -> tuple[list[dict[str, Any]], dict[str, Any]]:
+    """
+    Execute a single linter command and parse its output.
+    This is called once for non-chunked tools, multiple times for chunked tools.
+    """
+    findings = []
+    ast_data = {}
+    
+    try:
+        # Create debug log file when debug flag is set
+        debug_log_path = None
+        if os.environ.get("THEAUDITOR_DEBUG"):
+            debug_log_path = Path(".pf") / "linter_debug.log"
+            debug_log_path.parent.mkdir(exist_ok=True)
+            
+            # Log ground truth before execution
+            debug_info = {
+                "tool": tool,
+                "command": command,
+                "root_path": root_path,
+                "cwd": os.getcwd(),
+                "PATH": os.environ.get('PATH', ''),
+                "NODE_PATH": os.environ.get('NODE_PATH', ''),
+                "platform": platform.system(),
+                "IS_WINDOWS": IS_WINDOWS,
+                "workset_files_count": len(workset_files)
+            }
+            
+            with open(debug_log_path, 'a', encoding='utf-8') as f:
+                f.write(f"\n{'='*60}\n")
+                f.write(f"[{tool}] Pre-execution debug at {os.path.basename(__file__)}:{_execute_linter_command.__name__}\n")
+                f.write(json.dumps(debug_info, indent=2))
+                f.write("\n")
+        
+        # Run the linter using our custom temp files to avoid buffer overflow and WSL2 issues
+        stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(root_path, tool)
+        
+        with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+             open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+            
+            # Use bundled Node.js for JavaScript tools on ALL platforms
+            js_tools = ["eslint", "tsc", "prettier"]
+            if tool in js_tools:
+                # Find bundled Node.js runtime
+                sandbox_base = Path(root_path) / ".auditor_venv" / ".theauditor_tools"
+                node_runtime = sandbox_base / "node-runtime"
+                
+                # Platform-specific Node.js executable
+                if IS_WINDOWS:
+                    node_exe = node_runtime / "node.exe"
+                else:
+                    node_exe = node_runtime / "bin" / "node"
+                
+                if node_exe.exists():
+                    # The command[0] is the .cmd or shell wrapper path
+                    # We need to find the actual JavaScript entry point
+                    # npm installs in node_modules/<package>/<entry>
+                    
+                    # Map tool to its JavaScript entry point
+                    # These are the ACTUAL paths where npm installs them
+                    node_modules = sandbox_base / "node_modules"
+                    
+                    if tool == "eslint":
+                        # ESLint main entry is at node_modules/eslint/bin/eslint.js
+                        js_script = node_modules / "eslint" / "bin" / "eslint.js"
+                        # Fallback to lib/cli.js if bin doesn't exist (older versions)
+                        if not js_script.exists():
+                            js_script = node_modules / "eslint" / "lib" / "cli.js"
+                    elif tool == "tsc":
+                        # TypeScript compiler is at node_modules/typescript/lib/tsc.js
+                        js_script = node_modules / "typescript" / "lib" / "tsc.js"
+                    elif tool == "prettier":
+                        # Prettier can be at different locations
+                        # Try node_modules/prettier/bin/prettier.cjs first
+                        js_script = node_modules / "prettier" / "bin" / "prettier.cjs"
+                        if not js_script.exists():
+                            # Try prettier.js
+                            js_script = node_modules / "prettier" / "bin" / "prettier.js"
+                        if not js_script.exists():
+                            # Try the main entry
+                            js_script = node_modules / "prettier" / "index.js"
+                    
+                    if js_script.exists():
+                        # Build new command using bundled Node.js
+                        # Direct execution: node script.js [args...]
+                        command_to_run = [str(node_exe), str(js_script)] + command[1:]
+                        use_shell = False  # No shell needed with direct execution
+                        
+                        if debug_log_path:
+                            with open(debug_log_path, 'a', encoding='utf-8') as f:
+                                f.write(f"[{tool}] Using bundled Node.js runtime\n")
+                                f.write(f"  Node: {node_exe}\n")
+                                f.write(f"  Script: {js_script}\n")
+                                f.write(f"  Command: {command_to_run}\n")
+                    else:
+                        # Script not found - try to help debug
+                        if debug_log_path:
+                            with open(debug_log_path, 'a', encoding='utf-8') as f:
+                                f.write(f"[{tool}] Script not found: {js_script}\n")
+                                # List what actually exists to help debug
+                                tool_dir = node_modules / tool.replace("tsc", "typescript")
+                                if tool_dir.exists():
+                                    f.write(f"[{tool}] Directory exists: {tool_dir}\n")
+                                    try:
+                                        files = list(tool_dir.rglob("*.js"))[:5]
+                                        f.write(f"[{tool}] Found JS files: {files}\n")
+                                    except:
+                                        pass
+                        print(f"ERROR: JavaScript entry point not found: {js_script}")
+                        print(f"       Expected location: {js_script}")
+                        print(f"       Run 'aud setup-claude --target .' to reinstall")
+                        return [], {}
+                else:
+                    # No bundled Node.js - fail with clear error
+                    if debug_log_path:
+                        with open(debug_log_path, 'a', encoding='utf-8') as f:
+                            f.write(f"[{tool}] Bundled Node.js not found at: {node_exe}\n")
+                    print(f"WARNING: {tool} requires bundled Node.js runtime")
+                    print(f"         Expected at: {node_exe}")
+                    print(f"         Run 'aud setup-claude --target .' to install")
+                    return [], {}
+            else:
+                # Non-JS tools: always use list-based execution
+                command_to_run = command
+                use_shell = False  # Never use shell
+            
+            # Log the actual command that will be executed
+            if debug_log_path:
+                with open(debug_log_path, 'a', encoding='utf-8') as f:
+                    f.write(f"[{tool}] Actual command to execute:\n")
+                    f.write(f"  Type: {type(command_to_run)}\n")
+                    f.write(f"  Value: {command_to_run}\n")
+                    f.write(f"  Shell: {use_shell}\n")
+            
+            result = subprocess.run(
+                command_to_run,
+                cwd=root_path,
+                stdout=stdout_fp,
+                stderr=stderr_fp,
+                text=True,
+                encoding='utf-8',
+                errors='replace',  # Handle encoding errors gracefully
+                timeout=timeout,
+                shell=use_shell,  # Determined above based on tool and platform
+            )
+        
+        with open(stdout_path, 'r', encoding='utf-8', errors='replace') as f:
+            result.stdout = f.read()
+        with open(stderr_path, 'r', encoding='utf-8', errors='replace') as f:
+            result.stderr = f.read()
+        
+        # Log the result after execution
+        if debug_log_path:
+            with open(debug_log_path, 'a', encoding='utf-8') as f:
+                f.write(f"[{tool}] Post-execution results:\n")
+                f.write(f"  Return code: {result.returncode}\n")
+                f.write(f"  Stdout length: {len(result.stdout)} bytes\n")
+                f.write(f"  Stderr length: {len(result.stderr)} bytes\n")
+                if result.stdout:
+                    f.write(f"  Stdout first 500 chars: {result.stdout[:500]}\n")
+                if result.stderr:
+                    f.write(f"  Stderr first 500 chars: {result.stderr[:500]}\n")
+                f.write(f"{'='*60}\n")
+        
+        # Clean up temp files - best effort, don't fail if can't delete
+        try:
+            Path(stdout_path).unlink()
+            Path(stderr_path).unlink()
+        except (OSError, PermissionError):
+            pass  # WSL2/Windows may hold locks
+
+        # Import parsers dynamically to avoid circular imports
+        from . import parsers
+
+        # Parse output based on tool
+        if tool == "eslint":
+            findings, ast_data = parsers.parse_eslint_output(result.stdout, workset_files)
+        elif tool == "ruff":
+            findings = parsers.parse_ruff_output(result.stdout, workset_files)
+        elif tool == "mypy":
+            findings = parsers.parse_mypy_output(result.stdout, workset_files)
+        elif tool == "tsc":
+            findings = parsers.parse_tsc_output(result.stdout, workset_files)
+        elif tool == "prettier":
+            findings = parsers.parse_prettier_output(result.stdout, result.stderr, workset_files)
+        elif tool == "black":
+            findings = parsers.parse_black_output(result.stdout, result.stderr, workset_files)
+        elif tool == "bandit":
+            findings = parsers.parse_bandit_output(result.stdout, workset_files)
+        elif tool == "golangci-lint":
+            findings = parsers.parse_golangci_output(result.stdout, workset_files)
+        elif tool == "go-vet":
+            findings = parsers.parse_go_vet_output(result.stderr, workset_files)
+        elif tool in ["spotbugs", "checkstyle"]:
+            findings = parsers.parse_maven_output(tool, result.stdout, workset_files)
+
+    except subprocess.TimeoutExpired:
+        print(f"Warning: {tool} timed out after {timeout}s")
+    except FileNotFoundError:
+        print(f"Warning: {tool} not found, skipping")
+    except Exception as e:
+        print(f"Warning: Error running {tool}: {e}")
+
+    return findings, ast_data
\ No newline at end of file
diff --git a/theauditor/manifest_parser.py b/theauditor/manifest_parser.py
new file mode 100644
index 0000000..149fbc1
--- /dev/null
+++ b/theauditor/manifest_parser.py
@@ -0,0 +1,183 @@
+"""Universal parser for all manifest file types used in framework detection."""
+
+import json
+import yaml
+import configparser
+from pathlib import Path
+from typing import Any, List, Dict, Optional, Union
+
+
+class ManifestParser:
+    """Universal parser for all manifest file types."""
+    
+    def parse_toml(self, path: Path) -> dict:
+        """Parse TOML using tomllib with fallback to tomli for older Python."""
+        try:
+            import tomllib
+        except ImportError:
+            # Python < 3.11
+            try:
+                import tomli as tomllib
+            except ImportError:
+                print(f"Warning: Cannot parse {path} - tomllib not available")
+                return {}
+        
+        try:
+            with open(path, 'rb') as f:
+                return tomllib.load(f)
+        except Exception as e:
+            print(f"Warning: Failed to parse TOML {path}: {e}")
+            return {}
+    
+    def parse_json(self, path: Path) -> dict:
+        """Parse JSON safely."""
+        try:
+            with open(path, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        except (json.JSONDecodeError, OSError) as e:
+            print(f"Warning: Failed to parse JSON {path}: {e}")
+            return {}
+    
+    def parse_yaml(self, path: Path) -> dict:
+        """Parse YAML safely."""
+        try:
+            with open(path, 'r', encoding='utf-8') as f:
+                return yaml.safe_load(f) or {}
+        except (yaml.YAMLError, OSError) as e:
+            print(f"Warning: Failed to parse YAML {path}: {e}")
+            return {}
+    
+    def parse_ini(self, path: Path) -> dict:
+        """Parse INI/CFG files."""
+        try:
+            config = configparser.ConfigParser()
+            config.read(path)
+            return {s: dict(config[s]) for s in config.sections()}
+        except Exception as e:
+            print(f"Warning: Failed to parse INI/CFG {path}: {e}")
+            return {}
+    
+    def parse_requirements_txt(self, path: Path) -> List[str]:
+        """Parse requirements.txt format, returns list of package specs."""
+        try:
+            with open(path, 'r', encoding='utf-8') as f:
+                lines = []
+                for line in f:
+                    line = line.strip()
+                    # Skip comments and empty lines
+                    if not line or line.startswith('#'):
+                        continue
+                    # Skip special directives
+                    if line.startswith('-'):
+                        continue
+                    # Strip inline comments
+                    if '#' in line:
+                        line = line.split('#')[0].strip()
+                    if line:
+                        lines.append(line)
+                return lines
+        except OSError as e:
+            print(f"Warning: Failed to parse requirements.txt {path}: {e}")
+            return []
+    
+    def extract_nested_value(self, data: Union[dict, list], key_path: List[str]) -> Any:
+        """
+        Navigate nested dict with key path.
+        Handles wildcards (*) for dynamic keys.
+        
+        Example: ["tool", "poetry", "group", "*", "dependencies"]
+        Returns the value at the path, or None if not found.
+        """
+        if not key_path:
+            return data
+        
+        current = data
+        
+        for i, key in enumerate(key_path):
+            if key == "*":
+                # Wildcard - collect from all keys at this level
+                if isinstance(current, dict):
+                    results = {}
+                    remaining_path = key_path[i+1:] if i+1 < len(key_path) else []
+                    
+                    for k, v in current.items():
+                        if remaining_path:
+                            nested_result = self.extract_nested_value(v, remaining_path)
+                            if nested_result is not None:
+                                # Merge results for wildcards
+                                if isinstance(nested_result, dict):
+                                    results.update(nested_result)
+                                elif isinstance(nested_result, list):
+                                    if not results:
+                                        results = []
+                                    results.extend(nested_result)
+                                else:
+                                    results[k] = nested_result
+                        else:
+                            results[k] = v
+                    
+                    return results if results else None
+                else:
+                    return None
+            
+            elif isinstance(current, dict):
+                current = current.get(key)
+                if current is None:
+                    return None
+            else:
+                return None
+        
+        return current
+    
+    def check_package_in_deps(self, deps: Any, package_name: str) -> Optional[str]:
+        """
+        Check if a package exists in dependencies and return its version.
+        Handles various dependency formats.
+        """
+        if deps is None:
+            return None
+        
+        # Handle dict format (package.json, pyproject.toml with Poetry)
+        if isinstance(deps, dict):
+            if package_name in deps:
+                version = deps[package_name]
+                # Handle complex version specs
+                if isinstance(version, dict):
+                    version = version.get('version', str(version))
+                return str(version)
+        
+        # Handle list format (PEP 621 pyproject.toml)
+        elif isinstance(deps, list):
+            for dep_spec in deps:
+                if isinstance(dep_spec, str):
+                    # Parse "package==version" or "package>=version" format
+                    import re
+                    # Handle extras: package[extra]==version
+                    dep_spec_clean = re.sub(r'\[.*?\]', '', dep_spec)
+                    
+                    # Check if this is our package
+                    if dep_spec_clean.startswith(package_name):
+                        # Extract version if present
+                        match = re.match(rf'^{re.escape(package_name)}\s*([><=~!]+)\s*(.+)$', dep_spec_clean)
+                        if match:
+                            return match.group(2).strip()
+                        elif dep_spec_clean.strip() == package_name:
+                            return "latest"
+        
+        # Handle string format (requirements.txt content)
+        elif isinstance(deps, str):
+            lines = deps.split('\n')
+            for line in lines:
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    import re
+                    # Remove extras
+                    line_clean = re.sub(r'\[.*?\]', '', line)
+                    if line_clean.startswith(package_name):
+                        match = re.match(rf'^{re.escape(package_name)}\s*([><=~!]+)\s*(.+)$', line_clean)
+                        if match:
+                            return match.group(2).strip()
+                        elif line_clean.strip() == package_name:
+                            return "latest"
+        
+        return None
\ No newline at end of file
diff --git a/theauditor/ml.py b/theauditor/ml.py
new file mode 100644
index 0000000..7a12a31
--- /dev/null
+++ b/theauditor/ml.py
@@ -0,0 +1,17 @@
+"""Backward compatibility shim for ML insights.
+
+This file exists to maintain backward compatibility for code that imports
+from theauditor.ml directly. All functionality has been moved to
+theauditor.insights.ml for better organization.
+
+This ensures that:
+  - from theauditor.ml import learn  # STILL WORKS
+  - from theauditor.ml import suggest  # STILL WORKS
+  - import theauditor.ml  # STILL WORKS
+"""
+
+# Import everything from the new location
+from theauditor.insights.ml import *
+
+# This shim ensures 100% backward compatibility while the actual
+# implementation is now in theauditor/insights/ml.py
\ No newline at end of file
diff --git a/theauditor/module_resolver.py b/theauditor/module_resolver.py
new file mode 100644
index 0000000..fffdb0a
--- /dev/null
+++ b/theauditor/module_resolver.py
@@ -0,0 +1,352 @@
+"""Module resolution for TypeScript/JavaScript projects with tsconfig.json support."""
+
+import json
+import re
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+
+
+class ModuleResolver:
+    """Resolves module imports for TypeScript/JavaScript projects.
+    
+    Handles:
+    - TypeScript path aliases from tsconfig.json
+    - Webpack aliases from webpack.config.js
+    - Node.js module resolution algorithm
+    - Relative and absolute imports
+    """
+    
+    def __init__(self, project_root: Optional[str] = None, db_path: str = ".pf/repo_index.db"):
+        """Initialize resolver with database path - NO filesystem access.
+        
+        Args:
+            project_root: Deprecated parameter, kept for compatibility
+            db_path: Path to the indexed database
+        """
+        if project_root:
+            self.project_root = Path(project_root).resolve()
+        else:
+            self.project_root = Path.cwd()
+        
+        self.db_path = Path(db_path)
+        self.configs_by_context: Dict[str, Any] = {}
+        self.path_mappings_by_context: Dict[str, Dict[str, List[str]]] = {}
+        self.webpack_aliases: Dict[str, str] = {}  # Kept for compatibility
+        
+        # For backward compatibility
+        self.base_url: Optional[str] = None
+        self.path_mappings: Dict[str, List[str]] = {}
+        
+        # Load all configs from database ONCE
+        self._load_all_configs_from_db()
+        
+    def _load_all_configs_from_db(self) -> None:
+        """Load ALL tsconfig files from database and organize by context."""
+        if not self.db_path.exists():
+            print(f"[DEBUG] No database found at {self.db_path}, resolver disabled")
+            return
+        
+        import sqlite3
+        import os
+        conn = sqlite3.connect(str(self.db_path))
+        cursor = conn.cursor()
+        
+        try:
+            # Get ALL tsconfig files from cache
+            cursor.execute("""
+                SELECT path, content, context_dir 
+                FROM config_files 
+                WHERE type = 'tsconfig'
+            """)
+            
+            configs = cursor.fetchall()
+            print(f"[DEBUG] Found {len(configs)} cached tsconfig files")
+            
+            for path, content, context_dir in configs:
+                try:
+                    # Use the json5 library if available, otherwise strip comments manually
+                    try:
+                        import json5
+                        config = json5.loads(content)
+                    except ImportError:
+                        # Strip comments carefully (tsconfig allows comments)
+                        # First remove single-line comments (but not inside strings)
+                        lines = content.split('\n')
+                        cleaned_lines = []
+                        for line in lines:
+                            # Simple approach: if line contains //, take everything before it
+                            # unless it's inside quotes
+                            comment_pos = line.find('//')
+                            if comment_pos >= 0:
+                                # Check if it's inside a string (crude but works for tsconfig)
+                                before_comment = line[:comment_pos]
+                                if before_comment.count('"') % 2 == 0:
+                                    # Even number of quotes before //, so it's a real comment
+                                    line = before_comment
+                            cleaned_lines.append(line)
+                        content = '\n'.join(cleaned_lines)
+                        
+                        # Remove multi-line comments (/* ... */) more carefully
+                        # This is tricky with @/* patterns, so skip if risky
+                        if '/*' in content and '*/' in content and '@/*' not in content:
+                            content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
+                        
+                        # Remove trailing commas before closing brackets/braces
+                        content = re.sub(r',(\s*[}\]])', r'\1', content)
+                        
+                        config = json.loads(content)
+                    
+                    # Handle root config with references
+                    if context_dir is None:
+                        refs = config.get("references", [])
+                        if refs:
+                            print(f"[DEBUG] Root config has {len(refs)} project references, skipping")
+                            continue
+                        context_dir = "root"
+                    
+                    # Store the config
+                    self.configs_by_context[context_dir] = config
+                    
+                    # Extract compiler options
+                    compiler_opts = config.get("compilerOptions", {})
+                    base_url = compiler_opts.get("baseUrl", ".")
+                    paths = compiler_opts.get("paths", {})
+                    
+                    print(f"[DEBUG] {context_dir}/tsconfig.json: baseUrl='{base_url}', {len(paths)} path mappings")
+                    
+                    # Process path mappings with context
+                    mappings = {}
+                    for alias_pattern, targets in paths.items():
+                        normalized_alias = alias_pattern.rstrip("*")
+                        normalized_targets = []
+                        
+                        for target in targets:
+                            # Apply context-specific baseUrl resolution
+                            target = target.rstrip("*")
+                            
+                            if context_dir == "backend" and base_url == "./src":
+                                # Backend: @config/* -> config/* with baseUrl=./src
+                                # Resolves to: backend/src/config/*
+                                full_target = f"{context_dir}/src/{target}"
+                            elif context_dir == "frontend" and base_url == ".":
+                                # Frontend: @/* -> ./src/* with baseUrl=.
+                                # Resolves to: frontend/src/*
+                                if target.startswith("./"):
+                                    target = target[2:]  # Remove ./
+                                full_target = f"{context_dir}/{target}"
+                            else:
+                                # Unknown pattern, use as-is
+                                full_target = target
+                            
+                            normalized_targets.append(full_target)
+                        
+                        mappings[normalized_alias] = normalized_targets
+                        if os.environ.get("THEAUDITOR_DEBUG"):
+                            print(f"[DEBUG]   {normalized_alias} -> {normalized_targets[0] if normalized_targets else 'None'}")
+                    
+                    self.path_mappings_by_context[context_dir] = mappings
+                    
+                    # For backward compatibility, expose root/first context mappings
+                    if not self.path_mappings and mappings:
+                        self.path_mappings = mappings
+                        self.base_url = base_url
+                    
+                except (json.JSONDecodeError, KeyError) as e:
+                    print(f"[WARNING] Failed to parse {path}: {e}")
+                    
+        except sqlite3.OperationalError as e:
+            print(f"[WARNING] config_files table not found, using empty mappings: {e}")
+            
+        finally:
+            conn.close()
+        
+        print(f"[DEBUG] Loaded configs for: {list(self.configs_by_context.keys())}")
+    
+    def _load_tsconfig(self) -> None:
+        """Deprecated method kept for backward compatibility."""
+        pass
+    
+    def resolve(self, import_path: str, containing_file_path: str) -> str:
+        """Resolve an import path to its actual file location.
+        
+        Args:
+            import_path: The import string (e.g., '@/utils/helpers')
+            containing_file_path: The file where the import was found
+            
+        Returns:
+            The resolved path relative to project root, or the original path if no alias matches
+        """
+        # Handle relative imports first (start with . or ..)
+        if import_path.startswith("."):
+            return import_path
+        
+        print(f"\n[DEBUG] Resolving import: '{import_path}' from file: {containing_file_path}")
+        
+        # Check if import matches any TypeScript path aliases
+        for alias_prefix, target_patterns in self.path_mappings.items():
+            if import_path.startswith(alias_prefix):
+                # Extract the part after the alias
+                suffix = import_path[len(alias_prefix):]
+                print(f"[DEBUG] Matched alias '{alias_prefix}', suffix: '{suffix}'")
+                
+                # Try each target pattern (there can be multiple)
+                for target_pattern in target_patterns:
+                    print(f"[DEBUG] Trying target pattern: '{target_pattern}'")
+                    # Construct the resolved path
+                    if self.base_url:
+                        # Resolve relative to baseUrl
+                        base_path = self.project_root / self.base_url
+                        resolved_path = base_path / target_pattern / suffix
+                        print(f"[DEBUG] Resolved path (with baseUrl): {resolved_path}")
+                    else:
+                        # Resolve relative to project root
+                        resolved_path = self.project_root / target_pattern / suffix
+                        print(f"[DEBUG] Resolved path (no baseUrl): {resolved_path}")
+                    
+                    # Try common file extensions if path doesn't have one
+                    if not resolved_path.suffix:
+                        for ext in ['.ts', '.tsx', '.js', '.jsx', '.d.ts']:
+                            test_path = resolved_path.with_suffix(ext)
+                            if test_path.exists():
+                                # Return path relative to project root with normalized separators
+                                try:
+                                    result = str(test_path.relative_to(self.project_root)).replace("\\", "/")
+                                    print(f"[DEBUG] SUCCESS: Resolved to existing file: {result}")
+                                    return result
+                                except ValueError:
+                                    # Path is outside project root
+                                    result = str(test_path).replace("\\", "/")
+                                    print(f"[DEBUG] SUCCESS: Resolved to existing file (outside root): {result}")
+                                    return result
+                        
+                        # Also check for index files
+                        for index_name in ['index.ts', 'index.tsx', 'index.js', 'index.jsx']:
+                            test_path = resolved_path / index_name
+                            if test_path.exists():
+                                try:
+                                    return str(test_path.relative_to(self.project_root)).replace("\\", "/")
+                                except ValueError:
+                                    return str(test_path).replace("\\", "/")
+                    
+                    # If file exists as-is, return it
+                    if resolved_path.exists():
+                        try:
+                            return str(resolved_path.relative_to(self.project_root)).replace("\\", "/")
+                        except ValueError:
+                            return str(resolved_path).replace("\\", "/")
+                    
+                    # Return the transformed path even if file doesn't exist
+                    # (graph builder will handle non-existent files)
+                    try:
+                        relative_path = str(resolved_path.relative_to(self.project_root))
+                        # Remove leading slash/backslash and normalize separators
+                        return relative_path.replace("\\", "/").lstrip("/")
+                    except ValueError:
+                        # Path is outside project root - return modified import
+                        return target_pattern + suffix
+        
+        # No alias matched - return original path
+        print(f"[DEBUG] No alias matched for '{import_path}', returning original")
+        return import_path
+    
+    def resolve_with_context(self, import_path: str, source_file: str, context: str) -> str:
+        """Resolve import using the appropriate context's path mappings.
+        
+        Args:
+            import_path: The import string (e.g., '@config/app')
+            source_file: The file containing the import
+            context: Which tsconfig context ('backend', 'frontend', 'root')
+            
+        Returns:
+            Resolved path or original if no match
+        """
+        import os
+        
+        # Handle relative imports (no alias resolution needed)
+        if import_path.startswith("."):
+            return import_path
+        
+        # Get mappings for this context
+        mappings = self.path_mappings_by_context.get(context, {})
+        
+        if not mappings and os.environ.get("THEAUDITOR_DEBUG"):
+            print(f"[DEBUG] No mappings for context '{context}'")
+        
+        # Try each alias mapping
+        for alias_prefix, target_patterns in mappings.items():
+            if import_path.startswith(alias_prefix):
+                # Extract suffix after alias
+                suffix = import_path[len(alias_prefix):]
+                
+                # Use first target pattern (TypeScript uses first match)
+                if target_patterns:
+                    resolved = target_patterns[0] + suffix
+                    if os.environ.get("THEAUDITOR_DEBUG"):
+                        print(f"[DEBUG] Resolved: {import_path} -> {resolved} (context: {context})")
+                    return resolved
+        
+        # No alias matched - return original
+        return import_path
+    
+    def resolve_webpack_aliases(self, webpack_config_path: str) -> None:
+        """Parse webpack.config.js for resolve.alias mappings.
+        
+        Args:
+            webpack_config_path: Path to webpack configuration file
+        """
+        # This would require JavaScript execution or AST parsing
+        # For now, this is a stub for future enhancement
+        # Could use subprocess to run Node.js and extract config
+        pass
+    
+    def resolve_with_node_algorithm(self, import_path: str, containing_file: str) -> Optional[str]:
+        """Implement Node.js module resolution algorithm.
+        
+        Follows Node.js rules:
+        1. Check relative paths
+        2. Check node_modules in current and parent directories
+        3. Check global modules
+        
+        Args:
+            import_path: The module to resolve
+            containing_file: The file containing the import
+            
+        Returns:
+            Resolved path or None if not found
+        """
+        containing_dir = Path(containing_file).parent
+        
+        # For node_modules imports (not relative)
+        if not import_path.startswith("."):
+            # Walk up directory tree looking for node_modules
+            current = containing_dir
+            while current != current.parent:
+                node_modules = current / "node_modules" / import_path
+                
+                # Check for package.json main field
+                package_json = node_modules / "package.json"
+                if package_json.exists():
+                    try:
+                        pkg_data = json.loads(package_json.read_text())
+                        main = pkg_data.get("main", "index.js")
+                        main_file = node_modules / main
+                        if main_file.exists():
+                            return str(main_file.relative_to(self.project_root)).replace("\\", "/")
+                    except (json.JSONDecodeError, IOError) as e:
+                        print(f"[WARNING] Could not parse package.json from {package_json}: {e}")
+                        # Continue checking other resolution methods
+                
+                # Check for index.js
+                index_file = node_modules / "index.js"
+                if index_file.exists():
+                    return str(index_file.relative_to(self.project_root)).replace("\\", "/")
+                
+                # Check if it's a file with common extensions
+                for ext in ['.js', '.ts', '.jsx', '.tsx', '.json']:
+                    file = node_modules.with_suffix(ext)
+                    if file.exists():
+                        return str(file.relative_to(self.project_root)).replace("\\", "/")
+                
+                current = current.parent
+        
+        return None
\ No newline at end of file
diff --git a/theauditor/parsers/__init__.py b/theauditor/parsers/__init__.py
new file mode 100644
index 0000000..e54822c
--- /dev/null
+++ b/theauditor/parsers/__init__.py
@@ -0,0 +1,8 @@
+"""Parser modules for TheAuditor."""
+
+from .compose_parser import ComposeParser
+from .dockerfile_parser import DockerfileParser
+from .nginx_parser import NginxParser
+from .webpack_config_parser import WebpackConfigParser
+
+__all__ = ["ComposeParser", "DockerfileParser", "NginxParser", "WebpackConfigParser"]
\ No newline at end of file
diff --git a/theauditor/parsers/compose_parser.py b/theauditor/parsers/compose_parser.py
new file mode 100644
index 0000000..abb1795
--- /dev/null
+++ b/theauditor/parsers/compose_parser.py
@@ -0,0 +1,238 @@
+"""Parser for docker-compose.yml files.
+
+This module provides safe parsing of docker-compose.yml files to extract
+security-relevant configuration for each service.
+"""
+
+import yaml
+from pathlib import Path
+from typing import Dict, List, Any, Optional
+
+
+class ComposeParser:
+    """Parser for docker-compose.yml files."""
+    
+    def __init__(self):
+        """Initialize the compose parser."""
+        pass
+    
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        """
+        Parse a docker-compose.yml file and extract security-relevant information.
+        
+        Args:
+            file_path: Path to the docker-compose.yml file
+            
+        Returns:
+            Dictionary with parsed compose data including services and their configurations
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                # Use safe_load to prevent arbitrary code execution
+                compose_data = yaml.safe_load(f)
+                
+            if not compose_data:
+                return {'services': []}
+                
+            return self._extract_compose_info(compose_data, str(file_path))
+            
+        except (yaml.YAMLError, FileNotFoundError, PermissionError) as e:
+            # Return empty result on parsing errors
+            return {'services': [], 'error': str(e)}
+    
+    def parse_content(self, content: str, file_path: str = 'unknown') -> Dict[str, Any]:
+        """
+        Parse docker-compose content string and extract security-relevant information.
+        
+        Args:
+            content: Docker-compose.yml content as string
+            file_path: Optional file path for reference
+            
+        Returns:
+            Dictionary with parsed compose data
+        """
+        try:
+            compose_data = yaml.safe_load(content)
+            if not compose_data:
+                return {'services': []}
+            return self._extract_compose_info(compose_data, file_path)
+        except yaml.YAMLError as e:
+            return {'services': [], 'error': str(e)}
+    
+    def _extract_compose_info(self, compose_data: Dict[str, Any], file_path: str) -> Dict[str, Any]:
+        """
+        Extract security-relevant information from parsed compose data.
+        
+        Args:
+            compose_data: Parsed YAML data
+            file_path: Path to the source file
+            
+        Returns:
+            Dictionary with extracted service information
+        """
+        result = {
+            'version': compose_data.get('version', ''),
+            'services': []
+        }
+        
+        services = compose_data.get('services', {})
+        
+        for service_name, service_config in services.items():
+            if not isinstance(service_config, dict):
+                continue
+                
+            service_info = {
+                'name': service_name,
+                'image': self._extract_image(service_config),
+                'ports': self._extract_ports(service_config),
+                'volumes': self._extract_volumes(service_config),
+                'environment': self._extract_environment(service_config),
+                'is_privileged': service_config.get('privileged', False),
+                'network_mode': service_config.get('network_mode', 'bridge'),
+                'user': service_config.get('user', None),
+                'cap_add': service_config.get('cap_add', []),
+                'cap_drop': service_config.get('cap_drop', []),
+                'security_opt': service_config.get('security_opt', []),
+                'restart': service_config.get('restart', 'no'),
+                'command': service_config.get('command', None),
+                'entrypoint': service_config.get('entrypoint', None),
+                'depends_on': service_config.get('depends_on', []),
+                'healthcheck': self._extract_healthcheck(service_config)
+            }
+            
+            result['services'].append(service_info)
+        
+        return result
+    
+    def _extract_image(self, service_config: Dict[str, Any]) -> Optional[str]:
+        """Extract image information from service configuration."""
+        # Image can be specified directly or under build configuration
+        if 'image' in service_config:
+            return service_config['image']
+        elif 'build' in service_config:
+            build_config = service_config['build']
+            if isinstance(build_config, dict) and 'image' in build_config:
+                return build_config['image']
+            else:
+                return 'built_locally'
+        return None
+    
+    def _extract_ports(self, service_config: Dict[str, Any]) -> List[str]:
+        """
+        Extract port mappings from service configuration.
+        
+        Handles various port formats:
+        - "8080:80" (host:container)
+        - "8080:80/tcp"
+        - {"target": 80, "published": 8080}
+        """
+        ports = []
+        port_config = service_config.get('ports', [])
+        
+        if not port_config:
+            return ports
+            
+        for port in port_config:
+            if isinstance(port, str):
+                ports.append(port)
+            elif isinstance(port, dict):
+                # Long syntax
+                target = port.get('target', '')
+                published = port.get('published', '')
+                protocol = port.get('protocol', 'tcp')
+                if target and published:
+                    ports.append(f"{published}:{target}/{protocol}")
+                elif target:
+                    ports.append(f"{target}/{protocol}")
+            elif isinstance(port, (int, float)):
+                ports.append(str(port))
+        
+        return ports
+    
+    def _extract_volumes(self, service_config: Dict[str, Any]) -> List[str]:
+        """
+        Extract volume mappings from service configuration.
+        
+        Handles various volume formats:
+        - "./data:/var/lib/data" (host:container)
+        - {"type": "bind", "source": "./data", "target": "/var/lib/data"}
+        - "volume_name:/path"
+        """
+        volumes = []
+        volume_config = service_config.get('volumes', [])
+        
+        if not volume_config:
+            return volumes
+            
+        for volume in volume_config:
+            if isinstance(volume, str):
+                volumes.append(volume)
+            elif isinstance(volume, dict):
+                # Long syntax
+                volume_type = volume.get('type', 'volume')
+                source = volume.get('source', '')
+                target = volume.get('target', '')
+                
+                if volume_type == 'bind' and source and target:
+                    volumes.append(f"{source}:{target}")
+                elif volume_type == 'volume' and source and target:
+                    volumes.append(f"{source}:{target}")
+                elif target:
+                    volumes.append(target)
+        
+        return volumes
+    
+    def _extract_environment(self, service_config: Dict[str, Any]) -> Dict[str, str]:
+        """
+        Extract environment variables from service configuration.
+        
+        Handles various environment formats:
+        - List format: ["KEY=value", "KEY2=value2"]
+        - Dictionary format: {"KEY": "value", "KEY2": "value2"}
+        - env_file references (just notes the file, doesn't parse it)
+        """
+        env_vars = {}
+        
+        # Handle environment key
+        env_config = service_config.get('environment', [])
+        
+        if isinstance(env_config, list):
+            for env_item in env_config:
+                if isinstance(env_item, str) and '=' in env_item:
+                    key, value = env_item.split('=', 1)
+                    env_vars[key] = value
+                elif isinstance(env_item, str):
+                    # Environment variable without value (inherits from host)
+                    env_vars[env_item] = '${' + env_item + '}'
+        elif isinstance(env_config, dict):
+            for key, value in env_config.items():
+                env_vars[key] = str(value) if value is not None else ''
+        
+        # Note if env_file is used (but don't parse the file)
+        if 'env_file' in service_config:
+            env_files = service_config['env_file']
+            if isinstance(env_files, str):
+                env_vars['_ENV_FILE'] = env_files
+            elif isinstance(env_files, list) and env_files:
+                env_vars['_ENV_FILES'] = ','.join(env_files)
+        
+        return env_vars
+    
+    def _extract_healthcheck(self, service_config: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """Extract healthcheck configuration if present."""
+        healthcheck = service_config.get('healthcheck')
+        
+        if not healthcheck:
+            return None
+            
+        if isinstance(healthcheck, dict):
+            return {
+                'test': healthcheck.get('test', []),
+                'interval': healthcheck.get('interval', '30s'),
+                'timeout': healthcheck.get('timeout', '30s'),
+                'retries': healthcheck.get('retries', 3),
+                'start_period': healthcheck.get('start_period', '0s'),
+                'disabled': healthcheck.get('disable', False)
+            }
+        
+        return None
\ No newline at end of file
diff --git a/theauditor/parsers/dockerfile_parser.py b/theauditor/parsers/dockerfile_parser.py
new file mode 100644
index 0000000..dad55e5
--- /dev/null
+++ b/theauditor/parsers/dockerfile_parser.py
@@ -0,0 +1,156 @@
+"""Parser for Dockerfile files.
+
+This module provides parsing of Dockerfiles to extract
+instructions for security analysis.
+"""
+
+from pathlib import Path
+from typing import Dict, List, Any
+
+try:
+    from dockerfile_parse import DockerfileParser as DFParser
+except ImportError:
+    DFParser = None
+
+
+class DockerfileParser:
+    """Parser for Dockerfile files."""
+    
+    def __init__(self):
+        """Initialize the Dockerfile parser."""
+        pass
+    
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        """
+        Parse a Dockerfile and extract all instructions.
+        
+        Args:
+            file_path: Path to the Dockerfile
+            
+        Returns:
+            Dictionary with parsed Dockerfile instructions:
+            {
+                'instructions': [
+                    {
+                        'instruction': 'FROM',
+                        'value': 'python:3.11-slim',
+                        'line': 1
+                    },
+                    ...
+                ]
+            }
+        """
+        try:
+            # Check if dockerfile-parse is available
+            if DFParser is None:
+                return {
+                    'instructions': [],
+                    'error': 'dockerfile-parse library not installed'
+                }
+            
+            # Read the file content
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            # Parse using dockerfile-parse library
+            parser = DFParser(content=content)
+            
+            # Extract instructions with line numbers
+            instructions = []
+            lines = content.split('\n')
+            current_line = 1
+            
+            for instruction_dict in parser.structure:
+                # Extract instruction and value from the parser output
+                instruction = instruction_dict.get('instruction', '').upper()
+                value = instruction_dict.get('value', '')
+                
+                # Find the line number by searching for the instruction in the content
+                # This is a simple approach - more sophisticated line tracking could be added
+                for i, line in enumerate(lines[current_line-1:], start=current_line):
+                    if line.strip().upper().startswith(instruction):
+                        current_line = i
+                        break
+                
+                if instruction:  # Only add non-empty instructions
+                    instructions.append({
+                        'instruction': instruction,
+                        'value': value,
+                        'line': current_line
+                    })
+                
+                current_line += 1  # Move to next line for next instruction
+            
+            return {'instructions': instructions}
+            
+        except FileNotFoundError:
+            return {
+                'instructions': [],
+                'error': f'File not found: {file_path}'
+            }
+        except PermissionError:
+            return {
+                'instructions': [],
+                'error': f'Permission denied: {file_path}'
+            }
+        except Exception as e:
+            # Handle any parsing exceptions from the library
+            return {
+                'instructions': [],
+                'error': f'Parsing error: {str(e)}'
+            }
+    
+    def parse_content(self, content: str, file_path: str = 'unknown') -> Dict[str, Any]:
+        """
+        Parse Dockerfile content string.
+        
+        Args:
+            content: Dockerfile content as string
+            file_path: Optional file path for reference
+            
+        Returns:
+            Dictionary with parsed Dockerfile instructions
+        """
+        try:
+            # Check if dockerfile-parse is available
+            if DFParser is None:
+                return {
+                    'instructions': [],
+                    'error': 'dockerfile-parse library not installed'
+                }
+            
+            # Parse using dockerfile-parse library
+            parser = DFParser(content=content)
+            
+            # Extract instructions with line numbers
+            instructions = []
+            lines = content.split('\n')
+            current_line = 1
+            
+            for instruction_dict in parser.structure:
+                # Extract instruction and value from the parser output
+                instruction = instruction_dict.get('instruction', '').upper()
+                value = instruction_dict.get('value', '')
+                
+                # Find the line number
+                for i, line in enumerate(lines[current_line-1:], start=current_line):
+                    if line.strip().upper().startswith(instruction):
+                        current_line = i
+                        break
+                
+                if instruction:  # Only add non-empty instructions
+                    instructions.append({
+                        'instruction': instruction,
+                        'value': value,
+                        'line': current_line
+                    })
+                
+                current_line += 1
+            
+            return {'instructions': instructions}
+            
+        except Exception as e:
+            return {
+                'instructions': [],
+                'error': f'Parsing error: {str(e)}'
+            }
\ No newline at end of file
diff --git a/theauditor/parsers/nginx_parser.py b/theauditor/parsers/nginx_parser.py
new file mode 100644
index 0000000..7309d06
--- /dev/null
+++ b/theauditor/parsers/nginx_parser.py
@@ -0,0 +1,304 @@
+"""Parser for nginx configuration files.
+
+This module provides parsing of nginx.conf files to extract
+security-relevant configuration from blocks and directives.
+"""
+
+import re
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Tuple
+
+
+class NginxParser:
+    """Parser for nginx configuration files."""
+    
+    def __init__(self):
+        """Initialize the nginx parser."""
+        self.blocks = []
+        self.current_file = None
+    
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        """
+        Parse an nginx configuration file and extract security-relevant information.
+        
+        Args:
+            file_path: Path to the nginx.conf file
+            
+        Returns:
+            Dictionary with parsed nginx configuration blocks and directives
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+                
+            self.current_file = str(file_path)
+            self.blocks = []
+            
+            # Parse the configuration
+            self._parse_content(content)
+            
+            return {
+                'file': str(file_path),
+                'blocks': self.blocks
+            }
+            
+        except (FileNotFoundError, PermissionError) as e:
+            return {'file': str(file_path), 'blocks': [], 'error': str(e)}
+    
+    def parse_content(self, content: str, file_path: str = 'unknown') -> Dict[str, Any]:
+        """
+        Parse nginx configuration content string.
+        
+        Args:
+            content: Nginx configuration content as string
+            file_path: Optional file path for reference
+            
+        Returns:
+            Dictionary with parsed nginx configuration
+        """
+        self.current_file = file_path
+        self.blocks = []
+        
+        try:
+            self._parse_content(content)
+            return {
+                'file': file_path,
+                'blocks': self.blocks
+            }
+        except Exception as e:
+            return {'file': file_path, 'blocks': [], 'error': str(e)}
+    
+    def _parse_content(self, content: str):
+        """
+        Parse nginx configuration content recursively.
+        
+        Args:
+            content: Configuration content to parse
+        """
+        # Remove comments
+        content = self._remove_comments(content)
+        
+        # Parse blocks recursively
+        self._parse_blocks(content, block_type='root', parent_context='')
+    
+    def _remove_comments(self, content: str) -> str:
+        """Remove comments from nginx configuration."""
+        # Remove single-line comments (lines starting with #)
+        lines = []
+        for line in content.split('\n'):
+            # Find comment position (but not within quotes)
+            comment_pos = -1
+            in_quotes = False
+            quote_char = None
+            
+            for i, char in enumerate(line):
+                if char in ['"', "'"] and (i == 0 or line[i-1] != '\\'):
+                    if not in_quotes:
+                        in_quotes = True
+                        quote_char = char
+                    elif char == quote_char:
+                        in_quotes = False
+                        quote_char = None
+                elif char == '#' and not in_quotes:
+                    comment_pos = i
+                    break
+            
+            if comment_pos >= 0:
+                lines.append(line[:comment_pos])
+            else:
+                lines.append(line)
+        
+        return '\n'.join(lines)
+    
+    def _parse_blocks(self, content: str, block_type: str = 'root', parent_context: str = '', level: int = 0):
+        """
+        Recursively parse nginx blocks and directives.
+        
+        Args:
+            content: Content to parse
+            block_type: Type of current block (root, http, server, location, etc.)
+            parent_context: Context from parent blocks
+            level: Nesting level
+        """
+        # Pattern to match blocks: block_name [optional_params] { ... }
+        block_pattern = re.compile(
+            r'(\w+)(?:\s+([^{;]+?))?\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}',
+            re.MULTILINE | re.DOTALL
+        )
+        
+        # First, extract directives at this level (not within sub-blocks)
+        directives = self._extract_directives(content)
+        
+        if directives or block_type != 'root':
+            block_info = {
+                'block_type': block_type,
+                'block_context': parent_context,
+                'directives': directives,
+                'level': level
+            }
+            self.blocks.append(block_info)
+        
+        # Find and parse nested blocks
+        for match in block_pattern.finditer(content):
+            nested_block_type = match.group(1)
+            nested_block_params = match.group(2) if match.group(2) else ''
+            nested_block_content = match.group(3)
+            
+            # Clean up parameters
+            nested_block_params = nested_block_params.strip()
+            
+            # Determine context based on block type
+            if nested_block_type == 'server':
+                # Extract server_name if present
+                server_name_match = re.search(r'server_name\s+([^;]+);', nested_block_content)
+                context = server_name_match.group(1).strip() if server_name_match else 'default'
+            elif nested_block_type == 'location':
+                # Location context is the pattern (e.g., "/api", "~ \.php$")
+                context = nested_block_params
+            elif nested_block_type == 'upstream':
+                # Upstream context is the name
+                context = nested_block_params
+            else:
+                context = nested_block_params if nested_block_params else nested_block_type
+            
+            # Build full context path
+            if parent_context:
+                full_context = f"{parent_context} > {context}" if context else parent_context
+            else:
+                full_context = context
+            
+            # Recursively parse the nested block
+            self._parse_blocks(
+                nested_block_content,
+                block_type=nested_block_type,
+                parent_context=full_context,
+                level=level + 1
+            )
+    
+    def _extract_directives(self, content: str) -> Dict[str, Any]:
+        """
+        Extract directives from content (not within nested blocks).
+        
+        Args:
+            content: Content to extract directives from
+            
+        Returns:
+            Dictionary of directives
+        """
+        directives = {}
+        
+        # Remove nested blocks to only process directives at this level
+        block_pattern = re.compile(
+            r'\w+(?:\s+[^{;]+?)?\s*\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}',
+            re.MULTILINE | re.DOTALL
+        )
+        content_without_blocks = block_pattern.sub('', content)
+        
+        # Pattern to match directives: directive_name value1 [value2 ...];
+        directive_pattern = re.compile(r'(\w+)\s+([^;]+);', re.MULTILINE)
+        
+        for match in directive_pattern.finditer(content_without_blocks):
+            directive_name = match.group(1)
+            directive_value = match.group(2).strip()
+            
+            # Handle multiple values for the same directive
+            if directive_name in directives:
+                # Convert to list if not already
+                if not isinstance(directives[directive_name], list):
+                    directives[directive_name] = [directives[directive_name]]
+                directives[directive_name].append(directive_value)
+            else:
+                directives[directive_name] = directive_value
+        
+        return directives
+    
+    def find_security_directives(self, blocks: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Extract security-relevant directives from parsed blocks.
+        
+        Args:
+            blocks: List of parsed nginx blocks
+            
+        Returns:
+            Dictionary of security-relevant findings
+        """
+        security_info = {
+            'ssl_protocols': [],
+            'security_headers': [],
+            'proxy_passes': [],
+            'rate_limits': [],
+            'exposed_paths': [],
+            'ssl_ciphers': []
+        }
+        
+        security_headers = [
+            'add_header Strict-Transport-Security',
+            'add_header X-Frame-Options',
+            'add_header X-Content-Type-Options',
+            'add_header Content-Security-Policy',
+            'add_header X-XSS-Protection',
+            'add_header Referrer-Policy'
+        ]
+        
+        for block in blocks:
+            directives = block.get('directives', {})
+            block_type = block.get('block_type', '')
+            block_context = block.get('block_context', '')
+            
+            # Check for SSL protocols
+            if 'ssl_protocols' in directives:
+                security_info['ssl_protocols'].append({
+                    'context': block_context,
+                    'protocols': directives['ssl_protocols']
+                })
+            
+            # Check for SSL ciphers
+            if 'ssl_ciphers' in directives:
+                security_info['ssl_ciphers'].append({
+                    'context': block_context,
+                    'ciphers': directives['ssl_ciphers']
+                })
+            
+            # Check for security headers
+            if 'add_header' in directives:
+                headers = directives['add_header']
+                if not isinstance(headers, list):
+                    headers = [headers]
+                
+                for header in headers:
+                    security_info['security_headers'].append({
+                        'context': block_context,
+                        'header': header
+                    })
+            
+            # Check for proxy_pass directives
+            if 'proxy_pass' in directives:
+                security_info['proxy_passes'].append({
+                    'context': block_context,
+                    'proxy_pass': directives['proxy_pass'],
+                    'block_type': block_type
+                })
+            
+            # Check for rate limiting
+            if 'limit_req' in directives or 'limit_req_zone' in directives:
+                security_info['rate_limits'].append({
+                    'context': block_context,
+                    'limit': directives.get('limit_req', directives.get('limit_req_zone'))
+                })
+            
+            # Check for exposed sensitive paths
+            if block_type == 'location':
+                # Extract the location pattern from context
+                location_pattern = block_context.split('>')[-1].strip() if '>' in block_context else block_context
+                
+                # Check for sensitive paths
+                sensitive_patterns = ['.git', '.svn', '.hg', '.env', 'wp-admin', 'phpmyadmin', '.DS_Store', '.htaccess']
+                for pattern in sensitive_patterns:
+                    if pattern in location_pattern:
+                        security_info['exposed_paths'].append({
+                            'context': block_context,
+                            'pattern': location_pattern,
+                            'directives': directives
+                        })
+        
+        return security_info
\ No newline at end of file
diff --git a/theauditor/parsers/prisma_schema_parser.py b/theauditor/parsers/prisma_schema_parser.py
new file mode 100644
index 0000000..961caca
--- /dev/null
+++ b/theauditor/parsers/prisma_schema_parser.py
@@ -0,0 +1,316 @@
+"""Parser for Prisma schema files.
+
+This module provides parsing of schema.prisma files to extract
+models, fields, datasource configuration, and security-relevant settings.
+"""
+
+import re
+from pathlib import Path
+from typing import Dict, List, Any, Optional
+
+
+class PrismaSchemaParser:
+    """Parser for Prisma schema.prisma files."""
+    
+    def __init__(self):
+        """Initialize the Prisma schema parser."""
+        pass
+    
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        """
+        Parse a schema.prisma file and extract models, fields, and datasource info.
+        
+        Args:
+            file_path: Path to the schema.prisma file
+            
+        Returns:
+            Dictionary with parsed Prisma schema information:
+            {
+                'models': [
+                    {
+                        'name': 'User',
+                        'fields': [
+                            {
+                                'name': 'id',
+                                'type': 'Int',
+                                'is_indexed': True,
+                                'is_unique': True,
+                                'is_relation': False
+                            }
+                        ]
+                    }
+                ],
+                'datasource': {
+                    'provider': 'postgresql',
+                    'url': 'env("DATABASE_URL")',
+                    'connection_limit': None  # Or a number if specified
+                }
+            }
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            return self._parse_schema(content)
+            
+        except FileNotFoundError:
+            return {'error': f'File not found: {file_path}', 'models': [], 'datasource': {}}
+        except PermissionError:
+            return {'error': f'Permission denied: {file_path}', 'models': [], 'datasource': {}}
+        except Exception as e:
+            return {'error': f'Error parsing file: {str(e)}', 'models': [], 'datasource': {}}
+    
+    def parse_content(self, content: str, file_path: str = 'unknown') -> Dict[str, Any]:
+        """
+        Parse Prisma schema content string.
+        
+        Args:
+            content: schema.prisma content as string
+            file_path: Optional file path for reference
+            
+        Returns:
+            Dictionary with parsed Prisma schema information
+        """
+        try:
+            return self._parse_schema(content)
+        except Exception as e:
+            return {'error': f'Parsing error: {str(e)}', 'models': [], 'datasource': {}}
+    
+    def _parse_schema(self, content: str) -> Dict[str, Any]:
+        """
+        Parse the actual schema content.
+        
+        Args:
+            content: Prisma schema content
+            
+        Returns:
+            Dictionary with models and datasource configuration
+        """
+        result = {
+            'models': [],
+            'datasource': {}
+        }
+        
+        # Parse datasource block
+        datasource_match = re.search(
+            r'datasource\s+\w+\s*\{([^}]*)\}',
+            content,
+            re.DOTALL | re.IGNORECASE
+        )
+        
+        if datasource_match:
+            datasource_content = datasource_match.group(1)
+            result['datasource'] = self._parse_datasource(datasource_content)
+        
+        # Parse models
+        model_pattern = re.compile(
+            r'model\s+(\w+)\s*\{([^}]*)\}',
+            re.DOTALL
+        )
+        
+        for match in model_pattern.finditer(content):
+            model_name = match.group(1)
+            model_content = match.group(2)
+            
+            model = {
+                'name': model_name,
+                'fields': self._parse_fields(model_content)
+            }
+            
+            result['models'].append(model)
+        
+        return result
+    
+    def _parse_datasource(self, content: str) -> Dict[str, Any]:
+        """
+        Parse datasource configuration block.
+        
+        Args:
+            content: Content inside datasource { } block
+            
+        Returns:
+            Dictionary with datasource configuration
+        """
+        datasource = {
+            'provider': None,
+            'url': None,
+            'connection_limit': None
+        }
+        
+        # Extract provider
+        provider_match = re.search(r'provider\s*=\s*["\']([^"\']+)["\']', content)
+        if provider_match:
+            datasource['provider'] = provider_match.group(1)
+        
+        # Extract URL
+        url_match = re.search(r'url\s*=\s*([^\n]+)', content)
+        if url_match:
+            url_value = url_match.group(1).strip()
+            datasource['url'] = url_value
+            
+            # Check if connection_limit is specified in the URL
+            # Common patterns:
+            # - ?connection_limit=10
+            # - &connection_limit=10
+            # - connection_limit=10 (in env variable)
+            limit_match = re.search(r'connection_limit=(\d+)', url_value)
+            if limit_match:
+                datasource['connection_limit'] = int(limit_match.group(1))
+        
+        return datasource
+    
+    def _parse_fields(self, content: str) -> List[Dict[str, Any]]:
+        """
+        Parse fields within a model block.
+        
+        Args:
+            content: Content inside model { } block
+            
+        Returns:
+            List of field dictionaries
+        """
+        fields = []
+        lines = content.strip().split('\n')
+        
+        for line in lines:
+            line = line.strip()
+            
+            # Skip empty lines and comments
+            if not line or line.startswith('//'):
+                continue
+            
+            # Skip block attributes (@@)
+            if line.startswith('@@'):
+                continue
+            
+            # Parse field: fieldName Type @attributes
+            field_match = re.match(r'^(\w+)\s+(\w+(?:\[\])?(?:\?)?)', line)
+            if field_match:
+                field_name = field_match.group(1)
+                field_type = field_match.group(2)
+                
+                field = {
+                    'name': field_name,
+                    'type': field_type,
+                    'is_indexed': False,
+                    'is_unique': False,
+                    'is_relation': False
+                }
+                
+                # Check for attributes
+                if '@id' in line:
+                    field['is_indexed'] = True
+                    field['is_unique'] = True
+                
+                if '@unique' in line:
+                    field['is_unique'] = True
+                    field['is_indexed'] = True  # Unique implies indexed
+                
+                if '@index' in line:
+                    field['is_indexed'] = True
+                
+                if '@relation' in line:
+                    field['is_relation'] = True
+                
+                # Check if it's a relation type (starts with capital letter, not a primitive)
+                primitives = ['String', 'Int', 'BigInt', 'Float', 'Boolean', 'DateTime', 'Json', 'Bytes', 'Decimal']
+                if field_type and field_type[0].isupper() and field_type.replace('[]', '').replace('?', '') not in primitives:
+                    field['is_relation'] = True
+                
+                fields.append(field)
+        
+        # Check for composite indexes
+        for line in lines:
+            line = line.strip()
+            if line.startswith('@@index'):
+                # Extract field names from composite index
+                # @@index([field1, field2])
+                index_match = re.search(r'@@index\s*\(\s*\[([^\]]+)\]', line)
+                if index_match:
+                    indexed_fields = index_match.group(1).split(',')
+                    for indexed_field in indexed_fields:
+                        indexed_field = indexed_field.strip().strip('"').strip("'")
+                        # Mark these fields as indexed
+                        for field in fields:
+                            if field['name'] == indexed_field:
+                                field['is_indexed'] = True
+            
+            elif line.startswith('@@unique'):
+                # Extract field names from composite unique
+                # @@unique([field1, field2])
+                unique_match = re.search(r'@@unique\s*\(\s*\[([^\]]+)\]', line)
+                if unique_match:
+                    unique_fields = unique_match.group(1).split(',')
+                    for unique_field in unique_fields:
+                        unique_field = unique_field.strip().strip('"').strip("'")
+                        # Mark these fields as unique and indexed
+                        for field in fields:
+                            if field['name'] == unique_field:
+                                field['is_unique'] = True
+                                field['is_indexed'] = True
+        
+        return fields
+    
+    def find_security_issues(self, schema_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Analyze parsed schema for security and performance issues.
+        
+        Args:
+            schema_data: Parsed schema data
+            
+        Returns:
+            Dictionary of security findings
+        """
+        issues = {
+            'missing_indexes': [],
+            'connection_pool_issues': [],
+            'findings': []
+        }
+        
+        # Check connection pool configuration
+        datasource = schema_data.get('datasource', {})
+        connection_limit = datasource.get('connection_limit')
+        
+        if connection_limit is None:
+            issues['connection_pool_issues'].append({
+                'type': 'missing_connection_limit',
+                'severity': 'medium',
+                'description': 'No connection_limit specified in datasource URL - using default which may be too high'
+            })
+            issues['findings'].append({
+                'type': 'missing_connection_limit',
+                'severity': 'medium',
+                'description': 'No connection_limit specified - defaults can cause pool exhaustion'
+            })
+        elif connection_limit > 20:
+            issues['connection_pool_issues'].append({
+                'type': 'high_connection_limit',
+                'severity': 'high',
+                'value': connection_limit,
+                'description': f'Connection limit {connection_limit} is too high - can cause database overload'
+            })
+            issues['findings'].append({
+                'type': 'high_connection_limit',
+                'severity': 'high',
+                'value': connection_limit,
+                'description': f'Connection limit {connection_limit} exceeds recommended maximum of 20'
+            })
+        
+        # Check for models without any indexes
+        for model in schema_data.get('models', []):
+            indexed_fields = [f for f in model['fields'] if f['is_indexed']]
+            
+            if not indexed_fields:
+                issues['missing_indexes'].append({
+                    'model': model['name'],
+                    'severity': 'medium',
+                    'description': f'Model {model["name"]} has no indexed fields - queries will be slow'
+                })
+                issues['findings'].append({
+                    'type': 'no_indexes',
+                    'severity': 'medium',
+                    'model': model['name'],
+                    'description': f'Model {model["name"]} has no indexed fields'
+                })
+        
+        return issues
\ No newline at end of file
diff --git a/theauditor/parsers/webpack_config_parser.py b/theauditor/parsers/webpack_config_parser.py
new file mode 100644
index 0000000..99a71aa
--- /dev/null
+++ b/theauditor/parsers/webpack_config_parser.py
@@ -0,0 +1,213 @@
+"""Parser for webpack.config.js files.
+
+This module provides regex-based parsing of webpack configuration files
+to extract security-relevant settings without executing JavaScript code.
+"""
+
+import re
+from pathlib import Path
+from typing import Dict, Any, Optional
+
+
+class WebpackConfigParser:
+    """Parser for webpack.config.js files."""
+    
+    def __init__(self):
+        """Initialize the webpack config parser."""
+        pass
+    
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        """
+        Parse a webpack.config.js file and extract security-relevant configuration.
+        
+        Args:
+            file_path: Path to the webpack.config.js file
+            
+        Returns:
+            Dictionary with parsed webpack configuration:
+            {
+                'devtool': 'source-map',  # or 'hidden-source-map', false, etc.
+                'mode': 'production'  # or 'development'
+            }
+            
+            Returns empty dict for keys not found.
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            return self._extract_config(content)
+            
+        except FileNotFoundError:
+            return {'error': f'File not found: {file_path}'}
+        except PermissionError:
+            return {'error': f'Permission denied: {file_path}'}
+        except Exception as e:
+            return {'error': f'Error reading file: {str(e)}'}
+    
+    def parse_content(self, content: str, file_path: str = 'unknown') -> Dict[str, Any]:
+        """
+        Parse webpack configuration content string.
+        
+        Args:
+            content: webpack.config.js content as string
+            file_path: Optional file path for reference
+            
+        Returns:
+            Dictionary with parsed webpack configuration
+        """
+        try:
+            return self._extract_config(content)
+        except Exception as e:
+            return {'error': f'Parsing error: {str(e)}'}
+    
+    def _extract_config(self, content: str) -> Dict[str, Any]:
+        """
+        Extract configuration values from webpack config content.
+        
+        Args:
+            content: JavaScript content to parse
+            
+        Returns:
+            Dictionary with extracted configuration values
+        """
+        result = {}
+        
+        # Extract devtool setting
+        devtool_value = self._extract_devtool(content)
+        if devtool_value is not None:
+            result['devtool'] = devtool_value
+        
+        # Extract mode setting
+        mode_value = self._extract_mode(content)
+        if mode_value is not None:
+            result['mode'] = mode_value
+        
+        return result
+    
+    def _extract_devtool(self, content: str) -> Optional[str]:
+        """
+        Extract devtool setting from webpack config.
+        
+        Handles various formats:
+        - devtool: 'source-map'
+        - devtool: "eval-source-map"
+        - devtool: false
+        - devtool: process.env.NODE_ENV === 'production' ? false : 'source-map'
+        
+        Args:
+            content: JavaScript content to parse
+            
+        Returns:
+            The devtool value or None if not found
+        """
+        # Pattern to match devtool setting
+        # Handles: devtool: 'value', devtool: "value", devtool: false, devtool: true
+        patterns = [
+            # Simple string value with single or double quotes
+            r"devtool\s*:\s*['\"]([^'\"]+)['\"]",
+            # Boolean value (false/true)
+            r"devtool\s*:\s*(false|true)",
+            # Ternary operator (capture the production value)
+            r"devtool\s*:\s*[^?]+\?\s*['\"]?([^'\":]+)['\"]?\s*:\s*['\"]?([^'\":,}]+)",
+            # Variable or expression (capture as-is)
+            r"devtool\s*:\s*([a-zA-Z_$][a-zA-Z0-9_$.]*(?:\.[a-zA-Z_$][a-zA-Z0-9_$]*)*)",
+        ]
+        
+        for pattern in patterns:
+            match = re.search(pattern, content, re.IGNORECASE)
+            if match:
+                if len(match.groups()) == 2:
+                    # Ternary operator - return the production value (first capture)
+                    return match.group(1).strip()
+                else:
+                    # Simple value
+                    value = match.group(1).strip()
+                    # Clean up the value
+                    value = value.replace('"', '').replace("'", '')
+                    return value
+        
+        return None
+    
+    def _extract_mode(self, content: str) -> Optional[str]:
+        """
+        Extract mode setting from webpack config.
+        
+        Handles various formats:
+        - mode: 'production'
+        - mode: "development"  
+        - mode: process.env.NODE_ENV || 'development'
+        
+        Args:
+            content: JavaScript content to parse
+            
+        Returns:
+            The mode value or None if not found
+        """
+        # Pattern to match mode setting
+        patterns = [
+            # Simple string value with single or double quotes
+            r"mode\s*:\s*['\"]([^'\"]+)['\"]",
+            # With OR operator (capture the fallback value)
+            r"mode\s*:\s*[^|]+\|\|\s*['\"]([^'\"]+)['\"]",
+            # Ternary operator (capture the production value)
+            r"mode\s*:\s*[^?]+\?\s*['\"]([^'\"]+)['\"]",
+            # Variable reference (capture as-is)
+            r"mode\s*:\s*([a-zA-Z_$][a-zA-Z0-9_$.]*(?:\.[a-zA-Z_$][a-zA-Z0-9_$]*)*)",
+        ]
+        
+        for pattern in patterns:
+            match = re.search(pattern, content, re.IGNORECASE)
+            if match:
+                value = match.group(1).strip()
+                # Clean up the value
+                value = value.replace('"', '').replace("'", '')
+                # Validate it's a known mode
+                if value.lower() in ['production', 'development', 'none']:
+                    return value.lower()
+                # If it's a variable, return as-is
+                elif re.match(r'^[a-zA-Z_$]', value):
+                    return value
+        
+        return None
+    
+    def find_security_issues(self, config: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Analyze extracted configuration for security issues.
+        
+        Args:
+            config: Extracted webpack configuration
+            
+        Returns:
+            Dictionary of security findings
+        """
+        issues = {
+            'source_maps_exposed': False,
+            'development_mode': False,
+            'findings': []
+        }
+        
+        # Check devtool setting for exposed source maps
+        devtool = config.get('devtool')
+        if devtool and devtool not in ['false', 'none', 'hidden-source-map', 'nosources-source-map']:
+            if 'source-map' in str(devtool).lower() and 'hidden' not in str(devtool).lower():
+                issues['source_maps_exposed'] = True
+                issues['findings'].append({
+                    'type': 'source_map_exposure',
+                    'severity': 'medium',
+                    'value': devtool,
+                    'description': 'Source maps are exposed in production, revealing source code structure'
+                })
+        
+        # Check mode setting
+        mode = config.get('mode')
+        if mode == 'development':
+            issues['development_mode'] = True
+            issues['findings'].append({
+                'type': 'development_mode',
+                'severity': 'high',
+                'value': mode,
+                'description': 'Webpack is configured in development mode for production build'
+            })
+        
+        return issues
\ No newline at end of file
diff --git a/theauditor/pattern_loader.py b/theauditor/pattern_loader.py
new file mode 100644
index 0000000..ac5199d
--- /dev/null
+++ b/theauditor/pattern_loader.py
@@ -0,0 +1,201 @@
+"""Pattern loader for universal issue detection."""
+
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import yaml
+
+
+@dataclass
+class Pattern:
+    """Represents a single detection pattern."""
+
+    name: str
+    description: str
+    regex: str | None  # Can be None if using AST pattern
+    languages: list[str]
+    severity: str
+    ast_pattern: dict | None = None  # Optional AST pattern
+    confidence: float | None = None  # Confidence score for the pattern
+    files: list[str] | None = None  # File patterns to match
+    examples: list[str] | None = None  # Example code that should match
+    counter_examples: list[str] | None = None  # Example code that should NOT match
+    compiled_regex: re.Pattern | None = field(default=None, init=False, repr=False)
+
+    def __post_init__(self):
+        """Compile regex pattern after initialization."""
+        if self.regex:
+            try:
+                self.compiled_regex = re.compile(self.regex, re.IGNORECASE | re.MULTILINE)
+            except re.error as e:
+                raise ValueError(f"Invalid regex in pattern '{self.name}': {e}") from e
+
+    def matches_language(self, language: str) -> bool:
+        """Check if pattern applies to given language."""
+        return "*" in self.languages or language.lower() in [
+            lang.lower() for lang in self.languages
+        ]
+
+
+class PatternLoader:
+    """Loads and manages detection patterns from YAML files."""
+
+    def __init__(self, patterns_dir: Path | None = None):
+        """Initialize pattern loader.
+
+        Args:
+            patterns_dir: Directory containing pattern YAML files.
+                         Defaults to theauditor/patterns/
+        """
+        if patterns_dir is None:
+            patterns_dir = Path(__file__).parent / "patterns"
+        self.patterns_dir = Path(patterns_dir)
+        self.patterns: dict[str, list[Pattern]] = {}
+        self._loaded = False
+
+    def load_patterns(self, categories: list[str] | None = None) -> dict[str, list[Pattern]]:
+        """Load patterns from YAML files.
+
+        Args:
+            categories: Optional list of categories to load (e.g., ['runtime_issues', 'db_issues'])
+                       If None, loads all available patterns.
+
+        Returns:
+            Dictionary mapping category names to lists of patterns.
+        """
+        if not self.patterns_dir.exists():
+            raise FileNotFoundError(f"Patterns directory not found: {self.patterns_dir}")
+
+        yaml_files = list(self.patterns_dir.glob("**/*.yml")) + list(self.patterns_dir.glob("**/*.yaml"))
+
+        if not yaml_files:
+            raise ValueError(f"No pattern files found in {self.patterns_dir}")
+
+        for yaml_file in yaml_files:
+            # Determine category from path relative to patterns_dir
+            rel_path = yaml_file.relative_to(self.patterns_dir)
+            # Category is the path without extension (e.g., "frameworks/react" for "frameworks/react.yml")
+            category = str(rel_path.with_suffix(''))
+            
+            # Skip if category filtering is enabled and this isn't included
+            if categories and category not in categories:
+                continue
+
+            try:
+                patterns = self._load_yaml_file(yaml_file)
+                self.patterns[category] = patterns
+            except Exception as e:
+                # Log warning but continue loading other files
+                print(f"Warning: Failed to load {yaml_file}: {e}")
+
+        self._loaded = True
+        return self.patterns
+
+    def _load_yaml_file(self, file_path: Path) -> list[Pattern]:
+        """Load patterns from a single YAML file.
+
+        Args:
+            file_path: Path to YAML file.
+
+        Returns:
+            List of Pattern objects.
+        """
+        with open(file_path) as f:
+            data = yaml.safe_load(f)
+
+        if not isinstance(data, dict) or "patterns" not in data:
+            raise ValueError(f"Invalid pattern file format in {file_path}")
+
+        patterns = []
+        for pattern_data in data["patterns"]:
+            try:
+                pattern = Pattern(
+                    name=pattern_data["name"],
+                    description=pattern_data["description"],
+                    regex=pattern_data.get("regex"),  # Optional now
+                    languages=pattern_data.get("languages", ["*"]),
+                    severity=pattern_data.get("severity", "medium"),
+                    ast_pattern=pattern_data.get("ast_pattern"),  # Optional AST pattern
+                    confidence=pattern_data.get("confidence"),  # Optional confidence score
+                    files=pattern_data.get("files"),  # Optional file patterns
+                    examples=pattern_data.get("examples"),  # Optional examples
+                    counter_examples=pattern_data.get("counter_examples"),  # Optional counter examples
+                )
+                patterns.append(pattern)
+            except (KeyError, ValueError) as e:
+                print(f"Warning: Skipping invalid pattern in {file_path}: {e}")
+
+        return patterns
+
+    def get_patterns_for_language(self, language: str) -> list[Pattern]:
+        """Get all patterns applicable to a specific language.
+
+        Args:
+            language: Programming language (e.g., 'python', 'javascript').
+
+        Returns:
+            List of applicable patterns.
+        """
+        if not self._loaded:
+            self.load_patterns()
+
+        applicable_patterns = []
+        for category_patterns in self.patterns.values():
+            for pattern in category_patterns:
+                if pattern.matches_language(language):
+                    applicable_patterns.append(pattern)
+
+        return applicable_patterns
+
+    def get_all_patterns(self) -> list[Pattern]:
+        """Get all loaded patterns.
+
+        Returns:
+            List of all patterns from all categories.
+        """
+        if not self._loaded:
+            self.load_patterns()
+
+        all_patterns = []
+        for category_patterns in self.patterns.values():
+            all_patterns.extend(category_patterns)
+
+        return all_patterns
+
+    def validate_patterns(self) -> dict[str, list[str]]:
+        """Validate all loaded patterns.
+
+        Returns:
+            Dictionary of validation errors by category.
+        """
+        if not self._loaded:
+            self.load_patterns()
+
+        errors = {}
+
+        for category, patterns in self.patterns.items():
+            category_errors = []
+
+            for pattern in patterns:
+                # Check for duplicate names within category
+                names = [p.name for p in patterns]
+                if names.count(pattern.name) > 1:
+                    category_errors.append(f"Duplicate pattern name: {pattern.name}")
+
+                # Check severity values
+                valid_severities = ["critical", "high", "medium", "low"]
+                if pattern.severity not in valid_severities:
+                    category_errors.append(
+                        f"Invalid severity '{pattern.severity}' for pattern '{pattern.name}'"
+                    )
+
+                # Check regex compilation (already done in Pattern.__post_init__)
+                # Only check if pattern has a regex (not AST-only patterns)
+                if pattern.regex and pattern.compiled_regex is None:
+                    category_errors.append(f"Failed to compile regex for pattern '{pattern.name}'")
+
+            if category_errors:
+                errors[category] = category_errors
+
+        return errors
diff --git a/theauditor/patterns/business_logic.yml b/theauditor/patterns/business_logic.yml
new file mode 100644
index 0000000..303780e
--- /dev/null
+++ b/theauditor/patterns/business_logic.yml
@@ -0,0 +1,31 @@
+# Business logic issue patterns
+patterns:
+  - name: "money-float-arithmetic"
+    description: "Using float/double for money calculations - precision loss risk"
+    regex: "(price|cost|amount|total|balance|payment|fee|money)\\s*[\\*\\+\\-\\/].*\\b(float|Float|double|Double|parseFloat)\\b"
+    languages: ["*"]
+    severity: "critical"
+    
+  - name: "percentage-calc-error"
+    description: "Potential percentage calculation error (missing parentheses)"
+    regex: "\\b\\d+\\s*\\/\\s*100\\s*\\*(?!\\s*\\()|\\*\\s*\\d+\\s*\\/\\s*100(?!\\s*\\))"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "timezone-naive-datetime"
+    description: "Using naive datetime without timezone awareness"
+    regex: "(datetime\\.now\\(\\)|Date\\.now\\(\\)|new Date\\(\\))(?!.*\\b(utc|UTC|timezone|tz|Z)\\b)"
+    languages: ["python", "javascript", "typescript", "java"]
+    severity: "medium"
+    
+  - name: "email-regex-validation"
+    description: "Using regex for email validation (use proper library instead)"
+    regex: "regex.*@.*\\\\@.*email|email.*regex.*\\\\@"
+    languages: ["*"]
+    severity: "low"
+    
+  - name: "divide-by-zero-risk"
+    description: "Division without zero check"
+    regex: "\\/\\s*\\b(count|length|size|total|sum|num)\\b(?!.*\\b(if|check|guard|max\\(|Math\\.max)\\b)"
+    languages: ["*"]
+    severity: "medium"
diff --git a/theauditor/patterns/db_issues.yml b/theauditor/patterns/db_issues.yml
new file mode 100644
index 0000000..fbd386b
--- /dev/null
+++ b/theauditor/patterns/db_issues.yml
@@ -0,0 +1,49 @@
+# Database-related issue patterns
+patterns:
+  - name: "sql-string-concat"
+    description: "SQL query built with string concatenation - SQL injection risk"
+    regex: "(query|execute|exec|prepare)\\s*\\([^)]*\\+|\"SELECT.*\"\\s*\\+|'SELECT.*'\\s*\\+"
+    languages: ["javascript", "typescript", "java", "go", "ruby", "php", "c#"]
+    severity: "critical"
+    
+  - name: "transaction-not-rolled-back"
+    description: "Transaction started but no rollback in error path"
+    regex: "(BEGIN|START TRANSACTION|beginTransaction|begin_transaction)(?!.*\\b(rollback|ROLLBACK)\\b)"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "missing-db-index-hint"
+    description: "Query on non-indexed field (common patterns)"
+    regex: "WHERE\\s+(email|username|user_id|created_at|updated_at|status)\\s*="
+    languages: ["*"]
+    severity: "medium"
+    
+  - name: "unbounded-query"
+    description: "Query without LIMIT clause - potential memory issue"
+    regex: "SELECT\\s+.*FROM(?!.*\\bLIMIT\\b)(?!.*\\bTOP\\b)"
+    languages: ["*"]
+    severity: "medium"
+    
+  - name: "nested-transaction"
+    description: "Nested transaction detected - potential deadlock"
+    regex: "(BEGIN|START TRANSACTION).*\\{[^}]*(BEGIN|START TRANSACTION)"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "missing-where-clause-update"
+    description: "UPDATE query without WHERE clause - will affect ALL rows"
+    regex: "(?i)\\bUPDATE\\s+[\\w\\.]+\\s+SET\\s+(?:(?!\\bWHERE\\b)[^;])+;"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "missing-where-clause-delete"
+    description: "DELETE query without WHERE clause - will affect ALL rows"
+    regex: "(?i)\\bDELETE\\s+FROM\\s+[\\w\\.]+\\s*(?:(?!\\bWHERE\\b)[^;])*;"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "select-star-query"
+    description: "SELECT * query detected - specify needed columns for better performance and stability"
+    regex: "(?i)SELECT\\s+\\*\\s+FROM"
+    languages: ["*"]
+    severity: "low"
diff --git a/theauditor/patterns/docker.yml b/theauditor/patterns/docker.yml
new file mode 100644
index 0000000..a11b753
--- /dev/null
+++ b/theauditor/patterns/docker.yml
@@ -0,0 +1,19 @@
+# Docker security and best practice patterns
+patterns:
+  - name: "docker-root-user"
+    description: "Container running as root user - security risk"
+    regex: "^(?!.*USER\\s+(?!root)\\w+).*FROM.*"
+    languages: ["dockerfile"]
+    severity: "high"
+    
+  - name: "docker-unpinned-base-image"
+    description: "Unpinned base image tag - using :latest or no tag specified"
+    regex: "^FROM\\s+[^:]+(:latest)?\\s*$|^FROM\\s+[^:]+\\s*$"
+    languages: ["dockerfile"]
+    severity: "medium"
+    
+  - name: "docker-hardcoded-secrets"
+    description: "Potential secrets exposed in ARG or ENV instructions"
+    regex: "(ARG|ENV)\\s+.*(SECRET|TOKEN|PASSWORD|KEY|API_KEY|PRIVATE|CREDENTIAL|AUTH)\\s*="
+    languages: ["dockerfile"]
+    severity: "critical"
diff --git a/theauditor/patterns/flow_sensitive.yml b/theauditor/patterns/flow_sensitive.yml
new file mode 100644
index 0000000..1f9e611
--- /dev/null
+++ b/theauditor/patterns/flow_sensitive.yml
@@ -0,0 +1,116 @@
+# Flow-sensitive patterns for ordering bugs, deadlocks, and resource management
+patterns:
+  # Lock ordering and deadlock patterns
+  - name: "nested-locks"
+    description: "Nested lock acquisitions - potential deadlock risk"
+    regex: "(lock|Lock|mutex|Mutex)\\.(acquire|lock).*\\n.*?(lock|Lock|mutex|Mutex)\\.(acquire|lock)"
+    languages: ["python", "java", "c#", "c++", "go"]
+    severity: "critical"
+    
+  - name: "lock-order-ab-ba"
+    description: "Locks acquired in different orders - classic AB-BA deadlock"
+    regex: "(lockA|mutexA|lock1).*?(lockB|mutexB|lock2).*?\\|.*?(lockB|mutexB|lock2).*?(lockA|mutexA|lock1)"
+    languages: ["*"]
+    severity: "critical"
+    
+  - name: "lock-no-timeout"
+    description: "Lock acquisition without timeout - infinite wait risk"
+    regex: "\\.(acquire|lock|Lock)\\(\\s*\\)"
+    languages: ["python", "java", "c#"]
+    severity: "high"
+    
+  # Resource lifecycle patterns
+  - name: "file-no-close-finally"
+    description: "File opened without close in finally/defer/using block"
+    regex: "(?<!with\\s)open\\([^)]+\\)(?!.*?(finally|defer|using).*?close)"
+    languages: ["python", "java", "go", "c#"]
+    severity: "high"
+    
+  - name: "connection-no-close"
+    description: "Database/network connection without explicit close"
+    regex: "(connect|Connect|createConnection|getConnection)\\([^)]*\\)(?!.*?(close|disconnect|release))"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "transaction-no-end"
+    description: "Transaction begin without commit/rollback"
+    regex: "(begin|beginTransaction|startTransaction)\\((?!.*?(commit|rollback|end))"
+    languages: ["*"]
+    severity: "high"
+    
+  # Async/concurrent patterns
+  - name: "promise-no-catch"
+    description: "Promise chain without error handling"
+    regex: "\\.then\\([^)]*\\)(?!\\.catch)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "goroutine-no-sync"
+    description: "Goroutine launched without synchronization"
+    regex: "go\\s+(?:func|\\w+)\\((?!.*?(WaitGroup|chan|sync\\.))"
+    languages: ["go"]
+    severity: "high"
+    
+  - name: "thread-no-join"
+    description: "Thread started but never joined"
+    regex: "(Thread|thread)\\.(start|Start)\\((?!.*?(join|Join|wait))"
+    languages: ["python", "java", "c#"]
+    severity: "high"
+    
+  # Channel and queue patterns
+  - name: "channel-no-close"
+    description: "Channel created but never closed"
+    regex: "make\\(chan\\s+(?!.*?close\\()"
+    languages: ["go"]
+    severity: "medium"
+    
+  - name: "blocking-channel-in-select"
+    description: "Blocking channel operation inside select without default"
+    regex: "select\\s*\\{[^}]*<-[^}]*(?!default:)"
+    languages: ["go"]
+    severity: "medium"
+    
+  # Initialization order patterns
+  - name: "field-use-before-init"
+    description: "Class field used in constructor before initialization"
+    regex: "constructor\\([^)]*\\)\\s*\\{[^}]*this\\.(\\w+)[^=]*=[^}]*this\\.\\1\\s*="
+    languages: ["javascript", "typescript", "java"]
+    severity: "high"
+    
+  - name: "singleton-race"
+    description: "Singleton pattern without proper synchronization"
+    regex: "if\\s*\\(!?\\s*instance\\)\\s*\\{[^}]*instance\\s*=(?!.*synchronized|lock|mutex)"
+    languages: ["*"]
+    severity: "critical"
+    
+  # Critical section patterns
+  - name: "shared-write-no-sync"
+    description: "Write to shared/static variable without synchronization"
+    regex: "(static|shared|global|class)\\s+\\w+\\s*=(?!.*(synchronized|lock|mutex|atomic))"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "double-checked-lock-broken"
+    description: "Double-checked locking without volatile/memory barrier"
+    regex: "if\\s*\\([^)]*==\\s*null\\)\\s*\\{[^}]*synchronized[^}]*if\\s*\\([^)]*==\\s*null\\)(?!.*volatile)"
+    languages: ["java", "c#"]
+    severity: "critical"
+    
+  # Additional resource cleanup patterns from implement_as_patternYML.py
+  - name: "socket-no-close"
+    description: "Socket opened without proper cleanup"
+    regex: "(?:socket|Socket|createSocket|new\\s+Socket)\\([^)]*\\)(?!.*?(close|end|destroy|disconnect))"
+    languages: ["*"]
+    severity: "high"
+    
+  - name: "stream-no-close"
+    description: "Stream created without cleanup in finally/using/defer"
+    regex: "(?:createReadStream|createWriteStream|pipe|pipeline)\\([^)]*\\)(?!.*?(finally|defer|using|close|end|destroy))"
+    languages: ["javascript", "typescript", "java", "python"]
+    severity: "high"
+    
+  - name: "worker-no-terminate"
+    description: "Worker thread/process created but never terminated"
+    regex: "(?:Worker|new\\s+Worker|cluster\\.fork|child_process\\.(?:spawn|fork|exec))\\([^)]*\\)(?!.*?(terminate|kill|disconnect|close))"
+    languages: ["javascript", "typescript", "python"]
+    severity: "medium"
diff --git a/theauditor/patterns/frameworks/angular.yml b/theauditor/patterns/frameworks/angular.yml
new file mode 100644
index 0000000..ee45d23
--- /dev/null
+++ b/theauditor/patterns/frameworks/angular.yml
@@ -0,0 +1,55 @@
+# Angular-specific security patterns
+patterns:
+  - name: "angular-bypass-security-trust"
+    description: "Use of bypassSecurityTrust functions - disables Angular's built-in sanitization and exposes to XSS attacks"
+    regex: "bypassSecurityTrust(?:Html|Script|Style|Url|ResourceUrl)\\s*\\("
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "angular-template-injection"
+    description: "Dynamic template compilation with user input - Server-Side Template Injection (SSTI) vulnerability"
+    regex: "(?:ComponentFactoryResolver|ViewContainerRef|TemplateRef).*(?:user|input|data|params|query|body)"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "angular-insecure-elementref"
+    description: "Direct DOM manipulation via ElementRef.nativeElement - bypasses Angular's sanitization"
+    regex: "(?:ElementRef|this\\.el|elementRef)\\.nativeElement\\.(?:innerHTML|outerHTML)\\s*="
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "angular-eval-expression"
+    description: "Using eval in Angular components - code injection vulnerability"
+    regex: "(?:@Component|@Injectable|ngOnInit|ngAfterViewInit)[^}]*eval\\s*\\("
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "angular-exposed-api-keys"
+    description: "Exposed API keys in Angular environment or components"
+    regex: "(?:environment\\.|const|let|var)\\s*(?:apiKey|apiToken|apiSecret|clientSecret)\\s*[:=]\\s*['\"`](?!\\$\\{)[^'\"`]{10,}['\"`]"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "angular-unsafe-target-blank"
+    description: "External link without rel='noopener' - reverse tabnabbing vulnerability"
+    regex: "target\\s*=\\s*['\"`]_blank['\"`](?!.*rel\\s*=\\s*['\"`](?:noopener|noreferrer))"
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    
+  - name: "angular-missing-csrf"
+    description: "HTTP requests without CSRF token in Angular services"
+    regex: "(?:HttpClient|http)\\.(?:post|put|delete|patch)\\([^)]*\\)(?!.*(?:X-CSRF-Token|X-XSRF-TOKEN))"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "angular-unvalidated-redirect"
+    description: "Unvalidated redirect using Router.navigate with user input"
+    regex: "(?:router|this\\.router)\\.navigate\\(\\[[^\\]]*(?:user|input|data|params|query|returnUrl)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "angular-hardcoded-credentials"
+    description: "Hardcoded credentials in Angular components or services"
+    regex: "(?:password|secret|apiKey|token)\\s*[:=]\\s*['\"`](?!\\$\\{|\\{\\{)[^'\"`]{8,}['\"`]"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
diff --git a/theauditor/patterns/frameworks/django.yml b/theauditor/patterns/frameworks/django.yml
new file mode 100644
index 0000000..d360924
--- /dev/null
+++ b/theauditor/patterns/frameworks/django.yml
@@ -0,0 +1,67 @@
+# Django-specific security patterns
+patterns:
+  - name: "django-mark-safe-xss"
+    description: "Use of mark_safe on user-controlled input - tells Django not to escape a string, potentially leading to XSS"
+    regex: "mark_safe\\s*\\("
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "django-raw-sql-injection-risk"
+    description: "Use of .raw() or .extra() on QuerySet - can lead to SQL injection if user input is not properly parameterized"
+    regex: "\\.(?:raw|extra)\\s*\\("
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "django-csrf-exempt-decorator"
+    description: "Use of @csrf_exempt decorator - disables CSRF protection for a view and should be used with extreme caution"
+    regex: "@csrf_exempt"
+    languages: ["python"]
+    severity: "medium"
+    
+  - name: "django-debug-true-production"
+    description: "DEBUG=True in settings - exposes sensitive information in production"
+    regex: "DEBUG\\s*=\\s*True"
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "django-allowed-hosts-wildcard"
+    description: "ALLOWED_HOSTS with wildcard - accepts requests from any host, enabling host header injection"
+    regex: "ALLOWED_HOSTS\\s*=\\s*\\[[^\\]]*['\"]\\*['\"]"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "django-secret-key-exposed"
+    description: "Hardcoded SECRET_KEY in settings - compromises session security"
+    regex: "SECRET_KEY\\s*=\\s*['\"][^'\"]+['\"]"
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "django-unsafe-redirect"
+    description: "Unvalidated redirect using request parameters - open redirect vulnerability"
+    regex: "(?:redirect|HttpResponseRedirect)\\s*\\(\\s*request\\.(?:GET|POST|META)\\."
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "django-eval-template-tag"
+    description: "Use of eval in custom template tags - code injection vulnerability"
+    regex: "(?:@register\\.(?:simple_tag|filter|tag)|def\\s+\\w+\\s*\\([^)]*\\)).*eval\\s*\\("
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "django-unsafe-file-upload"
+    description: "File upload without validation - potential for malicious file upload"
+    regex: "request\\.FILES\\[[^\\]]+\\](?!.*(?:validate|clean|check))"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "django-orm-filter-injection"
+    description: "Dynamic ORM filter construction with user input - potential for ORM injection"
+    regex: "\\.filter\\s*\\(\\s*\\*\\*(?:request\\.(?:GET|POST)|user_input|data)"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "django-unsafe-deserialization"
+    description: "Use of pickle or eval for deserialization - remote code execution risk"
+    regex: "(?:pickle\\.loads|eval)\\s*\\(.*request\\."
+    languages: ["python"]
+    severity: "critical"
diff --git a/theauditor/patterns/frameworks/express.yml b/theauditor/patterns/frameworks/express.yml
new file mode 100644
index 0000000..367e4cf
--- /dev/null
+++ b/theauditor/patterns/frameworks/express.yml
@@ -0,0 +1,46 @@
+# Express.js-specific patterns
+patterns:
+  - name: "express-missing-error-handler"
+    description: "Express route without error handling"
+    regex: "app\\.(get|post|put|delete)\\([^)]*\\([^)]*res[^)]*\\)\\s*=>?\\s*\\{(?!.*try.*catch)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "express-sync-in-async"
+    description: "Synchronous operation blocking event loop"
+    regex: "app\\.(get|post|put|delete).*\\{[^}]*(fs\\.readFileSync|fs\\.writeFileSync)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "express-missing-helmet-comprehensive"
+    description: "Express app without Helmet security middleware - missing critical security headers"
+    regex: "(?:const|let|var)\\s+(\\w+)\\s*=\\s*express\\(\\)(?:(?!.*helmet).)*?\\1\\.listen\\("
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "express-xss-direct-send"
+    description: "Potential XSS - req.body/query/params directly in res.send() without sanitization"
+    regex: "res\\.(send|write|json|render)\\s*\\([^)]*\\b(req\\.(body|query|params|cookies|headers)\\.[\\w\\.]+)[^)]*\\)(?!.*(?:escape|sanitize|encode|DOMPurify))"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "express-missing-rate-limit-comprehensive"
+    description: "API endpoint without rate limiting - vulnerable to DoS/brute force"
+    regex: "app\\.(get|post|put|delete|patch)\\s*\\(['\"`]/api/[^)]+\\)(?:(?!rateLimit|RateLimit|express-rate-limit).)*?app\\.listen"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "express-body-parser-limit"
+    description: "Body parser without size limit"
+    regex: "bodyParser\\.(json|urlencoded)\\((?!.*limit)"
+    languages: ["javascript", "typescript"]
+    severity: "low"
+    
+  - name: "express-direct-db-query"
+    description: "Database query directly in route handler"
+    regex: "app\\.(get|post|put|delete).*?\\{[^}]*(query|find|insert|update|delete)\\("
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    ast_pattern:
+      node_type: "arrow_function"
+      contains: ["query", "find"]
\ No newline at end of file
diff --git a/theauditor/patterns/frameworks/fastapi.yml b/theauditor/patterns/frameworks/fastapi.yml
new file mode 100644
index 0000000..8a81528
--- /dev/null
+++ b/theauditor/patterns/frameworks/fastapi.yml
@@ -0,0 +1,94 @@
+# FastAPI-specific patterns
+patterns:
+  - name: "fastapi-sync-in-async-route"
+    description: "Synchronous operation in async FastAPI route"
+    regex: "async\\s+def\\s+\\w+.*?\\n[^}]*\\b(time\\.sleep|requests\\.|urllib\\.)\\b"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "fastapi-missing-dependency-injection"
+    description: "Direct database access without dependency injection"
+    regex: "@(app\\.get|app\\.post|router\\.get|router\\.post).*\\n[^}]*\\b(db\\.|session\\.|conn\\.)\\b(?!.*Depends)"
+    languages: ["python"]
+    severity: "medium"
+    
+  - name: "fastapi-unvalidated-path-param"
+    description: "Path parameter used directly without validation"
+    regex: "\\{(\\w+)\\}.*?def\\s+\\w+\\([^)]*\\1[^:]*\\):"
+    languages: ["python"]
+    severity: "medium"
+    
+  - name: "fastapi-missing-cors"
+    description: "FastAPI app without CORS middleware"
+    regex: "FastAPI\\((?!.*CORSMiddleware)"
+    languages: ["python"]
+    severity: "low"
+    
+  - name: "fastapi-blocking-file-operation"
+    description: "Blocking file I/O in async route"
+    regex: "async\\s+def.*?\\n[^}]*\\bopen\\([^)]*\\)(?!.*aiofiles)"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "fastapi-sql-in-route"
+    description: "Raw SQL query in route handler"
+    regex: "@(app|router)\\.(get|post|put|delete).*?def.*?\\n[^}]*(SELECT|INSERT|UPDATE|DELETE)\\s+"
+    languages: ["python"]
+    severity: "high"
+    ast_pattern:
+      node_type: "function_definition"
+      contains: ["SELECT", "INSERT", "UPDATE", "DELETE"]
+    
+  - name: "fastapi-background-task-exception"
+    description: "Background task without exception handling - unhandled exceptions in background tasks fail silently"
+    regex: "BackgroundTasks\\(\\).*?\\.add_task\\([^)]+\\)(?!.*try.*except)"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "fastapi-unsafe-depends-injection"
+    description: "Dependency injection without validation - potential for unsafe dependency resolution"
+    regex: "Depends\\(lambda[^:]*:[^)]*request\\."
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "fastapi-websocket-no-auth"
+    description: "WebSocket endpoint without authentication - unprotected real-time communication channel"
+    regex: "@(app|router)\\.websocket\\([^)]+\\).*?async\\s+def\\s+\\w+\\([^)]*\\)(?!.*(?:token|auth|verify|check_permission))"
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "fastapi-missing-request-validation"
+    description: "Endpoint without Pydantic model validation - raw dict/Any types allow unvalidated input"
+    regex: "async\\s+def\\s+\\w+\\([^)]*(?:dict|Dict\\[|Any)(?!.*BaseModel)"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "fastapi-exposed-debug-endpoint"
+    description: "Debug/test endpoint exposed in production - information disclosure risk"
+    regex: "@(app|router)\\.(get|post)\\(['\"`]/(?:debug|test|health/full|metrics/internal)['\"`]"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "fastapi-unhandled-startup-error"
+    description: "Startup event without error handling - app fails to start on initialization errors"
+    regex: "@app\\.on_event\\(['\"`]startup['\"`]\\).*?async\\s+def[^{]*\\{(?!.*try.*except)"
+    languages: ["python"]
+    severity: "medium"
+    
+  - name: "fastapi-stream-response-no-timeout"
+    description: "StreamingResponse without timeout - potential for resource exhaustion"
+    regex: "StreamingResponse\\([^)]+\\)(?!.*timeout)"
+    languages: ["python"]
+    severity: "medium"
+    
+  - name: "fastapi-form-data-injection"
+    description: "Form data used in file operations - potential path traversal vulnerability"
+    regex: "Form\\(\\).*?(?:open|Path)\\([^)]*\\w+[^)]*\\)"
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "fastapi-middleware-order-issue"
+    description: "Security middleware added after routes - middleware won't protect existing routes"
+    regex: "app\\.include_router\\(.*?\\n.*?app\\.add_middleware\\("
+    languages: ["python"]
+    severity: "high"
diff --git a/theauditor/patterns/frameworks/flask.yml b/theauditor/patterns/frameworks/flask.yml
new file mode 100644
index 0000000..7d4c655
--- /dev/null
+++ b/theauditor/patterns/frameworks/flask.yml
@@ -0,0 +1,73 @@
+# Flask-specific security patterns
+patterns:
+  - name: "flask-ssti-render-template-string"
+    description: "Use of render_template_string - can lead to Server-Side Template Injection if template string is from user input"
+    regex: "render_template_string\\s*\\("
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "flask-markup-xss"
+    description: "Use of Markup() - can lead to XSS if the wrapped content originates from user input"
+    regex: "Markup\\s*\\("
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "flask-debug-mode-enabled"
+    description: "Flask app.run(debug=True) - must never be used in production, exposes interactive debugger"
+    regex: "app\\.run\\s*\\([^)]*debug\\s*=\\s*True"
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "flask-secret-key-exposed"
+    description: "Hardcoded SECRET_KEY in Flask config - compromises session security"
+    regex: "(?:app\\.secret_key|SECRET_KEY)\\s*=\\s*['\"][^'\"]+['\"]"
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "flask-unsafe-file-upload"
+    description: "File upload without validation - potential for malicious file upload and path traversal"
+    regex: "request\\.files\\[[^\\]]+\\]\\.save\\s*\\("
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "flask-sql-injection-risk"
+    description: "String formatting in SQL queries - SQL injection vulnerability"
+    regex: "(?:execute|executemany)\\s*\\([^)]*(?:%|format|f['\"])"
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "flask-unsafe-redirect"
+    description: "Unvalidated redirect using request parameters - open redirect vulnerability"
+    regex: "redirect\\s*\\(\\s*request\\.(?:args|values|form)\\.get"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "flask-eval-usage"
+    description: "Use of eval with user input - code injection vulnerability"
+    regex: "eval\\s*\\(.*request\\."
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "flask-cors-wildcard"
+    description: "CORS with wildcard origin - allows any domain to access resources"
+    regex: "(?:CORS|Access-Control-Allow-Origin).*['\"]\\*['\"]"
+    languages: ["python"]
+    severity: "high"
+    
+  - name: "flask-unsafe-deserialization"
+    description: "Pickle deserialization of user input - remote code execution risk"
+    regex: "pickle\\.loads\\s*\\(.*request\\."
+    languages: ["python"]
+    severity: "critical"
+    
+  - name: "flask-jsonify-html"
+    description: "Returning HTML in JSON responses - potential for XSS if rendered client-side"
+    regex: "jsonify\\s*\\([^)]*<[^>]+>"
+    languages: ["python"]
+    severity: "medium"
+    
+  - name: "flask-werkzeug-debugger"
+    description: "Werkzeug debugger PIN exposed or disabled - allows arbitrary code execution"
+    regex: "(?:WERKZEUG_DEBUG_PIN|use_debugger\\s*=\\s*True)"
+    languages: ["python"]
+    severity: "critical"
diff --git a/theauditor/patterns/frameworks/nextjs.yml b/theauditor/patterns/frameworks/nextjs.yml
new file mode 100644
index 0000000..f535319
--- /dev/null
+++ b/theauditor/patterns/frameworks/nextjs.yml
@@ -0,0 +1,91 @@
+# Next.js-specific security patterns
+patterns:
+  - name: "nextjs-api-route-secret-exposure"
+    description: "Server-side environment variables exposed in API route response - can expose sensitive secrets to client"
+    regex: "(?:res\\.(?:json|send)|NextResponse\\.json)\\s*\\([^)]*process\\.env"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "nextjs-open-redirect"
+    description: "Unvalidated user input in router.push/replace - creates open redirect vulnerability"
+    regex: "router\\.(?:push|replace)\\s*\\([^)]*(?:query|params|searchParams)\\."
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    
+  - name: "nextjs-getinitialprops-data-exposure"
+    description: "getInitialProps can expose server-side data to client if entire fetched object returned as props"
+    regex: "getInitialProps[^}]*return\\s+(?:\\{[^}]*(?:fetch|axios)[^}]*\\}|data|response)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-ssr-injection"
+    description: "Server-side rendering with user input - potential for SSR injection attacks"
+    regex: "getServerSideProps[^}]*(?:req\\.query|req\\.body|params)(?![^}]*(?:sanitize|escape|validate))"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-public-env-exposure"
+    description: "Sensitive data in NEXT_PUBLIC_ variables - exposed to client bundle"
+    regex: "NEXT_PUBLIC_[A-Z_]*(?:SECRET|PRIVATE|KEY|TOKEN|PASSWORD)[A-Z_]*\\s*[:=]"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "nextjs-dangerouslyallowbrowser"
+    description: "Using dangerouslyAllowBrowser in server components - security risk"
+    regex: "dangerouslyAllowBrowser\\s*:\\s*true"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-api-csrf-missing"
+    description: "API route handling POST/PUT/DELETE without CSRF protection"
+    regex: "(?:pages|app)/api/.*export\\s+(?:default\\s+)?(?:async\\s+)?function\\s+handler[^}]*req\\.method\\s*===?\\s*['\"`](?:POST|PUT|DELETE)['\"`](?![^}]*csrf)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-unsafe-redirect-permanent"
+    description: "Permanent redirect with user input - SEO poisoning and phishing risk"
+    regex: "redirect\\s*\\([^)]*permanent\\s*:\\s*true[^)]*(?:query|params|searchParams)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-image-loader-ssrf"
+    description: "Custom image loader with user input - potential SSRF vulnerability"
+    regex: "loader\\s*[:=][^}]*\\([^)]*src[^)]*\\)[^}]*(?:\\$\\{|\\+).*(?:query|params|searchParams)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-eval-edge-runtime"
+    description: "Using eval in Edge Runtime - code injection vulnerability"
+    regex: "runtime\\s*[:=]\\s*['\"`]edge['\"`][^}]*eval\\s*\\("
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "nextjs-rewrites-injection"
+    description: "Dynamic rewrites with user input - request smuggling risk"
+    regex: "rewrites\\s*\\([^}]*(?:query|params|headers)(?![^}]*validate)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-middleware-bypass"
+    description: "Middleware without proper path matching - security bypass risk"
+    regex: "middleware\\.(?:ts|js).*export\\s+(?:async\\s+)?function\\s+middleware(?![^}]*matcher)"
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    
+  - name: "nextjs-static-export-secrets"
+    description: "Secrets in statically exported pages - exposed in HTML"
+    regex: "output\\s*:\\s*['\"`]export['\"`][^}]*(?:apiKey|secret|token|password)"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "nextjs-server-actions-validation"
+    description: "Server Actions without input validation - injection risk"
+    regex: "['\"`]use server['\"`][^}]*(?:formData\\.get|searchParams\\.get)(?![^}]*(?:zod|yup|joi|validate))"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "nextjs-cache-poisoning"
+    description: "ISR/SSG with user input in cache key - cache poisoning vulnerability"
+    regex: "(?:getStaticProps|getStaticPaths)[^}]*params(?![^}]*validate)[^}]*revalidate"
+    languages: ["javascript", "typescript"]
+    severity: "high"
diff --git a/theauditor/patterns/frameworks/react.yml b/theauditor/patterns/frameworks/react.yml
new file mode 100644
index 0000000..a2bfdd3
--- /dev/null
+++ b/theauditor/patterns/frameworks/react.yml
@@ -0,0 +1,49 @@
+# React-specific security patterns
+patterns:
+  - name: "react-dangerous-html"
+    description: "Use of dangerouslySetInnerHTML - primary XSS vector in React"
+    regex: "dangerouslySetInnerHTML\\s*=\\s*\\{\\{"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "react-exposed-api-keys"
+    description: "Exposed API keys in frontend code - sensitive data in client bundle"
+    regex: "(?:const|let|var)\\s+(?:REACT_APP_|NEXT_PUBLIC_|VITE_|GATSBY_)[A-Z_]*(?:KEY|TOKEN|SECRET|PASSWORD|API|PRIVATE|CREDENTIAL|AUTH)[A-Z_]*\\s*=\\s*['\"`](?:pk_|sk_|api_|key_|token_|secret_|bearer_|Basic\\s+|ghp_|ghs_|xox[baprs]-|AKIA|eyJ)[^'\"`]+"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "react-eval-jsx"
+    description: "Using eval with JSX - code injection vulnerability"
+    regex: "eval\\s*\\([^)]*\\).*(?:<[A-Z]|jsx|JSX)"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "react-unsafe-target-blank"
+    description: "External link without rel='noopener' - reverse tabnabbing vulnerability"
+    regex: "target\\s*=\\s*['\"`]_blank['\"`](?!.*rel\\s*=\\s*['\"`](?:noopener|noreferrer))"
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    
+  - name: "react-direct-innerhtml"
+    description: "Direct innerHTML manipulation in React components"
+    regex: "(?:ref\\.current\\.|document\\.(?:getElementById|querySelector)[^)]*\\)\\.?)innerHTML\\s*="
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "react-unescaped-user-input"
+    description: "User input rendered directly in JSX without escaping - potential XSS"
+    regex: "(?:return\\s*\\(|=>\\s*\\(|JSX\\.Element|<[A-Z][^>]*>)[^}]*\\{[^}]*(?:props\\.(?:user|input|data)|location\\.search|params\\.|query\\.|formData\\.|request\\.body)[^}]*\\}(?:(?!sanitize|escape|DOMPurify|xss|clean|safe).)*?(?:</|/>)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "react-missing-csrf"
+    description: "Form submission without CSRF token"
+    regex: "<form[^>]*method\\s*=\\s*['\"`](?:POST|PUT|DELETE)['\"`](?!.*csrf)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "react-hardcoded-credentials"
+    description: "Hardcoded credentials in React component"
+    regex: "(?:password|apiKey|api_key|secret|token|privateKey|private_key|auth|credential)\\s*[:=]\\s*['\"`](?!\\$\\{|process\\.env|import\\.meta\\.env)[^'\"`]{8,}['\"`]"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
diff --git a/theauditor/patterns/frameworks/svelte.yml b/theauditor/patterns/frameworks/svelte.yml
new file mode 100644
index 0000000..1e78c55
--- /dev/null
+++ b/theauditor/patterns/frameworks/svelte.yml
@@ -0,0 +1,85 @@
+# Svelte and SvelteKit-specific security patterns
+patterns:
+  - name: "svelte-html-tag-xss"
+    description: "Use of {@html} tag - can lead to XSS if the rendered expression originates from untrusted source"
+    regex: "\\{@html\\s+"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "sveltekit-open-redirect"
+    description: "redirect() calls that may use unvalidated user input - creates open redirect vulnerability"
+    regex: "redirect\\s*\\([^)]*(?:url\\.searchParams|params|request\\.url)"
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    
+  - name: "sveltekit-server-load-ssrf"
+    description: "fetch() in server load functions using user-controlled input - creates SSRF risk"
+    regex: "(?:export\\s+(?:async\\s+)?function\\s+load|export\\s+const\\s+load)[^}]*fetch\\s*\\([^)]*(?:params|url\\.|request\\.)"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "svelte-exposed-api-keys"
+    description: "Exposed API keys in Svelte/SvelteKit code - sensitive data in client bundle"
+    regex: "(?:const|let|var)\\s+(?:PUBLIC_|VITE_)[A-Z_]*(?:KEY|TOKEN|SECRET|PASSWORD|API)[A-Z_]*\\s*=\\s*['\"`](?:pk_|sk_|api_|key_|token_|secret_)[^'\"`]+"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "svelte-eval-usage"
+    description: "Using eval in Svelte components - code injection vulnerability"
+    regex: "(?:<script[^>]*>|export\\s+(?:async\\s+)?function)[^}]*eval\\s*\\("
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "svelte-bind-innerHTML"
+    description: "Using bind:innerHTML with user input - XSS vulnerability"
+    regex: "bind:innerHTML\\s*="
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "svelte-unsafe-target-blank"
+    description: "External link without rel='noopener' - reverse tabnabbing vulnerability"
+    regex: "target\\s*=\\s*['\"`]_blank['\"`](?!.*rel\\s*=\\s*['\"`](?:noopener|noreferrer))"
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    
+  - name: "svelte-direct-dom-manipulation"
+    description: "Direct DOM manipulation bypassing Svelte's reactivity - potential security issue"
+    regex: "(?:document\\.(?:getElementById|querySelector)[^)]*\\)|node|element)\\.innerHTML\\s*="
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "sveltekit-missing-csrf"
+    description: "Form submission without CSRF protection in SvelteKit"
+    regex: "<form[^>]*method\\s*=\\s*['\"`](?:POST|PUT|DELETE)['\"`](?!.*(?:csrf|enhance))"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "svelte-hardcoded-credentials"
+    description: "Hardcoded credentials in Svelte components"
+    regex: "(?:password|apiKey|secret|token)\\s*[:=]\\s*['\"`](?!\\$env)[^'\"`]{8,}['\"`]"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "sveltekit-env-import"
+    description: "Importing private env vars in client code - exposes secrets to browser"
+    regex: "\\$env/static/private|\\$env/dynamic/private"
+    languages: ["javascript", "typescript"]
+    severity: "critical"
+    
+  - name: "svelte-unescaped-user-input"
+    description: "User input rendered without escaping - potential XSS"
+    regex: "\\{[^}]*(?:user|input|data|params|query|body|formData)\\.[^}]+\\}"
+    languages: ["javascript", "typescript"]
+    severity: "medium"
+    
+  - name: "sveltekit-unsafe-set-cookie"
+    description: "Setting cookies without secure flags - session hijacking risk"
+    regex: "cookies\\.set\\s*\\([^)]*\\)(?!.*(?:httpOnly|secure|sameSite))"
+    languages: ["javascript", "typescript"]
+    severity: "high"
+    
+  - name: "svelte-store-injection"
+    description: "Direct store updates with user input - potential state injection"
+    regex: "(?:writable|readable|derived)\\s*\\([^)]*(?:user|input|params|query)"
+    languages: ["javascript", "typescript"]
+    severity: "medium"
diff --git a/theauditor/patterns/frameworks/vue.yml b/theauditor/patterns/frameworks/vue.yml
new file mode 100644
index 0000000..3e3b5f3
--- /dev/null
+++ b/theauditor/patterns/frameworks/vue.yml
@@ -0,0 +1,55 @@
+# Vue.js-specific security patterns
+patterns:
+  - name: "vue-v-html"
+    description: "Use of v-html directive - primary XSS vector in Vue"
+    regex: "v-html\\s*(?:=|:)"
+    languages: ["javascript", "typescript", "vue"]
+    severity: "high"
+    
+  - name: "vue-v-bind-innerHTML"
+    description: "Binding to innerHTML property - XSS vulnerability"
+    regex: "v-bind\\s*:\\s*innerHTML|:innerHTML\\s*="
+    languages: ["javascript", "typescript", "vue"]
+    severity: "high"
+    
+  - name: "vue-eval-template"
+    description: "Using eval in Vue template or component"
+    regex: "(?:template|methods|computed)\\s*:[^}]*eval\\s*\\("
+    languages: ["javascript", "typescript", "vue"]
+    severity: "critical"
+    
+  - name: "vue-exposed-api-keys"
+    description: "Exposed API keys in Vue components"
+    regex: "(?:const|let|var|data\\s*\\(\\)[^}]*return[^}]*)\\s+(?:VUE_APP_|VITE_)[A-Z_]*(?:KEY|TOKEN|SECRET|PASSWORD|API)[A-Z_]*\\s*[:=]\\s*['\"`](?:pk_|sk_|api_|key_|token_|secret_)[^'\"`]+"
+    languages: ["javascript", "typescript", "vue"]
+    severity: "high"
+    
+  - name: "vue-unescaped-interpolation"
+    description: "Triple mustache (unescaped) interpolation - XSS risk"
+    regex: "\\{\\{\\{[^}]+\\}\\}\\}"
+    languages: ["javascript", "typescript", "vue"]
+    severity: "high"
+    
+  - name: "vue-dynamic-component-injection"
+    description: "Dynamic component with user-controlled input"
+    regex: "<component\\s+:is\\s*=\\s*['\"`]?(?:user|input|data|params|query)"
+    languages: ["javascript", "typescript", "vue"]
+    severity: "high"
+    
+  - name: "vue-unsafe-target-blank"
+    description: "External link without rel='noopener' in Vue template"
+    regex: "target\\s*=\\s*['\"`]_blank['\"`](?!.*rel\\s*=\\s*['\"`](?:noopener|noreferrer))"
+    languages: ["javascript", "typescript", "vue"]
+    severity: "medium"
+    
+  - name: "vue-direct-dom-manipulation"
+    description: "Direct DOM manipulation bypassing Vue's reactivity"
+    regex: "(?:this\\.\\$refs\\.[^.]+\\.|document\\.(?:getElementById|querySelector)[^)]*\\)\\.?)innerHTML\\s*="
+    languages: ["javascript", "typescript", "vue"]
+    severity: "high"
+    
+  - name: "vue-missing-prop-validation"
+    description: "Props without type or validation"
+    regex: "props\\s*:\\s*\\[[^\\]]*['\"`]\\w+['\"`][^\\]]*\\]"
+    languages: ["javascript", "typescript", "vue"]
+    severity: "low"
diff --git a/theauditor/patterns/multi_tenant.yml b/theauditor/patterns/multi_tenant.yml
new file mode 100644
index 0000000..1433705
--- /dev/null
+++ b/theauditor/patterns/multi_tenant.yml
@@ -0,0 +1,88 @@
+# Multi-tenant Security Patterns
+# Based on PostgreSQL RLS implementation with app.current_facility_id session variable
+
+patterns:
+  - name: "cross-tenant-data-leak"
+    description: "Raw SQL query on sensitive tables without facility_id filtering"
+    severity: "critical"
+    confidence: 0.85
+    # Detects sequelize.query() calls on sensitive tables that lack WHERE facility_id
+    # Pattern looks for: sequelize.query with SELECT/UPDATE/DELETE on key tables
+    # without a WHERE clause containing facility_id (case-insensitive)
+    regex: '(?i)sequelize\.query\s*\(\s*[''"`](?:SELECT|UPDATE|DELETE)(?:(?!WHERE\s+[^;]*facility[_]?id)(?!WHERE\s+[^;]*"facility[_]?id")(?!JOIN)(?!USING\s*\()[\s\S])*?(?:FROM|UPDATE)\s+[''"`]?\s*(?:products|orders|inventory|customers|users|locations|transfers)(?:(?!WHERE\s+[^;]*facility[_]?id)(?!WHERE\s+[^;]*"facility[_]?id")[\s\S])*?[''"`]'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - "sequelize.query('SELECT * FROM products')"
+      - "sequelize.query(`DELETE FROM orders WHERE status = 'cancelled'`)"
+    counter_examples:
+      - "sequelize.query('SELECT * FROM products WHERE facility_id = :facilityId')"
+      - "Product.findAll() // ORM calls are protected by RLS"
+
+  - name: "rls-policy-without-using"
+    description: "CREATE POLICY statement missing USING clause for row filtering"
+    severity: "critical"
+    confidence: 0.95
+    # Detects CREATE POLICY statements that don't have a USING clause
+    # This is critical because without USING, the policy won't filter rows
+    regex: '(?i)CREATE\s+POLICY\s+\w+\s+ON\s+[''"`]?\w+[''"`]?\s*(?:FOR\s+(?:ALL|SELECT|INSERT|UPDATE|DELETE)\s*)?(?:TO\s+\w+\s*)?(?:WITH\s+CHECK\s*\([^)]*\)\s*)?(?![\s\S]*?USING\s*\()'
+    languages: ["javascript", "typescript", "sql"]
+    files: ["*.js", "*.ts", "*.sql", "*migration*.js", "*migration*.ts"]
+    examples:
+      - "CREATE POLICY test_policy ON products FOR ALL"
+      - "CREATE POLICY test ON users TO admin_role WITH CHECK (true)"
+    counter_examples:
+      - "CREATE POLICY test ON products USING (facility_id = current_setting('app.current_facility_id')::uuid)"
+
+  - name: "missing-rls-context-setting"
+    description: "Database transaction without setting app.current_facility_id"
+    severity: "high"
+    confidence: 0.75
+    # Detects transaction blocks that don't set the RLS context
+    # Looks for sequelize.transaction() without SET LOCAL app.current_facility_id
+    regex: '(?i)sequelize\.transaction\s*\((?:(?!SET\s+LOCAL\s+app\.current_facility[_]?id)[\s\S])*?\}\s*\)'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - |
+        sequelize.transaction(async (t) => {
+          await Product.create({...}, { transaction: t });
+        })
+    counter_examples:
+      - |
+        sequelize.transaction(async (t) => {
+          await sequelize.query('SET LOCAL app.current_facility_id = :id', { transaction: t });
+          await Product.create({...}, { transaction: t });
+        })
+
+  - name: "raw-query-without-transaction"
+    description: "Raw SQL query executed outside transaction context (RLS may not apply)"
+    severity: "high"
+    confidence: 0.70
+    # Detects sequelize.query() calls that don't pass a transaction option
+    # Without transaction, SET LOCAL won't persist for the query
+    regex: '(?i)sequelize\.query\s*\([^)]*(?:SELECT|INSERT|UPDATE|DELETE)[^)]*\)(?:(?!transaction\s*:)(?!\.transaction\s*\()[\s\S]){0,100}?[;,\s]'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - "await sequelize.query('SELECT * FROM products')"
+      - "sequelize.query(sql, { type: QueryTypes.SELECT })"
+    counter_examples:
+      - "sequelize.query(sql, { transaction: t })"
+      - "sequelize.query(sql, { transaction: req.transaction })"
+
+  - name: "bypass-rls-with-superuser"
+    description: "Potential RLS bypass by using database superuser connection"
+    severity: "critical"
+    confidence: 0.80
+    # Detects connections or operations that might use postgres/superuser role
+    # which bypasses RLS policies
+    regex: '(?i)(?:DB_USER|DATABASE_USER|POSTGRES_USER)\s*[=:]\s*[''"`]?(?:postgres|root|admin|superuser|sa)[''"`]?'
+    languages: ["*"]
+    files: ["*.env", "*.js", "*.ts", "*.yml", "*.yaml", "docker-compose*"]
+    examples:
+      - "DB_USER=postgres"
+      - "DATABASE_USER: 'postgres'"
+    counter_examples:
+      - "DB_USER=app_user"
+      - "DATABASE_USER=limited_user"
\ No newline at end of file
diff --git a/theauditor/patterns/nginx.yml b/theauditor/patterns/nginx.yml
new file mode 100644
index 0000000..6d64951
--- /dev/null
+++ b/theauditor/patterns/nginx.yml
@@ -0,0 +1,19 @@
+# nginx configuration security patterns
+patterns:
+  - name: "nginx-missing-security-headers"
+    description: "Missing critical security headers (CSP, HSTS)"
+    regex: "^(?!.*add_header\\s+(Content-Security-Policy|Strict-Transport-Security)).*server\\s*\\{"
+    languages: ["nginx", "conf"]
+    severity: "high"
+    
+  - name: "nginx-weak-ssl-protocols"
+    description: "Weak or outdated SSL/TLS protocols enabled"
+    regex: "ssl_protocols\\s+.*(SSLv2|SSLv3|TLSv1(?!\\.2|\\.3)|TLSv1\\.0|TLSv1\\.1)"
+    languages: ["nginx", "conf"]
+    severity: "critical"
+    
+  - name: "nginx-server-tokens-enabled"
+    description: "Server version disclosure enabled - information leak"
+    regex: "server_tokens\\s+on\\s*;|^(?!.*server_tokens\\s+off).*http\\s*\\{"
+    languages: ["nginx", "conf"]
+    severity: "medium"
diff --git a/theauditor/patterns/postgres_rls.yml b/theauditor/patterns/postgres_rls.yml
new file mode 100644
index 0000000..d69db49
--- /dev/null
+++ b/theauditor/patterns/postgres_rls.yml
@@ -0,0 +1,13 @@
+# PostgreSQL Row-Level Security (RLS) patterns
+patterns:
+  - name: "postgres-policy-missing-using"
+    description: "CREATE POLICY without USING clause - potential data leak"
+    regex: "CREATE\\s+POLICY\\s+\\w+\\s+ON\\s+\\w+(?!.*\\bUSING\\b)"
+    languages: ["sql", "postgresql"]
+    severity: "critical"
+    
+  - name: "postgres-rls-no-policies"
+    description: "Table has RLS enabled but no policies defined"
+    regex: "ALTER\\s+TABLE\\s+(\\w+)\\s+ENABLE\\s+ROW\\s+LEVEL\\s+SECURITY(?!.*CREATE\\s+POLICY.*\\1)"
+    languages: ["sql", "postgresql"]
+    severity: "critical"
diff --git a/theauditor/patterns/runtime_issues.yml b/theauditor/patterns/runtime_issues.yml
new file mode 100644
index 0000000..ddba156
--- /dev/null
+++ b/theauditor/patterns/runtime_issues.yml
@@ -0,0 +1,62 @@
+# Runtime issue patterns that indicate potential race conditions, deadlocks, and concurrency issues
+patterns:
+  - name: "check-then-act"
+    description: "Time-of-check-time-of-use (TOCTOU) race condition pattern"
+    regex: "if.*\\b(exists?|has|contains|includes)\\b.*then.*\\b(create|add|insert|write)\\b"
+    languages: ["*"]
+    severity: "critical"
+    
+  - name: "shared-state-no-lock"
+    description: "Shared mutable state without proper locking"
+    regex: "(global|static|class|shared|@@)\\s*\\w+\\s*=(?!.*\\b(lock|mutex|synchronized|Lock|Mutex|sync)\\b)"
+    languages: ["python", "java", "c#", "go", "ruby", "javascript"]
+    severity: "high"
+    
+  - name: "async-without-await"
+    description: "Async function call without await - potential race condition"
+    regex: "(?<!await\\s)\\b\\w+Async\\(|(?<!await\\s)\\basync\\s+\\w+\\(.*\\)(?!\\s*\\.then)"
+    languages: ["javascript", "typescript", "python", "c#"]
+    severity: "high"
+    
+  - name: "parallel-writes-no-sync"
+    description: "Parallel write operations without synchronization"
+    regex: "(Promise\\.all|asyncio\\.gather|parallel|concurrent).*\\b(save|update|insert|write|delete|remove)\\b"
+    languages: ["javascript", "typescript", "python"]
+    severity: "critical"
+    
+  - name: "double-checked-locking"
+    description: "Double-checked locking anti-pattern (often incorrect)"
+    regex: "if\\s*\\(!.*\\)\\s*\\{[^}]*lock[^}]*if\\s*\\(!.*\\)"
+    languages: ["java", "c++", "c#"]
+    severity: "high"
+    
+  - name: "sleep-in-loop"
+    description: "Sleep/delay in loop - potential performance issue or race condition"
+    regex: "(while|for).*\\{[^}]*(sleep|delay|setTimeout|wait)\\b"
+    languages: ["*"]
+    severity: "medium"
+    
+  - name: "retry-without-backoff"
+    description: "Retry logic without exponential backoff"
+    regex: "(retry|attempt|tries)(?!.*\\b(backoff|exponential|delay.*\\*|sleep.*\\*)).*\\b(while|for)\\b"
+    languages: ["*"]
+    severity: "medium"
+    
+  - name: "unsafe-random-concurrency"
+    description: "Using non-thread-safe random in concurrent context"
+    regex: "(thread|Thread|async|parallel|concurrent).*\\b(random|Random|rand)\\b(?!.*ThreadLocal|thread_local)"
+    languages: ["java", "python", "c++", "go"]
+    severity: "medium"
+    
+  # Additional concurrency patterns from implement_as_patternYML.py
+  - name: "unprotected-global-increment"
+    description: "Global/shared counter increment without synchronization"
+    regex: "(?i)(?:global|shared|static|class|@@)\\s*\\w+\\s*(?:\\+\\+|\\-\\-|\\+=|\\-=)(?!.*\\b(?:lock|Lock|mutex|Mutex|synchronized|atomic|Atomic|Interlocked)\\b)"
+    languages: ["*"]
+    severity: "critical"
+    
+  - name: "shared-collection-mutation"
+    description: "Shared collection (dict/map/list) modified without synchronization"
+    regex: "(?i)(?:shared|global|static|class)\\s*(?:dict|map|list|array|collection|hash|HashMap|ArrayList)\\s*\\[.*\\]\\s*="
+    languages: ["python", "java", "javascript", "go", "ruby"]
+    severity: "high"
diff --git a/theauditor/patterns/security.yml b/theauditor/patterns/security.yml
new file mode 100644
index 0000000..65c33da
--- /dev/null
+++ b/theauditor/patterns/security.yml
@@ -0,0 +1,191 @@
+# Security patterns for cryptographic and authentication vulnerabilities
+patterns:
+  - name: "insecure-random-for-security"
+    description: "Math.random() used for security-sensitive values like tokens or keys"
+    severity: "critical"
+    confidence: 0.90
+    # Detects Math.random() near security keywords (within 3 lines or same statement)
+    # This catches patterns like: const token = Math.random().toString(36)
+    regex: '(?i)(?:Math\.random|Math\.floor\s*\(\s*Math\.random|Math\.round\s*\(\s*Math\.random)(?:(?![\r\n]){0,200}(?:token|password|secret|key|auth|session|id|uuid|guid|nonce|salt|pin|otp|code|hash)|\s*\(\s*\)[^;\r\n]*(?:token|password|secret|key|auth|session|id|uuid|guid|nonce|salt|pin|otp|code|hash))'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - "const sessionToken = Math.random().toString(36).substring(7)"
+      - "const apiKey = 'key_' + Math.random()"
+      - "const resetToken = Math.floor(Math.random() * 1000000)"
+      - "user.password_reset_code = Math.random().toString()"
+    counter_examples:
+      - "const randomIndex = Math.floor(Math.random() * array.length)"
+      - "const sessionToken = crypto.randomBytes(32).toString('hex')"
+      - "const delay = Math.random() * 1000"
+  
+  - name: "insecure-random-python"
+    description: "Using random module instead of secrets for security-sensitive values"
+    severity: "critical"
+    confidence: 0.85
+    # Detects Python's random module used for security purposes
+    regex: '(?i)(?:random\.(?:random|randint|choice|randbytes|randrange|getrandbits)\s*\([^)]*\))(?:(?![\r\n]){0,200}(?:token|password|secret|key|auth|session|salt|nonce|pin|otp|code))'
+    languages: ["python"]
+    files: ["*.py"]
+    examples:
+      - "token = str(random.randint(100000, 999999))"
+      - "session_id = random.random()"
+      - "password_salt = random.randbytes(16)"
+    counter_examples:
+      - "token = secrets.token_hex(16)"
+      - "index = random.randint(0, len(items)-1)"
+  
+  - name: "predictable-token-generation"
+    description: "Predictable token generation using timestamp or sequential values"
+    severity: "high"
+    confidence: 0.75
+    # Detects tokens generated from timestamps or incremental values
+    regex: '(?i)(?:token|session|auth|key|secret|id)\s*[=:]\s*(?:Date\.now\(\)|Date\.getTime\(\)|new\s+Date\(\)\.getTime|timestamp|\+\+|counter\+\+|\w+\+\+)'
+    languages: ["javascript", "typescript", "python", "java"]
+    files: ["*.js", "*.ts", "*.py", "*.java", "*.jsx", "*.tsx"]
+    examples:
+      - "const token = Date.now().toString()"
+      - "session_id = timestamp"
+      - "auth_token = counter++"
+    counter_examples:
+      - "const token = crypto.randomUUID()"
+      - "last_updated = Date.now()"
+  
+  - name: "weak-crypto-algorithm"
+    description: "Using weak or deprecated cryptographic algorithms"
+    severity: "high"
+    confidence: 0.80
+    # Detects MD5, SHA1, DES, RC4 usage for crypto (not file hashing)
+    # Excludes: file_hash, content_hash, cache_key, etag, checksum contexts
+    regex: '(?i)(?<!file_)(?<!content_)(?<!cache_)(?<!etag)(?<!check)(?:\bmd5\b|\bsha1\b|\bdes\b|\brc4\b|createHash\s*\(\s*["''](?:md5|sha1)["'']|CryptoJS\.(?:MD5|SHA1|DES|RC4))(?!.*(?:file|cache|content|etag|checksum))'
+    languages: ["*"]
+    files: ["*.js", "*.ts", "*.py", "*.java", "*.go", "*.rb", "*.jsx", "*.tsx"]
+    examples:
+      - "crypto.createHash('md5')"
+      - "hashlib.md5()"
+      - "CryptoJS.SHA1(password)"
+    counter_examples:
+      - "crypto.createHash('sha256')"
+      - "hashlib.sha256()"
+  
+  - name: "missing-authentication-decorator"
+    description: "Route/endpoint without authentication decorator or middleware"
+    severity: "high"
+    confidence: 0.70
+    # Detects routes without common auth decorators/middleware
+    # Looks for route definitions without auth keywords in the same statement or preceding line
+    regex: '(?i)(?:@app\.route|@router\.(?:get|post|put|delete|patch)|app\.(?:get|post|put|delete|patch)|router\.(?:get|post|put|delete|patch))\s*\([^)]*["'']\/(?:api|admin|user|account|profile|settings|dashboard|private)[^)]*\)(?:(?!login_required|require_auth|authenticate|isAuthenticated|requireAuth|passport|jwt|protect|secured|auth)[\s\S]){0,200}(?:def\s+\w+|async\s+def\s+\w+|function|\(|\=\>)'
+    languages: ["python", "javascript", "typescript"]
+    files: ["*.py", "*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - |
+        @app.route('/api/users')
+        def get_users():
+      - "router.post('/admin/settings', (req, res) => {"
+    counter_examples:
+      - |
+        @app.route('/api/users')
+        @login_required
+        def get_users():
+      - "router.post('/admin/settings', authenticate, (req, res) => {"
+  
+  # WebSocket Security Patterns
+  - name: "websocket-no-auth-handshake"
+    description: "WebSocket server or connection handler without authentication verification"
+    severity: "critical"
+    confidence: 0.85
+    # Detects WebSocket server setup or connection handlers without auth checks
+    # Looks for new WebSocket.Server, io.on('connection'), ws.on('connection') etc without auth keywords nearby
+    regex: '(?i)(?:new\s+(?:WebSocket\.Server|ws\.Server|Server)|(?:wss?|io|socket)\.on\s*\(\s*["\''`]connection["\''`])(?:(?!auth|verify|check|token|session|jwt|passport|authenticate|isAuthenticated|requireAuth|authorization)[\s\S]){0,300}(?:function|\(|=>|async)'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - |
+        const wss = new WebSocket.Server({ port: 8080 });
+        wss.on('connection', function(ws) {
+      - |
+        io.on('connection', (socket) => {
+          console.log('user connected');
+      - "const server = new ws.Server({ port: 3000 });"
+    counter_examples:
+      - |
+        wss.on('connection', async (ws, req) => {
+          const token = await verifyToken(req.headers.authorization);
+      - |
+        io.use(authenticate).on('connection', (socket) => {
+  
+  - name: "websocket-no-message-validation"
+    description: "WebSocket message handler using data directly without validation"
+    severity: "high"
+    confidence: 0.80
+    # Detects message handlers that use received data directly in sensitive operations
+    # Looks for on('message') followed by database operations, eval, or direct object access without validation
+    regex: '(?i)(?:ws|socket)\.on\s*\(\s*["\''`](?:message|data)["\''`]\s*,\s*(?:async\s+)?(?:function\s*)?(?:\([^)]*\)|[a-zA-Z_]\w*)\s*(?:=>|\{)[^}]*(?:(?:query|exec|eval|JSON\.parse|database|db\.|find|update|insert|delete|save)\s*\((?:(?!validate|sanitize|check|verify|escape|clean|safe|schema)[\s\S])*?(?:message|data|msg|payload)|(?:message|data|msg|payload)(?:\[|\.))'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - |
+        socket.on('message', (data) => {
+          db.query('SELECT * FROM users WHERE id = ' + data.userId);
+      - |
+        ws.on('message', async (msg) => {
+          const result = await database.find({ id: msg.id });
+      - |
+        socket.on('data', (payload) => {
+          eval(payload.command);
+    counter_examples:
+      - |
+        socket.on('message', (data) => {
+          const validated = messageSchema.validate(data);
+          if (validated.error) return;
+      - |
+        ws.on('message', (msg) => {
+          const sanitized = sanitizeInput(msg);
+  
+  - name: "websocket-no-rate-limiting"
+    description: "WebSocket message handler without rate limiting protection"
+    severity: "high"
+    confidence: 0.75
+    # Detects message handlers without rate limiting logic
+    # Looks for on('message') handlers without rate limit keywords in surrounding context
+    regex: '(?i)(?:ws|socket)\.on\s*\(\s*["\''`](?:message|data)["\''`](?:(?!rate|limit|throttle|quota|flood|spam|cooldown|delay|bucket|window)[\s\S]){0,500}(?:\}|$)'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - |
+        socket.on('message', (data) => {
+          processMessage(data);
+        });
+      - |
+        ws.on('message', async function(msg) {
+          await handleRequest(msg);
+        });
+    counter_examples:
+      - |
+        socket.on('message', rateLimiter.middleware(), (data) => {
+      - |
+        ws.on('message', (msg) => {
+          if (!checkRateLimit(ws.id)) return;
+  
+  - name: "websocket-broadcast-sensitive-data"
+    description: "WebSocket broadcasting potentially sensitive data"
+    severity: "critical"
+    confidence: 0.70
+    # Detects broadcast/emit to all clients with sensitive data
+    # Looks for broadcast patterns with sensitive variable names
+    regex: '(?i)(?:broadcast|emit|send|clients\.forEach|wss\.clients\.forEach|io\.emit|io\.sockets\.emit|socket\.broadcast)(?:(?!public|status|notification|update)[\s\S]){0,200}(?:password|secret|token|key|auth|session|email|ssn|credit|private|personal|sensitive|confidential|user(?:name)?["\''`]?\s*:|phone|address)'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - |
+        io.emit('update', { username: user.email, session: sessionToken });
+      - |
+        wss.clients.forEach(client => {
+          client.send(JSON.stringify({ password: user.password }));
+      - |
+        socket.broadcast.emit('user-data', { email: email, token: token });
+    counter_examples:
+      - |
+        io.emit('notification', { message: 'User logged in', timestamp: Date.now() });
+      - |
+        broadcast({ type: 'status', online: true });
\ No newline at end of file
diff --git a/theauditor/patterns/security_compliance.yml b/theauditor/patterns/security_compliance.yml
new file mode 100644
index 0000000..976c9bc
--- /dev/null
+++ b/theauditor/patterns/security_compliance.yml
@@ -0,0 +1,122 @@
+# Security Compliance Patterns
+# General software security principles and data law compliance (OWASP, GDPR, etc.)
+
+patterns:
+  - name: "pii-logging-leak"
+    description: "Logging of Personally Identifiable Information without masking"
+    severity: "high"
+    confidence: 0.75
+    # Detects logging statements that may expose PII fields
+    # Looks for common PII keywords in logger/console calls (case-insensitive)
+    regex: '(?i)(?:logger|log|console)\.\w+\s*\([^)]*\b(?:password|passwd|pwd|ssn|social[_]?security|passport|license[_]?number|credit[_]?card|card[_]?number|cvv|pin|api[_]?key|secret|token|auth|bearer|private[_]?key|email|phone|mobile|address|dob|date[_]?of[_]?birth|maiden[_]?name|account[_]?number|routing[_]?number|tax[_]?id|drivers[_]?license|patient[_]?id|medical[_]?record)\b[^)]*\)'
+    languages: ["javascript", "typescript", "python", "java", "go", "ruby"]
+    files: ["*.js", "*.ts", "*.py", "*.java", "*.go", "*.rb", "*.jsx", "*.tsx"]
+    examples:
+      - "logger.info(`User login: ${user.email} with password: ${password}`)"
+      - "console.log('SSN:', customer.ssn)"
+      - "log.debug('Credit card:', payment.card_number)"
+    counter_examples:
+      - "logger.info(`User login: ${user.id}`)"
+      - "console.log('User authenticated successfully')"
+      - "logger.error('Invalid password attempt for user:', userId)"
+
+  - name: "missing-input-validation"
+    description: "Request body data used without validation"
+    severity: "high"
+    confidence: 0.70
+    # Detects direct use of req.body fields without validation
+    # Looks for req.body properties being passed directly to functions
+    regex: '(?i)(?:req|request)\.body(?:\.\w+|\[[''"`]\w+[''"`]\])?(?:(?!validate|sanitize|clean|check|verify|schema|joi|yup|zod|express-validator)[\s\S]){0,50}?(?:create|update|save|insert|findOneAndUpdate|exec|query|where)'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - "User.create(req.body)"
+      - "await Product.update(req.body.data, { where: { id } })"
+      - "db.query(sql, req.body.params)"
+    counter_examples:
+      - "const validated = await schema.validate(req.body); User.create(validated)"
+      - "const { error, value } = joi.validate(req.body); if (!error) User.create(value)"
+
+  - name: "insecure-jwt-creation"
+    description: "JWT created with weak algorithm or missing expiration"
+    severity: "critical"
+    confidence: 0.85
+    # Detects JWT sign operations with insecure configurations
+    # Looks for alg: 'none' or missing exp/expiresIn
+    regex: '(?i)jwt\.sign\s*\([^)]*(?:(?:algorithm|alg)\s*:\s*[''"`](?:none|HS256)[''"`]|(?!expiresIn|exp)[\s\S])*?\)'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - "jwt.sign(payload, secret, { algorithm: 'none' })"
+      - "jwt.sign({ userId }, secret) // missing expiration"
+      - "jwt.sign(data, key, { alg: 'HS256' }) // weak for production"
+    counter_examples:
+      - "jwt.sign(payload, secret, { algorithm: 'RS256', expiresIn: '1h' })"
+      - "jwt.sign(data, key, { expiresIn: '15m' })"
+
+  - name: "hardcoded-api-endpoint"
+    description: "Hardcoded production API endpoint (should use environment variables)"
+    severity: "medium"
+    confidence: 0.65
+    # Detects hardcoded URLs that look like production endpoints
+    regex: '[''"`]https?://(?:api|backend|prod|production|app)\.[a-zA-Z0-9-]+\.[a-zA-Z]{2,}(?:/[^''"`]*)?[''"`]'
+    languages: ["*"]
+    files: ["*.js", "*.ts", "*.py", "*.java", "*.go", "*.jsx", "*.tsx"]
+    examples:
+      - "'https://api.mycompany.com/v1/users'"
+      - '"https://production.example.com/api"'
+      - "`https://backend.service.io/graphql`"
+    counter_examples:
+      - "process.env.API_URL"
+      - "config.apiEndpoint"
+      - "'http://localhost:3000/api'"
+
+  - name: "unsafe-deserialization"
+    description: "Unsafe deserialization of user-controlled data"
+    severity: "critical"
+    confidence: 0.80
+    # Detects unsafe deserialization patterns that could lead to RCE
+    regex: '(?i)(?:JSON\.parse|eval|Function|deserialize|pickle\.loads|yaml\.load|unserialize)\s*\([^)]*(?:req|request|params|query|body|user|input|data)[^)]*\)'
+    languages: ["javascript", "typescript", "python", "php", "ruby"]
+    files: ["*.js", "*.ts", "*.py", "*.php", "*.rb", "*.jsx", "*.tsx"]
+    examples:
+      - "eval(req.body.code)"
+      - "JSON.parse(req.query.data)"
+      - "pickle.loads(user_input)"
+      - "yaml.load(request.data)"
+    counter_examples:
+      - "JSON.parse(sanitizedData)"
+      - "yaml.safe_load(content)"
+      - "JSON.parse(fs.readFileSync('config.json'))"
+
+  - name: "missing-csrf-protection"
+    description: "State-changing route without CSRF protection"
+    severity: "high"
+    confidence: 0.65
+    # Detects POST/PUT/DELETE routes without CSRF middleware
+    regex: '(?i)(?:app|router)\.(post|put|patch|delete)\s*\([^)]*(?:(?!csrf|csurf|csrfProtection|verifyCsrf)[\s\S])*?\)'
+    languages: ["javascript", "typescript"]
+    files: ["*.js", "*.ts", "*.jsx", "*.tsx"]
+    examples:
+      - "app.post('/api/transfer', handleTransfer)"
+      - "router.delete('/users/:id', deleteUser)"
+    counter_examples:
+      - "app.post('/api/transfer', csrfProtection, handleTransfer)"
+      - "router.delete('/users/:id', [csrf, auth], deleteUser)"
+
+  - name: "weak-password-requirements"
+    description: "Password validation with weak requirements"
+    severity: "medium"
+    confidence: 0.70
+    # Detects password validation that's too weak
+    regex: '(?i)password.*(?:length|min|minLength|minimum)[\s:]*[<>=]*\s*[1-7]\b'
+    languages: ["*"]
+    files: ["*.js", "*.ts", "*.py", "*.java", "*.rb", "*.jsx", "*.tsx", "*.yml", "*.yaml"]
+    examples:
+      - "password.length >= 6"
+      - "minLength: 4"
+      - "password_min_length = 7"
+    counter_examples:
+      - "password.length >= 12"
+      - "minLength: 10"
+      - "password_min_length = 8"
\ No newline at end of file
diff --git a/theauditor/pipelines.py b/theauditor/pipelines.py
new file mode 100644
index 0000000..4c3e08b
--- /dev/null
+++ b/theauditor/pipelines.py
@@ -0,0 +1,1080 @@
+"""Pipeline execution module for TheAuditor."""
+
+import json
+import os
+import platform
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+from concurrent.futures import ProcessPoolExecutor, as_completed, wait
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Callable, List, Tuple
+
+# Import our custom temp manager to avoid WSL2/Windows issues
+try:
+    from theauditor.utils.temp_manager import TempManager
+except ImportError:
+    # Fallback if not available yet
+    TempManager = None
+
+# Windows compatibility
+IS_WINDOWS = platform.system() == "Windows"
+
+
+def run_command_chain(commands: List[Tuple[str, List[str]]], root: str, chain_name: str) -> dict:
+    """
+    Execute a chain of commands sequentially and capture their output.
+    Used for parallel execution of independent command tracks.
+    
+    Args:
+        commands: List of (description, command_array) tuples
+        root: Working directory
+        chain_name: Name of this chain for logging
+        
+    Returns:
+        Dict with chain results including success, output, and timing
+    """
+    chain_start = time.time()
+    chain_output = []
+    chain_errors = []
+    failed = False
+    
+    # Write progress to a status file for monitoring
+    status_dir = Path(root) / ".pf" / "status"
+    try:
+        status_dir.mkdir(parents=True, exist_ok=True)
+    except Exception as e:
+        print(f"[WARNING] Could not create status dir {status_dir}: {e}", file=sys.stderr)
+    status_file = status_dir / f"{chain_name.replace(' ', '_').replace('(', '').replace(')', '').replace('/', '_')}.status"
+    
+    def write_status(message: str, completed: int = 0, total: int = 0):
+        """Write current status to file for external monitoring."""
+        try:
+            with open(status_file, 'w', encoding='utf-8') as f:
+                status_data = {
+                    "track": chain_name,
+                    "current": message,
+                    "completed": completed,
+                    "total": total,
+                    "timestamp": time.time(),
+                    "elapsed": time.time() - chain_start
+                }
+                f.write(json.dumps(status_data) + "\n")
+                f.flush()  # Force write to disk
+                # Debug output to stderr (visible in subprocess)
+                print(f"[STATUS] {chain_name}: {message} [{completed}/{total}]", file=sys.stderr)
+        except Exception as e:
+            print(f"[ERROR] Could not write status to {status_file}: {e}", file=sys.stderr)
+    
+    # Write initial status
+    write_status("Starting", 0, len(commands))
+    
+    completed_count = 0
+    for description, cmd in commands:
+        # Update status before starting command
+        write_status(f"Running: {description}", completed_count, len(commands))
+        
+        start_time = time.time()
+        chain_output.append(f"\n{'='*60}")
+        chain_output.append(f"[{chain_name}] {description}")
+        chain_output.append('='*60)
+        
+        try:
+            # Use temp files to capture output
+            if TempManager:
+                # Sanitize chain name and description for Windows paths
+                safe_chain = chain_name.replace(' ', '_').replace('(', '').replace(')', '').replace('/', '_')
+                safe_desc = description[:20].replace(' ', '_').replace('(', '').replace(')', '').replace('/', '_')
+                stdout_file, stderr_file = TempManager.create_temp_files_for_subprocess(
+                    root, f"chain_{safe_chain}_{safe_desc}"
+                )
+            else:
+                # Fallback to regular tempfile
+                with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt') as out_tmp, \
+                     tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt') as err_tmp:
+                    stdout_file = out_tmp.name
+                    stderr_file = err_tmp.name
+            
+            with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \
+                 open(stderr_file, 'w+', encoding='utf-8') as err_fp:
+                
+                result = subprocess.run(
+                    cmd,
+                    stdout=out_fp,
+                    stderr=err_fp,
+                    text=True,
+                    cwd=root,
+                    shell=IS_WINDOWS  # Windows compatibility fix
+                )
+            
+            # Read outputs
+            with open(stdout_file, 'r', encoding='utf-8') as f:
+                stdout = f.read()
+            with open(stderr_file, 'r', encoding='utf-8') as f:
+                stderr = f.read()
+            
+            # Clean up temp files
+            try:
+                os.unlink(stdout_file)
+                os.unlink(stderr_file)
+            except (OSError, PermissionError):
+                pass  # Windows file locking
+            
+            elapsed = time.time() - start_time
+            
+            # Check for special exit codes (findings commands)
+            is_findings_command = "taint-analyze" in cmd or ("deps" in cmd and "--vuln-scan" in cmd)
+            if is_findings_command:
+                success = result.returncode in [0, 1, 2]
+            else:
+                success = result.returncode == 0
+            
+            if success:
+                completed_count += 1
+                write_status(f"Completed: {description}", completed_count, len(commands))
+                chain_output.append(f"[OK] {description} completed in {elapsed:.1f}s")
+                if stdout:
+                    lines = stdout.strip().split('\n')
+                    # For parallel tracks, include all output (chains collect their own output)
+                    if len(lines) <= 5:
+                        for line in lines:
+                            chain_output.append(f"  {line}")
+                    else:
+                        # Show first 5 lines and indicate more in chain output
+                        for line in lines[:5]:
+                            chain_output.append(f"  {line}")
+                        chain_output.append(f"  ... ({len(lines) - 5} more lines)")
+                        # Add full output marker for later processing
+                        chain_output.append("  [Full output available in pipeline.log]")
+            else:
+                failed = True
+                write_status(f"FAILED: {description}", completed_count, len(commands))
+                chain_output.append(f"[FAILED] {description} failed (exit code {result.returncode})")
+                if stderr:
+                    chain_errors.append(f"Error in {description}: {stderr}")
+                break  # Stop chain on failure
+                
+        except Exception as e:
+            failed = True
+            write_status(f"ERROR: {description}", completed_count, len(commands))
+            chain_output.append(f"[FAILED] {description} failed: {e}")
+            chain_errors.append(f"Exception in {description}: {str(e)}")
+            break
+    
+    # Final status
+    if not failed:
+        write_status(f"Completed all {len(commands)} tasks", len(commands), len(commands))
+    
+    chain_elapsed = time.time() - chain_start
+    return {
+        "success": not failed,
+        "output": "\n".join(chain_output),
+        "errors": "\n".join(chain_errors) if chain_errors else "",
+        "elapsed": chain_elapsed,
+        "name": chain_name
+    }
+
+
+def run_full_pipeline(
+    root: str = ".",
+    quiet: bool = False,
+    exclude_self: bool = False,
+    offline: bool = False,
+    log_callback: Callable[[str, bool], None] = None
+) -> dict[str, Any]:
+    """
+    Run complete audit pipeline in exact order specified in teamsop.md.
+    
+    Args:
+        root: Root directory to analyze
+        quiet: Whether to run in quiet mode (minimal output)
+        log_callback: Optional callback function for logging messages (message, is_error)
+        
+    Returns:
+        Dict containing:
+            - success: Whether all phases succeeded
+            - failed_phases: Number of failed phases
+            - total_phases: Total number of phases
+            - elapsed_time: Total execution time in seconds
+            - created_files: List of all created files
+            - log_lines: List of all log lines
+    """
+    # CRITICAL: Archive previous run BEFORE any new artifacts are created
+    # Import and call _archive function directly to avoid subprocess issues
+    try:
+        from theauditor.commands._archive import _archive
+        # Call the function directly with appropriate parameters
+        # Note: Click commands can be invoked as regular functions
+        _archive.callback(run_type="full", diff_spec=None)
+        print("[INFO] Previous run archived successfully", file=sys.stderr)
+    except ImportError as e:
+        print(f"[WARNING] Could not import archive command: {e}", file=sys.stderr)
+    except Exception as e:
+        print(f"[WARNING] Archive operation failed: {e}", file=sys.stderr)
+    
+    # Track all created files throughout execution
+    all_created_files = []
+    
+    # CRITICAL FIX: Open log file immediately for real-time writing
+    # This ensures we don't lose logs if the pipeline crashes
+    # Write directly to .pf root, not in readthis (which gets recreated by extraction)
+    pf_dir = Path(root) / ".pf"
+    pf_dir.mkdir(parents=True, exist_ok=True)
+    log_file_path = pf_dir / "pipeline.log"
+    log_lines = []  # Keep for return value
+    
+    # Open log file in write mode with line buffering for immediate writes
+    log_file = None
+    try:
+        log_file = open(log_file_path, 'w', encoding='utf-8', buffering=1)
+    except Exception as e:
+        print(f"[CRITICAL] Failed to open log file {log_file_path}: {e}", file=sys.stderr)
+        # Fall back to memory-only logging if file can't be opened
+        log_file = None
+    
+    # CRITICAL: Create the .pf/raw/ directory for ground truth preservation
+    # This directory will store immutable copies of all analysis artifacts
+    raw_dir = Path(root) / ".pf" / "raw"
+    try:
+        raw_dir.mkdir(parents=True, exist_ok=True)
+    except Exception as e:
+        print(f"[CRITICAL] Failed to create raw directory {raw_dir}: {e}", file=sys.stderr)
+        # Continue execution - we'll handle missing directory during file moves
+    
+    # Ensure readthis directory exists for fresh chunks
+    # Archive has already moved old content to history
+    readthis_dir = Path(root) / ".pf" / "readthis"
+    readthis_dir.mkdir(parents=True, exist_ok=True)
+    
+    def log_output(message, is_error=False):
+        """Log message to callback, file (real-time), and memory."""
+        if log_callback and not quiet:
+            log_callback(message, is_error)
+        # Always add to log list for return value
+        log_lines.append(message)
+        # CRITICAL: Write immediately to file and flush (if file is open)
+        if log_file:
+            try:
+                log_file.write(message + '\n')
+                log_file.flush()  # Force write to disk immediately
+            except Exception as e:
+                print(f"[CRITICAL] Failed to write to log file: {e}", file=sys.stderr)
+                # Continue execution - logging failure shouldn't stop pipeline
+    
+    # Log header
+    log_output(f"TheAuditor Full Pipeline Execution Log")
+    log_output(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S')}")
+    log_output(f"Working Directory: {Path(root).resolve()}")
+    log_output("=" * 80)
+    
+    # Dynamically discover available commands from CLI registration (Courier principle)
+    from theauditor.cli import cli
+    
+    # Get all registered commands, excluding internal (_) and special commands
+    available_commands = sorted(cli.commands.keys())
+    
+    # Define execution order and arguments for known commands
+    # This provides the order and arguments, but dynamically adapts to available commands
+    command_order = [
+        ("index", []),
+        ("detect-frameworks", []),
+        ("deps", ["--check-latest"]),
+        ("docs", ["fetch"]),
+        ("docs", ["summarize"]),
+        ("workset", ["--all"]),
+        ("lint", ["--workset"]),
+        ("detect-patterns", []),
+        ("graph", ["build"]),
+        ("graph", ["analyze"]),
+        ("graph", ["viz", "--view", "full", "--include-analysis"]),
+        ("graph", ["viz", "--view", "cycles", "--include-analysis"]),
+        ("graph", ["viz", "--view", "hotspots", "--include-analysis"]),
+        ("graph", ["viz", "--view", "layers", "--include-analysis"]),
+        ("taint-analyze", []),
+        ("fce", []),
+        ("report", []),
+        ("summary", []),
+    ]
+    
+    # Build command list from available commands in the defined order
+    commands = []
+    phase_num = 0
+    
+    for cmd_name, extra_args in command_order:
+        # Check if command exists (dynamic discovery)
+        if cmd_name in available_commands or (cmd_name == "docs" and "docs" in available_commands) or (cmd_name == "graph" and "graph" in available_commands):
+            phase_num += 1
+            # Generate human-readable description from command name
+            if cmd_name == "index":
+                description = f"{phase_num}. Index repository"
+                # Add --exclude-self flag if requested
+                if exclude_self and cmd_name == "index":
+                    extra_args = extra_args + ["--exclude-self"]
+            elif cmd_name == "detect-frameworks":
+                description = f"{phase_num}. Detect frameworks"
+            elif cmd_name == "deps" and "--check-latest" in extra_args:
+                description = f"{phase_num}. Check dependencies"
+            elif cmd_name == "docs" and "fetch" in extra_args:
+                description = f"{phase_num}. Fetch documentation"
+            elif cmd_name == "docs" and "summarize" in extra_args:
+                description = f"{phase_num}. Summarize documentation"
+            elif cmd_name == "workset":
+                description = f"{phase_num}. Create workset (all files)"
+            elif cmd_name == "lint":
+                description = f"{phase_num}. Run linting"
+            elif cmd_name == "detect-patterns":
+                description = f"{phase_num}. Detect patterns"
+                # Add --exclude-self flag if requested
+                if exclude_self and cmd_name == "detect-patterns":
+                    extra_args = extra_args + ["--exclude-self"]
+            elif cmd_name == "graph" and "build" in extra_args:
+                description = f"{phase_num}. Build graph"
+            elif cmd_name == "graph" and "analyze" in extra_args:
+                description = f"{phase_num}. Analyze graph"
+            elif cmd_name == "graph" and "viz" in extra_args:
+                # Extract view type from arguments
+                if "--view" in extra_args:
+                    view_idx = extra_args.index("--view")
+                    if view_idx + 1 < len(extra_args):
+                        view_type = extra_args[view_idx + 1]
+                        description = f"{phase_num}. Visualize graph ({view_type})"
+                    else:
+                        description = f"{phase_num}. Visualize graph"
+                else:
+                    description = f"{phase_num}. Visualize graph"
+            elif cmd_name == "taint-analyze":
+                description = f"{phase_num}. Taint analysis"
+            elif cmd_name == "fce":
+                description = f"{phase_num}. Factual correlation engine"
+            elif cmd_name == "report":
+                description = f"{phase_num}. Generate report"
+            elif cmd_name == "summary":
+                description = f"{phase_num}. Generate audit summary"
+            else:
+                # Generic description for any new commands
+                description = f"{phase_num}. Run {cmd_name.replace('-', ' ')}"
+            
+            # Build command array - use python module directly
+            command_array = [sys.executable, "-m", "theauditor.cli", cmd_name] + extra_args
+            commands.append((description, command_array))
+        else:
+            # Command not available, log warning but continue (resilient)
+            log_output(f"[WARNING] Command '{cmd_name}' not available, skipping")
+    
+    total_phases = len(commands)
+    current_phase = 0
+    failed_phases = 0
+    phases_with_warnings = 0  # Track phases that completed but had errors in output
+    pipeline_start = time.time()
+    
+    def collect_created_files():
+        """Collect all files created during execution."""
+        files = []
+        
+        # Core files
+        if (Path(root) / "manifest.json").exists():
+            files.append("manifest.json")
+        if (Path(root) / "repo_index.db").exists():
+            files.append("repo_index.db")
+        
+        # .pf directory files
+        pf_dir = Path(root) / ".pf"
+        if pf_dir.exists():
+            for item in pf_dir.rglob("*"):
+                if item.is_file():
+                    files.append(item.relative_to(Path(root)).as_posix())
+        
+        # docs directory files (in .pf/docs)
+        docs_dir = Path(root) / ".pf" / "docs"
+        if docs_dir.exists():
+            for item in docs_dir.rglob("*"):
+                if item.is_file():
+                    files.append(item.relative_to(Path(root)).as_posix())
+        
+        return sorted(set(files))
+    
+    # PARALLEL PIPELINE IMPLEMENTATION
+    # Reorganize commands into stages for parallel execution
+    
+    # Stage categorization
+    foundation_commands = []  # Must run first sequentially
+    track_a_commands = []     # Network I/O track (deps, docs)
+    track_b_commands = []     # Code analysis track (workset, lint, patterns)
+    track_c_commands = []     # Graph & taint analysis track
+    final_commands = []       # Must run last sequentially
+    
+    # Categorize each command into appropriate stage/track
+    for phase_name, cmd in commands:
+        cmd_str = " ".join(cmd)
+        
+        # Stage 1: Foundation (must complete first)
+        if "index" in cmd_str:
+            foundation_commands.append((phase_name, cmd))
+        elif "detect-frameworks" in cmd_str:
+            foundation_commands.append((phase_name, cmd))
+        
+        # Stage 2: Parallel tracks
+        elif "deps" in cmd_str:
+            if not offline:  # Skip deps if offline mode
+                track_a_commands.append((phase_name, cmd))
+        elif "docs" in cmd_str:
+            if not offline:  # Skip docs if offline mode
+                track_a_commands.append((phase_name, cmd))
+        elif "workset" in cmd_str:
+            track_b_commands.append((phase_name, cmd))
+        elif "lint" in cmd_str:
+            track_b_commands.append((phase_name, cmd))
+        elif "detect-patterns" in cmd_str:
+            track_b_commands.append((phase_name, cmd))
+        elif "graph build" in cmd_str:
+            track_c_commands.append((phase_name, cmd))
+        elif "graph viz" in cmd_str:
+            track_c_commands.append((phase_name, cmd))
+        elif "taint" in cmd_str:
+            track_c_commands.append((phase_name, cmd))
+        
+        # Stage 4: Final aggregation (must run last)
+        elif "graph analyze" in cmd_str:
+            final_commands.append((phase_name, cmd))
+        elif "fce" in cmd_str:
+            final_commands.append((phase_name, cmd))
+        elif "report" in cmd_str:
+            final_commands.append((phase_name, cmd))
+        elif "summary" in cmd_str:
+            final_commands.append((phase_name, cmd))
+        else:
+            # Default to final commands for safety
+            final_commands.append((phase_name, cmd))
+    
+    # STAGE 1: Foundation (Sequential)
+    log_output("\n" + "="*60)
+    log_output("[STAGE 1] FOUNDATION - Sequential Execution")
+    log_output("="*60)
+    
+    for phase_name, cmd in foundation_commands:
+        current_phase += 1
+        log_output(f"\n[Phase {current_phase}/{total_phases}] {phase_name}")
+        start_time = time.time()
+        
+        try:
+            # Execute foundation command
+            if TempManager:
+                stdout_file, stderr_file = TempManager.create_temp_files_for_subprocess(
+                    root, f"foundation_{phase_name.replace(' ', '_')}"
+                )
+            else:
+                # Fallback to regular tempfile
+                with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt') as out_tmp, \
+                     tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt') as err_tmp:
+                    stdout_file = out_tmp.name
+                    stderr_file = err_tmp.name
+            
+            with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \
+                 open(stderr_file, 'w+', encoding='utf-8') as err_fp:
+                
+                result = subprocess.run(
+                    cmd,
+                    stdout=out_fp,
+                    stderr=err_fp,
+                    text=True,
+                    cwd=root,
+                    shell=IS_WINDOWS  # Windows compatibility fix
+                )
+            
+            # Read outputs
+            with open(stdout_file, 'r', encoding='utf-8') as f:
+                result.stdout = f.read()
+            with open(stderr_file, 'r', encoding='utf-8') as f:
+                result.stderr = f.read()
+            
+            # Clean up temp files
+            try:
+                os.unlink(stdout_file)
+                os.unlink(stderr_file)
+            except (OSError, PermissionError):
+                pass
+            
+            elapsed = time.time() - start_time
+            
+            if result.returncode == 0:
+                log_output(f"[OK] {phase_name} completed in {elapsed:.1f}s")
+                if result.stdout:
+                    lines = result.stdout.strip().split('\n')
+                    # Write FULL output to log file
+                    if log_file and len(lines) > 3:
+                        log_file.write("  [Full output below, truncated in terminal]\n")
+                        for line in lines:
+                            log_file.write(f"  {line}\n")
+                        log_file.flush()
+                    
+                    # Special handling for framework detection to show actual results
+                    if "Detect frameworks" in phase_name and len(lines) > 3:
+                        # Check if this looks like table output (has header separator)
+                        has_table = any("---" in line for line in lines[:5])
+                        if has_table:
+                            # Show more lines for table output to include actual data
+                            display_lines = []
+                            for i, line in enumerate(lines):
+                                if i < 6 or (i == 0):  # Show first line (path info) + table header + first few data rows
+                                    display_lines.append(line)
+                                    if log_callback and not quiet:
+                                        log_callback(f"  {line}", False)
+                                    log_lines.append(f"  {line}")
+                            if len(lines) > 6:
+                                truncate_msg = f"  ... ({len(lines) - 6} more lines)"
+                                if log_callback and not quiet:
+                                    log_callback(truncate_msg, False)
+                                log_lines.append(truncate_msg)
+                        else:
+                            # Regular truncation for non-table output
+                            for line in lines[:3]:
+                                if log_callback and not quiet:
+                                    log_callback(f"  {line}", False)
+                                log_lines.append(f"  {line}")
+                            if len(lines) > 3:
+                                truncate_msg = f"  ... ({len(lines) - 3} more lines)"
+                                if log_callback and not quiet:
+                                    log_callback(truncate_msg, False)
+                                log_lines.append(truncate_msg)
+                    else:
+                        # Regular truncation for other commands
+                        for line in lines[:3]:
+                            if log_callback and not quiet:
+                                log_callback(f"  {line}", False)
+                            log_lines.append(f"  {line}")
+                        if len(lines) > 3:
+                            truncate_msg = f"  ... ({len(lines) - 3} more lines)"
+                            if log_callback and not quiet:
+                                log_callback(truncate_msg, False)
+                            log_lines.append(truncate_msg)
+            else:
+                failed_phases += 1
+                log_output(f"[FAILED] {phase_name} failed (exit code {result.returncode})", is_error=True)
+                if result.stderr:
+                    # Write FULL error to log file
+                    if log_file:
+                        log_file.write(f"  [Full error output]:\n")
+                        log_file.write(f"  {result.stderr}\n")
+                        log_file.flush()
+                    # Show truncated in terminal
+                    error_msg = f"  Error: {result.stderr[:200]}"
+                    if len(result.stderr) > 200:
+                        error_msg += "... [see pipeline.log for full error]"
+                    if log_callback and not quiet:
+                        log_callback(error_msg, True)
+                    log_lines.append(error_msg)
+                # Foundation failure stops pipeline
+                log_output("[CRITICAL] Foundation stage failed - stopping pipeline", is_error=True)
+                break
+                
+        except Exception as e:
+            failed_phases += 1
+            log_output(f"[FAILED] {phase_name} failed: {e}", is_error=True)
+            break
+    
+    # Only proceed to parallel stage if foundation succeeded
+    if failed_phases == 0 and (track_a_commands or track_b_commands or track_c_commands):
+        # STAGE 2: Concurrent Analysis (Parallel Execution)
+        log_output("\n" + "="*60)
+        log_output("[STAGE 2] CONCURRENT ANALYSIS - Parallel Execution")
+        log_output("="*60)
+        if offline:
+            log_output("[OFFLINE MODE] Skipping network operations")
+            log_output("Launching 2 parallel tracks:")
+        else:
+            log_output("Launching 3 parallel tracks:")
+            log_output("  Track A: Network I/O (deps, docs)")
+        log_output("  Track B: Code Analysis (workset, lint, patterns)")
+        log_output("  Track C: Graph & Taint Analysis")
+        
+        # Execute parallel tracks using ProcessPoolExecutor
+        parallel_results = []
+        with ProcessPoolExecutor(max_workers=3) as executor:
+            futures = []
+            
+            # Submit Track A if it has commands
+            if track_a_commands:
+                future_a = executor.submit(run_command_chain, track_a_commands, root, "Track A (Network I/O)")
+                futures.append(future_a)
+                current_phase += len(track_a_commands)
+            
+            # Submit Track B if it has commands
+            if track_b_commands:
+                future_b = executor.submit(run_command_chain, track_b_commands, root, "Track B (Code Analysis)")
+                futures.append(future_b)
+                current_phase += len(track_b_commands)
+            
+            # Submit Track C if it has commands
+            if track_c_commands:
+                future_c = executor.submit(run_command_chain, track_c_commands, root, "Track C (Graph & Taint)")
+                futures.append(future_c)
+                current_phase += len(track_c_commands)
+            
+            # STAGE 3: Synchronization Point - Wait for all parallel tracks
+            log_output("\n[SYNC] Waiting for parallel tracks to complete...")
+            
+            # Monitor progress while waiting
+            status_dir = Path(root) / ".pf" / "status"
+            last_status_check = 0
+            status_check_interval = 2  # Check every 2 seconds
+            
+            # Process futures as they complete, but also check status periodically
+            pending_futures = list(futures)
+            while pending_futures:
+                # Check for completed futures (with short timeout)
+                done, pending_futures = wait(pending_futures, timeout=status_check_interval)
+                
+                # Read and display status if enough time has passed
+                current_time = time.time()
+                if current_time - last_status_check >= status_check_interval:
+                    last_status_check = current_time
+                    
+                    # Read all status files
+                    if status_dir.exists():
+                        status_summary = []
+                        status_files = list(status_dir.glob("*.status"))
+                        # Debug: show if we found any status files
+                        if not status_files and not quiet:
+                            log_output(f"[DEBUG] No status files found in {status_dir}")
+                        for status_file in status_files:
+                            try:
+                                with open(status_file, 'r', encoding='utf-8') as f:
+                                    status_data = json.loads(f.read().strip())
+                                    track = status_data.get("track", "Unknown")
+                                    completed = status_data.get("completed", 0)
+                                    total = status_data.get("total", 0)
+                                    current = status_data.get("current", "")
+                                    
+                                    # Format progress
+                                    if total > 0:
+                                        progress = f"[{completed}/{total}]"
+                                    else:
+                                        progress = ""
+                                    
+                                    status_summary.append(f"  {track}: {progress} {current[:50]}")
+                            except Exception:
+                                pass  # Ignore status read errors
+                        
+                        if status_summary:
+                            log_output("[PROGRESS] Track Status:")
+                            for status_line in status_summary:
+                                log_output(status_line)
+                
+                # Process completed futures
+                for future in done:
+                    try:
+                        result = future.result()
+                        parallel_results.append(result)
+                        if result["success"]:
+                            log_output(f"[OK] {result['name']} completed in {result['elapsed']:.1f}s")
+                        else:
+                            log_output(f"[FAILED] {result['name']} failed", is_error=True)
+                            failed_phases += 1
+                    except Exception as e:
+                        log_output(f"[ERROR] Parallel track failed with exception: {e}", is_error=True)
+                        failed_phases += 1
+        
+        # Print outputs from parallel tracks sequentially for clean logging
+        log_output("\n" + "="*60)
+        log_output("[STAGE 2 RESULTS] Parallel Track Outputs")
+        log_output("="*60)
+        
+        for result in parallel_results:
+            log_output(result["output"])
+            if result["errors"]:
+                log_output("[ERRORS]:")
+                log_output(result["errors"])
+    
+    # STAGE 4: Final Aggregation (Sequential) 
+    if failed_phases == 0 and final_commands:
+        log_output("\n" + "="*60)
+        log_output("[STAGE 4] FINAL AGGREGATION - Sequential Execution")
+        log_output("="*60)
+        
+        for phase_name, cmd in final_commands:
+            current_phase += 1
+            log_output(f"\n[Phase {current_phase}/{total_phases}] {phase_name}")
+            start_time = time.time()
+            
+            try:
+                # Execute final aggregation command
+                if TempManager:
+                    stdout_file, stderr_file = TempManager.create_temp_files_for_subprocess(
+                        root, f"final_{phase_name.replace(' ', '_')}"
+                    )
+                else:
+                    # Fallback to regular tempfile
+                    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt') as out_tmp, \
+                         tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt') as err_tmp:
+                        stdout_file = out_tmp.name
+                        stderr_file = err_tmp.name
+                
+                with open(stdout_file, 'w+', encoding='utf-8') as out_fp, \
+                     open(stderr_file, 'w+', encoding='utf-8') as err_fp:
+                    
+                    result = subprocess.run(
+                        cmd,
+                        stdout=out_fp,
+                        stderr=err_fp,
+                        text=True,
+                        cwd=root,
+                        shell=IS_WINDOWS  # Windows compatibility fix
+                    )
+                
+                # Read outputs
+                with open(stdout_file, 'r', encoding='utf-8') as f:
+                    result.stdout = f.read()
+                with open(stderr_file, 'r', encoding='utf-8') as f:
+                    result.stderr = f.read()
+                
+                # Clean up temp files
+                try:
+                    os.unlink(stdout_file)
+                    os.unlink(stderr_file)
+                except (OSError, PermissionError):
+                    pass
+                
+                elapsed = time.time() - start_time
+                
+                # Handle special exit codes for findings commands
+                is_findings_command = "taint-analyze" in cmd or ("deps" in cmd and "--vuln-scan" in cmd)
+                if is_findings_command:
+                    success = result.returncode in [0, 1, 2]
+                else:
+                    success = result.returncode == 0
+                
+                if success:
+                    if result.returncode == 2 and is_findings_command:
+                        log_output(f"[OK] {phase_name} completed in {elapsed:.1f}s - CRITICAL findings")
+                    elif result.returncode == 1 and is_findings_command:
+                        log_output(f"[OK] {phase_name} completed in {elapsed:.1f}s - HIGH findings")
+                    else:
+                        log_output(f"[OK] {phase_name} completed in {elapsed:.1f}s")
+                    
+                    if result.stdout:
+                        lines = result.stdout.strip().split('\n')
+                        # Write FULL output to log file
+                        if log_file and len(lines) > 3:
+                            log_file.write("  [Full output below, truncated in terminal]\n")
+                            for line in lines:
+                                log_file.write(f"  {line}\n")
+                            log_file.flush()
+                        
+                        # Special handling for framework detection to show actual results
+                        if "Detect frameworks" in phase_name and len(lines) > 3:
+                            # Check if this looks like table output (has header separator)
+                            has_table = any("---" in line for line in lines[:5])
+                            if has_table:
+                                # Show more lines for table output to include actual data
+                                display_lines = []
+                                for i, line in enumerate(lines):
+                                    if i < 6 or (i == 0):  # Show first line (path info) + table header + first few data rows
+                                        display_lines.append(line)
+                                        if log_callback and not quiet:
+                                            log_callback(f"  {line}", False)
+                                        log_lines.append(f"  {line}")
+                                if len(lines) > 6:
+                                    truncate_msg = f"  ... ({len(lines) - 6} more lines)"
+                                    if log_callback and not quiet:
+                                        log_callback(truncate_msg, False)
+                                    log_lines.append(truncate_msg)
+                            else:
+                                # Regular truncation for non-table output
+                                for line in lines[:3]:
+                                    if log_callback and not quiet:
+                                        log_callback(f"  {line}", False)
+                                    log_lines.append(f"  {line}")
+                                if len(lines) > 3:
+                                    truncate_msg = f"  ... ({len(lines) - 3} more lines)"
+                                    if log_callback and not quiet:
+                                        log_callback(truncate_msg, False)
+                                    log_lines.append(truncate_msg)
+                        else:
+                            # Regular truncation for other commands
+                            for line in lines[:3]:
+                                if log_callback and not quiet:
+                                    log_callback(f"  {line}", False)
+                                log_lines.append(f"  {line}")
+                            if len(lines) > 3:
+                                truncate_msg = f"  ... ({len(lines) - 3} more lines)"
+                                if log_callback and not quiet:
+                                    log_callback(truncate_msg, False)
+                                log_lines.append(truncate_msg)
+                else:
+                    failed_phases += 1
+                    log_output(f"[FAILED] {phase_name} failed (exit code {result.returncode})", is_error=True)
+                    if result.stderr:
+                        # Write FULL error to log file
+                        if log_file:
+                            log_file.write(f"  [Full error output]:\n")
+                            log_file.write(f"  {result.stderr}\n")
+                            log_file.flush()
+                        # Show truncated in terminal
+                        error_msg = f"  Error: {result.stderr[:200]}"
+                        if len(result.stderr) > 200:
+                            error_msg += "... [see pipeline.log for full error]"
+                        if log_callback and not quiet:
+                            log_callback(error_msg, True)
+                        log_lines.append(error_msg)
+                
+                # CRITICAL: Run extraction AFTER FCE and BEFORE report
+                if "factual correlation" in phase_name.lower():
+                    try:
+                        from theauditor.extraction import extract_all_to_readthis
+                        
+                        log_output("\n" + "="*60)
+                        log_output("[EXTRACTION] Creating AI-consumable chunks from raw data")
+                        log_output("="*60)
+                        
+                        extraction_start = time.time()
+                        extraction_success = extract_all_to_readthis(root)
+                        extraction_elapsed = time.time() - extraction_start
+                        
+                        if extraction_success:
+                            log_output(f"[OK] Chunk extraction completed in {extraction_elapsed:.1f}s")
+                            log_output("[INFO] AI-readable chunks available in .pf/readthis/")
+                        else:
+                            log_output(f"[WARNING] Chunk extraction completed with errors in {extraction_elapsed:.1f}s", is_error=True)
+                            log_output("[WARNING] Some chunks may be incomplete", is_error=True)
+                            
+                    except ImportError as e:
+                        log_output(f"[ERROR] Could not import extraction module: {e}", is_error=True)
+                        log_output("[ERROR] Chunks will not be generated", is_error=True)
+                    except Exception as e:
+                        log_output(f"[ERROR] Ticket extraction failed: {e}", is_error=True)
+                        log_output("[ERROR] Raw data preserved in .pf/raw/ but no chunks created", is_error=True)
+                
+            except Exception as e:
+                failed_phases += 1
+                log_output(f"[FAILED] {phase_name} failed: {e}", is_error=True)
+    
+    # After all commands complete, collect all created files
+    pipeline_elapsed = time.time() - pipeline_start
+    all_created_files = collect_created_files()
+    
+    # Create allfiles.md in .pf root (not in readthis which gets deleted/recreated)
+    pf_dir = Path(root) / ".pf"
+    allfiles_path = pf_dir / "allfiles.md"
+    with open(allfiles_path, 'w', encoding='utf-8') as f:
+        f.write("# All Files Created by `aud full` Command\n\n")
+        f.write(f"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
+        f.write(f"Total files: {len(all_created_files)}\n\n")
+        
+        # Group files by directory
+        files_by_dir = {}
+        for file_path in all_created_files:
+            dir_name = str(Path(file_path).parent)
+            if dir_name not in files_by_dir:
+                files_by_dir[dir_name] = []
+            files_by_dir[dir_name].append(file_path)
+        
+        # Write files grouped by directory
+        for dir_name in sorted(files_by_dir.keys()):
+            f.write(f"\n## {dir_name}/\n\n")
+            for file_path in sorted(files_by_dir[dir_name]):
+                file_size = 0
+                if Path(file_path).exists():
+                    file_size = Path(file_path).stat().st_size
+                f.write(f"- `{Path(file_path).name}` ({file_size:,} bytes)\n")
+        
+        f.write(f"\n---\n")
+        f.write(f"Total execution time: {pipeline_elapsed:.1f} seconds ({pipeline_elapsed/60:.1f} minutes)\n")
+        f.write(f"Commands executed: {total_phases}\n")
+        f.write(f"Failed commands: {failed_phases}\n")
+    
+    # Display final summary
+    log_output("\n" + "="*60)
+    if failed_phases == 0 and phases_with_warnings == 0:
+        log_output(f"[OK] AUDIT COMPLETE - All {total_phases} phases successful")
+    elif phases_with_warnings > 0 and failed_phases == 0:
+        log_output(f"[WARNING] AUDIT COMPLETE - {phases_with_warnings} phases completed with errors")
+    else:
+        log_output(f"[WARN] AUDIT COMPLETE - {failed_phases} phases failed, {phases_with_warnings} phases with errors")
+    log_output(f"[TIME] Total time: {pipeline_elapsed:.1f}s ({pipeline_elapsed/60:.1f} minutes)")
+    
+    # Display all created files summary
+    log_output("\n" + "="*60)
+    log_output("[FILES] ALL CREATED FILES")
+    log_output("="*60)
+    
+    # Count files by category
+    pf_files = [f for f in all_created_files if f.startswith(".pf/")]
+    readthis_files = [f for f in all_created_files if f.startswith(".pf/readthis/")]
+    docs_files = [f for f in all_created_files if f.startswith(".pf/docs/")]
+    root_files = [f for f in all_created_files if "/" not in f]
+    
+    log_output(f"\n[STATS] Summary:")
+    log_output(f"  Total files created: {len(all_created_files)}")
+    log_output(f"  .pf/ files: {len(pf_files)}")
+    log_output(f"  .pf/readthis/ files: {len(readthis_files)}")
+    if docs_files:
+        log_output(f"  .pf/docs/ files: {len(docs_files)}")
+    log_output(f"  Root files: {len(root_files)}")
+    
+    log_output(f"\n[SAVED] Complete file list saved to: .pf/allfiles.md")
+    log_output(f"\n[TIP] Key artifacts:")
+    log_output(f"  * .pf/readthis/ - All AI-consumable chunks")
+    log_output(f"  * .pf/allfiles.md - Complete file list")
+    log_output(f"  * .pf/pipeline.log - Full execution log")
+    log_output(f"  * .pf/findings.json - Pattern detection results")
+    log_output(f"  * .pf/risk_scores.json - Risk analysis")
+    
+    log_output("\n" + "="*60)
+    log_output("[COMPLETE] AUDIT SUITE EXECUTION COMPLETE")
+    log_output("="*60)
+    
+    # Close the log file (already written throughout execution)
+    if log_file:
+        try:
+            log_file.close()
+            log_file = None
+        except Exception as e:
+            print(f"[CRITICAL] Failed to close log file: {e}", file=sys.stderr)
+    
+    # Move files from temp to readthis if needed
+    temp_dir = Path(root) / ".pf" / "temp"
+    readthis_final = Path(root) / ".pf" / "readthis"
+    
+    # Ensure readthis exists
+    readthis_final.mkdir(parents=True, exist_ok=True)
+    
+    # Move pipeline.log if it's in temp
+    temp_log = temp_dir / "pipeline.log"
+    final_log = readthis_final / "pipeline.log"
+    
+    if temp_log.exists() and not final_log.exists():
+        try:
+            shutil.move(str(temp_log), str(final_log))
+            log_file_path = final_log
+        except Exception as e:
+            print(f"[WARNING] Could not move log to final location: {e}", file=sys.stderr)
+    
+    # Move allfiles.md if it's in temp
+    temp_allfiles = temp_dir / "allfiles.md"
+    final_allfiles = readthis_final / "allfiles.md"
+    
+    if temp_allfiles.exists() and not final_allfiles.exists():
+        try:
+            shutil.move(str(temp_allfiles), str(final_allfiles))
+            allfiles_path = final_allfiles
+        except Exception as e:
+            print(f"[WARNING] Could not move allfiles.md to final location: {e}", file=sys.stderr)
+    
+    print(f"\n[SAVED] Full pipeline log saved to: {log_file_path}")
+    
+    # Add allfiles.md and pipeline.log to the list of created files for completeness
+    all_created_files.append(str(allfiles_path))
+    all_created_files.append(str(log_file_path))
+    
+    # Clean up temporary files created during pipeline execution
+    if TempManager:
+        try:
+            TempManager.cleanup_temp_dir(root)
+            print("[INFO] Temporary files cleaned up", file=sys.stderr)
+        except Exception as e:
+            print(f"[WARNING] Could not clean temp files: {e}", file=sys.stderr)
+    
+    # Clean up status files
+    status_dir = Path(root) / ".pf" / "status"
+    if status_dir.exists():
+        try:
+            for status_file in status_dir.glob("*.status"):
+                status_file.unlink()
+            # Remove directory if empty
+            if not list(status_dir.iterdir()):
+                status_dir.rmdir()
+        except Exception as e:
+            print(f"[WARNING] Could not clean status files: {e}", file=sys.stderr)
+    
+    # Collect findings summary from generated reports
+    critical_findings = 0
+    high_findings = 0
+    medium_findings = 0
+    low_findings = 0
+    total_vulnerabilities = 0
+    
+    # Try to read taint analysis results
+    taint_path = Path(root) / ".pf" / "raw" / "taint_analysis.json"
+    if taint_path.exists():
+        try:
+            import json
+            with open(taint_path, encoding='utf-8') as f:
+                taint_data = json.load(f)
+                if taint_data.get("success"):
+                    summary = taint_data.get("summary", {})
+                    critical_findings += summary.get("critical_count", 0)
+                    high_findings += summary.get("high_count", 0)
+                    medium_findings += summary.get("medium_count", 0)
+                    low_findings += summary.get("low_count", 0)
+                    total_vulnerabilities = taint_data.get("total_vulnerabilities", 0)
+        except Exception as e:
+            print(f"[WARNING] Could not read taint analysis results from {taint_path}: {e}", file=sys.stderr)
+            # Non-critical - continue without taint stats
+    
+    # Try to read vulnerability scan results
+    vuln_path = Path(root) / ".pf" / "raw" / "vulnerabilities.json"
+    if vuln_path.exists():
+        try:
+            import json
+            with open(vuln_path, encoding='utf-8') as f:
+                vuln_data = json.load(f)
+                if vuln_data.get("vulnerabilities"):
+                    for vuln in vuln_data["vulnerabilities"]:
+                        severity = vuln.get("severity", "").lower()
+                        if severity == "critical":
+                            critical_findings += 1
+                        elif severity == "high":
+                            high_findings += 1
+                        elif severity == "medium":
+                            medium_findings += 1
+                        elif severity == "low":
+                            low_findings += 1
+        except Exception as e:
+            print(f"[WARNING] Could not read vulnerability scan results from {vuln_path}: {e}", file=sys.stderr)
+            # Non-critical - continue without vulnerability stats
+    
+    # Try to read pattern detection results
+    patterns_path = Path(root) / ".pf" / "raw" / "patterns.json"
+    if not patterns_path.exists():
+        # Fallback to findings.json (alternate name)
+        patterns_path = Path(root) / ".pf" / "raw" / "findings.json"
+        
+    if patterns_path.exists():
+        try:
+            import json
+            with open(patterns_path, encoding='utf-8') as f:
+                patterns_data = json.load(f)
+                # Aggregate findings by severity
+                for finding in patterns_data.get("findings", []):
+                    severity = finding.get("severity", "").lower()
+                    if severity == "critical":
+                        critical_findings += 1
+                    elif severity == "high":
+                        high_findings += 1
+                    elif severity == "medium":
+                        medium_findings += 1
+                    elif severity == "low":
+                        low_findings += 1
+        except Exception as e:
+            print(f"[WARNING] Could not read pattern results from {patterns_path}: {e}", file=sys.stderr)
+            # Non-critical - continue without pattern stats
+    
+    return {
+        "success": failed_phases == 0 and phases_with_warnings == 0,
+        "failed_phases": failed_phases,
+        "phases_with_warnings": phases_with_warnings,
+        "total_phases": total_phases,
+        "elapsed_time": pipeline_elapsed,
+        "created_files": all_created_files,
+        "log_lines": log_lines,
+        "findings": {
+            "critical": critical_findings,
+            "high": high_findings,
+            "medium": medium_findings,
+            "low": low_findings,
+            "total_vulnerabilities": total_vulnerabilities,
+        }
+    }
\ No newline at end of file
diff --git a/theauditor/project_summary.py b/theauditor/project_summary.py
new file mode 100644
index 0000000..0183ed9
--- /dev/null
+++ b/theauditor/project_summary.py
@@ -0,0 +1,421 @@
+"""Generate project structure and intelligence reports for AI consumption."""
+
+import json
+import os
+import sqlite3
+from pathlib import Path
+from typing import Dict, List, Tuple, Any
+
+from .indexer.config import SKIP_DIRS
+
+
+def generate_directory_tree(root_path: str = ".", max_depth: int = 4) -> str:
+    """
+    Generate a text-based directory tree representation.
+    
+    Args:
+        root_path: Root directory to analyze
+        max_depth: Maximum depth to traverse
+        
+    Returns:
+        String representation of directory tree
+    """
+    root = Path(root_path).resolve()
+    tree_lines = []
+    
+    # Critical files to always show explicitly
+    critical_files = {
+        # Python
+        'main.py', 'app.py', '__main__.py', 'config.py', 'settings.py',
+        'models.py', 'schemas.py', 'auth.py', 'authentication.py',
+        'middleware.py', 'routes.py', 'urls.py', 'api.py',
+        # Node/JS/TS
+        'index.js', 'index.ts', 'app.js', 'app.ts', 'server.js', 'server.ts',
+        'package.json', 'tsconfig.json', 'types.ts',
+        # General
+        'requirements.txt', 'setup.py', 'pyproject.toml', 'Dockerfile',
+        'docker-compose.yml', 'Makefile', '.env.example'
+    }
+    
+    def should_skip(path: Path) -> bool:
+        """Check if directory should be skipped."""
+        return path.name in SKIP_DIRS or path.name.startswith('.')
+    
+    def add_directory(dir_path: Path, prefix: str = "", depth: int = 0):
+        """Recursively add directory contents to tree."""
+        if depth > max_depth:
+            return
+            
+        try:
+            items = sorted(dir_path.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower()))
+        except PermissionError:
+            return
+            
+        dirs = [item for item in items if item.is_dir() and not should_skip(item)]
+        files = [item for item in items if item.is_file()]
+        
+        # Group files by extension
+        file_groups = {}
+        critical_in_dir = []
+        
+        for file in files:
+            if file.name in critical_files:
+                critical_in_dir.append(file)
+            else:
+                ext = file.suffix or 'no-ext'
+                if ext not in file_groups:
+                    file_groups[ext] = 0
+                file_groups[ext] += 1
+        
+        # Show critical files explicitly
+        for file in critical_in_dir:
+            is_last = (file == critical_in_dir[-1]) and not dirs and not file_groups
+            tree_lines.append(f"{prefix}{'└── ' if is_last else '├── '}{file.name}")
+        
+        # Show file count summary by type
+        if file_groups:
+            summary_parts = []
+            for ext, count in sorted(file_groups.items()):
+                if count > 1:
+                    summary_parts.append(f"{count} {ext} files")
+                elif count == 1:
+                    summary_parts.append(f"1 {ext} file")
+            
+            if summary_parts:
+                is_last = not dirs
+                summary = f"[{', '.join(summary_parts)}]"
+                tree_lines.append(f"{prefix}{'└── ' if is_last else '├── '}{summary}")
+        
+        # Process subdirectories
+        for i, subdir in enumerate(dirs):
+            is_last_dir = (i == len(dirs) - 1)
+            tree_lines.append(f"{prefix}{'└── ' if is_last_dir else '├── '}{subdir.name}/")
+            
+            extension = "    " if is_last_dir else "│   "
+            add_directory(subdir, prefix + extension, depth + 1)
+    
+    tree_lines.append(f"{root.name}/")
+    add_directory(root, "", 0)
+    
+    return "\n".join(tree_lines)
+
+
+def aggregate_statistics(manifest_path: str, db_path: str) -> Dict[str, Any]:
+    """
+    Aggregate project-wide statistics from manifest and database.
+    
+    Args:
+        manifest_path: Path to manifest.json
+        db_path: Path to repo_index.db
+        
+    Returns:
+        Dictionary containing project statistics
+    """
+    stats = {
+        'total_files': 0,
+        'total_loc': 0,
+        'total_bytes': 0,
+        'total_tokens': 0,  # Estimated as chars/4
+        'languages': {},
+        'total_functions': 0,
+        'total_classes': 0,
+        'total_imports': 0,
+        'total_calls': 0,
+        'top_10_largest': [],
+        'top_15_critical': []
+    }
+    
+    # Read manifest.json if it exists
+    if Path(manifest_path).exists():
+        with open(manifest_path, 'r') as f:
+            manifest = json.load(f)
+            
+        stats['total_files'] = len(manifest)
+        
+        # Language distribution and totals
+        for file_info in manifest:
+            stats['total_loc'] += file_info.get('loc', 0)
+            stats['total_bytes'] += file_info.get('bytes', 0)
+            
+            ext = file_info.get('ext', '').lower()
+            if ext:
+                # Map extensions to languages
+                lang_map = {
+                    '.py': 'Python',
+                    '.js': 'JavaScript', 
+                    '.ts': 'TypeScript',
+                    '.jsx': 'JSX',
+                    '.tsx': 'TSX',
+                    '.java': 'Java',
+                    '.go': 'Go',
+                    '.rs': 'Rust',
+                    '.cpp': 'C++', '.cc': 'C++',
+                    '.c': 'C',
+                    '.rb': 'Ruby',
+                    '.php': 'PHP',
+                    '.cs': 'C#',
+                    '.swift': 'Swift',
+                    '.kt': 'Kotlin',
+                    '.r': 'R',
+                    '.m': 'MATLAB',
+                    '.jl': 'Julia',
+                    '.sh': 'Shell',
+                    '.yml': 'YAML', '.yaml': 'YAML',
+                    '.json': 'JSON',
+                    '.xml': 'XML',
+                    '.html': 'HTML',
+                    '.css': 'CSS',
+                    '.scss': 'SCSS',
+                    '.sql': 'SQL',
+                    '.md': 'Markdown'
+                }
+                
+                lang = lang_map.get(ext, 'Other')
+                stats['languages'][lang] = stats['languages'].get(lang, 0) + 1
+        
+        # Estimate tokens (rough approximation: 1 token ≈ 4 characters)
+        stats['total_tokens'] = stats['total_bytes'] // 4
+        
+        # Find top 10 largest files by LOC
+        sorted_by_size = sorted(manifest, key=lambda x: x.get('loc', 0), reverse=True)
+        for file_info in sorted_by_size[:10]:
+            stats['top_10_largest'].append({
+                'path': file_info['path'],
+                'loc': file_info['loc'],
+                'bytes': file_info['bytes'],
+                'tokens': file_info['bytes'] // 4,
+                'percent': round((file_info['bytes'] / stats['total_bytes']) * 100, 2) if stats['total_bytes'] > 0 else 0
+            })
+        
+        # Find critical files based on naming patterns
+        critical_patterns = {
+            # Python patterns
+            'main.py': 'Entry point',
+            'app.py': 'Application entry',
+            '__main__.py': 'Module entry',
+            'config.py': 'Configuration',
+            'settings.py': 'Settings',
+            'models.py': 'Data models',
+            'schemas.py': 'Data schemas',
+            'auth.py': 'Authentication',
+            'authentication.py': 'Authentication',
+            'middleware.py': 'Middleware',
+            'routes.py': 'Routes',
+            'urls.py': 'URL patterns',
+            'api.py': 'API endpoints',
+            'views.py': 'Views',
+            'database.py': 'Database',
+            'db.py': 'Database',
+            # Node/JS/TS patterns
+            'index.js': 'Entry point',
+            'index.ts': 'Entry point',
+            'app.js': 'Application',
+            'app.ts': 'Application',
+            'server.js': 'Server',
+            'server.ts': 'Server',
+            'package.json': 'Dependencies',
+            'tsconfig.json': 'TypeScript config',
+            'types.ts': 'Type definitions',
+            'types.d.ts': 'Type definitions',
+            'middleware.js': 'Middleware',
+            'middleware.ts': 'Middleware',
+            'routes.js': 'Routes',
+            'routes.ts': 'Routes',
+            'config.js': 'Configuration',
+            'config.ts': 'Configuration',
+            # General
+            'Dockerfile': 'Container definition',
+            'docker-compose.yml': 'Container orchestration',
+            'requirements.txt': 'Python dependencies',
+            'setup.py': 'Python package',
+            'pyproject.toml': 'Python project',
+            'Makefile': 'Build automation'
+        }
+        
+        for file_info in manifest:
+            filename = Path(file_info['path']).name
+            if filename in critical_patterns:
+                stats['top_15_critical'].append({
+                    'path': file_info['path'],
+                    'filename': filename,
+                    'purpose': critical_patterns[filename],
+                    'loc': file_info['loc'],
+                    'bytes': file_info['bytes']
+                })
+                
+        # Limit to top 15 critical files
+        stats['top_15_critical'] = stats['top_15_critical'][:15]
+    
+    # Query database for symbol counts if it exists
+    if Path(db_path).exists():
+        try:
+            conn = sqlite3.connect(db_path)
+            cursor = conn.cursor()
+            
+            # Count functions
+            cursor.execute("SELECT COUNT(*) FROM symbols WHERE type = 'function'")
+            stats['total_functions'] = cursor.fetchone()[0]
+            
+            # Count classes
+            cursor.execute("SELECT COUNT(*) FROM symbols WHERE type = 'class'")
+            stats['total_classes'] = cursor.fetchone()[0]
+            
+            # Count calls (can represent imports/dependencies)
+            cursor.execute("SELECT COUNT(*) FROM symbols WHERE type = 'call'")
+            stats['total_calls'] = cursor.fetchone()[0]
+            
+            # Count imports from refs table if it exists
+            try:
+                cursor.execute("SELECT COUNT(*) FROM refs WHERE kind IN ('import', 'from', 'require')")
+                stats['total_imports'] = cursor.fetchone()[0]
+            except sqlite3.OperationalError:
+                # refs table might not exist
+                pass
+                
+            conn.close()
+        except Exception as e:
+            # Database might be empty or malformed
+            pass
+    
+    return stats
+
+
+def generate_project_summary(
+    root_path: str = ".",
+    manifest_path: str = "./.pf/manifest.json",
+    db_path: str = "./.pf/repo_index.db",
+    max_depth: int = 4
+) -> str:
+    """
+    Generate comprehensive project summary markdown report.
+    
+    Args:
+        root_path: Root directory of project
+        manifest_path: Path to manifest.json
+        db_path: Path to repo_index.db
+        
+    Returns:
+        Markdown formatted project summary report
+    """
+    lines = []
+    
+    # Header
+    lines.append("# Project Structure & Intelligence Report")
+    lines.append("")
+    lines.append("*This AI-optimized report provides immediate project comprehension.*")
+    lines.append("")
+    
+    # Get statistics
+    stats = aggregate_statistics(manifest_path, db_path)
+    
+    # Project Summary Section
+    lines.append("## Project Summary")
+    lines.append("")
+    lines.append(f"- **Total Files**: {stats['total_files']:,} (analyzable)")
+    
+    # Calculate token percentage of Claude's context
+    claude_context = 400000  # Approximate context window
+    token_percent = (stats['total_tokens'] / claude_context * 100) if stats['total_tokens'] > 0 else 0
+    lines.append(f"- **Total Tokens**: ~{stats['total_tokens']:,} ({token_percent:.1f}% of Claude's context)")
+    lines.append(f"- **Total LOC**: {stats['total_loc']:,}")
+    
+    # Language breakdown
+    if stats['languages']:
+        # Sort languages by file count
+        sorted_langs = sorted(stats['languages'].items(), key=lambda x: x[1], reverse=True)
+        total_files = sum(stats['languages'].values())
+        
+        lang_parts = []
+        for lang, count in sorted_langs[:5]:  # Top 5 languages
+            percent = (count / total_files * 100) if total_files > 0 else 0
+            lang_parts.append(f"{lang} ({percent:.0f}%)")
+        
+        lines.append(f"- **Languages**: {', '.join(lang_parts)}")
+    
+    # Key metrics
+    lines.append("")
+    lines.append("### Key Metrics")
+    lines.append("")
+    if stats['total_classes'] > 0:
+        lines.append(f"- **Classes**: {stats['total_classes']:,}")
+    if stats['total_functions'] > 0:
+        lines.append(f"- **Functions**: {stats['total_functions']:,}")
+    if stats['total_imports'] > 0:
+        lines.append(f"- **Imports**: {stats['total_imports']:,}")
+    if stats['total_calls'] > 0:
+        lines.append(f"- **Function Calls**: {stats['total_calls']:,}")
+    
+    lines.append("")
+    
+    # Top 10 Largest Files
+    if stats['top_10_largest']:
+        lines.append("## Largest Files (by tokens)")
+        lines.append("")
+        lines.append("| # | File | LOC | Tokens | % of Codebase |")
+        lines.append("|---|------|-----|--------|---------------|")
+        
+        for i, file_info in enumerate(stats['top_10_largest'], 1):
+            path = file_info['path']
+            if len(path) > 50:
+                # Truncate long paths
+                path = "..." + path[-47:]
+            lines.append(f"| {i} | `{path}` | {file_info['loc']:,} | {file_info['tokens']:,} | {file_info['percent']:.1f}% |")
+        
+        lines.append("")
+    
+    # Top Critical Files
+    if stats['top_15_critical']:
+        lines.append("## Critical Files (by convention)")
+        lines.append("")
+        lines.append("*Files identified as architecturally significant based on naming patterns:*")
+        lines.append("")
+        lines.append("| File | Purpose | LOC |")
+        lines.append("|------|---------|-----|")
+        
+        for file_info in stats['top_15_critical']:
+            path = file_info['path']
+            if len(path) > 40:
+                # Show just filename and parent dir
+                parts = Path(path).parts
+                if len(parts) > 2:
+                    path = f".../{parts[-2]}/{parts[-1]}"
+                else:
+                    path = "/".join(parts)
+            lines.append(f"| `{path}` | {file_info['purpose']} | {file_info['loc']:,} |")
+        
+        lines.append("")
+    
+    # Directory Tree
+    lines.append("## Directory Structure")
+    lines.append("")
+    lines.append("```")
+    tree = generate_directory_tree(root_path, max_depth)
+    lines.append(tree)
+    lines.append("```")
+    lines.append("")
+    
+    # Token Tracking for AI Context
+    lines.append("## AI Context Optimization")
+    lines.append("")
+    lines.append("### Reading Order for Maximum Comprehension")
+    lines.append("")
+    lines.append("1. **Start here**: This file (STRUCTURE.md) - ~2,000 tokens")
+    lines.append("2. **Core understanding**: Critical files listed above - ~10,000 tokens")
+    lines.append("3. **Issues & findings**: AUDIT.md - ~15,000 tokens")
+    lines.append("4. **Detailed analysis**: Other reports as needed")
+    lines.append("")
+    
+    lines.append("### Token Budget Recommendations")
+    lines.append("")
+    if stats['total_tokens'] < 50000:
+        lines.append("- **Small project**: Can load entire codebase if needed")
+    elif stats['total_tokens'] < 150000:
+        lines.append("- **Medium project**: Focus on critical files and problem areas")
+    else:
+        lines.append("- **Large project**: Use worksets and targeted analysis")
+    
+    lines.append("")
+    lines.append("---")
+    lines.append("*Generated by TheAuditor - Truth through systematic observation*")
+    
+    return "\n".join(lines)
\ No newline at end of file
diff --git a/theauditor/rules/__init__.py b/theauditor/rules/__init__.py
new file mode 100644
index 0000000..0aca070
--- /dev/null
+++ b/theauditor/rules/__init__.py
@@ -0,0 +1,29 @@
+"""TheAuditor AST-based rule definitions.
+
+This package contains high-fidelity AST-based rules for detecting
+security vulnerabilities, code quality issues, and anti-patterns.
+"""
+
+from .secrets import find_hardcoded_secrets
+from .xss import find_xss_vulnerabilities  
+from .node import find_node_runtime_issues
+from .typescript import find_typescript_type_issues
+from .sql.sql_injection_analyzer import find_sql_injection
+from .security.api_auth_detector import find_missing_api_authentication
+from .performance.performance import (
+    find_queries_in_loops,
+    find_inefficient_string_concatenation,
+    find_expensive_operations_in_loops
+)
+
+__all__ = [
+    'find_hardcoded_secrets',
+    'find_sql_injection',
+    'find_xss_vulnerabilities',
+    'find_node_runtime_issues',
+    'find_typescript_type_issues',
+    'find_queries_in_loops',
+    'find_inefficient_string_concatenation',
+    'find_expensive_operations_in_loops',
+    'find_missing_api_authentication',
+]
\ No newline at end of file
diff --git a/theauditor/rules/auth/__init__.py b/theauditor/rules/auth/__init__.py
new file mode 100644
index 0000000..c26a737
--- /dev/null
+++ b/theauditor/rules/auth/__init__.py
@@ -0,0 +1,5 @@
+"""Authentication and authorization security rules."""
+
+from .jwt_detector import find_jwt_flaws
+
+__all__ = ["find_jwt_flaws"]
\ No newline at end of file
diff --git a/theauditor/rules/auth/jwt_detector.py b/theauditor/rules/auth/jwt_detector.py
new file mode 100644
index 0000000..d8b6638
--- /dev/null
+++ b/theauditor/rules/auth/jwt_detector.py
@@ -0,0 +1,812 @@
+"""AST-based JWT implementation flaw detector.
+
+This module provides high-fidelity detection of common JWT security vulnerabilities
+by analyzing the AST structure of jwt.sign() and jwt.verify() calls.
+"""
+
+import re
+from typing import Any, List, Dict
+
+
+def find_jwt_flaws(tree: Any, file_path: str) -> List[Dict[str, Any]]:
+    """
+    Detect common JWT implementation flaws using AST analysis.
+    
+    This is a file-based AST rule designed to be called by universal_detector
+    for each JavaScript/TypeScript file. It detects:
+    
+    - Algorithm confusion attacks (mixing HS256/RS256)
+    - Weak secrets (<32 characters)
+    - Missing expiration claims
+    - Sensitive data in JWT payloads
+    - Missing refresh token rotation
+    
+    Args:
+        tree: AST tree from ast_parser (TypeScript compiler preferred, Tree-sitter fallback)
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        List of security findings in normalized format
+    """
+    findings = []
+    
+    if not tree or not isinstance(tree, dict):
+        return findings
+    
+    # Read file content for context extraction
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+            lines = content.split('\n')
+    except:
+        return findings
+    
+    tree_type = tree.get("type")
+    
+    # TypeScript compiler AST is the CORRECT tool for JS/TS analysis
+    if tree_type == "semantic_ast":
+        # Analyze TypeScript compiler AST
+        _analyze_typescript_ast(tree.get("ast"), findings, file_path, lines)
+        return findings
+    
+    # Tree-sitter is fallback for when TypeScript compiler isn't available
+    elif tree_type == "tree_sitter":
+        actual_tree = tree.get("tree")
+        if actual_tree and hasattr(actual_tree, 'root_node'):
+            # Use Tree-sitter if that's what we have
+            _analyze_tree_sitter_node(actual_tree.root_node, findings, file_path, lines)
+            _detect_refresh_token_rotation(actual_tree.root_node, findings, file_path, lines)
+    
+    # Pattern-based as last resort
+    else:
+        _analyze_with_patterns(file_path, findings)
+    
+    return findings
+
+
+def _analyze_tree_sitter_node(node, findings, file_path, lines, depth=0):
+    """Recursively analyze Tree-sitter AST nodes for JWT issues."""
+    
+    # Prevent infinite recursion
+    if depth > 100:
+        return
+    
+    # Check for call expressions
+    if node.type == "call_expression":
+        # Get function being called
+        func_node = node.child_by_field_name('function')
+        if func_node:
+            func_text = func_node.text.decode('utf-8', errors='ignore')
+            
+            # Get arguments node
+            args_node = node.child_by_field_name('arguments')
+            
+            # Detection 1: jwt.verify with algorithm confusion
+            if 'jwt.verify' in func_text or '.verify' in func_text:
+                if args_node:
+                    args_text = args_node.text.decode('utf-8', errors='ignore')
+                    line_num = node.start_point[0] + 1
+                    
+                    # Check for dangerous algorithm combinations
+                    if 'algorithms' in args_text:
+                        # Look for both symmetric (HS) and asymmetric (RS/ES) algorithms
+                        has_symmetric = any(alg in args_text for alg in ['HS256', 'HS384', 'HS512'])
+                        has_asymmetric = any(alg in args_text for alg in ['RS256', 'RS384', 'RS512', 'ES256', 'ES384', 'ES512'])
+                        
+                        if has_symmetric and has_asymmetric:
+                            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else args_text[:200]
+                            findings.append({
+                                'pattern_name': 'JWT_ALGORITHM_CONFUSION',
+                                'message': 'Algorithm confusion vulnerability: both symmetric (HS) and asymmetric (RS/ES) algorithms allowed',
+                                'file': file_path,
+                                'line': line_num,
+                                'column': node.start_point[1],
+                                'severity': 'critical',
+                                'snippet': snippet,
+                                'category': 'security',
+                                'details': {
+                                    'vulnerability': 'Allows attacker to switch from RS256 to HS256 and use public key as HMAC secret',
+                                    'fix': 'Use only one type of algorithm (either symmetric or asymmetric, not both)'
+                                }
+                            })
+            
+            # Detection 2 & 3: jwt.sign with weak secret or missing expiration
+            elif 'jwt.sign' in func_text or '.sign' in func_text:
+                if args_node and args_node.children:
+                    line_num = node.start_point[0] + 1
+                    
+                    # Extract the three arguments: payload, secret, options
+                    args = []
+                    current_arg = []
+                    paren_depth = 0
+                    bracket_depth = 0
+                    brace_depth = 0
+                    in_string = False
+                    string_char = None
+                    
+                    # Parse arguments accounting for nested structures
+                    args_text = args_node.text.decode('utf-8', errors='ignore')
+                    for i, char in enumerate(args_text):
+                        # Handle string boundaries
+                        if char in ['"', "'", '`'] and (i == 0 or args_text[i-1] != '\\'):
+                            if not in_string:
+                                in_string = True
+                                string_char = char
+                            elif char == string_char:
+                                in_string = False
+                                string_char = None
+                        
+                        if not in_string:
+                            if char == '(':
+                                paren_depth += 1
+                            elif char == ')':
+                                paren_depth -= 1
+                            elif char == '[':
+                                bracket_depth += 1
+                            elif char == ']':
+                                bracket_depth -= 1
+                            elif char == '{':
+                                brace_depth += 1
+                            elif char == '}':
+                                brace_depth -= 1
+                            elif char == ',' and paren_depth == 1 and bracket_depth == 0 and brace_depth == 0:
+                                # Found argument separator at top level
+                                args.append(''.join(current_arg).strip())
+                                current_arg = []
+                                continue
+                        
+                        if paren_depth > 0:  # Inside the arguments
+                            current_arg.append(char)
+                    
+                    # Add last argument
+                    if current_arg:
+                        args.append(''.join(current_arg).strip())
+                    
+                    # Clean up arguments (remove leading/trailing parens)
+                    args = [arg.strip('()').strip() for arg in args]
+                    
+                    # Check secret strength (second argument)
+                    if len(args) >= 2:
+                        secret = args[1]
+                        
+                        # Detection 2: Weak secret
+                        # Check if it's a string literal
+                        if (secret.startswith('"') or secret.startswith("'") or secret.startswith('`')):
+                            # Extract the actual secret value
+                            secret_value = secret.strip('"\'`')
+                            
+                            # Check for obvious weak secrets
+                            weak_patterns = ['secret', 'password', 'key', '123', 'test', 'demo', 'example']
+                            is_weak = (
+                                len(secret_value) < 32 or  # Less than 256 bits
+                                any(pattern in secret_value.lower() for pattern in weak_patterns) or
+                                secret_value.isdigit() or  # All digits
+                                secret_value.isalpha()  # All letters with no special chars
+                            )
+                            
+                            if is_weak:
+                                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+                                findings.append({
+                                    'pattern_name': 'JWT_WEAK_SECRET',
+                                    'message': f'Weak JWT secret detected: {len(secret_value)} characters (need 32+ for 256 bits)',
+                                    'file': file_path,
+                                    'line': line_num,
+                                    'column': node.start_point[1],
+                                    'severity': 'critical',
+                                    'snippet': snippet,
+                                    'category': 'security',
+                                    'details': {
+                                        'secret_length': len(secret_value),
+                                        'recommendation': 'Use a cryptographically strong secret of at least 32 characters (256 bits)'
+                                    }
+                                })
+                    
+                    # Check for expiration (third argument - options)
+                    if len(args) >= 3:
+                        options = args[2]
+                        
+                        # Detection 3: Missing expiration
+                        has_expiry = (
+                            'expiresIn' in options or 
+                            'exp' in options or
+                            'notAfter' in options
+                        )
+                        
+                        if not has_expiry:
+                            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+                            findings.append({
+                                'pattern_name': 'JWT_MISSING_EXPIRATION',
+                                'message': 'JWT token created without expiration claim',
+                                'file': file_path,
+                                'line': line_num,
+                                'column': node.start_point[1],
+                                'severity': 'high',
+                                'snippet': snippet,
+                                'category': 'security',
+                                'details': {
+                                    'risk': 'Tokens without expiration can be used indefinitely if compromised',
+                                    'fix': "Add 'expiresIn' option (e.g., { expiresIn: '1h' })"
+                                }
+                            })
+                    elif len(args) >= 1:
+                        # Only payload provided, no options = no expiration
+                        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+                        findings.append({
+                            'pattern_name': 'JWT_MISSING_EXPIRATION',
+                            'message': 'JWT token created without options object (no expiration)',
+                            'file': file_path,
+                            'line': line_num,
+                            'column': node.start_point[1],
+                            'severity': 'high',
+                            'snippet': snippet,
+                            'category': 'security',
+                            'details': {
+                                'risk': 'Tokens without expiration can be used indefinitely if compromised',
+                                'fix': "Add options parameter with expiresIn (e.g., jwt.sign(payload, secret, { expiresIn: '1h' }))"
+                            }
+                        })
+                    
+                    # Detection 4: Sensitive data in payload (first argument)
+                    if len(args) >= 1:
+                        payload = args[0]
+                        
+                        # List of sensitive field names to check for
+                        sensitive_fields = [
+                            'password', 'passwd', 'pwd', 'secret', 'apikey', 'api_key',
+                            'private', 'priv', 'ssn', 'social_security', 'credit_card',
+                            'creditcard', 'cvv', 'pin', 'tax_id', 'license', 'passport',
+                            'bank_account', 'routing_number', 'private_key', 'privateKey',
+                            'client_secret', 'clientSecret', 'refresh_token', 'refreshToken'
+                        ]
+                        
+                        found_sensitive = []
+                        for field in sensitive_fields:
+                            if field in payload.lower():
+                                found_sensitive.append(field)
+                        
+                        if found_sensitive:
+                            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else payload[:200]
+                            findings.append({
+                                'pattern_name': 'JWT_SENSITIVE_DATA_IN_PAYLOAD',
+                                'message': f'Sensitive data in JWT payload: {", ".join(found_sensitive)}',
+                                'file': file_path,
+                                'line': line_num,
+                                'column': node.start_point[1],
+                                'severity': 'high',
+                                'snippet': snippet,
+                                'category': 'security',
+                                'details': {
+                                    'sensitive_fields': found_sensitive,
+                                    'risk': 'JWT payloads are only base64 encoded, not encrypted - anyone can read them',
+                                    'fix': 'Never put sensitive data in JWT payloads. Store only user ID and fetch sensitive data server-side.'
+                                }
+                            })
+    
+    # Recursively analyze children
+    for child in node.children:
+        _analyze_tree_sitter_node(child, findings, file_path, lines, depth + 1)
+
+
+def _analyze_typescript_ast(ast_node, findings, file_path, lines):
+    """
+    Analyze TypeScript compiler AST for JWT vulnerabilities.
+    
+    The TypeScript compiler provides a much richer AST with type information
+    and semantic analysis compared to Tree-sitter.
+    """
+    if not ast_node:
+        return
+    
+    # Recursively walk the TypeScript AST
+    def walk_ts_ast(node, depth=0):
+        if depth > 100 or not isinstance(node, dict):
+            return
+        
+        kind = node.get('kind')
+        
+        # Handle CallExpression nodes
+        if kind == 'CallExpression':
+            expression = node.get('expression', {})
+            arguments = node.get('arguments', [])
+            
+            # Get the function name being called
+            func_name = _get_ts_call_name(expression)
+            
+            # Get position information
+            pos = node.get('pos', 0)
+            line_num = _get_line_from_pos(lines, pos)
+            
+            # Detection 1: jwt.verify with algorithm confusion
+            if func_name and ('jwt.verify' in func_name or 'verify' in func_name):
+                # Check third argument (options) for algorithm settings
+                if len(arguments) >= 3:
+                    options_arg = arguments[2]
+                    options_text = _extract_ts_text(options_arg)
+                    
+                    # Check for dangerous algorithm combinations
+                    has_symmetric = any(alg in options_text for alg in ['HS256', 'HS384', 'HS512'])
+                    has_asymmetric = any(alg in options_text for alg in ['RS256', 'RS384', 'RS512', 'ES256', 'ES384', 'ES512'])
+                    
+                    if has_symmetric and has_asymmetric:
+                        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else options_text[:200]
+                        findings.append({
+                            'pattern_name': 'JWT_ALGORITHM_CONFUSION',
+                            'message': 'Algorithm confusion vulnerability: both symmetric (HS) and asymmetric (RS/ES) algorithms allowed',
+                            'file': file_path,
+                            'line': line_num,
+                            'column': 0,
+                            'severity': 'critical',
+                            'snippet': snippet,
+                            'category': 'security',
+                            'details': {
+                                'vulnerability': 'Allows attacker to switch from RS256 to HS256 and use public key as HMAC secret',
+                                'fix': 'Use only one type of algorithm (either symmetric or asymmetric, not both)'
+                            }
+                        })
+            
+            # Detection 2 & 3: jwt.sign with weak secret or missing expiration
+            elif func_name and ('jwt.sign' in func_name or 'sign' in func_name):
+                # Check second argument (secret)
+                if len(arguments) >= 2:
+                    secret_arg = arguments[1]
+                    secret_text = _extract_ts_text(secret_arg)
+                    
+                    # Check for weak secrets
+                    weak_patterns = ['secret', 'password', 'key', '123', 'test', 'demo', 'example']
+                    is_weak = (
+                        len(secret_text) < 32 or
+                        any(pattern in secret_text.lower() for pattern in weak_patterns) or
+                        secret_text.replace('"', '').replace("'", '').isdigit()
+                    )
+                    
+                    if is_weak and secret_text:
+                        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_name[:200]
+                        findings.append({
+                            'pattern_name': 'JWT_WEAK_SECRET',
+                            'message': f'Weak JWT secret detected: {len(secret_text)} characters (need 32+ for 256 bits)',
+                            'file': file_path,
+                            'line': line_num,
+                            'column': 0,
+                            'severity': 'critical',
+                            'snippet': snippet,
+                            'category': 'security',
+                            'details': {
+                                'secret_length': len(secret_text),
+                                'recommendation': 'Use a cryptographically strong secret of at least 32 characters (256 bits)'
+                            }
+                        })
+                
+                # Check third argument (options) for expiration
+                if len(arguments) >= 3:
+                    options_arg = arguments[2]
+                    options_text = _extract_ts_text(options_arg)
+                    
+                    has_expiry = (
+                        'expiresIn' in options_text or 
+                        'exp' in options_text or
+                        'notAfter' in options_text
+                    )
+                    
+                    if not has_expiry:
+                        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_name[:200]
+                        findings.append({
+                            'pattern_name': 'JWT_MISSING_EXPIRATION',
+                            'message': 'JWT token created without expiration claim',
+                            'file': file_path,
+                            'line': line_num,
+                            'column': 0,
+                            'severity': 'high',
+                            'snippet': snippet,
+                            'category': 'security',
+                            'details': {
+                                'risk': 'Tokens without expiration remain valid forever if not explicitly revoked',
+                                'fix': "Add 'expiresIn' option when signing tokens (e.g., { expiresIn: '1h' })"
+                            }
+                        })
+                
+                # Check first argument (payload) for sensitive data
+                if len(arguments) >= 1:
+                    payload_arg = arguments[0]
+                    payload_text = _extract_ts_text(payload_arg)
+                    
+                    sensitive_fields = ['password', 'secret', 'ssn', 'credit_card', 'api_key', 'private_key', 'pin', 'cvv']
+                    found_sensitive = [field for field in sensitive_fields if field in payload_text.lower()]
+                    
+                    if found_sensitive:
+                        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else payload_text[:200]
+                        findings.append({
+                            'pattern_name': 'JWT_SENSITIVE_DATA_IN_PAYLOAD',
+                            'message': f'Sensitive data in JWT payload: {", ".join(found_sensitive)}',
+                            'file': file_path,
+                            'line': line_num,
+                            'column': 0,
+                            'severity': 'high',
+                            'snippet': snippet,
+                            'category': 'security',
+                            'details': {
+                                'sensitive_fields': found_sensitive,
+                                'risk': 'JWT payloads are only base64 encoded, not encrypted - anyone can read them',
+                                'fix': 'Never put sensitive data in JWT payloads. Store only user ID and fetch sensitive data server-side.'
+                            }
+                        })
+        
+        # Recursively process all child nodes
+        for key, value in node.items():
+            if key in ['statements', 'declarations', 'elements', 'properties', 'members']:
+                if isinstance(value, list):
+                    for item in value:
+                        if isinstance(item, dict):
+                            walk_ts_ast(item, depth + 1)
+            elif isinstance(value, dict):
+                walk_ts_ast(value, depth + 1)
+            elif isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict):
+                        walk_ts_ast(item, depth + 1)
+    
+    # Helper function to extract function name from TypeScript CallExpression
+    def _get_ts_call_name(expression):
+        if not isinstance(expression, dict):
+            return None
+        
+        # Handle PropertyAccessExpression (e.g., jwt.sign)
+        if expression.get('kind') == 'PropertyAccessExpression':
+            obj = expression.get('expression', {})
+            prop = expression.get('name', {})
+            obj_text = _extract_ts_identifier(obj)
+            prop_text = _extract_ts_identifier(prop)
+            if obj_text and prop_text:
+                return f"{obj_text}.{prop_text}"
+            return prop_text or obj_text
+        
+        # Handle Identifier (e.g., verify)
+        elif expression.get('kind') == 'Identifier':
+            return expression.get('text', expression.get('escapedText', ''))
+        
+        return None
+    
+    # Helper function to extract identifier text
+    def _extract_ts_identifier(node):
+        if not isinstance(node, dict):
+            return None
+        if node.get('kind') == 'Identifier':
+            return node.get('text', node.get('escapedText', ''))
+        return None
+    
+    # Helper function to extract text from TypeScript AST node
+    def _extract_ts_text(node):
+        if not isinstance(node, dict):
+            return ''
+        
+        # String literal
+        if node.get('kind') == 'StringLiteral':
+            return node.get('text', '')
+        
+        # Template literal
+        elif node.get('kind') == 'TemplateExpression':
+            return node.get('text', '')
+        
+        # Object literal
+        elif node.get('kind') == 'ObjectLiteralExpression':
+            properties = node.get('properties', [])
+            parts = []
+            for prop in properties:
+                if isinstance(prop, dict):
+                    name = prop.get('name', {})
+                    if isinstance(name, dict):
+                        parts.append(name.get('text', name.get('escapedText', '')))
+            return ' '.join(parts)
+        
+        # Array literal
+        elif node.get('kind') == 'ArrayLiteralExpression':
+            elements = node.get('elements', [])
+            parts = []
+            for elem in elements:
+                if isinstance(elem, dict):
+                    parts.append(_extract_ts_text(elem))
+            return ' '.join(parts)
+        
+        # Identifier
+        elif node.get('kind') == 'Identifier':
+            return node.get('text', node.get('escapedText', ''))
+        
+        # Recursively get text from nested structures
+        text_parts = []
+        for key, value in node.items():
+            if isinstance(value, dict):
+                sub_text = _extract_ts_text(value)
+                if sub_text:
+                    text_parts.append(sub_text)
+            elif isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict):
+                        sub_text = _extract_ts_text(item)
+                        if sub_text:
+                            text_parts.append(sub_text)
+        
+        return ' '.join(text_parts)
+    
+    # Helper function to get line number from character position
+    def _get_line_from_pos(lines, pos):
+        char_count = 0
+        for i, line in enumerate(lines, 1):
+            char_count += len(line) + 1  # +1 for newline
+            if char_count > pos:
+                return i
+        return len(lines)
+    
+    # Start walking the AST
+    walk_ts_ast(ast_node)
+
+
+def _detect_refresh_token_rotation(root_node, findings, file_path, lines):
+    """
+    Detect refresh token endpoints that don't properly rotate tokens.
+    
+    This function analyzes function scopes to identify refresh token endpoints
+    that issue new access tokens but fail to rotate the refresh token.
+    """
+    
+    # Find all function declarations and arrow functions
+    functions = _find_all_functions(root_node)
+    
+    for func_node in functions:
+        # Analyze each function for refresh token patterns
+        func_analysis = {
+            'uses_refresh_token': False,
+            'issues_access_token': False,
+            'rotates_refresh_token': False,
+            'refresh_token_line': None,
+            'access_token_line': None
+        }
+        
+        # Get function body
+        body_node = None
+        if func_node.type == 'function_declaration':
+            body_node = func_node.child_by_field_name('body')
+        elif func_node.type == 'arrow_function':
+            body_node = func_node.child_by_field_name('body')
+        elif func_node.type == 'function_expression':
+            body_node = func_node.child_by_field_name('body')
+        
+        if not body_node:
+            continue
+        
+        # Convert function body to text for analysis
+        func_text = body_node.text.decode('utf-8', errors='ignore')
+        func_start_line = func_node.start_point[0] + 1
+        
+        # Detection 1: Check if function accesses refresh token
+        refresh_patterns = [
+            r'req\.cookies\.refreshToken',
+            r'req\.cookies\[[\'"]\s*refreshToken\s*[\'"]',
+            r'req\.body\.refreshToken',
+            r'req\.body\[[\'"]\s*refreshToken\s*[\'"]',
+            r'req\.body\.token',  # Common pattern for refresh endpoints
+            r'refreshToken\s*=\s*req\.',
+            r'const\s+.*refreshToken.*=.*req\.',
+            r'let\s+.*refreshToken.*=.*req\.',
+            r'var\s+.*refreshToken.*=.*req\.'
+        ]
+        
+        for pattern in refresh_patterns:
+            match = re.search(pattern, func_text, re.IGNORECASE)
+            if match:
+                func_analysis['uses_refresh_token'] = True
+                func_analysis['refresh_token_line'] = func_start_line + func_text[:match.start()].count('\n')
+                break
+        
+        # Detection 2: Check if function issues new access token
+        access_patterns = [
+            r'jwt\.sign\s*\(',
+            r'jsonwebtoken\.sign\s*\(',
+            r'generateAccessToken\s*\(',
+            r'createAccessToken\s*\(',
+            r'issueAccessToken\s*\(',
+            r'signAccessToken\s*\('
+        ]
+        
+        for pattern in access_patterns:
+            match = re.search(pattern, func_text, re.IGNORECASE)
+            if match:
+                func_analysis['issues_access_token'] = True
+                func_analysis['access_token_line'] = func_start_line + func_text[:match.start()].count('\n')
+                break
+        
+        # Detection 3: Check if function rotates refresh token
+        rotation_patterns = [
+            # Setting new refresh token in cookie
+            r'res\.cookie\s*\(\s*[\'"]refreshToken[\'"]',
+            r'res\.cookie\s*\(\s*[\'"]refresh_token[\'"]',
+            r'response\.cookie\s*\(\s*[\'"]refreshToken[\'"]',
+            
+            # Setting new refresh token in response body
+            r'refreshToken\s*:\s*[^,}]+jwt\.sign',
+            r'refresh_token\s*:\s*[^,}]+jwt\.sign',
+            r'newRefreshToken\s*:\s*',
+            r'new_refresh_token\s*:\s*',
+            
+            # Database updates for refresh token
+            r'update\s*\(\s*\{[^}]*refreshToken',
+            r'update\s*\(\s*\{[^}]*refresh_token',
+            r'save\s*\(\s*\{[^}]*refreshToken',
+            r'updateOne\s*\(\s*[^)]*refreshToken',
+            r'findOneAndUpdate\s*\([^)]*refreshToken',
+            
+            # Redis or cache operations
+            r'redis\.(set|del|delete)\s*\([^)]*refresh',
+            r'cache\.(set|del|delete)\s*\([^)]*refresh',
+            
+            # Generating new refresh token
+            r'generateRefreshToken\s*\(',
+            r'createRefreshToken\s*\(',
+            r'issueRefreshToken\s*\(',
+            r'jwt\.sign\s*\([^)]*[\'"]refresh[\'"]',
+            
+            # Blacklisting/invalidating old token
+            r'blacklist.*refreshToken',
+            r'invalidate.*refreshToken',
+            r'revoke.*refreshToken'
+        ]
+        
+        for pattern in rotation_patterns:
+            match = re.search(pattern, func_text, re.IGNORECASE)
+            if match:
+                func_analysis['rotates_refresh_token'] = True
+                break
+        
+        # Generate finding if refresh token endpoint doesn't rotate
+        if (func_analysis['uses_refresh_token'] and 
+            func_analysis['issues_access_token'] and 
+            not func_analysis['rotates_refresh_token']):
+            
+            line_num = func_analysis['refresh_token_line'] or func_start_line
+            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+            
+            findings.append({
+                'pattern_name': 'JWT_NO_REFRESH_TOKEN_ROTATION',
+                'message': 'Refresh token endpoint does not rotate refresh tokens - allows indefinite session hijacking',
+                'file': file_path,
+                'line': line_num,
+                'column': func_node.start_point[1],
+                'severity': 'high',
+                'snippet': snippet,
+                'category': 'security',
+                'details': {
+                    'vulnerability': 'Refresh tokens are not rotated when used, allowing attackers to maintain persistent access',
+                    'detected_patterns': {
+                        'accesses_refresh_token': True,
+                        'issues_new_access_token': True,
+                        'rotates_refresh_token': False
+                    },
+                    'fix': 'Issue a new refresh token and invalidate the old one when refreshing access tokens',
+                    'recommendation': 'Implement refresh token rotation: 1) Generate new refresh token, 2) Save to database/cache, 3) Invalidate old token, 4) Return new tokens to client'
+                }
+            })
+
+
+def _find_all_functions(node, functions=None, depth=0):
+    """
+    Recursively find all function declarations and expressions in the AST.
+    """
+    if functions is None:
+        functions = []
+    
+    # Prevent infinite recursion
+    if depth > 100:
+        return functions
+    
+    # Check if this node is a function
+    if node.type in ['function_declaration', 'arrow_function', 'function_expression']:
+        functions.append(node)
+    
+    # Recursively check children
+    for child in node.children:
+        _find_all_functions(child, functions, depth + 1)
+    
+    return functions
+
+
+def _analyze_with_patterns(file_path: str, findings: List[Dict[str, Any]]):
+    """
+    BONUS: Additional pattern-based detection to supplement AST analysis.
+    
+    This runs IN ADDITION to AST analysis to catch edge cases like:
+    - Obfuscated code patterns
+    - Dynamic property access
+    - String concatenation forming JWT calls
+    """
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+            lines = content.split('\n')
+    except:
+        return
+    
+    # Pattern 1: Algorithm confusion in jwt.verify
+    pattern_algo_confusion = re.compile(
+        r'jwt\.verify\s*\([^)]*algorithms\s*:\s*\[[^\]]*(?:HS256|HS384|HS512)[^\]]*(?:RS256|RS384|RS512|ES256|ES384|ES512)',
+        re.IGNORECASE | re.DOTALL
+    )
+    
+    for match in pattern_algo_confusion.finditer(content):
+        line_num = content[:match.start()].count('\n') + 1
+        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+        
+        findings.append({
+            'pattern_name': 'JWT_ALGORITHM_CONFUSION',
+            'message': 'Algorithm confusion vulnerability detected (regex fallback)',
+            'file': file_path,
+            'line': line_num,
+            'column': 0,
+            'severity': 'critical',
+            'snippet': snippet,
+            'category': 'security'
+        })
+    
+    # Pattern 2: Weak secrets
+    pattern_weak_secret = re.compile(
+        r'jwt\.sign\s*\([^,)]*,\s*["\']([^"\']{1,31})["\']',
+        re.IGNORECASE
+    )
+    
+    for match in pattern_weak_secret.finditer(content):
+        secret = match.group(1)
+        line_num = content[:match.start()].count('\n') + 1
+        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+        
+        findings.append({
+            'pattern_name': 'JWT_WEAK_SECRET',
+            'message': f'Weak JWT secret: {len(secret)} characters (regex fallback)',
+            'file': file_path,
+            'line': line_num,
+            'column': 0,
+            'severity': 'critical',
+            'snippet': snippet,
+            'category': 'security'
+        })
+    
+    # Pattern 3: Missing expiration (jwt.sign with only 2 arguments)
+    pattern_no_expiry = re.compile(
+        r'jwt\.sign\s*\([^,)]+,\s*[^,)]+\s*\)',
+        re.IGNORECASE
+    )
+    
+    for match in pattern_no_expiry.finditer(content):
+        line_num = content[:match.start()].count('\n') + 1
+        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+        
+        # Make sure it's not a multi-line call
+        if 'expiresIn' not in match.group(0) and 'exp' not in match.group(0):
+            findings.append({
+                'pattern_name': 'JWT_MISSING_EXPIRATION',
+                'message': 'JWT without expiration detected (regex fallback)',
+                'file': file_path,
+                'line': line_num,
+                'column': 0,
+                'severity': 'high',
+                'snippet': snippet,
+                'category': 'security'
+            })
+    
+    # Pattern 4: Sensitive data in payload
+    pattern_sensitive = re.compile(
+        r'jwt\.sign\s*\(\s*\{[^}]*(?:password|passwd|pwd|secret|ssn|credit_card|private_key)[^}]*\}',
+        re.IGNORECASE
+    )
+    
+    for match in pattern_sensitive.finditer(content):
+        line_num = content[:match.start()].count('\n') + 1
+        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+        
+        findings.append({
+            'pattern_name': 'JWT_SENSITIVE_DATA_IN_PAYLOAD',
+            'message': 'Sensitive data in JWT payload (regex fallback)',
+            'file': file_path,
+            'line': line_num,
+            'column': 0,
+            'severity': 'high',
+            'snippet': snippet,
+            'category': 'security'
+        })
\ No newline at end of file
diff --git a/theauditor/rules/common/utils.py b/theauditor/rules/common/utils.py
new file mode 100644
index 0000000..3f8de85
--- /dev/null
+++ b/theauditor/rules/common/utils.py
@@ -0,0 +1,169 @@
+"""Common utility functions for security rules."""
+
+import base64
+import binascii
+import math
+
+
+def calculate_entropy(text: str) -> float:
+    """Calculate Shannon entropy of a string to measure randomness.
+    
+    High entropy (>4.0) typically indicates random strings like API keys.
+    Low entropy (<3.0) typically indicates natural language or simple patterns.
+    """
+    if not text:
+        return 0.0
+    
+    # Count character frequencies
+    char_counts = {}
+    for char in text:
+        char_counts[char] = char_counts.get(char, 0) + 1
+    
+    # Calculate entropy
+    entropy = 0.0
+    text_len = len(text)
+    for count in char_counts.values():
+        probability = count / text_len
+        if probability > 0:
+            entropy -= probability * math.log2(probability)
+    
+    return entropy
+
+
+def is_sequential(text: str) -> bool:
+    """Check if a string follows a sequential pattern (incrementing or decrementing).
+    
+    Examples:
+    - "abcdef" -> True (incrementing)
+    - "987654" -> True (decrementing)  
+    - "zyxwvu" -> True (decrementing)
+    - "abc123" -> False (mixed)
+    """
+    if len(text) < 3:
+        return False
+    
+    # Get ASCII differences between adjacent characters
+    differences = []
+    for i in range(1, len(text)):
+        diff = ord(text[i]) - ord(text[i-1])
+        differences.append(diff)
+    
+    # Check if all differences are the same (consistent increment/decrement)
+    if len(set(differences)) == 1:
+        # Common sequential patterns have difference of 1 or -1
+        if differences[0] in [1, -1]:
+            return True
+    
+    return False
+
+
+def is_keyboard_walk(text: str) -> bool:
+    """Check if a string matches common keyboard walk patterns.
+    
+    Keyboard walks are patterns formed by adjacent keys on a QWERTY keyboard.
+    
+    Examples:
+    - "qwerty" -> True
+    - "asdfgh" -> True
+    - "1qaz2wsx" -> True
+    """
+    # Common keyboard walks (lowercase for case-insensitive comparison)
+    keyboard_patterns = [
+        # Horizontal walks (rows)
+        'qwertyuiop', 'qwertyuio', 'qwertyui', 'qwertyu', 'qwerty', 'qwert',
+        'asdfghjkl', 'asdfghjk', 'asdfghj', 'asdfgh', 'asdfg', 'asdf',
+        'zxcvbnm', 'zxcvbn', 'zxcvb', 'zxcv',
+        
+        # Reverse horizontal walks
+        'poiuytrewq', 'oiuytrewq', 'iuytrewq', 'uytrewq', 'ytrewq', 'trewq',
+        'lkjhgfdsa', 'kjhgfdsa', 'jhgfdsa', 'hgfdsa', 'gfdsa', 'fdsa',
+        'mnbvcxz', 'nbvcxz', 'bvcxz', 'vcxz',
+        
+        # Vertical/diagonal walks
+        '1qaz2wsx3edc', '1qaz2wsx', '1qaz', '2wsx', '3edc',
+        'zaq1xsw2cde3', 'zaq1xsw2', 'zaq1', 'xsw2', 'cde3',
+        '1234567890', '0987654321',
+        
+        # Common patterns with shift
+        '!qaz@wsx', '!qaz', '@wsx',
+        
+        # Number row patterns
+        '1234567890', '0987654321', '123456789', '987654321',
+        '12345678', '87654321', '1234567', '7654321',
+        '123456', '654321', '12345', '54321', '1234', '4321', '123', '321',
+    ]
+    
+    # Check if the text matches any pattern (case-insensitive)
+    text_lower = text.lower()
+    for pattern in keyboard_patterns:
+        if pattern in text_lower or text_lower in pattern:
+            return True
+    
+    return False
+
+
+def decode_and_verify_base64(value: str) -> bool:
+    """Decode Base64 string and check if decoded content is actually secret-like.
+    
+    Args:
+        value: String that matches Base64 pattern
+        
+    Returns:
+        True if the string is a valid Base64 encoding of secret-like content,
+        False if it's a false positive (sequential, low entropy, repetitive, etc.)
+    """
+    try:
+        # Attempt to decode from Base64
+        decoded_bytes = base64.b64decode(value, validate=True)
+        
+        # Convert to string for analysis (try UTF-8 decoding)
+        try:
+            decoded_str = decoded_bytes.decode('utf-8')
+        except UnicodeDecodeError:
+            # If it's not valid UTF-8, it might be binary data (possibly a real secret)
+            # Check if it has reasonable entropy as bytes
+            byte_entropy = calculate_entropy(decoded_bytes.hex())
+            return byte_entropy > 3.0  # Binary secrets often have moderate to high entropy
+        
+        # Now analyze the decoded string
+        
+        # Check 1: Is it sequential or keyboard walk?
+        if is_sequential(decoded_str) or is_keyboard_walk(decoded_str):
+            return False
+        
+        # Check 2: Does it have very low entropy? (< 2.5 indicates simple/repetitive content)
+        entropy = calculate_entropy(decoded_str)
+        if entropy < 2.5:
+            return False
+        
+        # Check 3: Is it highly repetitive? (one character makes up > 90%)
+        if len(decoded_str) > 0:
+            char_counts = {}
+            for char in decoded_str:
+                char_counts[char] = char_counts.get(char, 0) + 1
+            
+            max_count = max(char_counts.values())
+            if max_count / len(decoded_str) > 0.9:
+                return False
+        
+        # Check 4: Is it a common placeholder or test value?
+        common_test_values = [
+            'test', 'testing', 'example', 'sample', 'demo',
+            'password', 'secret', 'admin', 'root', 'user',
+            'localhost', '127.0.0.1', '0.0.0.0',
+            'placeholder', 'changeme', 'your_password_here',
+            'aaaa', 'bbbb', 'xxxx', '0000', '1111', '1234'
+        ]
+        
+        decoded_lower = decoded_str.lower()
+        for test_val in common_test_values:
+            if test_val in decoded_lower and len(decoded_lower) < 30:
+                return False
+        
+        # If we get here, it's likely a real secret
+        return True
+        
+    except (binascii.Error, ValueError):
+        # Not valid Base64 or decoding failed
+        # This shouldn't happen if the regex matched, but be safe
+        return False
\ No newline at end of file
diff --git a/theauditor/rules/deployment/__init__.py b/theauditor/rules/deployment/__init__.py
new file mode 100644
index 0000000..e433401
--- /dev/null
+++ b/theauditor/rules/deployment/__init__.py
@@ -0,0 +1,5 @@
+"""Deployment configuration and security analysis rules."""
+
+from .compose_analyzer import find_compose_issues
+
+__all__ = ["find_compose_issues"]
\ No newline at end of file
diff --git a/theauditor/rules/deployment/compose_analyzer.py b/theauditor/rules/deployment/compose_analyzer.py
new file mode 100644
index 0000000..d5e8ffe
--- /dev/null
+++ b/theauditor/rules/deployment/compose_analyzer.py
@@ -0,0 +1,279 @@
+"""Database-aware docker-compose security analyzer.
+
+This module queries the compose_services table to detect common docker-compose
+security misconfigurations. It follows the pattern established by other
+database-aware rules in TheAuditor.
+"""
+
+import json
+import sqlite3
+import re
+from typing import List, Dict, Any
+
+
+def find_compose_issues(db_path: str) -> List[Dict[str, Any]]:
+    """
+    Analyze docker-compose configurations stored in the database for security issues.
+    
+    This function queries the compose_services table populated by the indexer
+    and detects the following critical docker-compose misconfigurations:
+    
+    - Mounting the Docker socket (docker.sock)
+    - Services running with privileged: true
+    - Services using network_mode: host
+    - Weak, hardcoded passwords or secrets in environment
+    - Exposing sensitive database ports to the host
+    
+    Args:
+        db_path: Path to the repo_index.db database
+        
+    Returns:
+        List of security findings in normalized format
+    """
+    findings = []
+    
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        
+        # Query all compose services from the database
+        cursor.execute("""
+            SELECT file_path, service_name, image, ports, volumes, 
+                   environment, is_privileged, network_mode
+            FROM compose_services
+        """)
+        
+        compose_services = cursor.fetchall()
+        
+        for row in compose_services:
+            file_path = row[0]
+            service_name = row[1]
+            image = row[2]
+            ports_json = row[3]
+            volumes_json = row[4]
+            env_json = row[5]
+            is_privileged = row[6]
+            network_mode = row[7]
+            
+            # Parse JSON fields
+            try:
+                ports = json.loads(ports_json) if ports_json else []
+                volumes = json.loads(volumes_json) if volumes_json else []
+                environment = json.loads(env_json) if env_json else {}
+            except json.JSONDecodeError:
+                # Skip malformed data
+                continue
+            
+            # Detection 1: Mounting docker.sock (container escape risk)
+            for volume in volumes:
+                if isinstance(volume, str):
+                    # Check for docker.sock in volume mapping
+                    if 'docker.sock' in volume:
+                        findings.append({
+                            'pattern_name': 'COMPOSE_DOCKER_SOCKET_MOUNTED',
+                            'message': f'Service "{service_name}" mounts Docker socket - container escape risk',
+                            'file': file_path,
+                            'line': 0,
+                            'column': 0,
+                            'severity': 'critical',
+                            'snippet': f'volumes: {volume}',
+                            'category': 'security',
+                            'confidence': 0.95,
+                            'details': {
+                                'service': service_name,
+                                'vulnerability': 'Container can control Docker daemon and escape',
+                                'fix': 'Remove docker.sock mount or use Docker-in-Docker (DinD) instead',
+                                'volume': volume
+                            }
+                        })
+            
+            # Detection 2: Privileged mode
+            if is_privileged:
+                findings.append({
+                    'pattern_name': 'COMPOSE_PRIVILEGED_CONTAINER',
+                    'message': f'Service "{service_name}" runs in privileged mode - security risk',
+                    'file': file_path,
+                    'line': 0,
+                    'column': 0,
+                    'severity': 'critical',
+                    'snippet': f'{service_name}: privileged: true',
+                    'category': 'security',
+                    'confidence': 0.95,
+                    'details': {
+                        'service': service_name,
+                        'vulnerability': 'Container has all capabilities and can compromise host',
+                        'fix': 'Remove privileged mode and use specific capabilities instead'
+                    }
+                })
+            
+            # Detection 3: Host network mode
+            if network_mode == 'host':
+                findings.append({
+                    'pattern_name': 'COMPOSE_HOST_NETWORK',
+                    'message': f'Service "{service_name}" uses host network mode - security risk',
+                    'file': file_path,
+                    'line': 0,
+                    'column': 0,
+                    'severity': 'high',
+                    'snippet': f'{service_name}: network_mode: host',
+                    'category': 'security',
+                    'confidence': 0.90,
+                    'details': {
+                        'service': service_name,
+                        'vulnerability': 'Container bypasses network isolation',
+                        'fix': 'Use bridge or custom network instead of host network'
+                    }
+                })
+            
+            # Detection 4: Weak passwords in environment
+            if environment:
+                sensitive_patterns = [
+                    'PASSWORD', 'PASS', 'PWD', 'SECRET', 'TOKEN', 'KEY',
+                    'API_KEY', 'ACCESS_KEY', 'PRIVATE', 'CREDENTIAL',
+                    'AUTH', 'MYSQL_ROOT_PASSWORD', 'POSTGRES_PASSWORD',
+                    'MONGO_INITDB_ROOT_PASSWORD', 'REDIS_PASSWORD'
+                ]
+                
+                # Common weak passwords to check for
+                weak_passwords = [
+                    'password', '123456', 'admin', 'root', 'test', 'demo',
+                    'secret', 'changeme', 'password123', 'admin123',
+                    'letmein', 'welcome', 'monkey', 'dragon', 'master'
+                ]
+                
+                for env_key, env_value in environment.items():
+                    env_key_upper = env_key.upper()
+                    
+                    # Check if this is a sensitive environment variable
+                    is_sensitive = any(pattern in env_key_upper for pattern in sensitive_patterns)
+                    
+                    if is_sensitive and env_value:
+                        # Check for hardcoded values (not variable references)
+                        if not env_value.startswith('$'):
+                            # Check for weak passwords
+                            if env_value.lower() in weak_passwords:
+                                findings.append({
+                                    'pattern_name': 'COMPOSE_WEAK_PASSWORD',
+                                    'message': f'Service "{service_name}" has weak password in environment',
+                                    'file': file_path,
+                                    'line': 0,
+                                    'column': 0,
+                                    'severity': 'critical',
+                                    'snippet': f'{env_key}={env_value[:10]}...' if len(env_value) > 10 else f'{env_key}={env_value}',
+                                    'category': 'security',
+                                    'confidence': 0.90,
+                                    'details': {
+                                        'service': service_name,
+                                        'env_key': env_key,
+                                        'vulnerability': 'Weak or common password used',
+                                        'fix': 'Use strong passwords and store in .env file or secrets manager'
+                                    }
+                                })
+                            # Check for any hardcoded secret (even if not weak)
+                            elif len(env_value) > 0:
+                                findings.append({
+                                    'pattern_name': 'COMPOSE_HARDCODED_SECRET',
+                                    'message': f'Service "{service_name}" has hardcoded secret in environment',
+                                    'file': file_path,
+                                    'line': 0,
+                                    'column': 0,
+                                    'severity': 'high',
+                                    'snippet': f'{env_key}=***',
+                                    'category': 'security',
+                                    'confidence': 0.85,
+                                    'details': {
+                                        'service': service_name,
+                                        'env_key': env_key,
+                                        'vulnerability': 'Secret hardcoded in compose file',
+                                        'fix': 'Use environment variables: ${' + env_key + '}'
+                                    }
+                                })
+            
+            # Detection 5: Exposed database ports
+            if ports:
+                # Database default ports
+                database_ports = {
+                    '3306': 'MySQL',
+                    '5432': 'PostgreSQL',
+                    '27017': 'MongoDB',
+                    '6379': 'Redis',
+                    '5984': 'CouchDB',
+                    '8086': 'InfluxDB',
+                    '9042': 'Cassandra',
+                    '7000': 'Cassandra',
+                    '7001': 'Cassandra',
+                    '9200': 'Elasticsearch',
+                    '9300': 'Elasticsearch',
+                    '2181': 'Zookeeper',
+                    '9092': 'Kafka',
+                    '1433': 'SQL Server',
+                    '1521': 'Oracle'
+                }
+                
+                for port_mapping in ports:
+                    if isinstance(port_mapping, str):
+                        # Parse port mapping (can be "8080:80" or just "80")
+                        if ':' in port_mapping:
+                            host_port, container_port = port_mapping.split(':', 1)
+                            # Remove protocol suffix if present (e.g., "80/tcp")
+                            host_port = host_port.split('/')[0]
+                            container_port = container_port.split('/')[0]
+                        else:
+                            # No host port specified, Docker assigns random port
+                            continue
+                        
+                        # Check if this is a database port being exposed
+                        if container_port in database_ports:
+                            db_type = database_ports[container_port]
+                            
+                            # Check if it's bound to all interfaces (0.0.0.0 or no IP specified)
+                            if not host_port.startswith('127.0.0.1') and not host_port.startswith('localhost'):
+                                findings.append({
+                                    'pattern_name': 'COMPOSE_DATABASE_PORT_EXPOSED',
+                                    'message': f'Service "{service_name}" exposes {db_type} port to all interfaces',
+                                    'file': file_path,
+                                    'line': 0,
+                                    'column': 0,
+                                    'severity': 'high',
+                                    'snippet': f'ports: {port_mapping}',
+                                    'category': 'security',
+                                    'confidence': 0.85,
+                                    'details': {
+                                        'service': service_name,
+                                        'database': db_type,
+                                        'port': container_port,
+                                        'vulnerability': 'Database accessible from external network',
+                                        'fix': f'Bind to localhost only: 127.0.0.1:{host_port}:{container_port}'
+                                    }
+                                })
+            
+            # Additional detection: Unpinned image versions
+            if image and ':latest' in image:
+                findings.append({
+                    'pattern_name': 'COMPOSE_UNPINNED_IMAGE',
+                    'message': f'Service "{service_name}" uses unpinned image version',
+                    'file': file_path,
+                    'line': 0,
+                    'column': 0,
+                    'severity': 'medium',
+                    'snippet': f'image: {image}',
+                    'category': 'security',
+                    'confidence': 0.80,
+                    'details': {
+                        'service': service_name,
+                        'image': image,
+                        'vulnerability': 'Image version can change unexpectedly',
+                        'fix': 'Pin to specific version tag'
+                    }
+                })
+        
+        conn.close()
+        
+    except sqlite3.Error as e:
+        # Return empty findings if database is not accessible
+        return []
+    except Exception as e:
+        # Catch any other unexpected errors
+        return []
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/deployment/nginx_analyzer.py b/theauditor/rules/deployment/nginx_analyzer.py
new file mode 100644
index 0000000..c5f3bbb
--- /dev/null
+++ b/theauditor/rules/deployment/nginx_analyzer.py
@@ -0,0 +1,329 @@
+"""Database-aware Nginx configuration security analyzer.
+
+This module queries the nginx_configs table to detect common Nginx
+security misconfigurations. It follows the pattern established by other
+database-aware rules in TheAuditor.
+"""
+
+import json
+import sqlite3
+import re
+from typing import List, Dict, Any
+
+
+def find_nginx_issues(db_path: str) -> List[Dict[str, Any]]:
+    """
+    Analyze Nginx configurations stored in the database for security issues.
+    
+    This function queries the nginx_configs table populated by the indexer
+    and detects the following critical Nginx misconfigurations:
+    
+    - proxy_pass without rate limiting
+    - Missing critical security headers
+    - Exposed hidden directories like .git
+    - SSL misconfigurations (deprecated protocols)
+    
+    Args:
+        db_path: Path to the repo_index.db database
+        
+    Returns:
+        List of security findings in normalized format
+    """
+    findings = []
+    
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        
+        # Query all nginx configuration blocks from the database
+        cursor.execute("""
+            SELECT file_path, block_type, block_context, directives, level
+            FROM nginx_configs
+            ORDER BY file_path, level
+        """)
+        
+        nginx_blocks = cursor.fetchall()
+        
+        # Build a structure to track rate limiting and proxy_pass configurations
+        proxy_pass_blocks = []
+        rate_limit_zones = []
+        rate_limited_locations = []
+        security_headers = {}
+        ssl_configs = []
+        location_blocks = []
+        
+        for row in nginx_blocks:
+            file_path = row[0]
+            block_type = row[1]
+            block_context = row[2]
+            directives_json = row[3]
+            level = row[4]
+            
+            # Parse JSON directives
+            try:
+                directives = json.loads(directives_json) if directives_json else {}
+            except json.JSONDecodeError:
+                directives = {}
+            
+            # Track proxy_pass directives
+            if 'proxy_pass' in directives:
+                proxy_pass_blocks.append({
+                    'file': file_path,
+                    'block_type': block_type,
+                    'context': block_context,
+                    'proxy_pass': directives['proxy_pass'],
+                    'directives': directives
+                })
+            
+            # Track rate limiting zones
+            if 'limit_req_zone' in directives:
+                rate_limit_zones.append({
+                    'file': file_path,
+                    'context': block_context,
+                    'zone': directives['limit_req_zone']
+                })
+            
+            # Track rate limited locations
+            if 'limit_req' in directives:
+                rate_limited_locations.append({
+                    'file': file_path,
+                    'context': block_context,
+                    'limit': directives['limit_req']
+                })
+            
+            # Track security headers
+            if 'add_header' in directives:
+                headers = directives['add_header']
+                if not isinstance(headers, list):
+                    headers = [headers]
+                
+                for header in headers:
+                    # Parse header name from the directive
+                    header_match = re.match(r'(\S+)\s+', header)
+                    if header_match:
+                        header_name = header_match.group(1)
+                        if file_path not in security_headers:
+                            security_headers[file_path] = set()
+                        security_headers[file_path].add(header_name)
+            
+            # Track SSL configurations
+            if 'ssl_protocols' in directives or 'ssl_ciphers' in directives:
+                ssl_configs.append({
+                    'file': file_path,
+                    'context': block_context,
+                    'protocols': directives.get('ssl_protocols', ''),
+                    'ciphers': directives.get('ssl_ciphers', '')
+                })
+            
+            # Track location blocks
+            if block_type == 'location':
+                location_blocks.append({
+                    'file': file_path,
+                    'context': block_context,
+                    'directives': directives
+                })
+        
+        # Detection 1: proxy_pass without rate limiting
+        for proxy_block in proxy_pass_blocks:
+            # Check if this proxy_pass location has rate limiting
+            has_rate_limit = False
+            
+            # Check if the same context has rate limiting
+            for rate_limited in rate_limited_locations:
+                if (proxy_block['file'] == rate_limited['file'] and 
+                    proxy_block['context'] == rate_limited['context']):
+                    has_rate_limit = True
+                    break
+            
+            # Check if rate limiting is in the directives
+            if 'limit_req' in proxy_block['directives']:
+                has_rate_limit = True
+            
+            if not has_rate_limit:
+                findings.append({
+                    'pattern_name': 'NGINX_PROXY_NO_RATE_LIMIT',
+                    'message': f'proxy_pass without rate limiting in {proxy_block["context"]}',
+                    'file': proxy_block['file'],
+                    'line': 0,
+                    'column': 0,
+                    'severity': 'high',
+                    'snippet': f'proxy_pass {proxy_block["proxy_pass"]}',
+                    'category': 'security',
+                    'confidence': 0.85,
+                    'details': {
+                        'context': proxy_block['context'],
+                        'vulnerability': 'Proxy endpoint vulnerable to DoS attacks',
+                        'fix': 'Add limit_req directive to protect the endpoint'
+                    }
+                })
+        
+        # Detection 2: Missing security headers
+        critical_headers = {
+            'Strict-Transport-Security': 'HSTS header for HTTPS enforcement',
+            'X-Frame-Options': 'Clickjacking protection',
+            'X-Content-Type-Options': 'MIME sniffing protection',
+            'Content-Security-Policy': 'XSS and injection protection',
+            'X-XSS-Protection': 'XSS protection for older browsers',
+            'Referrer-Policy': 'Control referrer information'
+        }
+        
+        # Check each file for missing headers
+        processed_files = set()
+        for block in nginx_blocks:
+            file_path = block[0]
+            if file_path in processed_files:
+                continue
+            processed_files.add(file_path)
+            
+            file_headers = security_headers.get(file_path, set())
+            
+            for header_name, description in critical_headers.items():
+                if header_name not in file_headers:
+                    findings.append({
+                        'pattern_name': 'NGINX_MISSING_SECURITY_HEADER',
+                        'message': f'Missing security header: {header_name}',
+                        'file': file_path,
+                        'line': 0,
+                        'column': 0,
+                        'severity': 'medium',
+                        'snippet': f'Missing: add_header {header_name}',
+                        'category': 'security',
+                        'confidence': 0.90,
+                        'details': {
+                            'header': header_name,
+                            'description': description,
+                            'fix': f'Add "add_header {header_name} <value>;" to server block'
+                        }
+                    })
+        
+        # Detection 3: Exposed hidden directories
+        sensitive_patterns = [
+            '.git', '.svn', '.hg', '.bzr',  # Version control
+            '.env', '.htaccess', '.htpasswd',  # Configuration files
+            'wp-admin', 'phpmyadmin', 'admin',  # Admin interfaces
+            '.DS_Store', 'Thumbs.db',  # OS files
+            'backup', '.backup', '.bak'  # Backup files
+        ]
+        
+        for location in location_blocks:
+            # Extract location pattern from context
+            location_pattern = location['context'].split('>')[-1].strip() if '>' in location['context'] else location['context']
+            
+            # Check if location is trying to protect or expose sensitive paths
+            for sensitive in sensitive_patterns:
+                if sensitive in location_pattern.lower():
+                    # Check if it's properly denied
+                    directives = location['directives']
+                    is_denied = False
+                    
+                    if 'deny' in directives:
+                        deny_value = directives['deny']
+                        if 'all' in str(deny_value).lower():
+                            is_denied = True
+                    
+                    if 'return' in directives:
+                        return_value = str(directives['return'])
+                        if '403' in return_value or '404' in return_value:
+                            is_denied = True
+                    
+                    if not is_denied:
+                        findings.append({
+                            'pattern_name': 'NGINX_EXPOSED_SENSITIVE_PATH',
+                            'message': f'Potentially exposed sensitive path: {location_pattern}',
+                            'file': location['file'],
+                            'line': 0,
+                            'column': 0,
+                            'severity': 'high',
+                            'snippet': f'location {location_pattern}',
+                            'category': 'security',
+                            'confidence': 0.80,
+                            'details': {
+                                'path': location_pattern,
+                                'vulnerability': 'Sensitive directory may be accessible',
+                                'fix': 'Add "deny all;" or "return 404;" to this location block'
+                            }
+                        })
+        
+        # Detection 4: SSL misconfigurations
+        deprecated_protocols = ['SSLv2', 'SSLv3', 'TLSv1', 'TLSv1.0', 'TLSv1.1']
+        weak_ciphers = ['RC4', 'DES', 'MD5', 'NULL', 'EXPORT', 'aNULL', 'eNULL']
+        
+        for ssl_config in ssl_configs:
+            protocols = ssl_config['protocols']
+            ciphers = ssl_config['ciphers']
+            
+            # Check for deprecated protocols
+            for deprecated in deprecated_protocols:
+                if deprecated in protocols:
+                    findings.append({
+                        'pattern_name': 'NGINX_DEPRECATED_SSL_PROTOCOL',
+                        'message': f'Using deprecated SSL/TLS protocol: {deprecated}',
+                        'file': ssl_config['file'],
+                        'line': 0,
+                        'column': 0,
+                        'severity': 'critical',
+                        'snippet': f'ssl_protocols {protocols}',
+                        'category': 'security',
+                        'confidence': 0.95,
+                        'details': {
+                            'protocol': deprecated,
+                            'context': ssl_config['context'],
+                            'vulnerability': 'Vulnerable to known SSL/TLS attacks',
+                            'fix': 'Use only TLSv1.2 and TLSv1.3: ssl_protocols TLSv1.2 TLSv1.3;'
+                        }
+                    })
+            
+            # Check for weak ciphers
+            for weak_cipher in weak_ciphers:
+                if weak_cipher in ciphers:
+                    findings.append({
+                        'pattern_name': 'NGINX_WEAK_SSL_CIPHER',
+                        'message': f'Using weak SSL cipher: {weak_cipher}',
+                        'file': ssl_config['file'],
+                        'line': 0,
+                        'column': 0,
+                        'severity': 'high',
+                        'snippet': f'ssl_ciphers {ciphers[:100]}...' if len(ciphers) > 100 else f'ssl_ciphers {ciphers}',
+                        'category': 'security',
+                        'confidence': 0.90,
+                        'details': {
+                            'cipher': weak_cipher,
+                            'context': ssl_config['context'],
+                            'vulnerability': 'Weak cipher vulnerable to cryptographic attacks',
+                            'fix': 'Use strong cipher suites: ssl_ciphers HIGH:!aNULL:!MD5;'
+                        }
+                    })
+        
+        # Additional detection: Server tokens disclosure
+        for row in nginx_blocks:
+            directives = json.loads(row[3]) if row[3] else {}
+            
+            # Check if server_tokens is on (default) or missing
+            if 'server_tokens' in directives:
+                if directives['server_tokens'].lower() != 'off':
+                    findings.append({
+                        'pattern_name': 'NGINX_SERVER_TOKENS_ON',
+                        'message': 'Server version disclosure enabled',
+                        'file': row[0],
+                        'line': 0,
+                        'column': 0,
+                        'severity': 'low',
+                        'snippet': f'server_tokens {directives["server_tokens"]}',
+                        'category': 'security',
+                        'confidence': 0.95,
+                        'details': {
+                            'vulnerability': 'Nginx version exposed in headers',
+                            'fix': 'Set "server_tokens off;" in http block'
+                        }
+                    })
+        
+        conn.close()
+        
+    except sqlite3.Error as e:
+        # Return empty findings if database is not accessible
+        return []
+    except Exception as e:
+        # Catch any other unexpected errors
+        return []
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/node/__init__.py b/theauditor/rules/node/__init__.py
new file mode 100644
index 0000000..6ac3592
--- /dev/null
+++ b/theauditor/rules/node/__init__.py
@@ -0,0 +1,5 @@
+"""Node.js runtime issue detection rules module."""
+
+from .runtime_issue_detector import find_node_runtime_issues
+
+__all__ = ['find_node_runtime_issues']
\ No newline at end of file
diff --git a/theauditor/rules/node/runtime_issue_detector.py b/theauditor/rules/node/runtime_issue_detector.py
new file mode 100644
index 0000000..8743895
--- /dev/null
+++ b/theauditor/rules/node/runtime_issue_detector.py
@@ -0,0 +1,603 @@
+"""Security rules for detecting Node.js runtime security issues.
+
+Supports:
+- JavaScript/TypeScript (via ESLint AST and tree-sitter AST)
+"""
+
+import re
+from typing import List, Dict, Any
+
+
+def find_node_runtime_issues(tree: Any, taint_checker=None) -> List[Dict[str, Any]]:
+    """Find Node.js runtime security issues (command injection, prototype pollution).
+    
+    Detects:
+    - Insecure child_process.exec() usage with user input
+    - Prototype pollution patterns in object merging
+    
+    Supports:
+    - ESLint AST (preferred - from prompt 4 integration)
+    - Tree-sitter AST (fallback)
+    - Regex-based AST (last resort)
+    
+    Args:
+        tree: Either an ESLint AST dict, tree-sitter AST, or regex AST from ast_parser.py
+        taint_checker: Optional function from orchestrator to check if variable is tainted
+    
+    Returns:
+        List of findings with details about Node.js runtime vulnerabilities
+    """
+    # Handle different AST formats
+    if isinstance(tree, dict):
+        tree_type = tree.get("type")
+        
+        if tree_type == "eslint_ast":
+            return _find_node_runtime_issues_eslint(tree, taint_checker)
+        elif tree_type == "tree_sitter":
+            return _find_node_runtime_issues_tree_sitter(tree, taint_checker)
+        elif tree_type == "regex_ast":
+            return _find_node_runtime_issues_regex(tree, taint_checker)
+        else:
+            # Unknown tree type
+            return []
+    else:
+        # Unknown format
+        return []
+
+
+def _find_node_runtime_issues_eslint(tree_wrapper: Dict[str, Any], taint_checker=None) -> List[Dict[str, Any]]:
+    """Find Node.js runtime issues using ESLint AST (highest fidelity).
+    
+    Uses the ESLint AST format from our prompt 4 integration for accurate analysis.
+    If taint_checker is provided by orchestrator, uses that instead of tracking taint locally.
+    """
+    findings = []
+    
+    # Get the ESLint AST and source code
+    ast = tree_wrapper.get("tree")
+    content = tree_wrapper.get("content", "")
+    
+    if not ast or not isinstance(ast, dict):
+        return findings
+    
+    # If orchestrator provides taint_checker, use that. Otherwise track locally
+    if taint_checker:
+        tainted_vars = None  # Not needed when using taint_checker
+    else:
+        # Track tainted variables locally (fallback)
+        tainted_vars = set()
+    
+    # Common sources of user input in Node.js
+    input_sources = [
+        'req.body', 'req.query', 'req.params', 'req.headers', 'req.cookies',
+        'request.body', 'request.query', 'request.params',
+        'process.argv', 'process.env',
+    ]
+    
+    # Helper function to traverse ESLint AST recursively
+    def traverse_ast(node: Dict[str, Any], parent: Dict[str, Any] = None):
+        if not isinstance(node, dict):
+            return
+        
+        node_type = node.get("type")
+        
+        # Track variable declarations from user input (skip if using taint_checker)
+        if node_type == "VariableDeclarator" and not taint_checker:
+            var_id = node.get("id", {})
+            var_init = node.get("init", {})
+            
+            if var_id.get("type") == "Identifier":
+                var_name = var_id.get("name")
+                
+                # Check if initialized from user input
+                init_text = _extract_text_from_eslint_node(var_init, content)
+                if any(source in init_text for source in input_sources):
+                    tainted_vars.add(var_name)
+        
+        # Check for child_process.exec() calls
+        if node_type == "CallExpression":
+            callee = node.get("callee", {})
+            
+            # Check for exec, execSync, spawn with shell:true
+            if callee.get("type") == "MemberExpression":
+                obj = callee.get("object", {})
+                prop = callee.get("property", {})
+                
+                # Check if it's child_process.exec or require('child_process').exec
+                is_child_process = False
+                if obj.get("type") == "Identifier" and obj.get("name") == "child_process":
+                    is_child_process = True
+                elif obj.get("type") == "CallExpression":
+                    obj_callee = obj.get("callee", {})
+                    if obj_callee.get("type") == "Identifier" and obj_callee.get("name") == "require":
+                        obj_args = obj.get("arguments", [])
+                        if obj_args and obj_args[0].get("type") == "Literal":
+                            if obj_args[0].get("value") == "child_process":
+                                is_child_process = True
+                
+                if is_child_process and prop.get("type") == "Identifier":
+                    method_name = prop.get("name")
+                    
+                    if method_name in ["exec", "execSync", "execFile", "execFileSync"]:
+                        # Check the command argument
+                        args = node.get("arguments", [])
+                        if args:
+                            cmd_arg = args[0]
+                            
+                            # Check if command is constructed with user input
+                            is_vulnerable = False
+                            vulnerability_details = ""
+                            
+                            # Template literal with tainted variables
+                            if cmd_arg.get("type") == "TemplateLiteral":
+                                expressions = cmd_arg.get("expressions", [])
+                                for expr in expressions:
+                                    if expr.get("type") == "Identifier":
+                                        var_name = expr.get("name")
+                                        # Use taint_checker if available
+                                        if taint_checker:
+                                            if taint_checker(var_name, start.get("line", 0)):
+                                                is_vulnerable = True
+                                                vulnerability_details = f"Template literal contains tainted variable: {var_name}"
+                                                break
+                                        elif tainted_vars is not None and var_name in tainted_vars:
+                                            is_vulnerable = True
+                                            vulnerability_details = f"Template literal contains tainted variable: {var_name}"
+                                            break
+                                    # Check for direct user input in template
+                                    expr_text = _extract_text_from_eslint_node(expr, content)
+                                    if any(source in expr_text for source in input_sources):
+                                        is_vulnerable = True
+                                        vulnerability_details = f"Template literal contains user input: {expr_text[:50]}"
+                                        break
+                            
+                            # Binary expression (string concatenation)
+                            elif cmd_arg.get("type") == "BinaryExpression" and cmd_arg.get("operator") == "+":
+                                # Check if any part contains tainted data
+                                left_text = _extract_text_from_eslint_node(cmd_arg.get("left", {}), content)
+                                right_text = _extract_text_from_eslint_node(cmd_arg.get("right", {}), content)
+                                
+                                if any(var in left_text + right_text for var in tainted_vars):
+                                    is_vulnerable = True
+                                    vulnerability_details = "String concatenation with tainted variable"
+                                elif any(source in left_text + right_text for source in input_sources):
+                                    is_vulnerable = True
+                                    vulnerability_details = "String concatenation with user input"
+                            
+                            # Direct tainted variable
+                            elif cmd_arg.get("type") == "Identifier":
+                                if cmd_arg.get("name") in tainted_vars:
+                                    is_vulnerable = True
+                                    vulnerability_details = f"Command is tainted variable: {cmd_arg.get('name')}"
+                            
+                            if is_vulnerable:
+                                loc = node.get("loc", {})
+                                start = loc.get("start", {})
+                                
+                                findings.append({
+                                    'line': start.get("line", 0),
+                                    'column': start.get("column", 0),
+                                    'type': 'command_injection',
+                                    'method': f'child_process.{method_name}',
+                                    'details': vulnerability_details,
+                                    'snippet': f'{method_name}(...user_input...)',
+                                    'confidence': 0.95,
+                                    'severity': 'CRITICAL',
+                                    'hint': f'Never pass user input to {method_name}(). Use execFile() with argument array or validate/sanitize input.'
+                                })
+                    
+                    # Check for spawn with shell:true
+                    elif method_name == "spawn":
+                        args = node.get("arguments", [])
+                        if len(args) >= 3:
+                            options_arg = args[2]
+                            if options_arg.get("type") == "ObjectExpression":
+                                properties = options_arg.get("properties", [])
+                                for prop in properties:
+                                    if prop.get("type") == "Property":
+                                        key = prop.get("key", {})
+                                        value = prop.get("value", {})
+                                        if key.get("name") == "shell" and value.get("value") == True:
+                                            # Check if command or args contain user input
+                                            cmd_arg = args[0]
+                                            args_arg = args[1] if len(args) > 1 else None
+                                            
+                                            is_vulnerable = False
+                                            if cmd_arg.get("type") == "Identifier" and cmd_arg.get("name") in tainted_vars:
+                                                is_vulnerable = True
+                                            elif args_arg and args_arg.get("type") == "ArrayExpression":
+                                                elements = args_arg.get("elements", [])
+                                                for elem in elements:
+                                                    if elem.get("type") == "Identifier" and elem.get("name") in tainted_vars:
+                                                        is_vulnerable = True
+                                                        break
+                                            
+                                            if is_vulnerable:
+                                                loc = node.get("loc", {})
+                                                start = loc.get("start", {})
+                                                
+                                                findings.append({
+                                                    'line': start.get("line", 0),
+                                                    'column': start.get("column", 0),
+                                                    'type': 'command_injection',
+                                                    'method': 'child_process.spawn',
+                                                    'details': 'spawn() with shell:true and user input',
+                                                    'snippet': 'spawn(cmd, args, {shell: true})',
+                                                    'confidence': 0.95,
+                                                    'severity': 'CRITICAL',
+                                                    'hint': 'Remove shell:true or validate/sanitize all inputs'
+                                                })
+        
+        # Check for prototype pollution patterns
+        if node_type == "ForInStatement" or node_type == "ForOfStatement":
+            # Look for patterns like: for (key in source) { target[key] = source[key] }
+            left = node.get("left", {})
+            right = node.get("right", {})
+            body = node.get("body", {})
+            
+            if left.get("type") == "VariableDeclaration":
+                declarations = left.get("declarations", [])
+                if declarations and declarations[0].get("id", {}).get("type") == "Identifier":
+                    key_var = declarations[0].get("id", {}).get("name")
+                    
+                    # Check body for dangerous assignment pattern
+                    if body.get("type") == "BlockStatement":
+                        statements = body.get("body", [])
+                        for stmt in statements:
+                            if _is_prototype_pollution_assignment(stmt, key_var):
+                                loc = node.get("loc", {})
+                                start = loc.get("start", {})
+                                
+                                findings.append({
+                                    'line': start.get("line", 0),
+                                    'column': start.get("column", 0),
+                                    'type': 'prototype_pollution',
+                                    'pattern': 'recursive_merge',
+                                    'details': f'Unsafe property assignment with dynamic key: target[{key_var}] = source[{key_var}]',
+                                    'snippet': f'for ({key_var} in source) {{ target[{key_var}] = ... }}',
+                                    'confidence': 0.85,
+                                    'severity': 'HIGH',
+                                    'hint': 'Check for __proto__, constructor, and prototype keys before assignment. Use Object.hasOwn() or Map instead.'
+                                })
+                                break
+        
+        # Recursively traverse child nodes
+        for key, value in node.items():
+            if key in ["type", "loc", "range", "raw", "value"]:
+                continue
+            
+            if isinstance(value, dict):
+                traverse_ast(value, node)
+            elif isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict):
+                        traverse_ast(item, node)
+    
+    # Start traversal from root
+    if ast.get("type") == "Program":
+        traverse_ast(ast)
+    
+    return findings
+
+
+def _find_node_runtime_issues_tree_sitter(tree_wrapper: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Find Node.js runtime issues using tree-sitter AST (fallback).
+    
+    Less accurate than ESLint AST but better than regex.
+    """
+    findings = []
+    
+    tree = tree_wrapper.get("tree")
+    content = tree_wrapper.get("content", "")
+    
+    if not tree:
+        return findings
+    
+    try:
+        import tree_sitter
+        from tree_sitter_language_pack import get_language
+        
+        language = tree_wrapper.get("language", "javascript")
+        lang = get_language(language)
+        
+        # Query for child_process.exec calls
+        exec_query = lang.query("""
+            (call_expression
+              function: (member_expression
+                object: (_) @obj
+                property: (property_identifier) @method)
+              arguments: (arguments (_) @cmd))
+        """)
+        
+        # Track tainted variables
+        tainted_vars = set()
+        
+        # Query for variable assignments from user input
+        var_query = lang.query("""
+            [
+              (variable_declarator
+                name: (identifier) @var_name
+                value: (_) @var_value)
+              (assignment_expression
+                left: (identifier) @var_name
+                right: (_) @var_value)
+            ]
+        """)
+        
+        # Find tainted variables
+        for capture in var_query.captures(tree.root_node):
+            node, capture_name = capture
+            if capture_name == "var_name":
+                var_name = node.text.decode("utf-8", errors="ignore")
+                # Find corresponding value
+                parent = node.parent
+                value_text = ""
+                
+                for sibling in parent.children:
+                    if sibling != node and sibling.type not in ["=", "const", "let", "var"]:
+                        value_text = sibling.text.decode("utf-8", errors="ignore")
+                        break
+                
+                input_sources = ['req.body', 'req.query', 'req.params', 'process.argv']
+                if any(source in value_text for source in input_sources):
+                    tainted_vars.add(var_name)
+        
+        # Check exec calls
+        for capture in exec_query.captures(tree.root_node):
+            node, capture_name = capture
+            
+            if capture_name == "method":
+                method_name = node.text.decode("utf-8", errors="ignore")
+                
+                if method_name in ["exec", "execSync", "execFile", "spawn"]:
+                    # Check if object is child_process
+                    parent = node.parent
+                    obj_node = None
+                    cmd_node = None
+                    
+                    for child in parent.parent.children:
+                        if child.type == "arguments":
+                            for arg in child.children:
+                                if arg.type not in ["(", ")", ","]:
+                                    cmd_node = arg
+                                    break
+                    
+                    if cmd_node:
+                        cmd_text = cmd_node.text.decode("utf-8", errors="ignore")
+                        
+                        # Check for tainted variables or direct user input
+                        is_vulnerable = False
+                        for tainted in tainted_vars:
+                            if tainted in cmd_text:
+                                is_vulnerable = True
+                                break
+                        
+                        if not is_vulnerable:
+                            input_sources = ['req.body', 'req.query', 'req.params', 'process.argv']
+                            for source in input_sources:
+                                if source in cmd_text:
+                                    is_vulnerable = True
+                                    break
+                        
+                        if is_vulnerable:
+                            findings.append({
+                                'line': node.start_point[0] + 1,
+                                'column': node.start_point[1],
+                                'type': 'command_injection',
+                                'method': f'child_process.{method_name}',
+                                'details': 'Command contains user-controlled input',
+                                'snippet': cmd_text[:80] + "..." if len(cmd_text) > 80 else cmd_text,
+                                'confidence': 0.85,
+                                'severity': 'CRITICAL',
+                                'hint': f'Sanitize input before passing to {method_name}() or use safer alternatives'
+                            })
+        
+        # Query for prototype pollution patterns
+        pollution_query = lang.query("""
+            (for_in_statement
+              left: (_) @key_var
+              right: (_) @source_obj
+              body: (statement_block) @body)
+        """)
+        
+        for capture in pollution_query.captures(tree.root_node):
+            node, capture_name = capture
+            
+            if capture_name == "body":
+                body_text = node.text.decode("utf-8", errors="ignore")
+                
+                # Look for pattern: target[key] = source[key]
+                if re.search(r'\w+\[[\w]+\]\s*=\s*\w+\[[\w]+\]', body_text):
+                    # Check if there's no key validation
+                    if not any(check in body_text for check in ['hasOwnProperty', 'hasOwn', '__proto__', 'constructor', 'prototype']):
+                        findings.append({
+                            'line': node.start_point[0] + 1,
+                            'column': node.start_point[1],
+                            'type': 'prototype_pollution',
+                            'pattern': 'unsafe_merge',
+                            'details': 'Object merge without key validation',
+                            'snippet': body_text[:80] + "..." if len(body_text) > 80 else body_text,
+                            'confidence': 0.75,
+                            'severity': 'HIGH',
+                            'hint': 'Validate keys to prevent __proto__ pollution'
+                        })
+    
+    except (ImportError, Exception):
+        # Tree-sitter not available, fall back to regex
+        return _find_node_runtime_issues_regex(tree_wrapper)
+    
+    return findings
+
+
+def _find_node_runtime_issues_regex(tree_wrapper: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Find Node.js runtime issues using regex (last resort).
+    
+    Least accurate but works without any AST parser.
+    """
+    findings = []
+    
+    content = tree_wrapper.get("content", "")
+    
+    if not content:
+        return findings
+    
+    lines = content.split('\n')
+    
+    # Patterns for command injection
+    exec_patterns = [
+        # child_process.exec with template literals containing user input
+        (r'(?:child_process\.)?exec(?:Sync)?\s*\(\s*`[^`]*\$\{[^}]*(?:req\.|request\.|process\.argv)[^}]*\}', 'template_literal'),
+        # exec with string concatenation
+        (r'(?:child_process\.)?exec(?:Sync)?\s*\(\s*["\'][^"\']*["\']?\s*\+\s*(?:req\.|request\.|process\.argv)', 'concatenation'),
+        # exec with direct user input variable (simplified)
+        (r'(?:child_process\.)?exec(?:Sync)?\s*\(\s*(?:userInput|userData|query|params|body|cmd|command)\b', 'direct_variable'),
+        # spawn with shell:true
+        (r'spawn\s*\([^)]+,\s*\[[^]]*(?:req\.|request\.|process\.argv)[^]]*\][^)]*shell\s*:\s*true', 'spawn_shell'),
+    ]
+    
+    # Patterns for prototype pollution
+    pollution_patterns = [
+        # for...in without validation
+        (r'for\s*\(\s*(?:let|const|var)?\s*(\w+)\s+in\s+\w+\s*\)[^{]*\{[^}]*\1\][^}]*=[^}]*\1\](?![^}]*(?:hasOwn|__proto__|constructor|prototype))', 'for_in_unsafe'),
+        # Object.assign with spread of user input
+        (r'Object\.assign\s*\([^,)]+,\s*\.\.\.(?:req\.|request\.body|request\.query)', 'assign_spread'),
+        # Recursive merge pattern
+        (r'function\s+merge[^{]*\{[^}]*for\s*\([^)]+in[^)]+\)[^}]*\[key\]\s*=', 'recursive_merge'),
+    ]
+    
+    # Check each line
+    for line_num, line in enumerate(lines, 1):
+        # Check for command injection patterns
+        for pattern, pattern_type in exec_patterns:
+            match = re.search(pattern, line, re.IGNORECASE)
+            if match:
+                findings.append({
+                    'line': line_num,
+                    'column': match.start(),
+                    'type': 'command_injection',
+                    'method': 'child_process.exec',
+                    'details': f'Potential command injection via {pattern_type}',
+                    'snippet': line.strip()[:80] + "..." if len(line.strip()) > 80 else line.strip(),
+                    'confidence': 0.70,  # Lower confidence for regex
+                    'severity': 'CRITICAL',
+                    'hint': 'Never pass user input to exec(). Use execFile() or validate input.'
+                })
+                break
+        
+        # Check for prototype pollution patterns
+        for pattern, pattern_type in pollution_patterns:
+            match = re.search(pattern, line, re.IGNORECASE)
+            if match:
+                findings.append({
+                    'line': line_num,
+                    'column': match.start(),
+                    'type': 'prototype_pollution',
+                    'pattern': pattern_type,
+                    'details': f'Potential prototype pollution via {pattern_type}',
+                    'snippet': line.strip()[:80] + "..." if len(line.strip()) > 80 else line.strip(),
+                    'confidence': 0.65,  # Lower confidence for regex
+                    'severity': 'HIGH',
+                    'hint': 'Validate object keys before assignment. Block __proto__, constructor, prototype.'
+                })
+                break
+    
+    return findings
+
+
+def _extract_text_from_eslint_node(node: Dict[str, Any], source: str) -> str:
+    """Extract source text from ESLint AST node using location info.
+    
+    Args:
+        node: ESLint AST node with location information
+        source: Original source code
+        
+    Returns:
+        Text content of the node
+    """
+    if not isinstance(node, dict):
+        return ""
+    
+    # For literal values, return the value directly
+    if node.get("type") == "Literal":
+        return str(node.get("value", ""))
+    
+    # For identifiers, return the name
+    if node.get("type") == "Identifier":
+        return node.get("name", "")
+    
+    # For other nodes, try to extract from source using range
+    range_info = node.get("range")
+    if range_info and isinstance(range_info, list) and len(range_info) == 2:
+        start, end = range_info
+        if 0 <= start < end <= len(source):
+            return source[start:end]
+    
+    # Fallback: try to reconstruct from node type
+    node_type = node.get("type", "")
+    
+    if node_type == "MemberExpression":
+        obj = _extract_text_from_eslint_node(node.get("object", {}), source)
+        prop = _extract_text_from_eslint_node(node.get("property", {}), source)
+        return f"{obj}.{prop}"
+    
+    elif node_type == "CallExpression":
+        callee = _extract_text_from_eslint_node(node.get("callee", {}), source)
+        return f"{callee}(...)"
+    
+    elif node_type == "BinaryExpression":
+        left = _extract_text_from_eslint_node(node.get("left", {}), source)
+        op = node.get("operator", "")
+        right = _extract_text_from_eslint_node(node.get("right", {}), source)
+        return f"{left} {op} {right}"
+    
+    return ""
+
+
+def _is_prototype_pollution_assignment(stmt: Dict[str, Any], key_var: str) -> bool:
+    """Check if a statement contains unsafe prototype pollution pattern.
+    
+    Args:
+        stmt: ESLint AST statement node
+        key_var: Name of the iteration variable
+        
+    Returns:
+        True if statement contains target[key] = source[key] pattern
+    """
+    if not isinstance(stmt, dict):
+        return False
+    
+    # Look for expression statement with assignment
+    if stmt.get("type") == "ExpressionStatement":
+        expr = stmt.get("expression", {})
+        
+        if expr.get("type") == "AssignmentExpression":
+            left = expr.get("left", {})
+            right = expr.get("right", {})
+            
+            # Check if left is target[key]
+            if left.get("type") == "MemberExpression" and left.get("computed"):
+                left_prop = left.get("property", {})
+                if left_prop.get("type") == "Identifier" and left_prop.get("name") == key_var:
+                    # Check if right is source[key]
+                    if right.get("type") == "MemberExpression" and right.get("computed"):
+                        right_prop = right.get("property", {})
+                        if right_prop.get("type") == "Identifier" and right_prop.get("name") == key_var:
+                            return True
+    
+    # Check nested statements
+    if stmt.get("type") == "IfStatement":
+        consequent = stmt.get("consequent", {})
+        if _is_prototype_pollution_assignment(consequent, key_var):
+            return True
+        
+        alternate = stmt.get("alternate", {})
+        if alternate and _is_prototype_pollution_assignment(alternate, key_var):
+            return True
+    
+    elif stmt.get("type") == "BlockStatement":
+        body = stmt.get("body", [])
+        for sub_stmt in body:
+            if _is_prototype_pollution_assignment(sub_stmt, key_var):
+                return True
+    
+    return False
\ No newline at end of file
diff --git a/theauditor/rules/orchestrator.py b/theauditor/rules/orchestrator.py
new file mode 100644
index 0000000..567891b
--- /dev/null
+++ b/theauditor/rules/orchestrator.py
@@ -0,0 +1,668 @@
+"""Unified orchestrator for dynamic rule discovery and execution.
+
+This module provides a central orchestrator that:
+1. Dynamically discovers ALL rules in the /rules directory
+2. Analyzes their signatures to determine requirements
+3. Executes them with appropriate parameters
+4. Provides a unified interface for all detection systems
+"""
+
+import importlib
+import inspect
+import json
+import os
+import pkgutil
+import sqlite3
+import sys
+from pathlib import Path
+from typing import Dict, List, Any, Callable, Optional, Set
+from dataclasses import dataclass, field
+
+
+@dataclass
+class RuleInfo:
+    """Metadata about a discovered rule."""
+    name: str
+    module: str
+    function: Callable
+    signature: inspect.Signature
+    category: str
+    requires_ast: bool = False
+    requires_db: bool = False
+    requires_file: bool = False
+    requires_content: bool = False
+    param_count: int = 0
+    param_names: List[str] = field(default_factory=list)
+    rule_type: str = "standalone"  # standalone, discovery, taint-dependent
+
+
+@dataclass
+class RuleContext:
+    """Context information for rule execution."""
+    file_path: Optional[Path] = None
+    content: Optional[str] = None
+    ast_tree: Optional[Any] = None
+    language: Optional[str] = None
+    db_path: Optional[str] = None
+    project_path: Optional[Path] = None
+
+
+class RulesOrchestrator:
+    """Unified orchestrator for ALL rule execution."""
+    
+    def __init__(self, project_path: Path, db_path: Path = None):
+        """Initialize the orchestrator.
+        
+        Args:
+            project_path: Root path of the project being analyzed
+            db_path: Optional path to the database (defaults to .pf/repo_index.db)
+        """
+        self.project_path = Path(project_path)
+        self.db_path = Path(db_path) if db_path else self.project_path / ".pf" / "repo_index.db"
+        self._debug = os.environ.get("THEAUDITOR_DEBUG", "").lower() == "true"
+        self.rules = self._discover_all_rules()
+        
+        # NEW: Initialize taint infrastructure for rules that need it
+        # Lazy imports to avoid circular dependencies
+        self.taint_registry = None
+        self._taint_trace_func = None
+        self._taint_conn = None  # Lazy-load database connection
+        
+        if self._debug:
+            print(f"[ORCHESTRATOR] Discovered {sum(len(r) for r in self.rules.values())} rules across {len(self.rules)} categories")
+    
+    def _discover_all_rules(self) -> Dict[str, List[RuleInfo]]:
+        """Dynamically discover ALL rules in /rules directory.
+        
+        Returns:
+            Dictionary mapping category name to list of RuleInfo objects
+        """
+        rules_by_category = {}
+        
+        # Get the rules package directory
+        import theauditor.rules as rules_package
+        rules_dir = Path(rules_package.__file__).parent
+        
+        # Walk all subdirectories
+        for subdir in rules_dir.iterdir():
+            if not subdir.is_dir() or subdir.name.startswith('__'):
+                continue
+                
+            category = subdir.name
+            rules_by_category[category] = []
+            
+            # Process all Python files in the subdirectory
+            for py_file in subdir.glob("*.py"):
+                if py_file.name.startswith('__'):
+                    continue
+                
+                module_name = f"theauditor.rules.{category}.{py_file.stem}"
+                
+                try:
+                    # Import the module
+                    module = importlib.import_module(module_name)
+                    
+                    # Find all find_* functions
+                    for name, obj in inspect.getmembers(module, inspect.isfunction):
+                        if name.startswith('find_'):
+                            # Check if function is defined in this module (not imported)
+                            if obj.__module__ == module_name:
+                                rule_info = self._analyze_rule(name, obj, module_name, category)
+                                rules_by_category[category].append(rule_info)
+                                
+                                if self._debug:
+                                    print(f"[ORCHESTRATOR] Found rule: {category}/{name} with {rule_info.param_count} params")
+                                    
+                except ImportError as e:
+                    if self._debug:
+                        print(f"[ORCHESTRATOR] Warning: Failed to import {module_name}: {e}")
+                except Exception as e:
+                    if self._debug:
+                        print(f"[ORCHESTRATOR] Warning: Error processing {module_name}: {e}")
+        
+        # Also check for top-level rule files (not in subdirectories)
+        for py_file in rules_dir.glob("*.py"):
+            if py_file.name.startswith('__') or py_file.is_dir():
+                continue
+            
+            module_name = f"theauditor.rules.{py_file.stem}"
+            category = "general"  # Top-level rules go in general category
+            
+            if category not in rules_by_category:
+                rules_by_category[category] = []
+            
+            try:
+                module = importlib.import_module(module_name)
+                
+                for name, obj in inspect.getmembers(module, inspect.isfunction):
+                    if name.startswith('find_'):
+                        if obj.__module__ == module_name:
+                            rule_info = self._analyze_rule(name, obj, module_name, category)
+                            rules_by_category[category].append(rule_info)
+                            
+            except ImportError:
+                pass  # Silent skip for non-importable files
+            except Exception as e:
+                if self._debug:
+                    print(f"[ORCHESTRATOR] Warning: Error processing {module_name}: {e}")
+        
+        return rules_by_category
+    
+    def _analyze_rule(self, name: str, func: Callable, module: str, category: str) -> RuleInfo:
+        """Analyze a rule function to determine its requirements.
+        
+        Args:
+            name: Function name
+            func: The function object
+            module: Module name
+            category: Category name
+            
+        Returns:
+            RuleInfo object with metadata about the rule
+        """
+        sig = inspect.signature(func)
+        params = list(sig.parameters.keys())
+        
+        # Determine what the rule needs based on parameter names
+        requires_ast = any(p in ['ast', 'tree', 'ast_tree', 'python_ast'] for p in params)
+        requires_db = any(p in ['db_path', 'database', 'conn'] for p in params)
+        requires_file = any(p in ['file_path', 'filepath', 'path', 'filename'] for p in params)
+        requires_content = any(p in ['content', 'source', 'code', 'text'] for p in params)
+        
+        # CRITICAL: Auto-detect rule type based on parameters
+        rule_type = "standalone"  # Default
+        
+        # Discovery rules: register new sinks/sources to the registry
+        if 'taint_registry' in params:
+            rule_type = "discovery"
+        # Taint-dependent rules: use taint analysis results
+        elif 'taint_checker' in params or 'trace_taint' in params:
+            rule_type = "taint-dependent"
+        # Everything else is standalone (doesn't need taint infrastructure)
+        else:
+            rule_type = "standalone"
+        
+        return RuleInfo(
+            name=name,
+            module=module,
+            function=func,
+            signature=sig,
+            category=category,
+            requires_ast=requires_ast,
+            requires_db=requires_db,
+            requires_file=requires_file,
+            requires_content=requires_content,
+            param_count=len(params),
+            param_names=params,
+            rule_type=rule_type
+        )
+    
+    def run_all_rules(self, context: Optional[RuleContext] = None) -> List[Dict[str, Any]]:
+        """Execute ALL discovered rules with appropriate parameters.
+        
+        Args:
+            context: Optional context with file, AST, database info
+            
+        Returns:
+            List of findings from all rules
+        """
+        if context is None:
+            context = RuleContext(
+                db_path=str(self.db_path),
+                project_path=self.project_path
+            )
+        
+        all_findings = []
+        total_executed = 0
+        
+        for category, rules in self.rules.items():
+            if not rules:
+                continue
+                
+            if self._debug:
+                print(f"[ORCHESTRATOR] Running {len(rules)} rules in category: {category}")
+            
+            for rule in rules:
+                try:
+                    findings = self._execute_rule(rule, context)
+                    if findings:
+                        all_findings.extend(findings)
+                        total_executed += 1
+                        
+                        if self._debug:
+                            print(f"[ORCHESTRATOR]   {rule.name}: {len(findings)} findings")
+                            
+                except Exception as e:
+                    if self._debug:
+                        print(f"[ORCHESTRATOR] Warning: Rule {rule.name} failed: {e}")
+        
+        if self._debug:
+            print(f"[ORCHESTRATOR] Executed {total_executed} rules, found {len(all_findings)} issues")
+        
+        return all_findings
+    
+    def run_rules_for_file(self, context: RuleContext) -> List[Dict[str, Any]]:
+        """Run rules applicable to a specific file.
+        
+        Args:
+            context: Context with file information
+            
+        Returns:
+            List of findings for this file
+        """
+        findings = []
+        
+        # Filter rules that need file/AST/content
+        for category, rules in self.rules.items():
+            for rule in rules:
+                # Skip database-only rules when processing individual files
+                if rule.requires_db and not (rule.requires_file or rule.requires_ast or rule.requires_content):
+                    continue
+                
+                # Skip rules that need AST if we don't have it
+                if rule.requires_ast and not context.ast_tree:
+                    continue
+                
+                try:
+                    rule_findings = self._execute_rule(rule, context)
+                    if rule_findings:
+                        findings.extend(rule_findings)
+                        
+                except Exception as e:
+                    if self._debug:
+                        print(f"[ORCHESTRATOR] Rule {rule.name} failed for file: {e}")
+        
+        return findings
+    
+    def get_rules_by_type(self, rule_type: str) -> List[RuleInfo]:
+        """Get all rules of a specific type.
+        
+        Args:
+            rule_type: Type of rules to retrieve (standalone, discovery, taint-dependent)
+            
+        Returns:
+            List of RuleInfo objects matching the type
+        """
+        rules_of_type = []
+        for category, rules in self.rules.items():
+            for rule in rules:
+                if rule.rule_type == rule_type:
+                    rules_of_type.append(rule)
+        return rules_of_type
+    
+    def run_discovery_rules(self, registry) -> List[Dict[str, Any]]:
+        """Run all discovery rules that populate the taint registry.
+        
+        Args:
+            registry: TaintRegistry to populate with discovered patterns
+            
+        Returns:
+            List of findings from discovery rules
+        """
+        context = RuleContext(
+            db_path=str(self.db_path),
+            project_path=self.project_path
+        )
+        
+        findings = []
+        discovery_rules = self.get_rules_by_type("discovery")
+        
+        for rule in discovery_rules:
+            try:
+                # Pass registry to the rule
+                kwargs = self._build_rule_kwargs(rule, context)
+                kwargs['taint_registry'] = registry
+                
+                rule_findings = rule.function(**kwargs)
+                if rule_findings:
+                    findings.extend(rule_findings)
+                    
+                if self._debug:
+                    print(f"[ORCHESTRATOR] Discovery rule {rule.name}: {len(rule_findings) if rule_findings else 0} findings")
+                    
+            except Exception as e:
+                if self._debug:
+                    print(f"[ORCHESTRATOR] Discovery rule {rule.name} failed: {e}")
+        
+        return findings
+    
+    def run_standalone_rules(self) -> List[Dict[str, Any]]:
+        """Run all standalone rules that don't need taint data.
+        
+        Returns:
+            List of findings from standalone rules
+        """
+        context = RuleContext(
+            db_path=str(self.db_path),
+            project_path=self.project_path
+        )
+        
+        findings = []
+        standalone_rules = self.get_rules_by_type("standalone")
+        
+        for rule in standalone_rules:
+            try:
+                kwargs = self._build_rule_kwargs(rule, context)
+                rule_findings = rule.function(**kwargs)
+                if rule_findings:
+                    findings.extend(rule_findings)
+                    
+            except Exception as e:
+                if self._debug:
+                    print(f"[ORCHESTRATOR] Standalone rule {rule.name} failed: {e}")
+        
+        return findings
+    
+    def run_taint_dependent_rules(self, taint_checker) -> List[Dict[str, Any]]:
+        """Run all rules that depend on taint analysis results.
+        
+        Args:
+            taint_checker: Function to check if a variable is tainted
+            
+        Returns:
+            List of findings from taint-dependent rules
+        """
+        context = RuleContext(
+            db_path=str(self.db_path),
+            project_path=self.project_path
+        )
+        
+        findings = []
+        taint_rules = self.get_rules_by_type("taint-dependent")
+        
+        for rule in taint_rules:
+            try:
+                kwargs = self._build_rule_kwargs(rule, context)
+                if 'taint_checker' in rule.param_names:
+                    kwargs['taint_checker'] = taint_checker
+                
+                rule_findings = rule.function(**kwargs)
+                if rule_findings:
+                    findings.extend(rule_findings)
+                    
+            except Exception as e:
+                if self._debug:
+                    print(f"[ORCHESTRATOR] Taint-dependent rule {rule.name} failed: {e}")
+        
+        return findings
+    
+    def _build_rule_kwargs(self, rule: RuleInfo, context: RuleContext) -> Dict[str, Any]:
+        """Build keyword arguments for a rule based on its requirements.
+        
+        Args:
+            rule: RuleInfo object
+            context: RuleContext with available data
+            
+        Returns:
+            Dictionary of keyword arguments for the rule
+        """
+        kwargs = {}
+        
+        for param_name in rule.param_names:
+            if param_name in ['db_path', 'database']:
+                kwargs[param_name] = context.db_path or str(self.db_path)
+            elif param_name in ['file_path', 'filepath', 'path', 'filename']:
+                if context.file_path:
+                    kwargs[param_name] = str(context.file_path)
+            elif param_name in ['content', 'source', 'code', 'text']:
+                if context.content:
+                    kwargs[param_name] = context.content
+            elif param_name in ['ast', 'tree', 'ast_tree', 'python_ast']:
+                if context.ast_tree:
+                    kwargs[param_name] = context.ast_tree
+            elif param_name == 'project_path':
+                kwargs[param_name] = str(context.project_path or self.project_path)
+            elif param_name == 'language':
+                kwargs[param_name] = context.language
+        
+        return kwargs
+    
+    def run_database_rules(self) -> List[Dict[str, Any]]:
+        """Run rules that operate on the database.
+        
+        Returns:
+            List of findings from database rules
+        """
+        context = RuleContext(
+            db_path=str(self.db_path),
+            project_path=self.project_path
+        )
+        
+        findings = []
+        
+        # Filter rules that need database
+        for category, rules in self.rules.items():
+            for rule in rules:
+                if rule.requires_db:
+                    try:
+                        rule_findings = self._execute_rule(rule, context)
+                        if rule_findings:
+                            findings.extend(rule_findings)
+                            
+                    except Exception as e:
+                        if self._debug:
+                            print(f"[ORCHESTRATOR] Database rule {rule.name} failed: {e}")
+        
+        return findings
+    
+    def _execute_rule(self, rule: RuleInfo, context: RuleContext) -> List[Dict[str, Any]]:
+        """Execute a single rule with appropriate parameters.
+        
+        Args:
+            rule: RuleInfo object describing the rule
+            context: RuleContext with available data
+            
+        Returns:
+            List of findings from the rule
+        """
+        # Build arguments based on what the rule needs
+        kwargs = {}
+        
+        for param_name in rule.param_names:
+            # NEW: Provide taint infrastructure to rules that need it
+            if param_name == 'taint_registry':
+                # Lazy-load taint registry only when needed
+                if self.taint_registry is None:
+                    from theauditor.taint.registry import TaintRegistry
+                    self.taint_registry = TaintRegistry()
+                kwargs['taint_registry'] = self.taint_registry
+                
+            elif param_name == 'taint_checker':
+                # Provide a function that checks if variable is tainted
+                kwargs['taint_checker'] = self._create_taint_checker(context)
+                
+            elif param_name == 'trace_taint':
+                # Provide inter-procedural tracking function
+                kwargs['trace_taint'] = self._get_taint_tracer()
+                
+            # Map parameter names to context values
+            elif param_name in ['ast', 'tree', 'ast_tree', 'python_ast']:
+                if context.ast_tree:
+                    kwargs[param_name] = context.ast_tree
+                else:
+                    return []  # Skip if AST required but not available
+                    
+            elif param_name in ['db_path', 'database']:
+                kwargs[param_name] = context.db_path or str(self.db_path)
+                
+            elif param_name in ['file_path', 'filepath', 'path', 'filename']:
+                if context.file_path:
+                    kwargs[param_name] = str(context.file_path)
+                else:
+                    return []  # Skip if file required but not available
+                    
+            elif param_name in ['content', 'source', 'code', 'text']:
+                if context.content:
+                    kwargs[param_name] = context.content
+                else:
+                    return []  # Skip if content required but not available
+                    
+            elif param_name == 'project_path':
+                kwargs[param_name] = str(context.project_path or self.project_path)
+                
+            elif param_name == 'language':
+                kwargs[param_name] = context.language
+            
+            # Some rules might have other parameters - try to handle gracefully
+            else:
+                # Check if parameter has a default value
+                param = rule.signature.parameters[param_name]
+                if param.default != inspect.Parameter.empty:
+                    # Has default, can skip
+                    continue
+                else:
+                    # Required parameter we don't know how to fill
+                    if self._debug:
+                        print(f"[ORCHESTRATOR] Warning: Don't know how to fill parameter '{param_name}' for rule {rule.name}")
+                    return []
+        
+        # Execute the rule
+        try:
+            result = rule.function(**kwargs)
+            
+            # Normalize result to list of dicts
+            if result is None:
+                return []
+            elif isinstance(result, list):
+                return result
+            elif isinstance(result, dict):
+                return [result]
+            else:
+                if self._debug:
+                    print(f"[ORCHESTRATOR] Warning: Rule {rule.name} returned unexpected type: {type(result)}")
+                return []
+                
+        except Exception as e:
+            if self._debug:
+                print(f"[ORCHESTRATOR] Error executing rule {rule.name}: {e}")
+            return []
+    
+    def get_rule_stats(self) -> Dict[str, Any]:
+        """Get statistics about discovered rules.
+        
+        Returns:
+            Dictionary with rule statistics
+        """
+        stats = {
+            'total_rules': sum(len(rules) for rules in self.rules.values()),
+            'categories': list(self.rules.keys()),
+            'by_category': {cat: len(rules) for cat, rules in self.rules.items()},
+            'by_requirements': {
+                'ast_rules': sum(1 for rules in self.rules.values() for r in rules if r.requires_ast),
+                'db_rules': sum(1 for rules in self.rules.values() for r in rules if r.requires_db),
+                'file_rules': sum(1 for rules in self.rules.values() for r in rules if r.requires_file),
+                'content_rules': sum(1 for rules in self.rules.values() for r in rules if r.requires_content),
+            }
+        }
+        return stats
+    
+    def _create_taint_checker(self, context: RuleContext):
+        """Check taint using REAL taint analysis results.
+        
+        This provides rules with a way to check if variables are tainted
+        using the main taint analyzer's cached results.
+        
+        Args:
+            context: The rule execution context
+            
+        Returns:
+            A function that checks if a variable is tainted
+        """
+        # Get cached taint results
+        if not hasattr(self, '_taint_results'):
+            from theauditor.taint import trace_taint
+            self._taint_results = trace_taint(str(self.db_path), max_depth=5)
+            if self._debug:
+                total = len(self._taint_results.get("taint_paths", []))
+                print(f"[ORCHESTRATOR] Cached {total} taint paths for rules", file=sys.stderr)
+        
+        def is_tainted(var_name: str, line: int) -> bool:
+            """Check if variable is in any taint path.
+            
+            Args:
+                var_name: Name of the variable to check
+                line: Line number where the check is happening
+                
+            Returns:
+                True if the variable is tainted, False otherwise
+            """
+            for path in self._taint_results.get("taint_paths", []):
+                # Check source
+                source = path.get("source", {})
+                if (source.get("file", "") == str(context.file_path) and 
+                    abs(source.get("line", 0) - line) < 10):
+                    # Check if var is in path
+                    for step in path.get("path", []):
+                        if var_name in str(step):
+                            return True
+            return False
+        
+        return is_tainted
+    
+    def _get_taint_tracer(self):
+        """Get cached taint analysis results for rules to query.
+        
+        This provides rules with access to the main taint analyzer's
+        results WITH JavaScript pattern support.
+        
+        Returns:
+            A function that returns relevant taint paths
+        """
+        if self._taint_trace_func is None:
+            # Run FULL taint analysis ONCE and cache it
+            from theauditor.taint import trace_taint
+            if not hasattr(self, '_taint_results'):
+                self._taint_results = trace_taint(str(self.db_path), max_depth=5)
+                if self._debug:
+                    total = len(self._taint_results.get("taint_paths", []))
+                    print(f"[ORCHESTRATOR] Cached {total} taint paths for rules", file=sys.stderr)
+            
+            def get_taint_for_location(source_var: str, source_file: str, source_line: int, source_function: str = "unknown"):
+                """Return cached taint paths relevant to location.
+                
+                Args:
+                    source_var: The variable to trace
+                    source_file: File containing the variable
+                    source_line: Line where the variable is defined
+                    source_function: Function containing the variable (optional)
+                    
+                Returns:
+                    List of relevant taint paths from cached results
+                """
+                relevant_paths = []
+                for path in self._taint_results.get("taint_paths", []):
+                    source = path.get("source", {})
+                    # Match by file and approximate line
+                    if (source.get("file", "").endswith(source_file) and 
+                        abs(source.get("line", 0) - source_line) < 10):
+                        # Check if variable is in the path
+                        for step in path.get("path", []):
+                            if source_var in str(step.get("var", "")):
+                                relevant_paths.append(path)
+                                break
+                return relevant_paths
+            
+            self._taint_trace_func = get_taint_for_location
+        
+        return self._taint_trace_func
+
+
+# Convenience function for backward compatibility
+def run_all_rules(project_path: str, db_path: str = None) -> List[Dict[str, Any]]:
+    """Run all rules for a project.
+    
+    Args:
+        project_path: Root path of the project
+        db_path: Optional database path (defaults to .pf/repo_index.db)
+        
+    Returns:
+        List of all findings
+    """
+    orchestrator = RulesOrchestrator(Path(project_path))
+    
+    context = RuleContext(
+        db_path=db_path or str(orchestrator.db_path),
+        project_path=Path(project_path)
+    )
+    
+    return orchestrator.run_all_rules(context)
\ No newline at end of file
diff --git a/theauditor/rules/orm/__init__.py b/theauditor/rules/orm/__init__.py
new file mode 100644
index 0000000..0b7e922
--- /dev/null
+++ b/theauditor/rules/orm/__init__.py
@@ -0,0 +1,6 @@
+"""ORM-specific rule modules for detecting anti-patterns and performance issues."""
+
+from .sequelize_detector import find_sequelize_issues
+from .prisma_detector import find_prisma_issues
+
+__all__ = ['find_sequelize_issues', 'find_prisma_issues']
\ No newline at end of file
diff --git a/theauditor/rules/orm/prisma_detector.py b/theauditor/rules/orm/prisma_detector.py
new file mode 100644
index 0000000..e3b7564
--- /dev/null
+++ b/theauditor/rules/orm/prisma_detector.py
@@ -0,0 +1,325 @@
+"""Database-aware Prisma ORM anti-pattern detection rule.
+
+This module queries both the orm_queries and prisma_models tables to detect
+common Prisma anti-patterns and performance issues including missing indexes,
+unbounded queries, and missing transactions.
+"""
+
+import json
+import sqlite3
+from pathlib import Path
+from typing import List, Dict, Any
+
+# Import the Prisma schema parser
+try:
+    from theauditor.parsers.prisma_schema_parser import PrismaSchemaParser
+    HAS_PRISMA_PARSER = True
+except ImportError:
+    HAS_PRISMA_PARSER = False
+
+
+def find_prisma_issues(db_path: str, taint_registry=None) -> List[Dict[str, Any]]:
+    """
+    Analyze Prisma ORM queries for performance anti-patterns and security issues.
+    
+    Detects:
+    - findMany queries without pagination (take/skip)
+    - Nested write operations without transactions
+    - Queries filtering on non-indexed fields
+    - Missing includes causing potential N+1
+    - Connection pool exhaustion risks
+    
+    Args:
+        db_path: Path to repo_index.db
+        taint_registry: Optional TaintRegistry to populate with Prisma patterns
+        
+    Returns:
+        List of findings in normalized format compatible with Finding dataclass
+    """
+    findings = []
+    
+    # Register Prisma-specific sinks if registry provided
+    if taint_registry:
+        # SQL injection sinks (raw queries)
+        taint_registry.register_sink("prisma.$queryRaw", "sql", "javascript")
+        taint_registry.register_sink("prisma.$executeRaw", "sql", "javascript")
+        taint_registry.register_sink("prisma.$queryRawUnsafe", "sql", "javascript")
+        taint_registry.register_sink("prisma.$executeRawUnsafe", "sql", "javascript")
+    
+    # Connect to database
+    if not Path(db_path).exists():
+        return findings
+    
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    
+    # Check if required tables exist
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='orm_queries'"
+    )
+    if not cursor.fetchone():
+        conn.close()
+        return findings
+    
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='prisma_models'"
+    )
+    has_prisma_models = cursor.fetchone() is not None
+    
+    # Query all ORM queries that look like Prisma patterns
+    cursor.execute(
+        """
+        SELECT file, line, query_type, includes, has_limit, has_transaction
+        FROM orm_queries
+        WHERE query_type LIKE '%.%'
+        ORDER BY file, line
+        """
+    )
+    orm_queries = cursor.fetchall()
+    
+    # Load Prisma model index information if available
+    indexed_fields = {}
+    if has_prisma_models:
+        cursor.execute(
+            """
+            SELECT model_name, field_name, is_indexed
+            FROM prisma_models
+            WHERE is_indexed = 1 OR is_unique = 1
+            """
+        )
+        for model_name, field_name, is_indexed in cursor.fetchall():
+            if model_name not in indexed_fields:
+                indexed_fields[model_name] = set()
+            indexed_fields[model_name].add(field_name.lower())
+    
+    # Track multi-operation contexts for transaction detection
+    file_operations = {}
+    
+    for file, line, query_type, includes_json, has_limit, has_transaction in orm_queries:
+        # Only process Prisma-style queries (model.method pattern)
+        if '.' not in query_type:
+            continue
+        
+        parts = query_type.split('.')
+        if len(parts) != 2:
+            continue
+        
+        model_name = parts[0]
+        method_name = parts[1]
+        
+        # Parse includes if present
+        includes = None
+        if includes_json:
+            try:
+                includes = json.loads(includes_json)
+            except json.JSONDecodeError:
+                pass
+        
+        # Detection 1: findMany without pagination
+        if method_name == 'findMany' and not has_limit:
+            findings.append({
+                'pattern_name': 'PRISMA_UNBOUNDED_QUERY',
+                'message': f'Unbounded findMany query on {model_name} - missing take/skip pagination',
+                'file': file,
+                'line': line,
+                'column': 0,
+                'severity': 'high',
+                'snippet': f'prisma.{query_type}() without take/skip',
+                'category': 'performance',
+                'match_type': 'database',
+                'framework': 'prisma',
+                'details': {
+                    'model': model_name,
+                    'method': method_name,
+                    'recommendation': 'Add take and skip parameters for pagination to prevent memory issues'
+                }
+            })
+        
+        # Detection 2: findMany without includes (potential N+1)
+        if method_name == 'findMany' and not includes:
+            findings.append({
+                'pattern_name': 'PRISMA_POTENTIAL_N_PLUS_ONE',
+                'message': f'findMany on {model_name} without includes - potential N+1 query pattern',
+                'file': file,
+                'line': line,
+                'column': 0,
+                'severity': 'medium',
+                'snippet': f'prisma.{query_type}() without includes',
+                'category': 'performance',
+                'match_type': 'database',
+                'framework': 'prisma',
+                'details': {
+                    'model': model_name,
+                    'method': method_name,
+                    'recommendation': 'Use include to eager load related data and avoid N+1 queries'
+                }
+            })
+        
+        # Detection 3: Write operations without transaction
+        if method_name in ['create', 'createMany', 'update', 'updateMany', 'delete', 'deleteMany', 'upsert']:
+            if file not in file_operations:
+                file_operations[file] = []
+            
+            file_operations[file].append({
+                'line': line,
+                'model': model_name,
+                'method': method_name,
+                'has_transaction': has_transaction
+            })
+        
+        # Detection 4: findFirst/findUnique without proper error handling hint
+        if method_name in ['findUniqueOrThrow', 'findFirstOrThrow']:
+            findings.append({
+                'pattern_name': 'PRISMA_UNHANDLED_THROW',
+                'message': f'{method_name} on {model_name} - ensure error handling is in place',
+                'file': file,
+                'line': line,
+                'column': 0,
+                'severity': 'low',
+                'snippet': f'prisma.{query_type}()',
+                'category': 'error_handling',
+                'match_type': 'database',
+                'framework': 'prisma',
+                'details': {
+                    'model': model_name,
+                    'method': method_name,
+                    'recommendation': 'Wrap in try-catch or use non-throwing variant if appropriate'
+                }
+            })
+    
+    # Detection 5: Multi-write operations without transaction
+    for file, operations in file_operations.items():
+        if len(operations) >= 2:
+            # Sort by line number
+            operations.sort(key=lambda x: x['line'])
+            
+            # Check for operations within 30 lines of each other
+            for i in range(len(operations) - 1):
+                op1 = operations[i]
+                op2 = operations[i + 1]
+                
+                # If operations are close together and neither has transaction
+                if (op2['line'] - op1['line'] <= 30 and 
+                    not op1['has_transaction'] and not op2['has_transaction']):
+                    
+                    findings.append({
+                        'pattern_name': 'PRISMA_MISSING_TRANSACTION',
+                        'message': f'Multiple write operations without transaction - {op1["model"]}.{op1["method"]} and {op2["model"]}.{op2["method"]}',
+                        'file': file,
+                        'line': op1['line'],
+                        'column': 0,
+                        'severity': 'high',
+                        'snippet': f'Multiple operations: {op1["method"]} and {op2["method"]}',
+                        'category': 'data_integrity',
+                        'match_type': 'database',
+                        'framework': 'prisma',
+                        'details': {
+                            'operations': [f'{op1["model"]}.{op1["method"]}', f'{op2["model"]}.{op2["method"]}'],
+                            'recommendation': 'Wrap related write operations in prisma.$transaction() for atomicity'
+                        }
+                    })
+                    break  # Only report once per cluster
+    
+    # Detection 6: Queries on non-indexed fields (if schema information available)
+    if has_prisma_models and indexed_fields:
+        # Re-scan queries looking for filter patterns
+        cursor.execute(
+            """
+            SELECT file, line, query_type
+            FROM orm_queries
+            WHERE query_type LIKE '%.findMany%' OR query_type LIKE '%.findFirst%'
+            """
+        )
+        
+        for file, line, query_type in cursor.fetchall():
+            if '.' in query_type:
+                model_name = query_type.split('.')[0]
+                
+                # Check if this model has indexed fields defined
+                if model_name in indexed_fields:
+                    # This is a simplified check - in production would need to parse the where clause
+                    # For now, we flag queries on models with few indexes as potentially problematic
+                    if len(indexed_fields[model_name]) < 2:  # Model has very few indexes
+                        findings.append({
+                            'pattern_name': 'PRISMA_MISSING_INDEX_HINT',
+                            'message': f'Query on {model_name} - model has limited indexes, verify query performance',
+                            'file': file,
+                            'line': line,
+                            'column': 0,
+                            'severity': 'low',
+                            'snippet': f'prisma.{query_type}()',
+                            'category': 'performance',
+                            'match_type': 'database',
+                            'framework': 'prisma',
+                            'details': {
+                                'model': model_name,
+                                'indexed_fields': list(indexed_fields[model_name]),
+                                'recommendation': 'Ensure queries filter on indexed fields for better performance'
+                            }
+                        })
+    
+    # Detection 7: Connection pool exhaustion (check schema.prisma)
+    if HAS_PRISMA_PARSER:
+        # Look for schema.prisma files
+        cursor.execute(
+            """
+            SELECT path FROM files 
+            WHERE path LIKE '%schema.prisma%'
+            """
+        )
+        schema_files = cursor.fetchall()
+        
+        for (schema_file,) in schema_files:
+            full_path = Path(db_path).parent / schema_file
+            if full_path.exists():
+                try:
+                    parser = PrismaSchemaParser()
+                    schema_data = parser.parse_file(full_path)
+                    
+                    # Check datasource configuration
+                    datasource = schema_data.get('datasource', {})
+                    connection_limit = datasource.get('connection_limit')
+                    
+                    if connection_limit is None:
+                        findings.append({
+                            'pattern_name': 'PRISMA_NO_CONNECTION_LIMIT',
+                            'message': 'No connection_limit specified in Prisma datasource - using default which may be too high',
+                            'file': schema_file,
+                            'line': 0,
+                            'column': 0,
+                            'severity': 'high',
+                            'snippet': 'datasource without connection_limit parameter',
+                            'category': 'performance',
+                            'match_type': 'database',
+                            'framework': 'prisma',
+                            'details': {
+                                'recommendation': 'Add ?connection_limit=10 to your DATABASE_URL or datasource url',
+                                'default_limit': 'Defaults vary by provider (often 50-100)',
+                                'suggested_limit': '10-20 connections for typical applications'
+                            }
+                        })
+                    elif connection_limit > 20:
+                        findings.append({
+                            'pattern_name': 'PRISMA_HIGH_CONNECTION_LIMIT',
+                            'message': f'Connection limit {connection_limit} is too high - can cause database overload',
+                            'file': schema_file,
+                            'line': 0,
+                            'column': 0,
+                            'severity': 'high',
+                            'snippet': f'connection_limit={connection_limit}',
+                            'category': 'performance',
+                            'match_type': 'database',
+                            'framework': 'prisma',
+                            'details': {
+                                'current_limit': connection_limit,
+                                'recommended_max': 20,
+                                'recommendation': 'Reduce connection_limit to 20 or less unless you have specific scaling requirements'
+                            }
+                        })
+                    
+                except Exception:
+                    # Skip if parser fails
+                    pass
+    
+    conn.close()
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/orm/sequelize_detector.py b/theauditor/rules/orm/sequelize_detector.py
new file mode 100644
index 0000000..2fd6ca6
--- /dev/null
+++ b/theauditor/rules/orm/sequelize_detector.py
@@ -0,0 +1,206 @@
+"""Database-aware Sequelize ORM anti-pattern detection rule.
+
+This module queries the orm_queries table to detect common Sequelize anti-patterns
+and performance issues that can severely degrade application performance.
+"""
+
+import json
+import sqlite3
+from pathlib import Path
+from typing import List, Dict, Any
+
+
+def find_sequelize_issues(db_path: str, taint_registry=None) -> List[Dict[str, Any]]:
+    """
+    Analyze Sequelize ORM queries for performance anti-patterns and security issues.
+    
+    Detects:
+    - Death queries: { include: [{ all: true, nested: true }] }
+    - N+1 queries from missing includes
+    - Multi-table operations missing transactions
+    - findOrCreate race conditions
+    - Unbounded queries without limits
+    
+    Args:
+        db_path: Path to repo_index.db
+        taint_registry: Optional TaintRegistry to populate with Sequelize patterns
+        
+    Returns:
+        List of findings in normalized format compatible with Finding dataclass
+    """
+    findings = []
+    
+    # Register Sequelize-specific sinks if registry provided
+    if taint_registry:
+        # SQL injection sinks
+        taint_registry.register_sink("sequelize.query", "sql", "javascript")
+        taint_registry.register_sink("models.sequelize.query", "sql", "javascript")
+        taint_registry.register_sink("db.sequelize.literal", "sql", "javascript")
+        taint_registry.register_sink("Sequelize.literal", "sql", "javascript")
+        
+        # Path traversal sinks (file operations)
+        taint_registry.register_sink("sequelize.import", "path", "javascript")
+    
+    # Connect to database
+    if not Path(db_path).exists():
+        return findings
+    
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    
+    # Check if orm_queries table exists
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='orm_queries'"
+    )
+    if not cursor.fetchone():
+        # Table doesn't exist, might be no ORM usage or not indexed yet
+        conn.close()
+        return findings
+    
+    # Query all ORM queries
+    cursor.execute(
+        """
+        SELECT file, line, query_type, includes, has_limit, has_transaction
+        FROM orm_queries
+        ORDER BY file, line
+        """
+    )
+    orm_queries = cursor.fetchall()
+    
+    # Track multi-operation contexts for transaction detection
+    file_operations = {}
+    
+    for file, line, query_type, includes_json, has_limit, has_transaction in orm_queries:
+        # Parse includes if present
+        includes = None
+        if includes_json:
+            try:
+                includes = json.loads(includes_json)
+            except json.JSONDecodeError:
+                pass
+        
+        # Detection 1: Death query pattern - include all with nested
+        if includes and isinstance(includes, dict):
+            if includes.get('all') and includes.get('nested'):
+                findings.append({
+                    'pattern_name': 'SEQUELIZE_DEATH_QUERY',
+                    'message': f'Death query detected: include all with nested at {query_type}',
+                    'file': file,
+                    'line': line,
+                    'column': 0,
+                    'severity': 'critical',
+                    'snippet': f'{query_type}({{ include: [{{ all: true, nested: true }}] }})',
+                    'category': 'performance',
+                    'match_type': 'database',
+                    'framework': 'sequelize',
+                    'details': {
+                        'query_type': query_type,
+                        'recommendation': 'Never use { all: true, nested: true }. Specify exact relations needed.'
+                    }
+                })
+        
+        # Detection 2: findAll without includes (potential N+1)
+        if query_type == 'findAll' and not includes:
+            findings.append({
+                'pattern_name': 'SEQUELIZE_POTENTIAL_N_PLUS_ONE',
+                'message': f'findAll without includes - potential N+1 query pattern',
+                'file': file,
+                'line': line,
+                'column': 0,
+                'severity': 'high',
+                'snippet': f'{query_type}() without includes',
+                'category': 'performance',
+                'match_type': 'database',
+                'framework': 'sequelize',
+                'details': {
+                    'query_type': query_type,
+                    'recommendation': 'Add include option to eager load related data and avoid N+1 queries'
+                }
+            })
+        
+        # Detection 3: Unbounded queries without limit
+        if query_type in ['findAll', 'findAndCountAll'] and not has_limit:
+            findings.append({
+                'pattern_name': 'SEQUELIZE_UNBOUNDED_QUERY',
+                'message': f'Unbounded {query_type} without limit - can cause memory issues',
+                'file': file,
+                'line': line,
+                'column': 0,
+                'severity': 'medium',
+                'snippet': f'{query_type}() without limit',
+                'category': 'performance',
+                'match_type': 'database',
+                'framework': 'sequelize',
+                'details': {
+                    'query_type': query_type,
+                    'recommendation': 'Add limit option to prevent fetching too many records'
+                }
+            })
+        
+        # Detection 4: findOrCreate race condition
+        if query_type == 'findOrCreate' and not has_transaction:
+            findings.append({
+                'pattern_name': 'SEQUELIZE_FINDORCREATE_RACE',
+                'message': 'findOrCreate without transaction - race condition vulnerability',
+                'file': file,
+                'line': line,
+                'column': 0,
+                'severity': 'high',
+                'snippet': f'{query_type}() without transaction',
+                'category': 'security',
+                'match_type': 'database',
+                'framework': 'sequelize',
+                'details': {
+                    'query_type': query_type,
+                    'recommendation': 'Wrap findOrCreate in a transaction to prevent race conditions'
+                }
+            })
+        
+        # Track operations per file for multi-table transaction detection
+        if file not in file_operations:
+            file_operations[file] = []
+        
+        # Track write operations for transaction detection
+        if query_type in ['create', 'update', 'destroy', 'bulkCreate', 'bulkUpdate']:
+            file_operations[file].append({
+                'line': line,
+                'query_type': query_type,
+                'has_transaction': has_transaction
+            })
+    
+    # Detection 5: Multi-table operations without transaction
+    # Check for multiple write operations close together without transaction
+    for file, operations in file_operations.items():
+        if len(operations) >= 2:
+            # Sort by line number
+            operations.sort(key=lambda x: x['line'])
+            
+            # Check for operations within 20 lines of each other
+            for i in range(len(operations) - 1):
+                op1 = operations[i]
+                op2 = operations[i + 1]
+                
+                # If operations are close together and neither has transaction
+                if (op2['line'] - op1['line'] <= 20 and 
+                    not op1['has_transaction'] and not op2['has_transaction']):
+                    
+                    findings.append({
+                        'pattern_name': 'SEQUELIZE_MISSING_TRANSACTION',
+                        'message': 'Multiple write operations without transaction - data consistency risk',
+                        'file': file,
+                        'line': op1['line'],
+                        'column': 0,
+                        'severity': 'high',
+                        'snippet': f"Multiple operations: {op1['query_type']} and {op2['query_type']}",
+                        'category': 'security',
+                        'match_type': 'database',
+                        'framework': 'sequelize',
+                        'details': {
+                            'operations': [op1['query_type'], op2['query_type']],
+                            'recommendation': 'Wrap related write operations in a transaction for atomicity'
+                        }
+                    })
+                    break  # Only report once per cluster
+    
+    conn.close()
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/orm/typeorm_detector.py b/theauditor/rules/orm/typeorm_detector.py
new file mode 100644
index 0000000..4cb0b4f
--- /dev/null
+++ b/theauditor/rules/orm/typeorm_detector.py
@@ -0,0 +1,384 @@
+"""Database-aware TypeORM anti-pattern detection rule.
+
+This module queries the orm_queries table to detect common TypeORM anti-patterns
+including unbounded queries, cascade issues, and production misconfigurations.
+"""
+
+import json
+import re
+import sqlite3
+from pathlib import Path
+from typing import List, Dict, Any
+
+
+def find_typeorm_issues(db_path: str, taint_registry=None) -> List[Dict[str, Any]]:
+    """
+    Analyze TypeORM queries for performance anti-patterns and security issues.
+    
+    Detects:
+    - QueryBuilder chains without limits/take
+    - Potentially dangerous cascade: true options
+    - Synchronize: true in production (via config detection)
+    - Missing indexes on commonly queried fields
+    - Complex joins without proper pagination
+    
+    Args:
+        db_path: Path to repo_index.db
+        taint_registry: Optional TaintRegistry to populate with TypeORM patterns
+        
+    Returns:
+        List of findings in normalized format compatible with Finding dataclass
+    """
+    findings = []
+    
+    # Register TypeORM-specific sinks if registry provided
+    if taint_registry:
+        # SQL injection sinks
+        taint_registry.register_sink("createQueryBuilder", "sql", "javascript")
+        taint_registry.register_sink("query", "sql", "javascript")
+        taint_registry.register_sink("manager.query", "sql", "javascript")
+        taint_registry.register_sink("connection.query", "sql", "javascript")
+        taint_registry.register_sink("getRepository().query", "sql", "javascript")
+    
+    # Connect to database
+    if not Path(db_path).exists():
+        return findings
+    
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    
+    # Check if orm_queries table exists
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='orm_queries'"
+    )
+    if not cursor.fetchone():
+        # Table doesn't exist, might be no ORM usage or not indexed yet
+        conn.close()
+        return findings
+    
+    # Query all ORM queries that look like TypeORM patterns
+    cursor.execute(
+        """
+        SELECT file, line, query_type, includes, has_limit, has_transaction
+        FROM orm_queries
+        WHERE query_type LIKE 'Repository.%' OR query_type LIKE 'QueryBuilder.%'
+        ORDER BY file, line
+        """
+    )
+    orm_queries = cursor.fetchall()
+    
+    # Track multi-operation contexts for transaction detection
+    file_operations = {}
+    
+    for file, line, query_type, includes_json, has_limit, has_transaction in orm_queries:
+        # Parse includes if present
+        includes = None
+        if includes_json:
+            try:
+                includes = json.loads(includes_json)
+            except json.JSONDecodeError:
+                pass
+        
+        # Detection 1: QueryBuilder without limits
+        if query_type.startswith('QueryBuilder.') and not has_limit:
+            # Check if it's a query that returns multiple results
+            method = query_type.split('.')[-1] if '.' in query_type else query_type
+            if method in ['getMany', 'getRawMany', 'getManyAndCount']:
+                findings.append({
+                    'pattern_name': 'TYPEORM_UNBOUNDED_QUERYBUILDER',
+                    'message': f'QueryBuilder {method} without limit/take - can cause memory issues',
+                    'file': file,
+                    'line': line,
+                    'column': 0,
+                    'severity': 'high',
+                    'snippet': f'{query_type}() without .limit() or .take()',
+                    'category': 'performance',
+                    'match_type': 'database',
+                    'framework': 'typeorm',
+                    'details': {
+                        'query_type': query_type,
+                        'recommendation': 'Add .limit(n) or .take(n) to QueryBuilder chain before executing'
+                    }
+                })
+        
+        # Detection 2: Complex joins without pagination
+        if includes and isinstance(includes, dict):
+            join_count = includes.get('joins', 0)
+            if join_count >= 3 and not has_limit:
+                findings.append({
+                    'pattern_name': 'TYPEORM_COMPLEX_JOIN_NO_LIMIT',
+                    'message': f'Complex query with {join_count} joins but no pagination',
+                    'file': file,
+                    'line': line,
+                    'column': 0,
+                    'severity': 'high',
+                    'snippet': f'QueryBuilder with {join_count} joins and no limit',
+                    'category': 'performance',
+                    'match_type': 'database',
+                    'framework': 'typeorm',
+                    'details': {
+                        'query_type': query_type,
+                        'join_count': join_count,
+                        'recommendation': 'Complex joins should use pagination to avoid memory issues'
+                    }
+                })
+        
+        # Detection 3: Repository.find without pagination
+        if query_type == 'Repository.find' and not has_limit:
+            findings.append({
+                'pattern_name': 'TYPEORM_UNBOUNDED_FIND',
+                'message': 'Repository.find() without take option - fetches all records',
+                'file': file,
+                'line': line,
+                'column': 0,
+                'severity': 'medium',
+                'snippet': 'repository.find() without take option',
+                'category': 'performance',
+                'match_type': 'database',
+                'framework': 'typeorm',
+                'details': {
+                    'query_type': query_type,
+                    'recommendation': 'Add { take: n } option to limit results'
+                }
+            })
+        
+        # Detection 4: Multiple saves without transaction
+        if query_type == 'Repository.save':
+            if file not in file_operations:
+                file_operations[file] = []
+            
+            file_operations[file].append({
+                'line': line,
+                'query_type': query_type,
+                'has_transaction': has_transaction
+            })
+        
+        # Detection 5: Potential N+1 - findOne in potential loop context
+        if query_type in ['Repository.findOne', 'Repository.findOneBy']:
+            # This is a heuristic - if there are multiple findOne calls close together
+            # it might indicate a loop
+            if file not in file_operations:
+                file_operations[file] = []
+            
+            # Check if there's another findOne within 10 lines
+            similar_queries = [
+                q for q in file_operations.get(file, []) 
+                if q['query_type'] == query_type and abs(q['line'] - line) <= 10
+            ]
+            
+            if similar_queries:
+                findings.append({
+                    'pattern_name': 'TYPEORM_POTENTIAL_N_PLUS_ONE',
+                    'message': f'Multiple {query_type} calls close together - potential N+1 pattern',
+                    'file': file,
+                    'line': line,
+                    'column': 0,
+                    'severity': 'medium',
+                    'snippet': f'Multiple {query_type} within 10 lines',
+                    'category': 'performance',
+                    'match_type': 'database',
+                    'framework': 'typeorm',
+                    'details': {
+                        'query_type': query_type,
+                        'recommendation': 'Use relations option or QueryBuilder with joins to fetch related data'
+                    }
+                })
+    
+    # Detection 6: Multiple saves without transaction
+    for file, operations in file_operations.items():
+        save_ops = [op for op in operations if op['query_type'] == 'Repository.save']
+        
+        if len(save_ops) >= 2:
+            # Sort by line number
+            save_ops.sort(key=lambda x: x['line'])
+            
+            # Check for operations within 30 lines of each other
+            for i in range(len(save_ops) - 1):
+                op1 = save_ops[i]
+                op2 = save_ops[i + 1]
+                
+                # If operations are close together and neither has transaction
+                if (op2['line'] - op1['line'] <= 30 and 
+                    not op1['has_transaction'] and not op2['has_transaction']):
+                    
+                    findings.append({
+                        'pattern_name': 'TYPEORM_MISSING_TRANSACTION',
+                        'message': 'Multiple save operations without transaction - data consistency risk',
+                        'file': file,
+                        'line': op1['line'],
+                        'column': 0,
+                        'severity': 'high',
+                        'snippet': f"Multiple save operations at lines {op1['line']} and {op2['line']}",
+                        'category': 'data_integrity',
+                        'match_type': 'database',
+                        'framework': 'typeorm',
+                        'details': {
+                            'operations': ['save', 'save'],
+                            'lines': [op1['line'], op2['line']],
+                            'recommendation': 'Use EntityManager.transaction() or QueryRunner for atomic operations'
+                        }
+                    })
+                    break  # Only report once per cluster
+    
+    # Detection 7: Missing @Index on frequently queried fields
+    # This is a simplified check - looks for entities without proper indexing
+    cursor.execute(
+        """
+        SELECT DISTINCT file FROM orm_queries 
+        WHERE query_type LIKE 'Repository.%'
+        """
+    )
+    repo_files = cursor.fetchall()
+    
+    for (repo_file,) in repo_files:
+        # Try to find corresponding entity file
+        entity_path = repo_file.replace('repository.', 'entity.').replace('Repository.', 'Entity.')
+        if not entity_path.endswith('entity.ts') and not entity_path.endswith('entity.js'):
+            # Try common patterns
+            entity_path = repo_file.replace('.ts', '.entity.ts').replace('.js', '.entity.js')
+        
+        # Check if entity file exists
+        cursor.execute(
+            """
+            SELECT path FROM files 
+            WHERE path LIKE ? OR path LIKE ?
+            """,
+            (f'%{Path(entity_path).stem}%entity%', f'%{Path(repo_file).stem.replace("repository", "")}%entity%')
+        )
+        entity_files = cursor.fetchall()
+        
+        for (entity_file,) in entity_files:
+            full_path = Path(db_path).parent / entity_file
+            if full_path.exists():
+                try:
+                    with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+                        content = f.read()
+                        
+                        # Count @Index decorators
+                        index_count = content.count('@Index')
+                        
+                        # Count properties (simplified - looks for property declarations)
+                        property_pattern = re.compile(r'^\s*(@Column|@PrimaryGeneratedColumn|@PrimaryColumn).*\n\s*(\w+)\s*:', re.MULTILINE)
+                        properties = property_pattern.findall(content)
+                        property_count = len(properties)
+                        
+                        # If there are many properties but few indexes, flag it
+                        if property_count > 5 and index_count < 2:
+                            findings.append({
+                                'pattern_name': 'TYPEORM_MISSING_INDEXES',
+                                'message': f'Entity has {property_count} properties but only {index_count} indexes - queries may be slow',
+                                'file': entity_file,
+                                'line': 0,
+                                'column': 0,
+                                'severity': 'medium',
+                                'snippet': f'{property_count} properties, {index_count} @Index decorators',
+                                'category': 'performance',
+                                'match_type': 'database',
+                                'framework': 'typeorm',
+                                'details': {
+                                    'property_count': property_count,
+                                    'index_count': index_count,
+                                    'recommendation': 'Add @Index() decorators to frequently queried fields for better performance'
+                                }
+                            })
+                        
+                        # Check for common queryable fields without indexes
+                        common_indexed_fields = ['email', 'username', 'userId', 'createdAt', 'updatedAt', 'status', 'type']
+                        for field_name in common_indexed_fields:
+                            # Check if field exists but not indexed
+                            field_pattern = re.compile(rf'^\s*(@Column.*\n\s*)?{field_name}\s*:', re.MULTILINE | re.IGNORECASE)
+                            index_pattern = re.compile(rf'@Index.*{field_name}|@Index\(\)\s*\n\s*(@Column.*\n\s*)?{field_name}', re.IGNORECASE)
+                            
+                            if field_pattern.search(content) and not index_pattern.search(content):
+                                findings.append({
+                                    'pattern_name': 'TYPEORM_COMMON_FIELD_NOT_INDEXED',
+                                    'message': f'Common queryable field "{field_name}" is not indexed',
+                                    'file': entity_file,
+                                    'line': 0,
+                                    'column': 0,
+                                    'severity': 'medium',
+                                    'snippet': f'{field_name} field without @Index',
+                                    'category': 'performance',
+                                    'match_type': 'database',
+                                    'framework': 'typeorm',
+                                    'details': {
+                                        'field': field_name,
+                                        'recommendation': f'Add @Index() decorator to {field_name} field as it is commonly used in queries'
+                                    }
+                                })
+                        
+                except Exception:
+                    # Skip files that can't be read
+                    pass
+    
+    # Detection 8: Check for cascade and synchronize configuration issues
+    # This would require parsing entity decorators, which we'll check in files table
+    cursor.execute(
+        """
+        SELECT path FROM files 
+        WHERE (ext = '.ts' OR ext = '.js') 
+        AND (path LIKE '%entity%' OR path LIKE '%model%')
+        """
+    )
+    entity_files = cursor.fetchall()
+    
+    # For each entity file, check if we can detect dangerous patterns
+    # Note: This is a simplified check - in production would need proper decorator parsing
+    for (entity_file,) in entity_files:
+        full_path = Path(db_path).parent / entity_file
+        if full_path.exists():
+            try:
+                with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read()
+                    
+                    # Check for cascade: true (dangerous in production)
+                    if 'cascade: true' in content or 'cascade:true' in content:
+                        # Find the line number
+                        lines = content.split('\n')
+                        for i, line in enumerate(lines, 1):
+                            if 'cascade: true' in line or 'cascade:true' in line:
+                                findings.append({
+                                    'pattern_name': 'TYPEORM_CASCADE_TRUE',
+                                    'message': 'cascade: true detected - can cause unintended data deletion',
+                                    'file': entity_file,
+                                    'line': i,
+                                    'column': 0,
+                                    'severity': 'high',
+                                    'snippet': line.strip()[:100],
+                                    'category': 'data_integrity',
+                                    'match_type': 'database',
+                                    'framework': 'typeorm',
+                                    'details': {
+                                        'recommendation': 'Use specific cascade options like ["insert", "update"] instead of true'
+                                    }
+                                })
+                                break  # Report once per file
+                    
+                    # Check for synchronize: true (dangerous in production)
+                    if 'synchronize: true' in content or 'synchronize:true' in content:
+                        lines = content.split('\n')
+                        for i, line in enumerate(lines, 1):
+                            if 'synchronize: true' in line or 'synchronize:true' in line:
+                                findings.append({
+                                    'pattern_name': 'TYPEORM_SYNCHRONIZE_TRUE',
+                                    'message': 'synchronize: true detected - NEVER use in production',
+                                    'file': entity_file,
+                                    'line': i,
+                                    'column': 0,
+                                    'severity': 'critical',
+                                    'snippet': line.strip()[:100],
+                                    'category': 'security',
+                                    'match_type': 'database',
+                                    'framework': 'typeorm',
+                                    'details': {
+                                        'recommendation': 'Use migrations instead of synchronize in production environments'
+                                    }
+                                })
+                                break  # Report once per file
+                    
+            except Exception:
+                # Skip files that can't be read
+                pass
+    
+    conn.close()
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/performance/__init__.py b/theauditor/rules/performance/__init__.py
new file mode 100644
index 0000000..b5881a5
--- /dev/null
+++ b/theauditor/rules/performance/__init__.py
@@ -0,0 +1,13 @@
+"""Performance-related rule definitions."""
+
+from .performance import (
+    find_queries_in_loops,
+    find_inefficient_string_concatenation,
+    find_expensive_operations_in_loops
+)
+
+__all__ = [
+    'find_queries_in_loops',
+    'find_inefficient_string_concatenation',
+    'find_expensive_operations_in_loops'
+]
\ No newline at end of file
diff --git a/theauditor/rules/performance/performance.py b/theauditor/rules/performance/performance.py
new file mode 100644
index 0000000..5ca5a32
--- /dev/null
+++ b/theauditor/rules/performance/performance.py
@@ -0,0 +1,779 @@
+"""Performance-focused AST rules for TheAuditor.
+
+This module contains AST-based rules to detect performance anti-patterns
+and inefficient code patterns that can cause application bottlenecks.
+"""
+
+import ast
+import re
+from typing import List, Dict, Any
+
+
+def find_queries_in_loops(tree: Any) -> List[Dict[str, Any]]:
+    """Find database queries executed inside loops (language-aware).
+    
+    This is one of the most common and costly performance bugs. Each query
+    in a loop causes a network round-trip to the database, leading to N+1
+    query problems that can severely degrade application performance.
+    
+    Detects:
+    - Database operations inside for loops
+    - Database operations inside while loops
+    - Nested loops with queries (even worse performance impact)
+    
+    Supports:
+    - Python (native ast.AST)
+    - JavaScript/TypeScript (tree-sitter or regex fallback)
+    
+    Args:
+        tree: Either a Python ast.AST object (legacy) or a wrapped AST dict from ast_parser.py
+    
+    Returns:
+        List of findings with line, column, loop type, and suggested fix
+    """
+    # Handle both legacy (direct ast.AST) and new wrapped format
+    if isinstance(tree, ast.AST):
+        # Legacy format - direct Python AST
+        return _find_queries_in_loops_python(tree)
+    elif isinstance(tree, dict):
+        # New wrapped format from ast_parser.py
+        tree_type = tree.get("type")
+        language = tree.get("language", "")  # Empty not unknown
+        
+        if tree_type == "python_ast":
+            return _find_queries_in_loops_python(tree["tree"])
+        elif tree_type == "tree_sitter":
+            return _find_queries_in_loops_tree_sitter(tree)
+        elif tree_type == "regex_ast":
+            return _find_queries_in_loops_regex_ast(tree)
+        else:
+            # Unknown tree type
+            return []
+    else:
+        # Unknown format
+        return []
+
+
+def _find_queries_in_loops_python(tree: ast.AST) -> List[Dict[str, Any]]:
+    """Find database queries executed inside loops in Python AST (original implementation).
+    
+    This is the original Python-specific implementation.
+    """
+    findings = []
+    
+    # Database operation indicators - function names that suggest DB operations
+    db_operations = {
+        # Generic database operations
+        'query', 'execute', 'fetch', 'fetchone', 'fetchall', 'fetchmany',
+        'select', 'insert', 'update', 'delete', 'find', 'find_one', 'find_all',
+        
+        # ORM operations (SQLAlchemy, Django ORM, etc.)
+        'filter', 'filter_by', 'get', 'all', 'first', 'one', 'one_or_none',
+        'count', 'exists', 'scalar', 'save', 'create', 'update_or_create',
+        'get_or_create', 'bulk_create', 'bulk_update',
+        
+        # MongoDB operations
+        'find_one', 'find_one_and_update', 'find_one_and_delete',
+        'insert_one', 'insert_many', 'update_one', 'update_many',
+        'delete_one', 'delete_many', 'aggregate',
+        
+        # Redis operations
+        'get', 'set', 'hget', 'hset', 'lpush', 'rpush', 'sadd', 'zadd',
+        
+        # Elasticsearch operations
+        'search', 'index', 'get', 'update', 'delete',
+        
+        # Raw SQL execution
+        'execute', 'executemany', 'executescript',
+    }
+    
+    # Helper function to check if a node contains database operations
+    def contains_db_operation(node: ast.AST, loop_depth: int = 0) -> List[Dict[str, Any]]:
+        """Recursively check if a node contains database operations."""
+        local_findings = []
+        
+        # Database context indicators that suggest actual DB operations
+        db_context_keywords = [
+            'db.', 'database.', 'session.', 'cursor.', 
+            'query(', 'session(', 'engine.', 'connection.',
+            'model.', 'orm.', 'sql', '.execute(',
+            'mongo', 'redis', 'elastic', 'postgres', 'mysql',
+            '_db.', 'db_', 'database_', '.commit(', '.rollback('
+        ]
+        
+        for child in ast.walk(node):
+            if isinstance(child, ast.Call):
+                # Check for method calls (e.g., cursor.execute())
+                if isinstance(child.func, ast.Attribute):
+                    method_name = child.func.attr.lower()
+                    if method_name in db_operations:
+                        # Secondary validation: check for database context
+                        try:
+                            # Get the source code of the call node
+                            call_source = ast.unparse(child).lower()
+                            
+                            # Check if the source contains database context keywords
+                            has_db_context = any(keyword in call_source for keyword in db_context_keywords)
+                            
+                            # Special check for MongoDB-style calls (e.g., collection.find_one())
+                            if not has_db_context and method_name in ['find_one', 'find_one_and_update', 
+                                                                       'update_one', 'update_many', 
+                                                                       'delete_one', 'delete_many']:
+                                # These are very likely MongoDB operations
+                                has_db_context = True
+                            
+                            if has_db_context:
+                                # Try to get the full operation name from the call source
+                                # Extract the part before the final parenthesis
+                                operation_name = call_source.rstrip(')')
+                                # If it's too long, try to get a shorter version
+                                if len(operation_name) > 50:
+                                    # Try to get the object being called
+                                    obj_name = "db_object"
+                                    if isinstance(child.func.value, ast.Name):
+                                        obj_name = child.func.value.id
+                                    elif isinstance(child.func.value, ast.Attribute):
+                                        # For chained calls, try to get the base object
+                                        base = child.func.value
+                                        while isinstance(base, ast.Attribute):
+                                            base = base.value
+                                        if isinstance(base, ast.Name):
+                                            obj_name = base.id
+                                    operation_name = f"{obj_name}.{method_name}"
+                                else:
+                                    # Use the full call for short operations
+                                    operation_name = operation_name.split('(')[0]
+                                
+                                local_findings.append({
+                                    'line': getattr(child, 'lineno', 0),
+                                    'column': getattr(child, 'col_offset', 0),
+                                    'operation': f"{operation_name}()",
+                                    'loop_depth': loop_depth,
+                                })
+                        except (AttributeError, TypeError):
+                            # If ast.unparse fails (Python < 3.9), fall back to checking object name
+                            obj_name = "unknown"
+                            if isinstance(child.func.value, ast.Name):
+                                obj_name = child.func.value.id.lower()
+                                # Check if object name suggests database context
+                                if any(keyword in obj_name for keyword in ['db', 'database', 'session', 'cursor', 
+                                                                            'conn', 'query', 'model', 'engine']):
+                                    local_findings.append({
+                                        'line': getattr(child, 'lineno', 0),
+                                        'column': getattr(child, 'col_offset', 0),
+                                        'operation': f"{obj_name}.{method_name}()",
+                                        'loop_depth': loop_depth,
+                                    })
+                            elif isinstance(child.func.value, ast.Attribute):
+                                # Handle chained calls like db.session.query()
+                                # Try to get the base object
+                                base = child.func.value
+                                parts = [method_name]
+                                while isinstance(base, ast.Attribute):
+                                    parts.append(base.attr)
+                                    base = base.value
+                                if isinstance(base, ast.Name):
+                                    base_name = base.id.lower()
+                                    full_chain = f"{base_name}.{'.'.join(reversed(parts))}"
+                                    # Check if the chain suggests database context
+                                    if any(keyword in base_name for keyword in ['db', 'database', 'session', 
+                                                                                 'cursor', 'conn', 'engine']):
+                                        local_findings.append({
+                                            'line': getattr(child, 'lineno', 0),
+                                            'column': getattr(child, 'col_offset', 0),
+                                            'operation': f"{full_chain}()",
+                                            'loop_depth': loop_depth,
+                                        })
+                
+                # Check for function calls (e.g., query())
+                elif isinstance(child.func, ast.Name):
+                    func_name = child.func.id.lower()
+                    if func_name in db_operations:
+                        # Secondary validation: check for database context
+                        try:
+                            # Get the source code of the call node
+                            call_source = ast.unparse(child).lower()
+                            
+                            # Check if the source contains database context keywords
+                            has_db_context = any(keyword in call_source for keyword in db_context_keywords)
+                            
+                            if has_db_context:
+                                local_findings.append({
+                                    'line': getattr(child, 'lineno', 0),
+                                    'column': getattr(child, 'col_offset', 0),
+                                    'operation': f"{func_name}()",
+                                    'loop_depth': loop_depth,
+                                })
+                        except (AttributeError, TypeError):
+                            # If ast.unparse fails (Python < 3.9), only flag obvious DB functions
+                            if func_name in ['query', 'execute', 'fetch', 'fetchone', 'fetchall']:
+                                local_findings.append({
+                                    'line': getattr(child, 'lineno', 0),
+                                    'column': getattr(child, 'col_offset', 0),
+                                    'operation': f"{func_name}()",
+                                    'loop_depth': loop_depth,
+                                })
+        
+        return local_findings
+    
+    # Helper function to analyze loop bodies
+    def analyze_loop_body(body: List[ast.AST], loop_type: str, loop_line: int, 
+                          loop_depth: int = 0) -> None:
+        """Analyze the body of a loop for database operations."""
+        for node in body:
+            # Check for nested loops (even worse performance)
+            if isinstance(node, (ast.For, ast.While)):
+                nested_loop_type = "for" if isinstance(node, ast.For) else "while"
+                analyze_loop_body(
+                    node.body if isinstance(node, ast.For) else node.body,
+                    nested_loop_type,
+                    getattr(node, 'lineno', 0),
+                    loop_depth + 1
+                )
+            
+            # Check for database operations
+            db_ops = contains_db_operation(node, loop_depth)
+            for op in db_ops:
+                # Determine severity based on loop depth
+                if op['loop_depth'] > 0:
+                    severity = 'CRITICAL'  # Nested loop with query
+                    hint = 'CRITICAL: Query in nested loop! Extract queries outside all loops and use batch operations'
+                else:
+                    severity = 'HIGH'
+                    hint = 'Move query outside loop. Consider using batch fetch or JOIN operations'
+                
+                # Create a descriptive snippet
+                if op['loop_depth'] > 0:
+                    snippet = f"Nested {loop_type} loop (depth {op['loop_depth']+1}) with {op['operation']}"
+                else:
+                    snippet = f"{loop_type} loop with {op['operation']}"
+                
+                findings.append({
+                    'line': op['line'],
+                    'column': op.get('col', 0),
+                    'loop_line': loop_line,
+                    'loop_type': loop_type,
+                    'snippet': snippet,
+                    'operation': op['operation'],
+                    'severity': severity,
+                    'confidence': 0.90,  # High confidence since we're matching specific patterns
+                    'type': 'query_in_loop',
+                    'hint': hint
+                })
+    
+    # Walk the AST looking for loops
+    for node in ast.walk(tree):
+        if isinstance(node, ast.For):
+            # Analyze for loop body
+            analyze_loop_body(node.body, "for", getattr(node, 'lineno', 0))
+        
+        elif isinstance(node, ast.While):
+            # Analyze while loop body
+            analyze_loop_body(node.body, "while", getattr(node, 'lineno', 0))
+        
+        # Also check for comprehensions with database operations
+        elif isinstance(node, (ast.ListComp, ast.SetComp, ast.DictComp, ast.GeneratorExp)):
+            # Check if the comprehension contains database operations
+            for generator in node.generators:
+                if isinstance(generator.iter, ast.Call):
+                    # Check if the iterator is a database operation
+                    if isinstance(generator.iter.func, ast.Attribute):
+                        method_name = generator.iter.func.attr.lower()
+                        if method_name in db_operations:
+                            findings.append({
+                                'line': getattr(node, 'lineno', 0),
+                                'column': getattr(node, 'col_offset', 0),
+                                'loop_line': getattr(node, 'lineno', 0),
+                                'loop_type': 'comprehension',
+                                'snippet': f"Comprehension with {method_name}()",
+                                'operation': method_name,
+                                'severity': 'HIGH',
+                                'confidence': 0.85,
+                                'type': 'query_in_loop',
+                                'hint': 'Comprehensions with DB queries create hidden loops. Fetch data first, then process'
+                            })
+    
+    return findings
+
+
+def _find_queries_in_loops_tree_sitter(tree_wrapper: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Find database queries executed inside loops in JavaScript/TypeScript using tree-sitter AST.
+    
+    Uses tree-sitter queries to find loops and database operations within them.
+    """
+    findings = []
+    
+    # Database operation indicators for JavaScript/TypeScript
+    db_operations = {
+        # Generic database operations
+        'query', 'execute', 'exec', 'fetch', 'find', 'findOne', 'findMany',
+        'select', 'insert', 'update', 'delete', 'create', 'save',
+        
+        # ORM operations (Sequelize, TypeORM, Prisma, etc.)
+        'findAll', 'findByPk', 'findOrCreate', 'findAndCountAll',
+        'findUnique', 'findFirst', 'findMany', 'create', 'createMany',
+        'update', 'updateMany', 'upsert', 'delete', 'deleteMany',
+        'count', 'aggregate', 'groupBy',
+        
+        # MongoDB operations
+        'find', 'findOne', 'findOneAndUpdate', 'findOneAndDelete',
+        'insertOne', 'insertMany', 'updateOne', 'updateMany',
+        'deleteOne', 'deleteMany', 'aggregate', 'countDocuments',
+        
+        # Redis operations
+        'get', 'set', 'hget', 'hset', 'lpush', 'rpush', 'sadd', 'zadd',
+        'mget', 'mset', 'hmget', 'hmset',
+        
+        # SQL query builders
+        'where', 'join', 'leftJoin', 'rightJoin', 'innerJoin',
+        'orderBy', 'groupBy', 'having', 'limit', 'offset',
+        
+        # Knex.js methods
+        'from', 'into', 'returning', 'pluck', 'first',
+    }
+    
+    # Database context indicators for JavaScript
+    db_context_keywords = [
+        'db.', 'database.', 'sql.', 'knex.', 'prisma.', 'sequelize.',
+        'model.', 'models.', 'mongoose.', 'typeorm.', 'redis.', 'cache.',
+        'query(', 'execute(', '.query(', '.exec(', '.execute(',
+        'mongodb', 'postgres', 'mysql', 'sqlite', 'mssql',
+        'collection.', 'repository.', 'entityManager.', 'queryBuilder.',
+        '.save(', '.find(', '.create(', '.update(', '.delete(',
+    ]
+    
+    tree = tree_wrapper.get("tree")
+    content = tree_wrapper.get("content", "")
+    language = tree_wrapper.get("language", "javascript")
+    
+    if not tree:
+        return findings
+    
+    # Try to use tree-sitter for proper traversal
+    try:
+        # Import tree-sitter dynamically
+        import tree_sitter
+        from tree_sitter_language_pack import get_language
+        
+        lang = get_language(language)
+        
+        # Query for loop constructs
+        loop_query = lang.query("""
+            [
+                (for_statement) @for_loop
+                (for_in_statement) @for_in_loop
+                (for_of_statement) @for_of_loop
+                (while_statement) @while_loop
+                (do_statement) @do_while_loop
+            ]
+        """)
+        
+        # Query for call expressions (to find DB operations)
+        call_query = lang.query("""
+            (call_expression) @call
+        """)
+        
+        # Process each loop
+        for loop_capture in loop_query.captures(tree.root_node):
+            loop_node, loop_type = loop_capture
+            loop_line = loop_node.start_point[0] + 1
+            
+            # Determine loop type for reporting
+            if "for_in" in loop_type:
+                loop_type_str = "for...in"
+            elif "for_of" in loop_type:
+                loop_type_str = "for...of"
+            elif "while" in loop_type:
+                loop_type_str = "while"
+            elif "do" in loop_type:
+                loop_type_str = "do...while"
+            else:
+                loop_type_str = "for"
+            
+            # Find all call expressions within this loop
+            for call_capture in call_query.captures(loop_node):
+                call_node, _ = call_capture
+                
+                # Check if call is within the loop's range
+                if (call_node.start_point[0] >= loop_node.start_point[0] and 
+                    call_node.end_point[0] <= loop_node.end_point[0]):
+                    
+                    # Extract the call text
+                    call_text = call_node.text.decode("utf-8", errors="ignore")
+                    call_text_lower = call_text.lower()
+                    
+                    # Check if this looks like a database operation
+                    is_db_operation = False
+                    matched_operation = None
+                    
+                    # Check for method names that match DB operations
+                    for op in db_operations:
+                        if f".{op.lower()}(" in call_text_lower or f" {op.lower()}(" in call_text_lower:
+                            # Secondary validation: check for database context
+                            has_db_context = any(keyword in call_text_lower for keyword in db_context_keywords)
+                            
+                            # Special check for common ORM patterns
+                            if not has_db_context and op in ['find', 'findOne', 'save', 'create', 
+                                                             'update', 'delete', 'count']:
+                                # Check if preceded by model/collection name pattern
+                                if re.search(r'\b(user|post|comment|order|product|model|collection|repository)\s*\.\s*' + op, 
+                                           call_text_lower, re.IGNORECASE):
+                                    has_db_context = True
+                            
+                            if has_db_context:
+                                is_db_operation = True
+                                matched_operation = op
+                                break
+                    
+                    if is_db_operation:
+                        # Get the operation name for reporting
+                        operation_name = call_text.split('(')[0].strip()
+                        if len(operation_name) > 50:
+                            operation_name = f"...{operation_name[-47:]}"
+                        
+                        # Check for nested loops (by checking if there's another loop between this and parent)
+                        loop_depth = 0
+                        parent = loop_node.parent
+                        while parent:
+                            if parent.type in ["for_statement", "for_in_statement", "for_of_statement", 
+                                              "while_statement", "do_statement"]:
+                                loop_depth += 1
+                            parent = parent.parent
+                        
+                        # Determine severity
+                        if loop_depth > 0:
+                            severity = 'CRITICAL'
+                            hint = 'CRITICAL: Query in nested loop! Extract queries outside all loops and use batch operations'
+                        else:
+                            severity = 'HIGH'
+                            hint = 'Move query outside loop. Consider using batch fetch or JOIN operations'
+                        
+                        # Create snippet
+                        if loop_depth > 0:
+                            snippet = f"Nested {loop_type_str} loop (depth {loop_depth+1}) with {operation_name}()"
+                        else:
+                            snippet = f"{loop_type_str} loop with {operation_name}()"
+                        
+                        findings.append({
+                            'line': call_node.start_point[0] + 1,
+                            'column': call_node.start_point[1],
+                            'loop_line': loop_line,
+                            'loop_type': loop_type_str,
+                            'snippet': snippet,
+                            'operation': f"{operation_name}()",
+                            'severity': severity,
+                            'confidence': 0.90,
+                            'type': 'query_in_loop',
+                            'hint': hint
+                        })
+        
+        # Also check for array methods with DB operations (forEach, map, etc.)
+        array_method_query = lang.query("""
+            (call_expression
+              function: (member_expression
+                property: (property_identifier) @method)
+              arguments: (arguments
+                (arrow_function) @callback))
+        """)
+        
+        for capture in array_method_query.captures(tree.root_node):
+            node, capture_name = capture
+            
+            if capture_name == "method":
+                method_name = node.text.decode("utf-8", errors="ignore")
+                if method_name in ["forEach", "map", "filter", "reduce", "some", "every", "find"]:
+                    # Get the callback function node
+                    parent_call = node.parent.parent
+                    
+                    # Look for DB operations within the callback
+                    for call_capture in call_query.captures(parent_call):
+                        call_node, _ = call_capture
+                        call_text = call_node.text.decode("utf-8", errors="ignore")
+                        call_text_lower = call_text.lower()
+                        
+                        # Check if this is a DB operation
+                        for op in db_operations:
+                            if f".{op.lower()}(" in call_text_lower:
+                                has_db_context = any(keyword in call_text_lower for keyword in db_context_keywords)
+                                
+                                if has_db_context:
+                                    operation_name = call_text.split('(')[0].strip()
+                                    if len(operation_name) > 50:
+                                        operation_name = f"...{operation_name[-47:]}"
+                                    
+                                    findings.append({
+                                        'line': call_node.start_point[0] + 1,
+                                        'column': call_node.start_point[1],
+                                        'loop_line': node.start_point[0] + 1,
+                                        'loop_type': f'array.{method_name}',
+                                        'snippet': f"Array {method_name} with {operation_name}()",
+                                        'operation': f"{operation_name}()",
+                                        'severity': 'HIGH',
+                                        'confidence': 0.85,
+                                        'type': 'query_in_loop',
+                                        'hint': f'Array {method_name} creates implicit loop. Fetch data first, then process'
+                                    })
+                                    break
+    
+    except (ImportError, Exception):
+        # Tree-sitter not available or query failed, fall back to regex_ast logic
+        return _find_queries_in_loops_regex_ast(tree_wrapper)
+    
+    return findings
+
+
+def _find_queries_in_loops_regex_ast(tree_wrapper: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Find database queries in loops using regex-based fallback AST.
+    
+    This is used when tree-sitter is not available for JavaScript/TypeScript.
+    """
+    findings = []
+    
+    # Database operation indicators for JavaScript
+    db_operations = [
+        'query', 'execute', 'exec', 'fetch', 'find', 'findOne', 'findMany',
+        'select', 'insert', 'update', 'delete', 'create', 'save',
+        'findAll', 'findByPk', 'findOrCreate', 'findUnique', 'findFirst',
+        'updateMany', 'deleteMany', 'count', 'aggregate',
+        'insertOne', 'insertMany', 'updateOne', 'updateMany',
+        'deleteOne', 'deleteMany', 'countDocuments',
+    ]
+    
+    # Database context indicators
+    db_context_keywords = [
+        'db', 'database', 'sql', 'knex', 'prisma', 'sequelize',
+        'model', 'mongoose', 'typeorm', 'redis', 'cache',
+        'mongodb', 'postgres', 'mysql', 'sqlite',
+        'collection', 'repository', 'entityManager', 'queryBuilder'
+    ]
+    
+    content = tree_wrapper.get("content", "")
+    
+    if not content:
+        return findings
+    
+    lines = content.split('\n')
+    
+    # Track loop boundaries (simplified)
+    loop_stack = []  # Stack of (loop_type, line_start, line_end)
+    
+    # First pass: identify loop boundaries
+    for line_num, line in enumerate(lines, 1):
+        line_stripped = line.strip()
+        
+        # Detect loop starts
+        if re.match(r'^\s*(for|while|do)\s*\(', line_stripped):
+            # Simple heuristic: assume loop body starts on next line
+            if 'for' in line_stripped:
+                loop_type = 'for'
+            elif 'while' in line_stripped:
+                loop_type = 'while'
+            else:
+                loop_type = 'do...while'
+            
+            loop_stack.append((loop_type, line_num, None))
+        
+        # Detect array method loops
+        elif re.search(r'\.(forEach|map|filter|reduce|some|every)\s*\(', line_stripped):
+            match = re.search(r'\.(\w+)\s*\(', line_stripped)
+            if match:
+                method_name = match.group(1)
+                loop_stack.append((f'array.{method_name}', line_num, None))
+    
+    # Second pass: find database operations and check if they're in loops
+    for line_num, line in enumerate(lines, 1):
+        # Look for database operations
+        for op in db_operations:
+            # Pattern to match method calls
+            pattern = r'\.(' + re.escape(op) + r')\s*\('
+            match = re.search(pattern, line, re.IGNORECASE)
+            
+            if match:
+                # Check for database context
+                line_lower = line.lower()
+                has_db_context = any(keyword in line_lower for keyword in db_context_keywords)
+                
+                # Additional check: if the line contains common model names
+                if not has_db_context:
+                    if re.search(r'\b(user|post|comment|order|product|item|customer)\s*\.', line_lower):
+                        has_db_context = True
+                
+                if has_db_context:
+                    # Check if we're inside a loop
+                    in_loop = False
+                    loop_info = None
+                    
+                    # Simple heuristic: check if line is after any loop start in stack
+                    for loop_type, loop_start, _ in loop_stack:
+                        if line_num > loop_start:
+                            in_loop = True
+                            loop_info = (loop_type, loop_start)
+                            break
+                    
+                    if in_loop:
+                        operation_name = f"{match.group(1)}"
+                        
+                        # Check if nested (multiple loops in stack at this line)
+                        nested_count = sum(1 for lt, ls, _ in loop_stack if line_num > ls)
+                        
+                        if nested_count > 1:
+                            severity = 'CRITICAL'
+                            hint = 'CRITICAL: Query in nested loop! Extract queries outside all loops'
+                            snippet = f"Nested {loop_info[0]} loop (depth {nested_count}) with {operation_name}()"
+                        else:
+                            severity = 'HIGH'
+                            hint = 'Move query outside loop. Consider batch operations'
+                            snippet = f"{loop_info[0]} loop with {operation_name}()"
+                        
+                        findings.append({
+                            'line': line_num,
+                            'column': match.start(),
+                            'loop_line': loop_info[1],
+                            'loop_type': loop_info[0],
+                            'snippet': snippet,
+                            'operation': f"{operation_name}()",
+                            'severity': severity,
+                            'confidence': 0.75,  # Lower confidence for regex-based detection
+                            'type': 'query_in_loop',
+                            'hint': hint
+                        })
+                        break  # Only report once per line
+    
+    return findings
+
+
+def find_inefficient_string_concatenation(tree: ast.AST) -> List[Dict[str, Any]]:
+    """Find inefficient string concatenation in loops.
+    
+    String concatenation with + in loops is O(n²) complexity because
+    strings are immutable in Python. Each concatenation creates a new
+    string object.
+    
+    Returns:
+        List of findings with suggestions to use join() or list append
+    """
+    findings = []
+    
+    # Walk the AST looking for loops
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.For, ast.While)):
+            # Check loop body for string concatenation
+            for body_node in ast.walk(node):
+                if isinstance(body_node, ast.AugAssign):
+                    # Check for += operation
+                    if isinstance(body_node.op, ast.Add):
+                        # Check if target might be a string
+                        if isinstance(body_node.target, ast.Name):
+                            var_name = body_node.target.id
+                            
+                            # Look for string indicators
+                            if isinstance(body_node.value, (ast.Constant, ast.Str)):
+                                # This is likely string concatenation
+                                loop_type = "for" if isinstance(node, ast.For) else "while"
+                                
+                                findings.append({
+                                    'line': getattr(body_node, 'lineno', 0),
+                                    'column': getattr(body_node, 'col_offset', 0),
+                                    'variable': var_name,
+                                    'snippet': f"{var_name} += ... in {loop_type} loop",
+                                    'severity': 'MEDIUM',
+                                    'confidence': 0.80,
+                                    'type': 'inefficient_string_concat',
+                                    'hint': 'Use list.append() in loop, then "".join(list) after loop for O(n) performance'
+                                })
+    
+    return findings
+
+
+def find_expensive_operations_in_loops(tree: ast.AST) -> List[Dict[str, Any]]:
+    """Find expensive operations that should be moved outside loops.
+    
+    Detects:
+    - File I/O operations in loops
+    - Network requests in loops
+    - Regular expression compilation in loops
+    - Heavy computations that could be cached
+    
+    Returns:
+        List of findings with optimization suggestions
+    """
+    findings = []
+    
+    # Expensive operation indicators
+    expensive_operations = {
+        # File I/O
+        'open', 'read', 'write', 'close',
+        'readlines', 'writelines', 'readline',
+        
+        # Network operations
+        'urlopen', 'urlretrieve', 'get', 'post', 'put', 'delete', 'patch',
+        'request', 'send', 'recv', 'connect',
+        
+        # Regular expressions
+        'compile', 're.compile',
+        
+        # Expensive computations
+        'sleep', 'time.sleep',
+        'sort', 'sorted',  # If called repeatedly on same data
+    }
+    
+    # Walk the AST looking for loops
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.For, ast.While)):
+            loop_type = "for" if isinstance(node, ast.For) else "while"
+            loop_line = getattr(node, 'lineno', 0)
+            
+            # Check loop body for expensive operations
+            for body_node in ast.walk(node):
+                if isinstance(body_node, ast.Call):
+                    operation = None
+                    severity = 'HIGH'
+                    hint = ''
+                    
+                    # Check for function calls
+                    if isinstance(body_node.func, ast.Name):
+                        func_name = body_node.func.id
+                        if func_name in expensive_operations:
+                            operation = f"{func_name}()"
+                            
+                            if func_name == 'open':
+                                hint = 'File operations in loops are expensive. Consider batch processing or caching'
+                            elif func_name in ['compile', 're.compile']:
+                                hint = 'Compile regex once outside the loop and reuse'
+                            elif func_name in ['sleep', 'time.sleep']:
+                                severity = 'CRITICAL'
+                                hint = 'Sleep in loop blocks execution. Consider async/await or event-driven approach'
+                            else:
+                                hint = 'Move expensive operation outside loop or cache results'
+                    
+                    # Check for method calls
+                    elif isinstance(body_node.func, ast.Attribute):
+                        method_name = body_node.func.attr
+                        if method_name in expensive_operations:
+                            obj_name = "object"
+                            if isinstance(body_node.func.value, ast.Name):
+                                obj_name = body_node.func.value.id
+                            
+                            operation = f"{obj_name}.{method_name}()"
+                            
+                            if method_name in ['get', 'post', 'put', 'delete', 'patch']:
+                                severity = 'CRITICAL'
+                                hint = 'HTTP requests in loops cause severe performance issues. Use batch APIs or async requests'
+                            else:
+                                hint = 'Consider moving operation outside loop or using batch processing'
+                    
+                    if operation:
+                        findings.append({
+                            'line': getattr(body_node, 'lineno', 0),
+                            'column': getattr(body_node, 'col_offset', 0),
+                            'loop_line': loop_line,
+                            'loop_type': loop_type,
+                            'snippet': f"{operation} in {loop_type} loop",
+                            'operation': operation,
+                            'severity': severity,
+                            'confidence': 0.85,
+                            'type': 'expensive_operation_in_loop',
+                            'hint': hint
+                        })
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/react/__init__.py b/theauditor/rules/react/__init__.py
new file mode 100644
index 0000000..62b6c22
--- /dev/null
+++ b/theauditor/rules/react/__init__.py
@@ -0,0 +1,9 @@
+"""React-specific rule detectors for TheAuditor.
+
+This package contains semantic AST-based rules for detecting
+React Hooks issues and other React-specific anti-patterns.
+"""
+
+from .hooks_analyzer import find_react_hooks_issues
+
+__all__ = ['find_react_hooks_issues']
\ No newline at end of file
diff --git a/theauditor/rules/react/hooks_analyzer.py b/theauditor/rules/react/hooks_analyzer.py
new file mode 100644
index 0000000..a87cef2
--- /dev/null
+++ b/theauditor/rules/react/hooks_analyzer.py
@@ -0,0 +1,398 @@
+"""React Hooks semantic analyzer using TypeScript Compiler API AST.
+
+This module performs high-fidelity semantic analysis of React Hooks
+to detect missing dependencies and memory leaks. It operates on the
+rich AST produced by js_semantic_parser.py which provides symbol
+resolution and type information.
+"""
+
+from typing import List, Dict, Any, Set, Optional
+
+
+def find_react_hooks_issues(tree: Any, file_path: str) -> List[Dict[str, Any]]:
+    """
+    Detect React Hooks programming errors using semantic AST analysis.
+    
+    This function analyzes the semantic AST from TypeScript Compiler API
+    to detect:
+    1. Missing dependencies in useEffect, useCallback, useMemo
+    2. Memory leaks from missing cleanup in useEffect
+    
+    Args:
+        tree: Semantic AST from js_semantic_parser.py
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        List of issues found with normalized format
+    """
+    findings = []
+    
+    # Validate AST structure
+    if not tree or not isinstance(tree, dict):
+        return findings
+    
+    # Extract root AST node
+    ast_root = tree.get('ast')
+    if not ast_root:
+        return findings
+    
+    # Track component-level variables for dependency analysis
+    component_scope_vars = set()
+    
+    # Helper function to extract text from a node
+    def get_node_text(node):
+        """Extract text content from an AST node."""
+        if isinstance(node, dict):
+            return node.get('text', '')
+        return str(node)
+    
+    # Helper function to get line number from a node
+    def get_node_line(node):
+        """Extract line number from an AST node."""
+        if isinstance(node, dict):
+            return node.get('line', 0)
+        return 0
+    
+    # Helper function to check if a node is a specific hook call
+    def is_hook_call(node, hook_name):
+        """Check if a node is a call to a specific React Hook."""
+        if not isinstance(node, dict):
+            return False
+        
+        if node.get('kind') != 'CallExpression':
+            return False
+        
+        # Get the function being called
+        children = node.get('children', [])
+        if not children:
+            return False
+        
+        # First child should be the function identifier
+        func_node = children[0]
+        if isinstance(func_node, dict):
+            # Check for direct hook call
+            if func_node.get('kind') == 'Identifier':
+                func_text = get_node_text(func_node)
+                return hook_name in func_text
+            # Check for React.useEffect style
+            elif func_node.get('kind') == 'PropertyAccessExpression':
+                prop_text = get_node_text(func_node)
+                return hook_name in prop_text
+        
+        return False
+    
+    # Helper function to extract callback and deps from hook call
+    def extract_hook_args(node):
+        """Extract callback function and dependency array from hook call."""
+        callback = None
+        deps_array = None
+        
+        children = node.get('children', [])
+        # Skip first child (function identifier)
+        args = children[1:] if len(children) > 1 else []
+        
+        # First argument is the callback
+        if args:
+            callback = args[0]
+        
+        # Second argument is the dependency array (optional)
+        if len(args) > 1:
+            deps_node = args[1]
+            if isinstance(deps_node, dict) and deps_node.get('kind') == 'ArrayLiteralExpression':
+                deps_array = deps_node
+        
+        return callback, deps_array
+    
+    # Helper function to extract dependencies from array literal
+    def extract_deps_from_array(deps_node):
+        """Extract dependency names from dependency array."""
+        deps = set()
+        if not deps_node:
+            return deps
+        
+        children = deps_node.get('children', [])
+        for child in children:
+            if isinstance(child, dict):
+                # Handle simple identifiers
+                if child.get('kind') == 'Identifier':
+                    deps.add(get_node_text(child))
+                # Handle property access (e.g., props.value)
+                elif child.get('kind') == 'PropertyAccessExpression':
+                    # Get the base object name for tracking
+                    text = get_node_text(child)
+                    if '.' in text:
+                        base = text.split('.')[0]
+                        deps.add(base)
+                    else:
+                        deps.add(text)
+        
+        return deps
+    
+    # Helper function to find all variables used in callback
+    def find_used_variables(node, local_vars=None):
+        """Recursively find all variables referenced in a callback."""
+        if local_vars is None:
+            local_vars = set()
+        
+        used_vars = set()
+        
+        if not isinstance(node, dict):
+            return used_vars
+        
+        kind = node.get('kind', '')
+        
+        # Track local variable declarations
+        if kind in ['VariableDeclaration', 'FunctionDeclaration']:
+            # Extract variable names being declared
+            children = node.get('children', [])
+            for child in children:
+                if isinstance(child, dict):
+                    if child.get('kind') == 'VariableDeclarationList':
+                        # Process variable list
+                        for var_child in child.get('children', []):
+                            if isinstance(var_child, dict) and var_child.get('name'):
+                                local_vars.add(var_child.get('name'))
+                    elif child.get('name'):
+                        local_vars.add(child.get('name'))
+        
+        # Track parameter declarations
+        if kind == 'Parameter':
+            if node.get('name'):
+                local_vars.add(node.get('name'))
+        
+        # Check for variable usage
+        if kind == 'Identifier':
+            var_name = get_node_text(node)
+            # Only track if not locally declared
+            if var_name and var_name not in local_vars:
+                # Filter out React hooks and built-in objects
+                built_ins = {
+                    'console', 'window', 'document', 'undefined', 'null', 'true', 'false',
+                    'Math', 'Object', 'Array', 'String', 'Number', 'Boolean', 'Date',
+                    'JSON', 'Promise', 'Set', 'Map', 'WeakMap', 'WeakSet',
+                    'addEventListener', 'removeEventListener', 'setTimeout', 'clearTimeout',
+                    'setInterval', 'clearInterval', 'fetch', 'XMLHttpRequest',
+                    'log', 'error', 'warn', 'info', 'debug', 'alert',
+                    'parseInt', 'parseFloat', 'isNaN', 'isFinite',
+                    'reduce', 'map', 'filter', 'forEach', 'find', 'some', 'every',
+                    'push', 'pop', 'shift', 'unshift', 'slice', 'splice',
+                    'toString', 'valueOf', 'hasOwnProperty', 'propertyIsEnumerable',
+                    'item', 'value', 'key', 'index', 'length', 'size'
+                }
+                if not var_name.startswith('use') and var_name not in built_ins:
+                    used_vars.add(var_name)
+        
+        # Check for property access (e.g., props.value, state.count)
+        if kind == 'PropertyAccessExpression':
+            text = get_node_text(node)
+            if '.' in text:
+                base = text.split('.')[0]
+                # Only track if not locally declared and not a built-in
+                built_in_objects = {
+                    'console', 'window', 'document', 'Math', 'Object', 'Array', 'String',
+                    'Number', 'Boolean', 'Date', 'JSON', 'Promise', 'localStorage', 
+                    'sessionStorage', 'location', 'history', 'navigator', 'performance'
+                }
+                if base not in local_vars and base not in built_in_objects:
+                    used_vars.add(base)
+        
+        # Recursively process children
+        children = node.get('children', [])
+        for child in children:
+            child_vars = find_used_variables(child, local_vars.copy())
+            used_vars.update(child_vars)
+        
+        return used_vars
+    
+    # Helper function to check for subscription patterns
+    def find_subscriptions(node):
+        """Find subscription/listener patterns in node."""
+        subscriptions = []
+        
+        if not isinstance(node, dict):
+            return subscriptions
+        
+        kind = node.get('kind', '')
+        text = get_node_text(node)
+        
+        # Check for addEventListener pattern
+        if kind == 'CallExpression' and 'addEventListener' in text:
+            subscriptions.append({
+                'type': 'addEventListener',
+                'text': text,
+                'line': get_node_line(node)
+            })
+        
+        # Check for socket.on pattern
+        if kind == 'CallExpression' and '.on(' in text:
+            subscriptions.append({
+                'type': 'socket.on',
+                'text': text,
+                'line': get_node_line(node)
+            })
+        
+        # Check for setInterval/setTimeout
+        if kind == 'CallExpression' and ('setInterval' in text or 'setTimeout' in text):
+            subscriptions.append({
+                'type': 'timer',
+                'text': text,
+                'line': get_node_line(node)
+            })
+        
+        # Check for subscription patterns (subscribe, watch, observe)
+        if kind == 'CallExpression' and any(pattern in text for pattern in ['.subscribe(', '.watch(', '.observe(']):
+            subscriptions.append({
+                'type': 'subscription',
+                'text': text,
+                'line': get_node_line(node)
+            })
+        
+        # Recursively check children
+        children = node.get('children', [])
+        for child in children:
+            child_subs = find_subscriptions(child)
+            subscriptions.extend(child_subs)
+        
+        return subscriptions
+    
+    # Helper function to check if callback returns a cleanup function
+    def has_cleanup_return(callback_node):
+        """Check if a callback returns a cleanup function."""
+        if not isinstance(callback_node, dict):
+            return False
+        
+        # For arrow functions and function expressions
+        kind = callback_node.get('kind', '')
+        
+        # Look for return statements
+        def find_return_statements(node):
+            returns = []
+            if not isinstance(node, dict):
+                return returns
+            
+            if node.get('kind') == 'ReturnStatement':
+                returns.append(node)
+            
+            # Don't traverse into nested functions
+            if node.get('kind') in ['FunctionExpression', 'ArrowFunction', 'FunctionDeclaration']:
+                if node != callback_node:  # Skip if it's a nested function
+                    return returns
+            
+            children = node.get('children', [])
+            for child in children:
+                child_returns = find_return_statements(child)
+                returns.extend(child_returns)
+            
+            return returns
+        
+        return_statements = find_return_statements(callback_node)
+        
+        # Check if any return statement returns a function
+        for ret in return_statements:
+            children = ret.get('children', [])
+            if children:
+                # Check if returning a function
+                ret_value = children[0] if children else None
+                if ret_value and isinstance(ret_value, dict):
+                    ret_kind = ret_value.get('kind', '')
+                    # Check for function expression or arrow function
+                    if ret_kind in ['FunctionExpression', 'ArrowFunction']:
+                        return True
+                    # Check for returning a cleanup function reference
+                    if ret_kind == 'Identifier':
+                        ret_text = get_node_text(ret_value)
+                        if 'cleanup' in ret_text.lower() or 'unsubscribe' in ret_text.lower():
+                            return True
+        
+        return False
+    
+    # Main traversal function
+    def traverse_ast(node, depth=0):
+        """Traverse AST to find React Hooks issues."""
+        if depth > 100 or not isinstance(node, dict):
+            return
+        
+        kind = node.get('kind', '')
+        
+        # Detection 1: Missing dependencies in useEffect, useCallback, useMemo
+        hook_names = ['useEffect', 'useCallback', 'useMemo']
+        for hook_name in hook_names:
+            if is_hook_call(node, hook_name):
+                callback, deps_array = extract_hook_args(node)
+                
+                if callback:
+                    # Find all variables used in the callback
+                    used_vars = find_used_variables(callback)
+                    
+                    # Get declared dependencies
+                    declared_deps = extract_deps_from_array(deps_array) if deps_array else set()
+                    
+                    # Find missing dependencies
+                    missing_deps = used_vars - declared_deps
+                    
+                    # Filter out some common false positives
+                    missing_deps = {
+                        dep for dep in missing_deps
+                        if not dep.startswith('_')  # Skip private convention
+                        and dep not in ['React', 'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef']  # Skip React APIs
+                        and len(dep) > 1  # Skip single letter vars often used for iteration
+                    }
+                    
+                    if missing_deps:
+                        findings.append({
+                            'pattern_name': 'REACT_HOOKS_MISSING_DEPS',
+                            'message': f'{hook_name} hook missing dependencies: {", ".join(sorted(missing_deps))}',
+                            'file': file_path,
+                            'line': get_node_line(node),
+                            'column': node.get('column', 0),
+                            'severity': 'high',
+                            'category': 'react',
+                            'confidence': 0.85,
+                            'details': {
+                                'hook': hook_name,
+                                'missing_dependencies': sorted(list(missing_deps)),
+                                'declared_dependencies': sorted(list(declared_deps)),
+                                'used_variables': sorted(list(used_vars))
+                            }
+                        })
+        
+        # Detection 2: Memory leaks in useEffect
+        if is_hook_call(node, 'useEffect'):
+            callback, deps_array = extract_hook_args(node)
+            
+            if callback:
+                # Check for subscriptions/listeners
+                subscriptions = find_subscriptions(callback)
+                
+                if subscriptions:
+                    # Check if cleanup function is returned
+                    has_cleanup = has_cleanup_return(callback)
+                    
+                    if not has_cleanup:
+                        subscription_types = list(set(sub['type'] for sub in subscriptions))
+                        findings.append({
+                            'pattern_name': 'REACT_HOOKS_MEMORY_LEAK',
+                            'message': f'useEffect creates subscriptions ({", ".join(subscription_types)}) but lacks cleanup function',
+                            'file': file_path,
+                            'line': get_node_line(node),
+                            'column': node.get('column', 0),
+                            'severity': 'high',
+                            'category': 'react',
+                            'confidence': 0.90,
+                            'details': {
+                                'hook': 'useEffect',
+                                'subscriptions': subscriptions,
+                                'has_cleanup': False,
+                                'recommendation': 'Return a cleanup function that removes event listeners or cancels subscriptions'
+                            }
+                        })
+        
+        # Recursively traverse children
+        children = node.get('children', [])
+        for child in children:
+            traverse_ast(child, depth + 1)
+    
+    # Start traversal from root
+    traverse_ast(ast_root)
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/secrets/__init__.py b/theauditor/rules/secrets/__init__.py
new file mode 100644
index 0000000..2b6f826
--- /dev/null
+++ b/theauditor/rules/secrets/__init__.py
@@ -0,0 +1,5 @@
+"""Secret detection rules module."""
+
+from .hardcoded_secret_analyzer import find_hardcoded_secrets
+
+__all__ = ['find_hardcoded_secrets']
\ No newline at end of file
diff --git a/theauditor/rules/secrets/hardcoded_secret_analyzer.py b/theauditor/rules/secrets/hardcoded_secret_analyzer.py
new file mode 100644
index 0000000..8c54dc7
--- /dev/null
+++ b/theauditor/rules/secrets/hardcoded_secret_analyzer.py
@@ -0,0 +1,662 @@
+"""Security rules for detecting hardcoded secrets in source code.
+
+Supports:
+- Python (via native `ast` module)
+- JavaScript/TypeScript (via `tree-sitter` AST)
+"""
+
+import ast
+import re
+from typing import List, Dict, Any
+
+from theauditor.rules.common.utils import (
+    calculate_entropy,
+    is_sequential,
+    is_keyboard_walk,
+    decode_and_verify_base64
+)
+
+
+def is_likely_secret(value: str) -> bool:
+    """Determine if a string value is likely a secret based on patterns and entropy.
+    
+    Checks for:
+    - High entropy (randomness)
+    - Minimum length requirements
+    - Common secret patterns (hex, base64, etc.)
+    """
+    # Skip empty or very short strings
+    # COURIER PHILOSOPHY: We set thresholds but don't judge - if it matches, we report it
+    if len(value) < 32:  # Increased from 20 to reduce false positives
+        return False
+    
+    # Skip obvious non-secrets
+    if value.lower() in ['true', 'false', 'none', 'null', 'undefined', 'development', 'production', 'test',
+                         'staging', 'localhost', '127.0.0.1', '0.0.0.0', 'example', 'sample', 'demo']:
+        return False
+    
+    # Skip URLs and paths (common false positives)
+    if value.startswith(('http://', 'https://', '/', './', '../', 'file://', 'ftp://', 'ssh://')):
+        return False
+    
+    # Skip module imports and package names
+    if value.startswith(('theauditor.', 'from ', 'import ', '__')):
+        return False
+    
+    # Skip template strings or placeholders
+    if '${' in value or '{{' in value or '<' in value or '>' in value:
+        return False
+    
+    # Check for Base64 pattern FIRST (before entropy check)
+    # This is important because Base64 strings often have high entropy
+    # even when their decoded content is simple/sequential
+    base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+    if re.match(base64_pattern, value):
+        # For Base64, only rely on decode-and-verify, not the encoded string's entropy
+        return decode_and_verify_base64(value)
+    
+    # Check for high entropy (indicates randomness)
+    entropy = calculate_entropy(value)
+    if entropy > 4.5:  # Increased from 4.2 to reduce false positives
+        # Check for false positive patterns before concluding it's a secret
+        if not (is_sequential(value) or is_keyboard_walk(value)):
+            return True
+    
+    # Other secret patterns
+    secret_patterns = [
+        r'^[a-fA-F0-9]{32,}$',  # Hex strings (MD5, SHA, etc.)
+        r'^[A-Z0-9]{20,}$',  # All caps alphanumeric (common for API keys)
+        r'^sk_[a-zA-Z0-9]{24,}$',  # Stripe secret key pattern
+        r'^pk_[a-zA-Z0-9]{24,}$',  # Stripe public key pattern
+        r'^[a-zA-Z0-9]{40}$',  # GitHub token pattern
+        r'^AKIA[0-9A-Z]{16}$',  # AWS access key pattern
+    ]
+    
+    for pattern in secret_patterns:
+        if re.match(pattern, value):
+            # Additional check: reject if too few unique characters (likely repetitive)
+            unique_chars = len(set(value))
+            if unique_chars < 3 and len(value) >= 32:  # Updated to match new minimum length
+                # Skip patterns with < 3 unique chars (repetitive strings)
+                continue
+            return True
+    
+    # If entropy is moderately high and string contains mixed case/numbers/symbols
+    if entropy > 3.5:
+        has_upper = any(c.isupper() for c in value)
+        has_lower = any(c.islower() for c in value)
+        has_digit = any(c.isdigit() for c in value)
+        has_symbol = any(not c.isalnum() and not c.isspace() for c in value)
+        
+        # Likely a secret if it has significant character diversity
+        # Requires at least 3 out of 4 character categories
+        if sum([has_upper, has_lower, has_digit, has_symbol]) >= 3:
+            # Final validation: exclude predictable patterns
+            if not (is_sequential(value) or is_keyboard_walk(value)):
+                return True
+    
+    return False
+
+
+def find_hardcoded_secrets(tree: Any) -> List[Dict[str, Any]]:
+    """Find hardcoded secrets in AST (language-aware).
+    
+    Detects:
+    - Variables with security-related names containing string literals
+    - High-entropy strings that look like API keys or tokens
+    - Common secret patterns in assignments
+    
+    Supports:
+    - Python (native ast.AST)
+    - JavaScript/TypeScript (tree-sitter or regex fallback)
+    
+    Args:
+        tree: Either a Python ast.AST object (legacy) or a wrapped AST dict from ast_parser.py
+    
+    Returns:
+        List of findings with line, column, variable name, and confidence score
+    """
+    # Handle both legacy (direct ast.AST) and new wrapped format
+    if isinstance(tree, ast.AST):
+        # Legacy format - direct Python AST
+        return _find_hardcoded_secrets_python(tree)
+    elif isinstance(tree, dict):
+        # New wrapped format from ast_parser.py
+        tree_type = tree.get("type")
+        language = tree.get("language", "")  # Empty not unknown
+        
+        if tree_type == "python_ast":
+            return _find_hardcoded_secrets_python(tree["tree"])
+        elif tree_type == "tree_sitter":
+            return _find_hardcoded_secrets_tree_sitter(tree)
+        elif tree_type == "regex_ast":
+            return _find_hardcoded_secrets_regex_ast(tree)
+        else:
+            # Unknown tree type
+            return []
+    else:
+        # Unknown format
+        return []
+
+
+def _find_hardcoded_secrets_python(tree: ast.AST) -> List[Dict[str, Any]]:
+    """Find hardcoded secrets in Python AST (original implementation).
+    
+    This is the original Python-specific implementation.
+    """
+    findings = []
+    
+    # Security-related variable name keywords
+    secret_keywords = [
+        'secret', 'token', 'password', 'passwd', 'pwd',
+        'api_key', 'apikey', 'auth_token', 'credential', 'private_key',
+        'access_token', 'refresh_token', 'bearer', 'oauth', 'jwt',
+        'session_id', 'cookie', 'signature', 'salt',
+        'encryption_key', 'decrypt', 'encrypt', 'cipher',
+        'aws_secret', 'azure_key', 'gcp_key', 'stripe', 'github_token', 'gitlab_token'
+    ]
+    
+    # Walk the AST looking for assignments
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Assign):
+            # Check each target of the assignment
+            for target in node.targets:
+                if isinstance(target, ast.Name):
+                    var_name = target.id.lower()
+                    
+                    # Check if variable name contains security keywords
+                    has_secret_keyword = any(keyword in var_name for keyword in secret_keywords)
+                    
+                    # Check the assigned value
+                    if isinstance(node.value, (ast.Constant, ast.Str)):
+                        # Get the string value
+                        if isinstance(node.value, ast.Constant):
+                            value = node.value.value
+                        else:  # ast.Str for older Python versions
+                            value = node.value.s
+                        
+                        # Only check string values
+                        if isinstance(value, str):
+                            is_suspicious = False
+                            confidence = 0.0
+                            
+                            # High confidence if variable name is suspicious AND value looks like a secret
+                            if has_secret_keyword and is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.95
+                            # Medium-high confidence if variable name is very suspicious
+                            elif var_name in ['password', 'secret', 'api_key', 'private_key', 'access_token']:
+                                if len(value) > 10 and value not in ['placeholder', 'changeme', 'your_password_here']:
+                                    # For Base64 strings with suspicious names, still verify the decoded content
+                                    base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                    if re.match(base64_pattern, value):
+                                        # Only flag if decoded content is secret-like
+                                        if decode_and_verify_base64(value):
+                                            is_suspicious = True
+                                            confidence = 0.85
+                                    else:
+                                        # Non-Base64 suspicious variable names
+                                        is_suspicious = True
+                                        confidence = 0.85
+                            # Medium confidence if only the value looks like a secret
+                            elif is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.70 if has_secret_keyword else 0.60
+                            
+                            if is_suspicious:
+                                # Check if this is a Base64 encoded secret
+                                base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                is_base64 = re.match(base64_pattern, value) is not None
+                                
+                                # Redact the actual secret value for security
+                                if is_base64:
+                                    snippet = f"{target.id} = {'*' * min(len(value), 20)}... (Base64 encoded secret, decoded content appears random)"
+                                else:
+                                    snippet = f"{target.id} = {'*' * min(len(value), 20)}..."
+                                
+                                findings.append({
+                                    'line': getattr(node, 'lineno', 0),
+                                    'column': getattr(node, 'col_offset', 0),
+                                    'variable': target.id,
+                                    'snippet': snippet,
+                                    'confidence': confidence,
+                                    'severity': 'CRITICAL',
+                                    'type': 'hardcoded_secret',
+                                    'hint': f'Move {target.id} to environment variables or secure vault'
+                                })
+        
+        # Also check for dictionary literals with secret keys
+        elif isinstance(node, ast.Dict):
+            for key_node, value_node in zip(node.keys, node.values):
+                if isinstance(key_node, (ast.Constant, ast.Str)):
+                    # Get key name
+                    if isinstance(key_node, ast.Constant):
+                        key_name = str(key_node.value).lower()
+                    else:
+                        key_name = str(key_node.s).lower()
+                    
+                    # Check if key name is suspicious
+                    has_secret_keyword = any(keyword in key_name for keyword in secret_keywords)
+                    
+                    if isinstance(value_node, (ast.Constant, ast.Str)):
+                        # Get value
+                        if isinstance(value_node, ast.Constant):
+                            value = value_node.value
+                        else:
+                            value = value_node.s
+                        
+                        if isinstance(value, str):
+                            is_suspicious = False
+                            confidence = 0.0
+                            
+                            # High confidence if key name is suspicious AND value looks like a secret
+                            if has_secret_keyword and is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.90
+                            # Medium-high confidence if key name is very suspicious
+                            elif key_name in ['password', 'secret', 'api_key', 'private_key', 'access_token']:
+                                if len(value) > 10 and value not in ['placeholder', 'changeme', 'your_password_here']:
+                                    # For Base64 strings with suspicious names, still verify the decoded content
+                                    base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                    if re.match(base64_pattern, value):
+                                        # Only flag if decoded content is secret-like
+                                        if decode_and_verify_base64(value):
+                                            is_suspicious = True
+                                            confidence = 0.80
+                                    else:
+                                        # Non-Base64 suspicious key names
+                                        is_suspicious = True
+                                        confidence = 0.80
+                            # Medium confidence if only the value looks like a secret
+                            elif is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.60 if has_secret_keyword else 0.50
+                            
+                            if is_suspicious:
+                                # Check if this is a Base64 encoded secret
+                                base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                is_base64 = re.match(base64_pattern, value) is not None
+                                
+                                if is_base64:
+                                    snippet = f'"{key_name}": {"*" * min(len(value), 20)}... (Base64 encoded secret, decoded content appears random)'
+                                else:
+                                    snippet = f'"{key_name}": {"*" * min(len(value), 20)}...'
+                                
+                                findings.append({
+                                    'line': getattr(node, 'lineno', 0),
+                                    'column': getattr(node, 'col_offset', 0),
+                                    'variable': f'dict[{key_name}]',
+                                    'snippet': snippet,
+                                    'confidence': confidence,
+                                    'severity': 'CRITICAL',
+                                    'type': 'hardcoded_secret',
+                                    'hint': f'Move secret value for "{key_name}" to environment variables'
+                                })
+    
+    return findings
+
+
+def _find_hardcoded_secrets_tree_sitter(tree_wrapper: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Find hardcoded secrets in JavaScript/TypeScript using tree-sitter AST.
+    
+    Uses tree-sitter queries to find variable declarations and assignments
+    that might contain hardcoded secrets.
+    """
+    findings = []
+    
+    # Security-related variable name keywords (same as Python)
+    secret_keywords = [
+        'key', 'secret', 'token', 'password', 'passwd', 'pwd',
+        'api', 'auth', 'credential', 'private', 'priv',
+        'access', 'refresh', 'bearer', 'oauth', 'jwt',
+        'session', 'cookie', 'signature', 'salt', 'hash',
+        'encryption', 'decrypt', 'encrypt', 'cipher',
+        'aws', 'azure', 'gcp', 'stripe', 'github', 'gitlab'
+    ]
+    
+    tree = tree_wrapper.get("tree")
+    content = tree_wrapper.get("content", "")
+    language = tree_wrapper.get("language", "javascript")
+    
+    if not tree:
+        return findings
+    
+    # Try to use tree-sitter for proper traversal
+    try:
+        # Import tree-sitter dynamically (since it might not be available)
+        import tree_sitter
+        from tree_sitter_language_pack import get_language
+        
+        lang = get_language(language)
+        
+        # Query for variable declarations with string values
+        # This captures const/let/var declarations with string literals
+        var_query = lang.query("""
+            (variable_declaration
+              (variable_declarator
+                name: (identifier) @var_name
+                value: (string) @var_value))
+        """)
+        
+        # Query for object properties with string values
+        # This captures { key: "value" } patterns
+        obj_query = lang.query("""
+            (pair
+              key: [(property_identifier) (string)] @key_name
+              value: (string) @key_value)
+        """)
+        
+        # Query for assignment expressions
+        # This captures variable = "value" patterns
+        assign_query = lang.query("""
+            (assignment_expression
+              left: (identifier) @var_name
+              right: (string) @var_value)
+        """)
+        
+        # Process variable declarations
+        for capture in var_query.captures(tree.root_node):
+            node, capture_name = capture
+            
+            if capture_name == "var_name":
+                var_name_node = node
+                # Find the corresponding value node
+                parent = node.parent
+                if parent and parent.type == "variable_declarator":
+                    for child in parent.children:
+                        if child.type == "string":
+                            value_node = child
+                            var_name = var_name_node.text.decode("utf-8", errors="ignore").lower()
+                            value_text = value_node.text.decode("utf-8", errors="ignore")
+                            
+                            # Remove quotes from string literal
+                            if len(value_text) >= 2 and value_text[0] in ['"', "'", "`"]:
+                                value = value_text[1:-1]
+                            else:
+                                value = value_text
+                            
+                            # Apply same detection logic as Python
+                            has_secret_keyword = any(keyword in var_name for keyword in secret_keywords)
+                            is_suspicious = False
+                            confidence = 0.0
+                            
+                            if has_secret_keyword and is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.95
+                            elif var_name in ['password', 'secret', 'api_key', 'apikey', 'private_key', 'privatekey', 'access_token', 'accesstoken']:
+                                if len(value) > 10 and value not in ['placeholder', 'changeme', 'your_password_here']:
+                                    base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                    if re.match(base64_pattern, value):
+                                        if decode_and_verify_base64(value):
+                                            is_suspicious = True
+                                            confidence = 0.85
+                                    else:
+                                        is_suspicious = True
+                                        confidence = 0.85
+                            elif is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.70 if has_secret_keyword else 0.60
+                            
+                            if is_suspicious:
+                                # Check if this is a Base64 encoded secret
+                                base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                is_base64 = re.match(base64_pattern, value) is not None
+                                
+                                var_name_text = var_name_node.text.decode('utf-8', errors='ignore')
+                                if is_base64:
+                                    snippet = f"{var_name_text} = {'*' * min(len(value), 20)}... (Base64 encoded secret, decoded content appears random)"
+                                else:
+                                    snippet = f"{var_name_text} = {'*' * min(len(value), 20)}..."
+                                
+                                findings.append({
+                                    'line': var_name_node.start_point[0] + 1,
+                                    'column': var_name_node.start_point[1],
+                                    'variable': var_name_text,
+                                    'snippet': snippet,
+                                    'confidence': confidence,
+                                    'severity': 'CRITICAL',
+                                    'type': 'hardcoded_secret',
+                                    'hint': f'Move {var_name_text} to environment variables or secure vault'
+                                })
+        
+        # Process object properties
+        for capture in obj_query.captures(tree.root_node):
+            node, capture_name = capture
+            
+            if capture_name == "key_name":
+                key_node = node
+                # Find the corresponding value node
+                parent = node.parent
+                if parent and parent.type == "pair":
+                    for child in parent.children:
+                        if child.type == "string" and child != key_node:
+                            value_node = child
+                            key_text = key_node.text.decode("utf-8", errors="ignore")
+                            
+                            # Remove quotes if it's a string key
+                            if len(key_text) >= 2 and key_text[0] in ['"', "'", "`"]:
+                                key_name = key_text[1:-1].lower()
+                            else:
+                                key_name = key_text.lower()
+                            
+                            value_text = value_node.text.decode("utf-8", errors="ignore")
+                            # Remove quotes from string value
+                            if len(value_text) >= 2 and value_text[0] in ['"', "'", "`"]:
+                                value = value_text[1:-1]
+                            else:
+                                value = value_text
+                            
+                            # Apply detection logic
+                            has_secret_keyword = any(keyword in key_name for keyword in secret_keywords)
+                            is_suspicious = False
+                            confidence = 0.0
+                            
+                            if has_secret_keyword and is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.90
+                            elif key_name in ['password', 'secret', 'api_key', 'apikey', 'private_key', 'privatekey', 'access_token', 'accesstoken']:
+                                if len(value) > 10 and value not in ['placeholder', 'changeme', 'your_password_here']:
+                                    base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                    if re.match(base64_pattern, value):
+                                        if decode_and_verify_base64(value):
+                                            is_suspicious = True
+                                            confidence = 0.80
+                                    else:
+                                        is_suspicious = True
+                                        confidence = 0.80
+                            elif is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.60 if has_secret_keyword else 0.50
+                            
+                            if is_suspicious:
+                                # Check if this is a Base64 encoded secret
+                                base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                is_base64 = re.match(base64_pattern, value) is not None
+                                
+                                if is_base64:
+                                    snippet = f'"{key_name}": {"*" * min(len(value), 20)}... (Base64 encoded secret, decoded content appears random)'
+                                else:
+                                    snippet = f'"{key_name}": {"*" * min(len(value), 20)}...'
+                                
+                                findings.append({
+                                    'line': key_node.start_point[0] + 1,
+                                    'column': key_node.start_point[1],
+                                    'variable': f'object[{key_name}]',
+                                    'snippet': snippet,
+                                    'confidence': confidence,
+                                    'severity': 'CRITICAL',
+                                    'type': 'hardcoded_secret',
+                                    'hint': f'Move secret value for "{key_name}" to environment variables'
+                                })
+        
+        # Process assignment expressions
+        for capture in assign_query.captures(tree.root_node):
+            node, capture_name = capture
+            
+            if capture_name == "var_name":
+                var_name_node = node
+                # Find the corresponding value node
+                parent = node.parent
+                if parent and parent.type == "assignment_expression":
+                    for child in parent.children:
+                        if child.type == "string":
+                            value_node = child
+                            var_name = var_name_node.text.decode("utf-8", errors="ignore").lower()
+                            value_text = value_node.text.decode("utf-8", errors="ignore")
+                            
+                            # Remove quotes from string literal
+                            if len(value_text) >= 2 and value_text[0] in ['"', "'", "`"]:
+                                value = value_text[1:-1]
+                            else:
+                                value = value_text
+                            
+                            # Apply detection logic
+                            has_secret_keyword = any(keyword in var_name for keyword in secret_keywords)
+                            is_suspicious = False
+                            confidence = 0.0
+                            
+                            if has_secret_keyword and is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.95
+                            elif var_name in ['password', 'secret', 'api_key', 'apikey', 'private_key', 'privatekey', 'access_token', 'accesstoken']:
+                                if len(value) > 10 and value not in ['placeholder', 'changeme', 'your_password_here']:
+                                    base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                    if re.match(base64_pattern, value):
+                                        if decode_and_verify_base64(value):
+                                            is_suspicious = True
+                                            confidence = 0.85
+                                    else:
+                                        is_suspicious = True
+                                        confidence = 0.85
+                            elif is_likely_secret(value):
+                                is_suspicious = True
+                                confidence = 0.70 if has_secret_keyword else 0.60
+                            
+                            if is_suspicious:
+                                # Check if this is a Base64 encoded secret
+                                base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                                is_base64 = re.match(base64_pattern, value) is not None
+                                
+                                var_name_text = var_name_node.text.decode('utf-8', errors='ignore')
+                                if is_base64:
+                                    snippet = f"{var_name_text} = {'*' * min(len(value), 20)}... (Base64 encoded secret, decoded content appears random)"
+                                else:
+                                    snippet = f"{var_name_text} = {'*' * min(len(value), 20)}..."
+                                
+                                findings.append({
+                                    'line': var_name_node.start_point[0] + 1,
+                                    'column': var_name_node.start_point[1],
+                                    'variable': var_name_text,
+                                    'snippet': snippet,
+                                    'confidence': confidence,
+                                    'severity': 'CRITICAL',
+                                    'type': 'hardcoded_secret',
+                                    'hint': f'Move {var_name_text} to environment variables or secure vault'
+                                })
+    
+    except (ImportError, Exception):
+        # Tree-sitter not available or query failed, fall back to regex_ast logic
+        return _find_hardcoded_secrets_regex_ast(tree_wrapper)
+    
+    return findings
+
+
+def _find_hardcoded_secrets_regex_ast(tree_wrapper: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Find hardcoded secrets using regex-based fallback AST.
+    
+    This is used when tree-sitter is not available for JavaScript/TypeScript.
+    It uses regex patterns to find potential secrets in the code.
+    """
+    findings = []
+    
+    # Security-related variable name keywords
+    secret_keywords = [
+        'secret', 'token', 'password', 'passwd', 'pwd',
+        'api_key', 'apikey', 'auth_token', 'credential', 'private_key',
+        'access_token', 'refresh_token', 'bearer', 'oauth', 'jwt',
+        'session_id', 'cookie', 'signature', 'salt',
+        'encryption_key', 'decrypt', 'encrypt', 'cipher',
+        'aws_secret', 'azure_key', 'gcp_key', 'stripe', 'github_token', 'gitlab_token'
+    ]
+    
+    content = tree_wrapper.get("content", "")
+    
+    if not content:
+        return findings
+    
+    lines = content.split('\n')
+    
+    # Regex patterns for JavaScript/TypeScript variable assignments
+    patterns = [
+        # const/let/var variable = "string"
+        (r'(?:const|let|var)\s+(\w+)\s*=\s*["\']([^"\']+)["\']', 'variable'),
+        # variable = "string" (reassignment)
+        (r'^(\w+)\s*=\s*["\']([^"\']+)["\']', 'variable'),
+        # { key: "value" } in objects
+        (r'["\']?(\w+)["\']?\s*:\s*["\']([^"\']+)["\']', 'object_property'),
+        # Template literals with static content
+        (r'(?:const|let|var)\s+(\w+)\s*=\s*`([^`]+)`', 'template_literal'),
+    ]
+    
+    for line_num, line in enumerate(lines, 1):
+        for pattern, pattern_type in patterns:
+            matches = re.finditer(pattern, line)
+            for match in matches:
+                if len(match.groups()) >= 2:
+                    var_or_key = match.group(1).lower()
+                    value = match.group(2)
+                    
+                    # Check if variable/key name contains security keywords
+                    has_secret_keyword = any(keyword in var_or_key for keyword in secret_keywords)
+                    
+                    # Determine if this is suspicious
+                    is_suspicious = False
+                    confidence = 0.0
+                    
+                    if has_secret_keyword and is_likely_secret(value):
+                        is_suspicious = True
+                        confidence = 0.85  # Slightly lower confidence for regex-based detection
+                    elif var_or_key in ['password', 'secret', 'api_key', 'apikey', 'private_key', 'privatekey', 'access_token', 'accesstoken']:
+                        if len(value) > 10 and value not in ['placeholder', 'changeme', 'your_password_here']:
+                            base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                            if re.match(base64_pattern, value):
+                                if decode_and_verify_base64(value):
+                                    is_suspicious = True
+                                    confidence = 0.75
+                            else:
+                                is_suspicious = True
+                                confidence = 0.75
+                    elif is_likely_secret(value):
+                        is_suspicious = True
+                        confidence = 0.60 if has_secret_keyword else 0.50
+                    
+                    if is_suspicious:
+                        # Check if this is a Base64 encoded secret
+                        base64_pattern = r'^(?!([A-Za-z0-9+/])\1{19,})[A-Za-z0-9+/]{20,}={0,2}$'
+                        is_base64 = re.match(base64_pattern, value) is not None
+                        
+                        if pattern_type == 'object_property':
+                            if is_base64:
+                                snippet = f'"{var_or_key}": {"*" * min(len(value), 20)}... (Base64 encoded secret, decoded content appears random)'
+                            else:
+                                snippet = f'"{var_or_key}": {"*" * min(len(value), 20)}...'
+                            variable_name = f'object[{var_or_key}]'
+                        else:
+                            if is_base64:
+                                snippet = f"{match.group(1)} = {'*' * min(len(value), 20)}... (Base64 encoded secret, decoded content appears random)"
+                            else:
+                                snippet = f"{match.group(1)} = {'*' * min(len(value), 20)}..."
+                            variable_name = match.group(1)
+                        
+                        findings.append({
+                            'line': line_num,
+                            'column': match.start(),
+                            'variable': variable_name,
+                            'snippet': snippet,
+                            'confidence': confidence * 0.9,  # Lower confidence for regex-based detection
+                            'severity': 'CRITICAL',
+                            'type': 'hardcoded_secret',
+                            'hint': f'Move {variable_name} to environment variables or secure vault'
+                        })
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/security/__init__.py b/theauditor/rules/security/__init__.py
new file mode 100644
index 0000000..39d4cf0
--- /dev/null
+++ b/theauditor/rules/security/__init__.py
@@ -0,0 +1,6 @@
+"""Security configuration analysis rules."""
+
+from .cors_analyzer import find_cors_issues
+from .rate_limit_analyzer import find_rate_limit_issues
+
+__all__ = ["find_cors_issues", "find_rate_limit_issues"]
\ No newline at end of file
diff --git a/theauditor/rules/security/api_auth_detector.py b/theauditor/rules/security/api_auth_detector.py
new file mode 100644
index 0000000..2bbc7c6
--- /dev/null
+++ b/theauditor/rules/security/api_auth_detector.py
@@ -0,0 +1,151 @@
+"""Database-aware API endpoint security analysis rule.
+
+This module queries the repo_index.db to identify API endpoints that perform 
+state-changing operations (POST, PUT, DELETE, PATCH) without proper authentication controls.
+This is a high-fidelity rule that operates on the fully indexed database rather than 
+individual files, making it part of the new generation of database-aware rules.
+"""
+
+import json
+import sqlite3
+from pathlib import Path
+from typing import List, Dict, Any
+
+
+def find_missing_api_authentication(db_path: str) -> List[Dict[str, Any]]:
+    """
+    Analyze API endpoints for missing authentication on state-changing operations.
+    
+    This is a high-fidelity rule that queries the repo_index.db to identify
+    API endpoints that perform state-changing operations (POST, PUT, DELETE, PATCH)
+    without proper authentication controls.
+    
+    Args:
+        db_path: Path to repo_index.db
+        
+    Returns:
+        List of security findings in normalized format compatible with Finding dataclass
+    """
+    findings = []
+    
+    # Connect to database
+    if not Path(db_path).exists():
+        return findings
+    
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    
+    # Check if api_endpoints table exists
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='api_endpoints'"
+    )
+    if not cursor.fetchone():
+        # Table doesn't exist, might be old schema or no API endpoints indexed
+        conn.close()
+        return findings
+    
+    # Query all API endpoints with their security controls
+    cursor.execute(
+        """
+        SELECT file, method, pattern, controls, line
+        FROM api_endpoints
+        ORDER BY file, pattern
+        """
+    )
+    endpoints = cursor.fetchall()
+    
+    # Define authentication control keywords (comprehensive list preserved from original)
+    auth_keywords = [
+        # Generic authentication
+        'auth', 'authenticate', 'authenticated', 'authorization', 'authorize',
+        'authorized', 'requireAuth', 'requiresAuth', 'isAuthenticated',
+        'ensureAuthenticated', 'protect', 'protected', 'secure', 'secured',
+        
+        # JWT specific
+        'jwt', 'verifyToken', 'validateToken', 'checkToken', 'jwtAuth',
+        'verifyJWT', 'validateJWT', 'checkJWT', 'decodeToken', 'verifyJwt',
+        
+        # Session/Cookie
+        'session', 'checkSession', 'validateSession', 'requireSession',
+        'cookie', 'checkCookie', 'validateCookie', 'sessionAuth',
+        
+        # Framework specific
+        'login_required',  # Flask-Login
+        'permission_required',  # Flask permissions
+        'requires_auth',  # Common pattern
+        'passport',  # Passport.js
+        'ensureLoggedIn',  # Connect-ensure-login
+        'requireUser',  # Common pattern
+        'currentUser',  # If checking current user
+        '@auth',  # Decorator pattern
+        '@authenticated',  # Decorator pattern
+        
+        # Role-based
+        'role', 'checkRole', 'hasRole', 'requireRole',
+        'permission', 'checkPermission', 'hasPermission',
+        'admin', 'requireAdmin', 'isAdmin', 'checkAdmin',
+        'rbac', 'acl', 'checkAcl', 'hasAccess',
+        
+        # API Key
+        'apiKey', 'api_key', 'checkApiKey', 'validateApiKey',
+        'requireApiKey', 'verifyApiKey', 'x-api-key',
+        
+        # OAuth
+        'oauth', 'checkOAuth', 'validateOAuth', 'oauthAuth',
+        
+        # Other security middleware
+        'guard', 'Guard', 'authGuard', 'AuthGuard',
+        'middleware', 'authMiddleware', 'securityMiddleware'
+    ]
+    
+    # Convert to lowercase for case-insensitive matching
+    auth_keywords_lower = [k.lower() for k in auth_keywords]
+    
+    # Analyze each endpoint
+    for file, method, pattern, controls_json, line in endpoints:
+        # Parse controls
+        try:
+            controls = json.loads(controls_json) if controls_json else []
+        except json.JSONDecodeError:
+            controls = []
+        
+        # Convert controls to lowercase for matching
+        controls_lower = [c.lower() for c in controls]
+        
+        # Check if this is a state-changing operation
+        if method.upper() in ['POST', 'PUT', 'PATCH', 'DELETE']:
+            # Check for authentication controls
+            has_auth = False
+            for control in controls_lower:
+                if any(keyword in control for keyword in auth_keywords_lower):
+                    has_auth = True
+                    break
+            
+            # Generate finding if no auth found
+            if not has_auth:
+                # Format the message and snippet for compatibility with Finding dataclass
+                message = f'State-changing endpoint lacks authentication: {method} {pattern}'
+                snippet = f'{method} {pattern} - No auth middleware detected'
+                
+                finding = {
+                    'pattern_name': 'MISSING_API_AUTHENTICATION',
+                    'message': message,
+                    'file': file,
+                    'line': line if line else 0,
+                    'column': 0,
+                    'severity': 'high',
+                    'snippet': snippet,
+                    'category': 'security',
+                    'match_type': 'database',
+                    'framework': None,
+                    'details': {
+                        'method': method,
+                        'pattern': pattern,
+                        'controls': controls,
+                        'recommendation': 'Add authentication middleware or decorator to protect this endpoint'
+                    }
+                }
+                findings.append(finding)
+    
+    conn.close()
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/security/cors_analyzer.py b/theauditor/rules/security/cors_analyzer.py
new file mode 100644
index 0000000..974cbf5
--- /dev/null
+++ b/theauditor/rules/security/cors_analyzer.py
@@ -0,0 +1,485 @@
+"""AST-based CORS misconfiguration detector.
+
+This module provides high-fidelity detection of dangerous CORS configurations
+by analyzing the AST structure of CORS middleware configurations and response headers.
+"""
+
+import re
+import ast
+from typing import Any, List, Dict
+
+
+def find_cors_issues(tree: Any, file_path: str) -> List[Dict[str, Any]]:
+    """
+    Detect common CORS misconfigurations using AST analysis.
+    
+    This is a file-based AST rule designed to be called by universal_detector
+    for each JavaScript/TypeScript/Python file. It detects:
+    
+    - Wildcard origin with credentials enabled
+    - Dynamic origin reflection without validation
+    - Null origin allowed
+    - Manual OPTIONS handling (pre-flight bypass)
+    
+    Args:
+        tree: AST tree from ast_parser (Tree-sitter, semantic, or Python AST)
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        List of security findings in normalized format
+    """
+    findings = []
+    
+    if not tree:
+        return findings
+    
+    # Determine file type from extension
+    file_ext = file_path.lower().split('.')[-1] if '.' in file_path else ''
+    is_javascript = file_ext in ['js', 'jsx', 'ts', 'tsx']
+    is_python = file_ext in ['py']
+    
+    # Handle different AST types
+    if isinstance(tree, dict):
+        tree_type = tree.get("type")
+        
+        # Handle Tree-sitter AST for JavaScript/TypeScript
+        if tree_type == "tree_sitter" and is_javascript:
+            actual_tree = tree.get("tree")
+            if actual_tree and hasattr(actual_tree, 'root_node'):
+                # Read file content for context
+                try:
+                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                        content = f.read()
+                        lines = content.split('\n')
+                except:
+                    lines = []
+                
+                # Analyze Tree-sitter AST for Node.js patterns
+                _analyze_tree_sitter_node(actual_tree.root_node, findings, file_path, lines)
+        
+        # Handle Python AST
+        elif tree_type == "python_ast" and is_python:
+            actual_tree = tree.get("tree")
+            if actual_tree:
+                # Analyze Python AST for Flask-CORS patterns
+                _analyze_python_ast(actual_tree, findings, file_path)
+    
+    # Direct Python AST (backward compatibility)
+    elif isinstance(tree, ast.AST) and is_python:
+        _analyze_python_ast(tree, findings, file_path)
+    
+    # Fallback to pattern-based detection if no AST available
+    if not findings and (is_javascript or is_python):
+        _analyze_with_patterns(file_path, findings, is_javascript, is_python)
+    
+    return findings
+
+
+def _analyze_tree_sitter_node(node, findings, file_path, lines, depth=0):
+    """Recursively analyze Tree-sitter AST nodes for CORS issues in JavaScript/TypeScript."""
+    
+    # Prevent infinite recursion
+    if depth > 100:
+        return
+    
+    # Check for call expressions
+    if node.type == "call_expression":
+        func_node = node.child_by_field_name('function')
+        if func_node:
+            func_text = func_node.text.decode('utf-8', errors='ignore')
+            
+            # Check for CORS middleware configuration
+            if 'cors' in func_text.lower():
+                args_node = node.child_by_field_name('arguments')
+                if args_node:
+                    config = _extract_object_properties(args_node)
+                    line_num = node.start_point[0] + 1
+                    
+                    # Detection 1: Wildcard origin with credentials
+                    if config.get('origin') == '*' and config.get('credentials') == 'true':
+                        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+                        findings.append({
+                            'pattern_name': 'CORS_WILDCARD_WITH_CREDENTIALS',
+                            'message': 'CORS wildcard origin (*) with credentials enabled - allows any site to read authenticated data',
+                            'file': file_path,
+                            'line': line_num,
+                            'column': node.start_point[1],
+                            'severity': 'critical',
+                            'snippet': snippet,
+                            'category': 'security',
+                            'details': {
+                                'vulnerability': 'Any website can read authenticated user data',
+                                'fix': 'Use specific origins whitelist instead of wildcard when credentials are enabled'
+                            }
+                        })
+                    
+                    # Detection 3: Null origin allowed
+                    origin_value = config.get('origin', '')
+                    if 'null' in str(origin_value).lower():
+                        snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+                        findings.append({
+                            'pattern_name': 'CORS_NULL_ORIGIN_ALLOWED',
+                            'message': 'CORS configuration allows "null" origin - enables sandbox iframe attacks',
+                            'file': file_path,
+                            'line': line_num,
+                            'column': node.start_point[1],
+                            'severity': 'high',
+                            'snippet': snippet,
+                            'category': 'security',
+                            'details': {
+                                'vulnerability': 'Sandboxed iframes can bypass origin restrictions',
+                                'fix': 'Never allow "null" origin in production'
+                            }
+                        })
+            
+            # Check for response header manipulation
+            elif 'setHeader' in func_text or 'header' in func_text or 'set' in func_text:
+                args_node = node.child_by_field_name('arguments')
+                if args_node:
+                    args_text = args_node.text.decode('utf-8', errors='ignore')
+                    
+                    # Detection 2: Dynamic origin reflection
+                    if 'Access-Control-Allow-Origin' in args_text:
+                        # Check if origin is being reflected from request
+                        parent_context = _get_parent_context(node, lines, 5)
+                        if any(pattern in parent_context for pattern in ['req.headers.origin', 'req.header(\'origin\')', 'request.headers.origin', 'request.headers[\'origin\']']):
+                            # Check if there's validation
+                            if not any(validation in parent_context for validation in ['whitelist', 'allowedOrigins', 'includes(', 'indexOf(', 'match(', 'test(']):
+                                line_num = node.start_point[0] + 1
+                                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else args_text[:200]
+                                findings.append({
+                                    'pattern_name': 'CORS_REFLECTED_ORIGIN',
+                                    'message': 'Origin header reflected without validation - enables targeted CORS bypass',
+                                    'file': file_path,
+                                    'line': line_num,
+                                    'column': node.start_point[1],
+                                    'severity': 'critical',
+                                    'snippet': snippet,
+                                    'category': 'security',
+                                    'details': {
+                                        'vulnerability': 'Attacker can make their malicious site an allowed origin',
+                                        'fix': 'Validate origin against a strict whitelist before reflecting'
+                                    }
+                                })
+            
+            # Detection 4: Manual OPTIONS handling (pre-flight bypass)
+            elif any(method in func_text for method in ['.options(', 'app.options', 'router.options', 'route(\'OPTIONS\'']):
+                line_num = node.start_point[0] + 1
+                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+                
+                # Check if it's setting CORS headers manually
+                parent_context = _get_parent_context(node, lines, 10)
+                if 'Access-Control' in parent_context:
+                    findings.append({
+                        'pattern_name': 'CORS_MANUAL_PREFLIGHT',
+                        'message': 'Manual OPTIONS handling detected - may bypass CORS middleware security',
+                        'file': file_path,
+                        'line': line_num,
+                        'column': node.start_point[1],
+                        'severity': 'medium',
+                        'snippet': snippet,
+                        'category': 'security',
+                        'details': {
+                            'risk': 'Manual pre-flight handling can introduce inconsistencies with middleware',
+                            'fix': 'Use CORS middleware for all CORS handling instead of manual OPTIONS routes'
+                        }
+                    })
+    
+    # Check for assignment expressions (for configuration objects)
+    elif node.type == "assignment_expression":
+        left_node = node.child_by_field_name('left')
+        right_node = node.child_by_field_name('right')
+        
+        if left_node and right_node:
+            var_name = left_node.text.decode('utf-8', errors='ignore')
+            if 'cors' in var_name.lower() and right_node.type == "object":
+                config = _extract_object_properties(right_node)
+                line_num = node.start_point[0] + 1
+                
+                # Apply same checks as for cors() function calls
+                if config.get('origin') == '*' and config.get('credentials') == 'true':
+                    snippet = lines[line_num - 1].strip() if line_num <= len(lines) else var_name[:200]
+                    findings.append({
+                        'pattern_name': 'CORS_WILDCARD_WITH_CREDENTIALS',
+                        'message': 'CORS configuration with wildcard and credentials in object',
+                        'file': file_path,
+                        'line': line_num,
+                        'column': node.start_point[1],
+                        'severity': 'critical',
+                        'snippet': snippet,
+                        'category': 'security'
+                    })
+    
+    # Recursively analyze children
+    for child in node.children:
+        _analyze_tree_sitter_node(child, findings, file_path, lines, depth + 1)
+
+
+def _analyze_python_ast(tree: ast.AST, findings: List[Dict[str, Any]], file_path: str):
+    """Analyze Python AST for Flask-CORS and similar patterns."""
+    
+    # Read file for snippets
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            lines = f.read().split('\n')
+    except:
+        lines = []
+    
+    for node in ast.walk(tree):
+        # Check for CORS() initialization or cors_init_app calls
+        if isinstance(node, ast.Call):
+            func_name = ""
+            if isinstance(node.func, ast.Name):
+                func_name = node.func.id
+            elif isinstance(node.func, ast.Attribute):
+                func_name = node.func.attr
+            
+            if 'CORS' in func_name or 'cors' in func_name.lower():
+                # Extract keyword arguments
+                config = {}
+                for keyword in node.keywords:
+                    key = keyword.arg
+                    value = None
+                    
+                    if isinstance(keyword.value, ast.Constant):
+                        value = keyword.value.value
+                    elif isinstance(keyword.value, (ast.Str, ast.Num)):
+                        value = keyword.value.s if hasattr(keyword.value, 's') else keyword.value.n
+                    elif isinstance(keyword.value, ast.NameConstant):
+                        value = keyword.value.value
+                    elif isinstance(keyword.value, ast.Name):
+                        value = keyword.value.id
+                    
+                    if key and value is not None:
+                        config[key] = str(value)
+                
+                line_num = getattr(node, 'lineno', 0)
+                
+                # Detection 1: Wildcard with credentials (Flask-CORS style)
+                if (config.get('origins') == '*' or config.get('resources') == '*') and config.get('supports_credentials') == 'True':
+                    snippet = lines[line_num - 1].strip() if line_num <= len(lines) else str(node)[:200]
+                    findings.append({
+                        'pattern_name': 'CORS_WILDCARD_WITH_CREDENTIALS',
+                        'message': 'Flask-CORS wildcard origin with credentials enabled',
+                        'file': file_path,
+                        'line': line_num,
+                        'column': getattr(node, 'col_offset', 0),
+                        'severity': 'critical',
+                        'snippet': snippet,
+                        'category': 'security',
+                        'details': {
+                            'vulnerability': 'Any website can read authenticated user data',
+                            'fix': 'Use specific origins list instead of wildcard when supports_credentials=True'
+                        }
+                    })
+                
+                # Detection 3: Null origin in Flask-CORS
+                origins_value = config.get('origins', '')
+                if 'null' in origins_value.lower():
+                    snippet = lines[line_num - 1].strip() if line_num <= len(lines) else str(node)[:200]
+                    findings.append({
+                        'pattern_name': 'CORS_NULL_ORIGIN_ALLOWED',
+                        'message': 'Flask-CORS configuration allows "null" origin',
+                        'file': file_path,
+                        'line': line_num,
+                        'column': getattr(node, 'col_offset', 0),
+                        'severity': 'high',
+                        'snippet': snippet,
+                        'category': 'security'
+                    })
+        
+        # Check for response header setting in Python
+        elif isinstance(node, ast.Call):
+            if isinstance(node.func, ast.Attribute):
+                # Check for response.headers['Access-Control-Allow-Origin'] = request.headers['Origin']
+                if node.func.attr in ['set_header', 'add_header'] or 'header' in node.func.attr:
+                    # Check arguments for CORS headers
+                    if len(node.args) >= 2:
+                        first_arg = node.args[0]
+                        if isinstance(first_arg, ast.Constant) and 'Access-Control-Allow-Origin' in str(first_arg.value):
+                            # Check if reflecting origin
+                            second_arg = node.args[1]
+                            if 'origin' in ast.unparse(second_arg).lower() if hasattr(ast, 'unparse') else False:
+                                line_num = getattr(node, 'lineno', 0)
+                                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else str(node)[:200]
+                                findings.append({
+                                    'pattern_name': 'CORS_REFLECTED_ORIGIN',
+                                    'message': 'Python: Origin header potentially reflected without validation',
+                                    'file': file_path,
+                                    'line': line_num,
+                                    'column': getattr(node, 'col_offset', 0),
+                                    'severity': 'critical',
+                                    'snippet': snippet,
+                                    'category': 'security'
+                                })
+
+
+def _extract_object_properties(node) -> Dict[str, str]:
+    """Extract properties from a JavaScript object literal in Tree-sitter AST."""
+    properties = {}
+    
+    if not node:
+        return properties
+    
+    # Navigate through the AST to find object properties
+    for child in node.children:
+        if child.type == "object":
+            for prop_child in child.children:
+                if prop_child.type == "pair":
+                    key_node = prop_child.child_by_field_name('key')
+                    value_node = prop_child.child_by_field_name('value')
+                    
+                    if key_node and value_node:
+                        key = key_node.text.decode('utf-8', errors='ignore').strip('"\'')
+                        value = value_node.text.decode('utf-8', errors='ignore').strip('"\'')
+                        properties[key] = value
+                elif prop_child.type == "property":
+                    # Alternative property format
+                    for subchild in prop_child.children:
+                        if subchild.type == "property_identifier":
+                            key = subchild.text.decode('utf-8', errors='ignore')
+                        elif subchild.type in ["string", "true", "false", "null"]:
+                            value = subchild.text.decode('utf-8', errors='ignore').strip('"\'')
+                            if key:
+                                properties[key] = value
+    
+    # Also check direct children if it's an arguments list
+    if node.type == "arguments":
+        for child in node.children:
+            if child.type == "object":
+                return _extract_object_properties(child)
+    
+    return properties
+
+
+def _get_parent_context(node, lines, context_lines=5) -> str:
+    """Get surrounding context lines for better analysis."""
+    line_num = node.start_point[0]
+    start = max(0, line_num - context_lines)
+    end = min(len(lines), line_num + context_lines + 1)
+    return '\n'.join(lines[start:end])
+
+
+def _analyze_with_patterns(file_path: str, findings: List[Dict[str, Any]], is_javascript: bool, is_python: bool):
+    """Fallback pattern-based detection when AST is not available."""
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+            lines = content.split('\n')
+    except:
+        return
+    
+    if is_javascript:
+        # Pattern 1: Wildcard with credentials in JavaScript
+        pattern_wildcard_creds = re.compile(
+            r'cors\s*\(\s*\{[^}]*origin\s*:\s*["\']?\*["\']?[^}]*credentials\s*:\s*true',
+            re.IGNORECASE | re.DOTALL
+        )
+        
+        for match in pattern_wildcard_creds.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+            findings.append({
+                'pattern_name': 'CORS_WILDCARD_WITH_CREDENTIALS',
+                'message': 'CORS wildcard with credentials (regex fallback)',
+                'file': file_path,
+                'line': line_num,
+                'column': 0,
+                'severity': 'critical',
+                'snippet': snippet,
+                'category': 'security'
+            })
+        
+        # Pattern 2: Origin reflection without validation
+        pattern_reflect = re.compile(
+            r'res\.(setHeader|set|header)\s*\(["\']Access-Control-Allow-Origin["\'],\s*req\.(headers\[?["\']origin|\s*header\(["\']origin)',
+            re.IGNORECASE
+        )
+        
+        for match in pattern_reflect.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            # Check for validation in surrounding lines
+            context_start = max(0, line_num - 5)
+            context_end = min(len(lines), line_num + 5)
+            context = '\n'.join(lines[context_start:context_end])
+            
+            if not any(word in context for word in ['whitelist', 'allowed', 'includes', 'indexOf']):
+                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+                findings.append({
+                    'pattern_name': 'CORS_REFLECTED_ORIGIN',
+                    'message': 'Origin reflected without validation (regex fallback)',
+                    'file': file_path,
+                    'line': line_num,
+                    'column': 0,
+                    'severity': 'critical',
+                    'snippet': snippet,
+                    'category': 'security'
+                })
+        
+        # Pattern 3: Null origin allowed
+        pattern_null = re.compile(
+            r'cors\s*\(\s*\{[^}]*origin\s*:\s*[^}]*null',
+            re.IGNORECASE | re.DOTALL
+        )
+        
+        for match in pattern_null.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+            findings.append({
+                'pattern_name': 'CORS_NULL_ORIGIN_ALLOWED',
+                'message': 'Null origin allowed (regex fallback)',
+                'file': file_path,
+                'line': line_num,
+                'column': 0,
+                'severity': 'high',
+                'snippet': snippet,
+                'category': 'security'
+            })
+        
+        # Pattern 4: Manual OPTIONS handling
+        pattern_options = re.compile(
+            r'(app|router)\.(options|route\(["\']OPTIONS)|method\s*===?\s*["\']OPTIONS',
+            re.IGNORECASE
+        )
+        
+        for match in pattern_options.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            # Check if setting CORS headers nearby
+            context_start = max(0, line_num - 10)
+            context_end = min(len(lines), line_num + 10)
+            context = '\n'.join(lines[context_start:context_end])
+            
+            if 'Access-Control' in context:
+                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+                findings.append({
+                    'pattern_name': 'CORS_MANUAL_PREFLIGHT',
+                    'message': 'Manual OPTIONS handling (regex fallback)',
+                    'file': file_path,
+                    'line': line_num,
+                    'column': 0,
+                    'severity': 'medium',
+                    'snippet': snippet,
+                    'category': 'security'
+                })
+    
+    elif is_python:
+        # Pattern 1: Flask-CORS wildcard with credentials
+        pattern_flask_wildcard = re.compile(
+            r'CORS\([^)]*origins\s*=\s*["\']?\*[^)]*supports_credentials\s*=\s*True',
+            re.IGNORECASE | re.DOTALL
+        )
+        
+        for match in pattern_flask_wildcard.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+            findings.append({
+                'pattern_name': 'CORS_WILDCARD_WITH_CREDENTIALS',
+                'message': 'Flask-CORS wildcard with credentials (regex fallback)',
+                'file': file_path,
+                'line': line_num,
+                'column': 0,
+                'severity': 'critical',
+                'snippet': snippet,
+                'category': 'security'
+            })
\ No newline at end of file
diff --git a/theauditor/rules/security/rate_limit_analyzer.py b/theauditor/rules/security/rate_limit_analyzer.py
new file mode 100644
index 0000000..62c5ff4
--- /dev/null
+++ b/theauditor/rules/security/rate_limit_analyzer.py
@@ -0,0 +1,553 @@
+"""AST-based rate limiting misconfiguration detector.
+
+This module provides high-fidelity detection of dangerous rate limiting configurations
+by analyzing the AST structure to understand middleware ordering, route protection,
+key generation strategies, and storage configurations.
+"""
+
+import re
+import ast
+from typing import Any, List, Dict
+
+
+def find_rate_limit_issues(tree: Any, file_path: str) -> List[Dict[str, Any]]:
+    """
+    Detect common rate limiting misconfigurations using AST analysis.
+    
+    This is a file-based AST rule designed to be called by universal_detector
+    for each JavaScript/TypeScript/Python file. It detects:
+    
+    - Inefficient middleware order (expensive operations before rate limiting)
+    - Missing rate limiting on critical endpoints (/login, /register, /reset-password)
+    - Bypassable key generation (single header like X-Forwarded-For)
+    - Non-persistent storage (in-memory store in distributed environment)
+    
+    Args:
+        tree: AST tree from ast_parser (Tree-sitter, semantic, or Python AST)
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        List of security findings in normalized format
+    """
+    findings = []
+    
+    if not tree:
+        return findings
+    
+    # Determine file type from extension
+    file_ext = file_path.lower().split('.')[-1] if '.' in file_path else ''
+    is_javascript = file_ext in ['js', 'jsx', 'ts', 'tsx']
+    is_python = file_ext in ['py']
+    
+    # Handle different AST types
+    if isinstance(tree, dict):
+        tree_type = tree.get("type")
+        
+        # Handle Tree-sitter AST for JavaScript/TypeScript
+        if tree_type == "tree_sitter" and is_javascript:
+            actual_tree = tree.get("tree")
+            if actual_tree and hasattr(actual_tree, 'root_node'):
+                # Read file content for context
+                try:
+                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                        content = f.read()
+                        lines = content.split('\n')
+                except:
+                    lines = []
+                
+                # Analyze Tree-sitter AST for Node.js patterns
+                _analyze_tree_sitter_node(actual_tree.root_node, findings, file_path, lines)
+        
+        # Handle Python AST
+        elif tree_type == "python_ast" and is_python:
+            actual_tree = tree.get("tree")
+            if actual_tree:
+                # Analyze Python AST for Flask/Django patterns
+                _analyze_python_ast(actual_tree, findings, file_path)
+    
+    # Direct Python AST (backward compatibility)
+    elif isinstance(tree, ast.AST) and is_python:
+        _analyze_python_ast(tree, findings, file_path)
+    
+    # Fallback to pattern-based detection if no AST available
+    if not findings and (is_javascript or is_python):
+        _analyze_with_patterns(file_path, findings, is_javascript, is_python)
+    
+    return findings
+
+
+def _analyze_tree_sitter_node(node, findings, file_path, lines, depth=0):
+    """Recursively analyze Tree-sitter AST nodes for rate limiting issues in JavaScript/TypeScript."""
+    
+    # Prevent infinite recursion
+    if depth > 100:
+        return
+    
+    # Track middleware registration order
+    middleware_stack = []
+    rate_limiter_position = -1
+    auth_middleware_position = -1
+    expensive_middleware_positions = []
+    
+    # Common rate limiting library patterns
+    rate_limit_patterns = [
+        'express-rate-limit', 'rateLimit', 'RateLimit', 'rate-limit',
+        'express-slow-down', 'slowDown', 'SlowDown',
+        'express-brute', 'ExpressBrute', 'brute',
+        'rate-limiter-flexible', 'RateLimiterMemory', 'RateLimiterRedis'
+    ]
+    
+    # Authentication middleware patterns
+    auth_patterns = [
+        'authenticate', 'auth', 'requireAuth', 'isAuthenticated',
+        'passport.authenticate', 'jwt.verify', 'verifyToken',
+        'ensureAuthenticated', 'requireLogin', 'checkAuth'
+    ]
+    
+    # Expensive operation patterns
+    expensive_patterns = [
+        'database', 'query', 'findOne', 'findAll', 'select',
+        'bcrypt', 'hash', 'compare', 'crypto', 'encrypt',
+        'sendEmail', 'sendMail', 'mailer', 'fetch', 'axios'
+    ]
+    
+    # Critical endpoints that must have rate limiting
+    critical_endpoints = [
+        '/login', '/signin', '/auth',
+        '/register', '/signup', '/create-account',
+        '/reset-password', '/forgot-password', '/password-reset',
+        '/verify', '/confirm', '/validate',
+        '/api/auth', '/api/login', '/api/register'
+    ]
+    
+    # Check for middleware registration patterns
+    if node.type == "call_expression":
+        func_node = node.child_by_field_name('function')
+        args_node = node.child_by_field_name('arguments')
+        
+        if func_node:
+            func_text = func_node.text.decode('utf-8', errors='ignore')
+            
+            # Detection 1: Middleware ordering analysis
+            if 'app.use' in func_text or 'router.use' in func_text:
+                if args_node:
+                    args_text = args_node.text.decode('utf-8', errors='ignore')
+                    
+                    # Track position of different middleware types
+                    position = node.start_point[0]
+                    
+                    # Check if this is rate limiting middleware
+                    if any(pattern in args_text for pattern in rate_limit_patterns):
+                        rate_limiter_position = position
+                        
+                        # Check configuration for storage type
+                        if 'MemoryStore' in args_text or 'memory' in args_text.lower():
+                            # Detection 4: Non-persistent storage
+                            line_num = node.start_point[0] + 1
+                            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else args_text[:200]
+                            findings.append({
+                                'pattern_name': 'RATE_LIMIT_MEMORY_STORE',
+                                'message': 'Rate limiter using in-memory storage - ineffective in distributed/serverless environment',
+                                'file': file_path,
+                                'line': line_num,
+                                'column': node.start_point[1],
+                                'severity': 'high',
+                                'snippet': snippet,
+                                'category': 'security',
+                                'details': {
+                                    'vulnerability': 'Memory store resets on restart and is not shared across instances',
+                                    'fix': 'Use Redis, MongoDB, or other persistent storage for rate limiting'
+                                }
+                            })
+                        
+                        # Check for bypassable key generation
+                        if 'keyGenerator' in args_text or 'key:' in args_text:
+                            # Detection 3: Single header key generation
+                            if ('x-forwarded-for' in args_text.lower() or 
+                                'x-real-ip' in args_text.lower() or
+                                'cf-connecting-ip' in args_text.lower()):
+                                # Check if it's the only source (no fallback)
+                                if 'req.ip' not in args_text and '||' not in args_text:
+                                    line_num = node.start_point[0] + 1
+                                    snippet = lines[line_num - 1].strip() if line_num <= len(lines) else args_text[:200]
+                                    findings.append({
+                                        'pattern_name': 'RATE_LIMIT_BYPASSABLE_KEY',
+                                        'message': 'Rate limiter relies on spoofable header for key generation',
+                                        'file': file_path,
+                                        'line': line_num,
+                                        'column': node.start_point[1],
+                                        'severity': 'critical',
+                                        'snippet': snippet,
+                                        'category': 'security',
+                                        'details': {
+                                            'vulnerability': 'Attacker can bypass rate limiting by spoofing header values',
+                                            'fix': 'Use multiple sources with fallback: req.headers["x-forwarded-for"] || req.ip'
+                                        }
+                                    })
+                    
+                    # Check if this is authentication middleware
+                    elif any(pattern in args_text for pattern in auth_patterns):
+                        auth_middleware_position = position
+                    
+                    # Check if this is expensive middleware
+                    elif any(pattern in args_text for pattern in expensive_patterns):
+                        expensive_middleware_positions.append(position)
+            
+            # Detection 2: Missing rate limiting on critical endpoints
+            elif any(method in func_text for method in ['app.post', 'app.get', 'router.post', 'router.get']):
+                if args_node:
+                    # Extract route pattern
+                    route_text = ""
+                    for child in args_node.children:
+                        if child.type == "string":
+                            route_text = child.text.decode('utf-8', errors='ignore').strip('"\'`')
+                            break
+                    
+                    # Check if this is a critical endpoint
+                    if any(endpoint in route_text.lower() for endpoint in critical_endpoints):
+                        # Check if rate limiting is applied to this route
+                        parent_context = _get_parent_context(node, lines, 20)
+                        has_rate_limit = any(pattern in parent_context for pattern in rate_limit_patterns)
+                        
+                        if not has_rate_limit:
+                            line_num = node.start_point[0] + 1
+                            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else func_text[:200]
+                            findings.append({
+                                'pattern_name': 'RATE_LIMIT_MISSING_CRITICAL',
+                                'message': f'Critical endpoint {route_text} lacks rate limiting protection',
+                                'file': file_path,
+                                'line': line_num,
+                                'column': node.start_point[1],
+                                'severity': 'critical',
+                                'snippet': snippet,
+                                'category': 'security',
+                                'details': {
+                                    'endpoint': route_text,
+                                    'vulnerability': 'Endpoint vulnerable to brute force attacks',
+                                    'fix': 'Apply rate limiting middleware to authentication endpoints'
+                                }
+                            })
+    
+    # After processing all middleware, check ordering
+    if rate_limiter_position > 0:
+        # Detection 1: Check if auth middleware comes before rate limiter
+        if auth_middleware_position > 0 and auth_middleware_position < rate_limiter_position:
+            findings.append({
+                'pattern_name': 'RATE_LIMIT_AFTER_AUTH',
+                'message': 'Authentication middleware runs before rate limiting - expensive operation not protected',
+                'file': file_path,
+                'line': auth_middleware_position + 1,
+                'column': 0,
+                'severity': 'high',
+                'snippet': 'Auth middleware registered before rate limiter',
+                'category': 'security',
+                'details': {
+                    'vulnerability': 'Authentication logic (DB queries, bcrypt) runs before rate limiting',
+                    'fix': 'Register rate limiting middleware before authentication middleware'
+                }
+            })
+        
+        # Check if any expensive middleware comes before rate limiter
+        for exp_pos in expensive_middleware_positions:
+            if exp_pos < rate_limiter_position:
+                findings.append({
+                    'pattern_name': 'RATE_LIMIT_AFTER_EXPENSIVE',
+                    'message': 'Expensive operation (DB/crypto) runs before rate limiting',
+                    'file': file_path,
+                    'line': exp_pos + 1,
+                    'column': 0,
+                    'severity': 'high',
+                    'snippet': 'Expensive middleware before rate limiter',
+                    'category': 'security',
+                    'details': {
+                        'vulnerability': 'Resource-intensive operations not protected by rate limiting',
+                        'fix': 'Move rate limiting middleware earlier in the middleware stack'
+                    }
+                })
+    
+    # Recursively analyze children
+    for child in node.children:
+        _analyze_tree_sitter_node(child, findings, file_path, lines, depth + 1)
+
+
+def _analyze_python_ast(tree: ast.AST, findings: List[Dict[str, Any]], file_path: str):
+    """Analyze Python AST for Flask/Django rate limiting patterns."""
+    
+    # Read file for snippets
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            lines = f.read().split('\n')
+    except:
+        lines = []
+    
+    # Track decorator order and rate limiting presence
+    has_rate_limit = False
+    auth_before_rate_limit = False
+    critical_endpoints = []
+    
+    for node in ast.walk(tree):
+        # Check for Flask-Limiter or Django-ratelimit decorators
+        if isinstance(node, ast.FunctionDef):
+            decorators = []
+            for decorator in node.decorator_list:
+                decorator_name = ""
+                if isinstance(decorator, ast.Name):
+                    decorator_name = decorator.id
+                elif isinstance(decorator, ast.Call):
+                    if isinstance(decorator.func, ast.Name):
+                        decorator_name = decorator.func.id
+                    elif isinstance(decorator.func, ast.Attribute):
+                        decorator_name = decorator.func.attr
+                
+                decorators.append(decorator_name.lower())
+            
+            # Check decorator order
+            rate_limit_index = -1
+            auth_index = -1
+            
+            for i, dec in enumerate(decorators):
+                if 'limit' in dec or 'ratelimit' in dec or 'throttle' in dec:
+                    rate_limit_index = i
+                    has_rate_limit = True
+                elif 'login_required' in dec or 'auth' in dec or 'authenticated' in dec:
+                    auth_index = i
+            
+            # Detection 1: Auth decorator before rate limit
+            if rate_limit_index >= 0 and auth_index >= 0 and auth_index < rate_limit_index:
+                line_num = getattr(node, 'lineno', 0)
+                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else node.name
+                findings.append({
+                    'pattern_name': 'RATE_LIMIT_AFTER_AUTH',
+                    'message': f'Authentication decorator runs before rate limiting in {node.name}',
+                    'file': file_path,
+                    'line': line_num,
+                    'column': getattr(node, 'col_offset', 0),
+                    'severity': 'high',
+                    'snippet': snippet,
+                    'category': 'security',
+                    'details': {
+                        'function': node.name,
+                        'vulnerability': 'Authentication logic runs before rate limiting check',
+                        'fix': 'Place rate limiting decorator before authentication decorator'
+                    }
+                })
+            
+            # Check if this is a critical endpoint without rate limiting
+            func_name = node.name.lower()
+            critical_names = ['login', 'signin', 'register', 'signup', 'reset_password', 
+                            'forgot_password', 'verify', 'authenticate']
+            
+            if any(crit in func_name for crit in critical_names):
+                if rate_limit_index < 0:  # No rate limiting
+                    line_num = getattr(node, 'lineno', 0)
+                    snippet = lines[line_num - 1].strip() if line_num <= len(lines) else node.name
+                    findings.append({
+                        'pattern_name': 'RATE_LIMIT_MISSING_CRITICAL',
+                        'message': f'Critical endpoint {node.name} lacks rate limiting protection',
+                        'file': file_path,
+                        'line': line_num,
+                        'column': getattr(node, 'col_offset', 0),
+                        'severity': 'critical',
+                        'snippet': snippet,
+                        'category': 'security',
+                        'details': {
+                            'function': node.name,
+                            'vulnerability': 'Authentication endpoint vulnerable to brute force',
+                            'fix': 'Add rate limiting decorator (e.g., @limiter.limit("5/minute"))'
+                        }
+                    })
+        
+        # Check Flask-Limiter configuration
+        elif isinstance(node, ast.Call):
+            func_name = ""
+            if isinstance(node.func, ast.Name):
+                func_name = node.func.id
+            
+            if func_name == 'Limiter':
+                # Check configuration arguments
+                config = {}
+                for keyword in node.keywords:
+                    if keyword.arg == 'storage_uri':
+                        if isinstance(keyword.value, ast.Constant):
+                            storage = keyword.value.value
+                            # Detection 4: Memory storage
+                            if not storage or 'memory' in str(storage).lower():
+                                line_num = getattr(node, 'lineno', 0)
+                                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else 'Limiter config'
+                                findings.append({
+                                    'pattern_name': 'RATE_LIMIT_MEMORY_STORE',
+                                    'message': 'Flask-Limiter using in-memory storage - ineffective in production',
+                                    'file': file_path,
+                                    'line': line_num,
+                                    'column': getattr(node, 'col_offset', 0),
+                                    'severity': 'high',
+                                    'snippet': snippet,
+                                    'category': 'security',
+                                    'details': {
+                                        'vulnerability': 'Memory storage not shared across workers/processes',
+                                        'fix': 'Use Redis backend: storage_uri="redis://localhost:6379"'
+                                    }
+                                })
+                    
+                    elif keyword.arg == 'key_func':
+                        # Detection 3: Check for weak key generation
+                        # This would need more complex analysis of the function
+                        pass
+
+
+def _get_parent_context(node, lines, context_lines=10) -> str:
+    """Get surrounding context lines for better analysis."""
+    line_num = node.start_point[0]
+    start = max(0, line_num - context_lines)
+    end = min(len(lines), line_num + context_lines + 1)
+    return '\n'.join(lines[start:end])
+
+
+def _analyze_with_patterns(file_path: str, findings: List[Dict[str, Any]], is_javascript: bool, is_python: bool):
+    """Fallback pattern-based detection when AST is not available."""
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+            lines = content.split('\n')
+    except:
+        return
+    
+    if is_javascript:
+        # Pattern 1: Express rate limiter after auth middleware
+        auth_middleware_line = -1
+        rate_limit_line = -1
+        
+        for i, line in enumerate(lines, 1):
+            # Track auth middleware registration
+            if re.search(r'app\.use\([^)]*(?:auth|authenticate|passport\.authenticate)', line, re.IGNORECASE):
+                auth_middleware_line = i
+            # Track rate limit middleware
+            elif re.search(r'app\.use\([^)]*(?:rateLimit|rate-limit|limiter)', line, re.IGNORECASE):
+                rate_limit_line = i
+        
+        if auth_middleware_line > 0 and rate_limit_line > auth_middleware_line:
+            findings.append({
+                'pattern_name': 'RATE_LIMIT_AFTER_AUTH',
+                'message': 'Rate limiting registered after authentication (regex fallback)',
+                'file': file_path,
+                'line': rate_limit_line,
+                'column': 0,
+                'severity': 'high',
+                'snippet': lines[rate_limit_line - 1].strip() if rate_limit_line <= len(lines) else '',
+                'category': 'security'
+            })
+        
+        # Pattern 2: Critical endpoints without rate limiting
+        critical_route_pattern = re.compile(
+            r'(app|router)\.(post|get|put)\s*\(["\'](/api)?/(login|signin|register|signup|password|reset)',
+            re.IGNORECASE
+        )
+        
+        for match in critical_route_pattern.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            # Check if rate limiting is nearby (within 10 lines)
+            context_start = max(0, line_num - 10)
+            context_end = min(len(lines), line_num + 10)
+            context = '\n'.join(lines[context_start:context_end])
+            
+            if not re.search(r'rate.*limit|limiter|throttle', context, re.IGNORECASE):
+                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+                findings.append({
+                    'pattern_name': 'RATE_LIMIT_MISSING_CRITICAL',
+                    'message': 'Critical endpoint without rate limiting (regex fallback)',
+                    'file': file_path,
+                    'line': line_num,
+                    'column': 0,
+                    'severity': 'critical',
+                    'snippet': snippet,
+                    'category': 'security'
+                })
+        
+        # Pattern 3: Bypassable key generation
+        pattern_weak_key = re.compile(
+            r'keyGenerator[^}]*headers\[["\']x-forwarded-for["\'](?![^}]*\|\|)',
+            re.IGNORECASE | re.DOTALL
+        )
+        
+        for match in pattern_weak_key.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+            findings.append({
+                'pattern_name': 'RATE_LIMIT_BYPASSABLE_KEY',
+                'message': 'Rate limiter uses single spoofable header (regex fallback)',
+                'file': file_path,
+                'line': line_num,
+                'column': 0,
+                'severity': 'critical',
+                'snippet': snippet,
+                'category': 'security'
+            })
+        
+        # Pattern 4: Memory store
+        pattern_memory = re.compile(
+            r'(MemoryStore|store\s*:\s*new\s+Memory|store\s*:\s*["\']memory)',
+            re.IGNORECASE
+        )
+        
+        for match in pattern_memory.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+            findings.append({
+                'pattern_name': 'RATE_LIMIT_MEMORY_STORE',
+                'message': 'Rate limiter using memory storage (regex fallback)',
+                'file': file_path,
+                'line': line_num,
+                'column': 0,
+                'severity': 'high',
+                'snippet': snippet,
+                'category': 'security'
+            })
+    
+    elif is_python:
+        # Pattern 1: Flask critical endpoints without rate limiting
+        pattern_flask_endpoint = re.compile(
+            r'@app\.route\(["\'].*/(login|register|reset|password|auth)',
+            re.IGNORECASE
+        )
+        
+        for match in pattern_flask_endpoint.finditer(content):
+            line_num = content[:match.start()].count('\n') + 1
+            # Check next 5 lines for rate limiting decorator
+            context_end = min(len(lines), line_num + 5)
+            context = '\n'.join(lines[line_num - 1:context_end])
+            
+            if not re.search(r'@.*limit|@.*throttle|@.*ratelimit', context, re.IGNORECASE):
+                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+                findings.append({
+                    'pattern_name': 'RATE_LIMIT_MISSING_CRITICAL',
+                    'message': 'Flask endpoint without rate limiting (regex fallback)',
+                    'file': file_path,
+                    'line': line_num,
+                    'column': 0,
+                    'severity': 'critical',
+                    'snippet': snippet,
+                    'category': 'security'
+                })
+        
+        # Pattern 2: Flask-Limiter with default memory storage
+        pattern_limiter_memory = re.compile(
+            r'Limiter\([^)]*\)',
+            re.IGNORECASE
+        )
+        
+        for match in pattern_limiter_memory.finditer(content):
+            config_text = match.group(0)
+            if 'storage_uri' not in config_text or 'memory' in config_text.lower():
+                line_num = content[:match.start()].count('\n') + 1
+                snippet = lines[line_num - 1].strip() if line_num <= len(lines) else match.group(0)[:200]
+                findings.append({
+                    'pattern_name': 'RATE_LIMIT_MEMORY_STORE',
+                    'message': 'Flask-Limiter using default memory storage (regex fallback)',
+                    'file': file_path,
+                    'line': line_num,
+                    'column': 0,
+                    'severity': 'high',
+                    'snippet': snippet,
+                    'category': 'security'
+                })
\ No newline at end of file
diff --git a/theauditor/rules/security/sourcemap_detector.py b/theauditor/rules/security/sourcemap_detector.py
new file mode 100644
index 0000000..ee1ea93
--- /dev/null
+++ b/theauditor/rules/security/sourcemap_detector.py
@@ -0,0 +1,209 @@
+"""Source map exposure detector for production builds.
+
+This module detects exposed source maps that can reveal original source code:
+1. .map files in production directories
+2. Inline source maps (base64 encoded in JS)
+3. Source map URLs pointing to external files
+"""
+
+import re
+from pathlib import Path
+from typing import Dict, List, Any
+
+
+def find_source_maps(project_path: str) -> List[Dict[str, Any]]:
+    """
+    Detect exposed source maps in production build artifacts.
+    
+    This function scans common production build directories for:
+    1. .map files (external source maps)
+    2. Inline source maps embedded in JavaScript files
+    3. Source map URLs in JavaScript comments
+    
+    Args:
+        project_path: Root path of the project to analyze
+        
+    Returns:
+        List of source map exposure findings
+    """
+    findings = []
+    project_root = Path(project_path)
+    
+    # Define common production build directories to scan
+    build_dirs = ['dist', 'build', 'out', 'public', 'static', 'assets', 'bundle', '_next']
+    
+    # Regex patterns for source map detection
+    # Inline source map: //# sourceMappingURL=data:application/json;base64,...
+    inline_pattern = re.compile(
+        r'//[#@]\s*sourceMappingURL\s*=\s*data:application/json[^,]*;base64,',
+        re.IGNORECASE
+    )
+    
+    # External source map URL: //# sourceMappingURL=bundle.js.map
+    url_pattern = re.compile(
+        r'//[#@]\s*sourceMappingURL\s*=\s*([^\s]+\.map)',
+        re.IGNORECASE
+    )
+    
+    # Find all build directories that exist
+    existing_build_dirs = []
+    for dir_name in build_dirs:
+        dir_path = project_root / dir_name
+        if dir_path.exists() and dir_path.is_dir():
+            existing_build_dirs.append(dir_path)
+    
+    # Also check if project root itself looks like a build output
+    # (e.g., deployed directly from build folder)
+    if _is_likely_build_output(project_root):
+        existing_build_dirs.append(project_root)
+    
+    # If no build directories found, skip analysis
+    if not existing_build_dirs:
+        return findings
+    
+    # Scan each build directory
+    for build_dir in existing_build_dirs:
+        # 1. Find all .map files
+        for map_file in build_dir.rglob('*.map'):
+            # Skip node_modules and other vendor directories
+            if 'node_modules' in str(map_file) or '.git' in str(map_file):
+                continue
+            
+            try:
+                relative_path = map_file.relative_to(project_root)
+                
+                # Check if it's a JavaScript source map (not CSS map)
+                is_js_map = False
+                if map_file.name.endswith('.js.map') or map_file.name.endswith('.mjs.map'):
+                    is_js_map = True
+                else:
+                    # Check content to determine type
+                    try:
+                        with open(map_file, 'r', encoding='utf-8', errors='ignore') as f:
+                            first_line = f.readline()
+                            if '"sources"' in first_line or '"mappings"' in first_line:
+                                is_js_map = True
+                    except:
+                        is_js_map = True  # Assume JS if can't read
+                
+                if is_js_map:
+                    findings.append({
+                        'pattern_name': 'SOURCE_MAP_FILE_EXPOSED',
+                        'message': f'Source map file exposed in production: {map_file.name}',
+                        'file': str(relative_path),
+                        'line': 0,
+                        'column': 0,
+                        'severity': 'high',
+                        'category': 'security',
+                        'confidence': 0.95,
+                        'details': {
+                            'type': 'external_map_file',
+                            'build_directory': build_dir.name,
+                            'impact': 'Exposes original source code structure and logic',
+                            'recommendation': 'Remove .map files from production builds or block access via server config'
+                        }
+                    })
+                    
+            except (OSError, ValueError):
+                continue
+        
+        # 2. Scan JavaScript files for inline source maps and URLs
+        js_extensions = ['*.js', '*.mjs', '*.cjs', '*.jsx']
+        
+        for ext in js_extensions:
+            for js_file in build_dir.rglob(ext):
+                # Skip node_modules and vendor files
+                if 'node_modules' in str(js_file) or '.git' in str(js_file):
+                    continue
+                
+                try:
+                    relative_path = js_file.relative_to(project_root)
+                    
+                    # Read file content
+                    with open(js_file, 'r', encoding='utf-8', errors='ignore') as f:
+                        content = f.read()
+                    
+                    # Only check last 5000 characters for performance
+                    # Source maps are typically at the end of files
+                    content_tail = content[-5000:] if len(content) > 5000 else content
+                    
+                    # Check for inline source map
+                    inline_match = inline_pattern.search(content_tail)
+                    if inline_match:
+                        # Find line number
+                        line_num = content[:inline_match.start()].count('\n') + 1
+                        
+                        findings.append({
+                            'pattern_name': 'INLINE_SOURCE_MAP_EXPOSED',
+                            'message': 'Inline source map embedded in production JavaScript',
+                            'file': str(relative_path),
+                            'line': line_num,
+                            'column': 0,
+                            'severity': 'high',
+                            'category': 'security',
+                            'confidence': 0.95,
+                            'details': {
+                                'type': 'inline_source_map',
+                                'build_directory': build_dir.name,
+                                'impact': 'Full source code embedded in production file',
+                                'recommendation': 'Disable inline source maps in production build config'
+                            }
+                        })
+                    
+                    # Check for external source map URL
+                    url_match = url_pattern.search(content_tail)
+                    if url_match:
+                        map_url = url_match.group(1)
+                        line_num = content[:url_match.start()].count('\n') + 1
+                        
+                        # Check if the referenced .map file actually exists
+                        map_path = js_file.parent / map_url
+                        file_exists = map_path.exists()
+                        
+                        findings.append({
+                            'pattern_name': 'SOURCE_MAP_URL_EXPOSED',
+                            'message': f'Source map URL in production JavaScript: {map_url}',
+                            'file': str(relative_path),
+                            'line': line_num,
+                            'column': 0,
+                            'severity': 'high' if file_exists else 'medium',
+                            'category': 'security',
+                            'confidence': 0.90,
+                            'details': {
+                                'type': 'source_map_url',
+                                'map_url': map_url,
+                                'map_exists': file_exists,
+                                'build_directory': build_dir.name,
+                                'impact': 'Points to source map that may expose original code',
+                                'recommendation': 'Remove sourceMappingURL comments from production builds'
+                            }
+                        })
+                        
+                except (OSError, ValueError, UnicodeDecodeError):
+                    continue
+    
+    return findings
+
+
+def _is_likely_build_output(directory: Path) -> bool:
+    """
+    Check if a directory looks like build output.
+    
+    Args:
+        directory: Path to check
+        
+    Returns:
+        True if directory appears to contain build artifacts
+    """
+    # Check for minified files or webpack chunks
+    minified_files = list(directory.glob('*.min.js')) + list(directory.glob('*.[hash].js'))
+    if minified_files:
+        return True
+    
+    # Check for common build output patterns
+    build_indicators = ['main.js', 'bundle.js', 'app.js', 'vendor.js', 'index.js']
+    for indicator in build_indicators:
+        if (directory / indicator).exists():
+            return True
+    
+    return False
\ No newline at end of file
diff --git a/theauditor/rules/sql/__init__.py b/theauditor/rules/sql/__init__.py
new file mode 100644
index 0000000..166626b
--- /dev/null
+++ b/theauditor/rules/sql/__init__.py
@@ -0,0 +1,5 @@
+"""SQL injection detection rule definitions."""
+
+from .sql_injection_analyzer import find_sql_injection
+
+__all__ = ['find_sql_injection']
\ No newline at end of file
diff --git a/theauditor/rules/sql/sql_injection_analyzer.py b/theauditor/rules/sql/sql_injection_analyzer.py
new file mode 100644
index 0000000..44f07be
--- /dev/null
+++ b/theauditor/rules/sql/sql_injection_analyzer.py
@@ -0,0 +1,74 @@
+"""Security-focused AST rules for detecting SQL injection vulnerabilities.
+
+Supports:
+- Python (via native `ast` module)
+"""
+
+import ast
+from typing import List, Dict, Any
+
+
+def find_sql_injection(tree: ast.AST) -> List[Dict[str, Any]]:
+    """Find potential SQL injection vulnerabilities.
+    
+    Detects:
+    - String formatting in SQL queries
+    - Direct concatenation in SQL queries
+    - f-strings used for SQL
+    
+    Returns:
+        List of findings with details
+    """
+    findings = []
+    sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'DROP', 'CREATE', 'ALTER', 'EXEC', 'EXECUTE']
+    
+    for node in ast.walk(tree):
+        # Check for SQL in string formatting
+        if isinstance(node, ast.Call):
+            # Check for .format() calls
+            if (isinstance(node.func, ast.Attribute) and 
+                node.func.attr == 'format'):
+                # Check if the string being formatted contains SQL
+                if isinstance(node.func.value, (ast.Constant, ast.Str)):
+                    if isinstance(node.func.value, ast.Constant):
+                        value = node.func.value.value
+                    else:
+                        value = node.func.value.s
+                    
+                    if isinstance(value, str):
+                        value_upper = value.upper()
+                        if any(keyword in value_upper for keyword in sql_keywords):
+                            findings.append({
+                                'line': getattr(node, 'lineno', 0),
+                                'column': getattr(node, 'col_offset', 0),
+                                'snippet': 'SQL query using .format()',
+                                'confidence': 0.85,
+                                'severity': 'CRITICAL',
+                                'type': 'sql_injection',
+                                'hint': 'Use parameterized queries instead of string formatting'
+                            })
+        
+        # Check for f-strings with SQL
+        elif isinstance(node, ast.JoinedStr):
+            # This is an f-string
+            # Check if it contains SQL keywords
+            for value in node.values:
+                if isinstance(value, (ast.Constant, ast.Str)):
+                    if isinstance(value, ast.Constant):
+                        str_value = str(value.value)
+                    else:
+                        str_value = str(value.s)
+                    
+                    if any(keyword in str_value.upper() for keyword in sql_keywords):
+                        findings.append({
+                            'line': getattr(node, 'lineno', 0),
+                            'column': getattr(node, 'col_offset', 0),
+                            'snippet': 'SQL query using f-string',
+                            'confidence': 0.90,
+                            'severity': 'CRITICAL',
+                            'type': 'sql_injection',
+                            'hint': 'Never use f-strings for SQL queries, use parameterized queries'
+                        })
+                        break
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/typescript/__init__.py b/theauditor/rules/typescript/__init__.py
new file mode 100644
index 0000000..945fde7
--- /dev/null
+++ b/theauditor/rules/typescript/__init__.py
@@ -0,0 +1,5 @@
+"""TypeScript type issue detection rules module."""
+
+from .type_safety_analyzer import find_typescript_type_issues
+
+__all__ = ['find_typescript_type_issues']
\ No newline at end of file
diff --git a/theauditor/rules/typescript/type_safety_analyzer.py b/theauditor/rules/typescript/type_safety_analyzer.py
new file mode 100644
index 0000000..523cf16
--- /dev/null
+++ b/theauditor/rules/typescript/type_safety_analyzer.py
@@ -0,0 +1,145 @@
+"""Security rules for detecting TypeScript type-related issues.
+
+Supports:
+- TypeScript (via TypeScript Compiler API semantic AST)
+"""
+
+from typing import List, Dict, Any
+
+
+def find_typescript_type_issues(tree: Dict[str, Any], file_path: str) -> List[Dict[str, Any]]:
+    """Find TypeScript type-related issues using semantic AST.
+    
+    This function operates on the rich AST provided by the TypeScript Compiler API
+    to detect problematic type patterns including:
+    - Explicit 'any' types
+    - Unsafe type assertions (as any, as unknown)
+    - Implicit 'any' from compiler diagnostics
+    - Type suppression comments (@ts-ignore, @ts-nocheck)
+    
+    Args:
+        tree: Semantic AST from TypeScript Compiler API
+        file_path: Path to the TypeScript file being analyzed
+        
+    Returns:
+        List of security findings with severity levels
+    """
+    findings = []
+    
+    # Check if we have a semantic AST with type information
+    if not tree or tree.get("type") != "semantic_ast":
+        return findings
+    
+    semantic_tree = tree.get("tree", {})
+    if not semantic_tree or not semantic_tree.get("success"):
+        return findings
+    
+    # 1. Check for explicit 'any' types in symbols
+    for symbol in semantic_tree.get("symbols", []):
+        if symbol.get("type") == "any":
+            findings.append({
+                "rule": "TYPESCRIPT_ANY_TYPE",
+                "severity": "MEDIUM",
+                "message": f"Symbol '{symbol.get('name')}' has explicit 'any' type",
+                "file": file_path,
+                "line": symbol.get("line", 0),
+                "column": 0,
+                "evidence": f"{symbol.get('name')}: any",
+                "confidence": "HIGH"
+            })
+    
+    # 2. Parse compiler diagnostics for implicit 'any' errors
+    for diagnostic in semantic_tree.get("diagnostics", []):
+        message = diagnostic.get("message", "")
+        
+        # TypeScript error codes for implicit any
+        # TS7006: Parameter has implicit 'any' type
+        # TS7008: Member has implicit 'any' type
+        # TS7031: Binding element has implicit 'any' type
+        if diagnostic.get("code") in [7006, 7008, 7031] or "implicit 'any'" in message.lower():
+            findings.append({
+                "rule": "TYPESCRIPT_IMPLICIT_ANY",
+                "severity": "MEDIUM",
+                "message": f"Implicit 'any' type: {message}",
+                "file": file_path,
+                "line": diagnostic.get("line", 0),
+                "column": diagnostic.get("column", 0),
+                "evidence": message[:200],
+                "confidence": "HIGH"
+            })
+    
+    # 3. Recursively search AST for problematic patterns
+    def search_ast_node(node: Dict[str, Any], depth: int = 0) -> None:
+        if depth > 100 or not isinstance(node, dict):
+            return
+        
+        node_kind = node.get("kind", "")
+        node_text = node.get("text", "")
+        node_line = node.get("line", 0)
+        node_column = node.get("column", 0)
+        
+        # Check for explicit 'any' keyword nodes
+        if node_kind == "AnyKeyword":
+            findings.append({
+                "rule": "TYPESCRIPT_ANY_KEYWORD",
+                "severity": "MEDIUM",
+                "message": "Explicit 'any' type annotation detected",
+                "file": file_path,
+                "line": node_line,
+                "column": node_column,
+                "evidence": node_text[:100] if node_text else "any",
+                "confidence": "HIGH"
+            })
+        
+        # Check for unsafe type assertions (as any, as unknown)
+        if node_kind == "AsExpression":
+            type_node = node.get("type", {})
+            if isinstance(type_node, dict):
+                type_kind = type_node.get("kind", "")
+                if type_kind == "AnyKeyword":
+                    findings.append({
+                        "rule": "TYPESCRIPT_UNSAFE_CAST_ANY",
+                        "severity": "HIGH",
+                        "message": "Unsafe type assertion to 'any' bypasses type checking",
+                        "file": file_path,
+                        "line": node_line,
+                        "column": node_column,
+                        "evidence": node_text[:100] if node_text else "as any",
+                        "confidence": "HIGH"
+                    })
+                elif type_kind == "UnknownKeyword":
+                    findings.append({
+                        "rule": "TYPESCRIPT_UNSAFE_CAST_UNKNOWN",
+                        "severity": "HIGH",
+                        "message": "Unsafe type assertion to 'unknown' may hide type errors",
+                        "file": file_path,
+                        "line": node_line,
+                        "column": node_column,
+                        "evidence": node_text[:100] if node_text else "as unknown",
+                        "confidence": "HIGH"
+                    })
+        
+        # Check for type suppression comments
+        if "@ts-ignore" in node_text or "@ts-nocheck" in node_text:
+            findings.append({
+                "rule": "TYPESCRIPT_TYPE_SUPPRESSION",
+                "severity": "MEDIUM",
+                "message": "TypeScript error suppression comment detected",
+                "file": file_path,
+                "line": node_line,
+                "column": node_column,
+                "evidence": node_text[:100],
+                "confidence": "HIGH"
+            })
+        
+        # Recursively check children
+        for child in node.get("children", []):
+            if isinstance(child, dict):
+                search_ast_node(child, depth + 1)
+    
+    # Start recursive search from root AST node
+    ast_root = semantic_tree.get("ast", {})
+    if ast_root:
+        search_ast_node(ast_root)
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/vue/__init__.py b/theauditor/rules/vue/__init__.py
new file mode 100644
index 0000000..e9c97e8
--- /dev/null
+++ b/theauditor/rules/vue/__init__.py
@@ -0,0 +1,9 @@
+"""Vue.js-specific rule detectors for TheAuditor.
+
+This package contains AST-based rules for detecting
+Vue reactivity issues and props mutation anti-patterns.
+"""
+
+from .reactivity_analyzer import find_vue_reactivity_issues
+
+__all__ = ['find_vue_reactivity_issues']
\ No newline at end of file
diff --git a/theauditor/rules/vue/reactivity_analyzer.py b/theauditor/rules/vue/reactivity_analyzer.py
new file mode 100644
index 0000000..ff6637c
--- /dev/null
+++ b/theauditor/rules/vue/reactivity_analyzer.py
@@ -0,0 +1,295 @@
+"""Vue.js reactivity and props mutation analyzer.
+
+This module analyzes Vue components for common anti-patterns:
+1. Direct props mutation (violates one-way data flow)
+2. Non-reactive data initialization (shared state bug)
+"""
+
+from typing import List, Dict, Any, Set, Optional
+
+
+def find_vue_reactivity_issues(tree: Any, file_path: str) -> List[Dict[str, Any]]:
+    """
+    Detect Vue.js reactivity and props mutation issues using semantic AST.
+    
+    This function analyzes both Options API and Composition API components to detect:
+    1. Direct props mutations (props.x = value)
+    2. Non-reactive data initialization (shared objects/arrays)
+    
+    Args:
+        tree: Semantic AST from js_semantic_parser.py
+        file_path: Path to the Vue file being analyzed
+        
+    Returns:
+        List of issues found with normalized format
+    """
+    findings = []
+    
+    # Validate AST structure
+    if not tree or not isinstance(tree, dict):
+        return findings
+    
+    # Extract root AST node
+    ast_root = tree.get('ast')
+    if not ast_root:
+        return findings
+    
+    # Track component's props for mutation detection
+    component_props = set()
+    is_composition_api = False
+    
+    # Helper function to extract text from node
+    def get_node_text(node):
+        """Extract text content from an AST node."""
+        if isinstance(node, dict):
+            return node.get('text', '')
+        return str(node)
+    
+    # Helper function to get line number
+    def get_node_line(node):
+        """Extract line number from an AST node."""
+        if isinstance(node, dict):
+            return node.get('line', 0)
+        return 0
+    
+    # Helper function to check if node is a props mutation
+    def is_props_mutation(node, props_list):
+        """Check if node represents a mutation of component props."""
+        if not isinstance(node, dict):
+            return False, None
+        
+        kind = node.get('kind', '')
+        
+        # Check for assignment expressions
+        if kind == 'BinaryExpression':
+            children = node.get('children', [])
+            if len(children) >= 3:
+                # First child is left operand, second is operator, third is right operand
+                left = children[0] if children else None
+                operator = children[1] if len(children) > 1 else None
+                
+                # Check if operator is assignment
+                if operator and isinstance(operator, dict):
+                    op_text = get_node_text(operator)
+                    if '=' in op_text and '==' not in op_text and '!=' not in op_text:
+                        # Check if left side references props
+                        if left:
+                            left_text = get_node_text(left)
+                            # Check for this.propName or props.propName patterns
+                            for prop in props_list:
+                                if f'this.{prop}' in left_text or f'props.{prop}' in left_text:
+                                    return True, prop
+        
+        return False, None
+    
+    # Helper function to extract props from Options API
+    def extract_options_api_props(node):
+        """Extract props from Options API component definition."""
+        props = set()
+        
+        if not isinstance(node, dict):
+            return props
+        
+        kind = node.get('kind', '')
+        
+        # Look for export default { props: {...} }
+        if kind == 'ObjectLiteralExpression':
+            children = node.get('children', [])
+            for child in children:
+                if isinstance(child, dict) and child.get('kind') == 'PropertyAssignment':
+                    prop_name = child.get('name', '')
+                    if prop_name == 'props':
+                        # Extract prop definitions
+                        prop_children = child.get('children', [])
+                        for prop_child in prop_children:
+                            if isinstance(prop_child, dict):
+                                # Handle props: ['prop1', 'prop2'] array syntax
+                                if prop_child.get('kind') == 'ArrayLiteralExpression':
+                                    array_children = prop_child.get('children', [])
+                                    for item in array_children:
+                                        if isinstance(item, dict) and item.get('kind') == 'StringLiteral':
+                                            prop_text = get_node_text(item).strip('"\'')
+                                            props.add(prop_text)
+                                # Handle props: { prop1: Type, prop2: {...} } object syntax
+                                elif prop_child.get('kind') == 'ObjectLiteralExpression':
+                                    obj_children = prop_child.get('children', [])
+                                    for prop_def in obj_children:
+                                        if isinstance(prop_def, dict) and prop_def.get('kind') == 'PropertyAssignment':
+                                            props.add(prop_def.get('name', ''))
+        
+        # Recursively check children
+        children = node.get('children', [])
+        for child in children:
+            child_props = extract_options_api_props(child)
+            props.update(child_props)
+        
+        return props
+    
+    # Helper function to extract props from Composition API
+    def extract_composition_api_props(node):
+        """Extract props from Composition API defineProps call."""
+        props = set()
+        
+        if not isinstance(node, dict):
+            return props
+        
+        kind = node.get('kind', '')
+        text = get_node_text(node)
+        
+        # Look for defineProps call
+        if kind == 'CallExpression' and 'defineProps' in text:
+            children = node.get('children', [])
+            # First child after function name is the props object
+            for child in children:
+                if isinstance(child, dict) and child.get('kind') == 'ObjectLiteralExpression':
+                    obj_children = child.get('children', [])
+                    for prop_def in obj_children:
+                        if isinstance(prop_def, dict) and prop_def.get('kind') == 'PropertyAssignment':
+                            props.add(prop_def.get('name', ''))
+        
+        # Recursively check children
+        children = node.get('children', [])
+        for child in children:
+            child_props = extract_composition_api_props(child)
+            props.update(child_props)
+        
+        return props
+    
+    # Helper function to check for non-reactive data
+    def check_data_function(node):
+        """Check data() function for non-reactive initialization patterns."""
+        issues = []
+        
+        if not isinstance(node, dict):
+            return issues
+        
+        kind = node.get('kind', '')
+        
+        # Look for data() method in Options API
+        if kind == 'MethodDeclaration' or kind == 'PropertyAssignment':
+            name = node.get('name', '')
+            if name == 'data':
+                # Analyze the return statement
+                def find_return_object(n):
+                    if not isinstance(n, dict):
+                        return None
+                    if n.get('kind') == 'ReturnStatement':
+                        children = n.get('children', [])
+                        if children and isinstance(children[0], dict):
+                            return children[0]
+                    # Recursively check children
+                    for child in n.get('children', []):
+                        result = find_return_object(child)
+                        if result:
+                            return result
+                    return None
+                
+                return_obj = find_return_object(node)
+                if return_obj and return_obj.get('kind') == 'ObjectLiteralExpression':
+                    # Check properties of returned object
+                    obj_children = return_obj.get('children', [])
+                    for prop in obj_children:
+                        if isinstance(prop, dict) and prop.get('kind') == 'PropertyAssignment':
+                            prop_name = prop.get('name', '')
+                            # Check for object/array literal initializers
+                            prop_value = None
+                            for child in prop.get('children', []):
+                                if isinstance(child, dict):
+                                    value_kind = child.get('kind', '')
+                                    value_text = get_node_text(child)
+                                    
+                                    # Detect empty object literal
+                                    if value_kind == 'ObjectLiteralExpression' and value_text.strip() in ['{}', '{ }']:
+                                        issues.append({
+                                            'property': prop_name,
+                                            'type': 'object',
+                                            'line': get_node_line(prop)
+                                        })
+                                    # Detect empty array literal
+                                    elif value_kind == 'ArrayLiteralExpression' and value_text.strip() in ['[]', '[ ]']:
+                                        issues.append({
+                                            'property': prop_name,
+                                            'type': 'array',
+                                            'line': get_node_line(prop)
+                                        })
+        
+        # Recursively check children
+        children = node.get('children', [])
+        for child in children:
+            child_issues = check_data_function(child)
+            issues.extend(child_issues)
+        
+        return issues
+    
+    # Main traversal function
+    def traverse_ast(node, depth=0):
+        """Traverse AST to find Vue reactivity issues."""
+        nonlocal is_composition_api
+        
+        if depth > 100 or not isinstance(node, dict):
+            return
+        
+        kind = node.get('kind', '')
+        text = get_node_text(node)
+        
+        # Detect Composition API
+        if 'defineProps' in text or 'defineEmits' in text or '<script setup>' in str(tree):
+            is_composition_api = True
+        
+        # Detection 1: Props mutations
+        is_mutation, mutated_prop = is_props_mutation(node, component_props)
+        if is_mutation:
+            api_type = 'Composition API' if is_composition_api else 'Options API'
+            findings.append({
+                'pattern_name': 'VUE_PROPS_MUTATION',
+                'message': f'Direct mutation of prop "{mutated_prop}" violates one-way data flow',
+                'file': file_path,
+                'line': get_node_line(node),
+                'column': node.get('column', 0),
+                'severity': 'high',
+                'category': 'vue',
+                'confidence': 0.90,
+                'details': {
+                    'prop': mutated_prop,
+                    'api_type': api_type,
+                    'recommendation': f'Use a local data property or emit an event to update parent state'
+                }
+            })
+        
+        # Detection 2: Non-reactive data (Options API only)
+        if not is_composition_api:
+            data_issues = check_data_function(node)
+            for issue in data_issues:
+                findings.append({
+                    'pattern_name': 'VUE_NON_REACTIVE_DATA',
+                    'message': f'Non-reactive {issue["type"]} initialization in data() will be shared across component instances',
+                    'file': file_path,
+                    'line': issue['line'],
+                    'column': 0,
+                    'severity': 'high',
+                    'category': 'vue',
+                    'confidence': 0.85,
+                    'details': {
+                        'property': issue['property'],
+                        'type': issue['type'],
+                        'recommendation': f'Initialize {issue["type"]} in data() using a factory function or return new instance'
+                    }
+                })
+        
+        # Recursively traverse children
+        children = node.get('children', [])
+        for child in children:
+            traverse_ast(child, depth + 1)
+    
+    # Extract props based on API style
+    # First pass: identify props
+    if '<script setup>' in str(tree) or 'defineProps' in get_node_text(ast_root):
+        is_composition_api = True
+        component_props = extract_composition_api_props(ast_root)
+    else:
+        component_props = extract_options_api_props(ast_root)
+    
+    # Second pass: find issues
+    traverse_ast(ast_root)
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/rules/xss/__init__.py b/theauditor/rules/xss/__init__.py
new file mode 100644
index 0000000..2057cf4
--- /dev/null
+++ b/theauditor/rules/xss/__init__.py
@@ -0,0 +1,5 @@
+"""XSS vulnerability detection rules module."""
+
+from .xssdetection import find_xss_vulnerabilities
+
+__all__ = ['find_xss_vulnerabilities']
\ No newline at end of file
diff --git a/theauditor/rules/xss/xssdetection.py b/theauditor/rules/xss/xssdetection.py
new file mode 100644
index 0000000..f3ce47a
--- /dev/null
+++ b/theauditor/rules/xss/xssdetection.py
@@ -0,0 +1,640 @@
+"""Security rules for detecting Cross-Site Scripting (XSS) vulnerabilities."""
+
+import ast
+import re
+from typing import List, Dict, Any
+
+
+def find_xss_vulnerabilities(tree: Any, taint_checker=None) -> List[Dict[str, Any]]:
+    """Find potential Cross-Site Scripting (XSS) vulnerabilities (language-aware).
+    
+    Detects:
+    - User input directly rendered without escaping
+    - Unsafe template rendering
+    - Direct HTML generation with user data
+    
+    Supports:
+    - Python (native ast.AST)
+    - JavaScript/TypeScript (tree-sitter or regex fallback)
+    
+    Args:
+        tree: Either a Python ast.AST object (legacy) or a wrapped AST dict from ast_parser.py
+        taint_checker: Optional function from orchestrator to check if variable is tainted
+    
+    Returns:
+        List of findings with details about XSS vulnerabilities
+    """
+    # Handle both legacy (direct ast.AST) and new wrapped format
+    if isinstance(tree, ast.AST):
+        # Legacy format - direct Python AST
+        return _find_xss_vulnerabilities_python(tree, taint_checker)
+    elif isinstance(tree, dict):
+        # New wrapped format from ast_parser.py
+        tree_type = tree.get("type")
+        language = tree.get("language", "")  # Empty not unknown
+        
+        if tree_type == "python_ast":
+            return _find_xss_vulnerabilities_python(tree["tree"], taint_checker)
+        elif tree_type == "tree_sitter":
+            return _find_xss_vulnerabilities_tree_sitter(tree, taint_checker)
+        elif tree_type == "regex_ast":
+            return _find_xss_vulnerabilities_regex_ast(tree, taint_checker)
+        else:
+            # Unknown tree type
+            return []
+    else:
+        # Unknown format
+        return []
+
+
+def _find_xss_vulnerabilities_python(tree: ast.AST, taint_checker=None) -> List[Dict[str, Any]]:
+    """Find potential Cross-Site Scripting (XSS) vulnerabilities in Python AST (original implementation).
+    
+    This is the original Python-specific implementation.
+    If taint_checker is provided by orchestrator, uses that instead of tracking taint locally.
+    """
+    findings = []
+    
+    # If orchestrator provides taint_checker, use that. Otherwise fall back to local tracking
+    if taint_checker:
+        # Use orchestrator's taint data
+        tainted_vars = None  # Not needed when using taint_checker
+        sanitized_vars = set()
+    else:
+        # Track tainted variables locally (fallback for standalone use)
+        tainted_vars = set()
+        sanitized_vars = set()
+    
+    # Common sources of user input
+    input_sources = {
+        # Flask/Werkzeug
+        'request.args.get', 'request.form', 'request.values', 'request.data',
+        'request.get_json', 'request.cookies', 'request.headers',
+        # Django
+        'request.GET', 'request.POST', 'request.FILES', 'request.META',
+        # Express.js style (for Python frameworks that mimic it)
+        'req.query', 'req.body', 'req.params', 'req.cookies',
+        # Generic
+        'input', 'raw_input', 'sys.stdin.read',
+    }
+    
+    # Common sinks where XSS can occur
+    dangerous_sinks = {
+        # Flask/Jinja2
+        'render_template_string', 'Markup', 'make_response',
+        # Django
+        'mark_safe', 'format_html', 'HttpResponse',
+        # Direct HTML manipulation
+        'innerHTML', 'document.write', 'eval',
+        # Response methods
+        'send', 'write', 'end', 'json',
+    }
+    
+    # Common sanitization functions
+    sanitizers = {
+        'escape', 'html.escape', 'markupsafe.escape', 'jinja2.escape',
+        'bleach.clean', 'cgi.escape', 'django.utils.html.escape',
+        'flask.escape', 'werkzeug.utils.escape',
+    }
+    
+    # Walk the AST
+    for node in ast.walk(tree):
+        # Track assignments from user input sources
+        if isinstance(node, ast.Assign):
+            for target in node.targets:
+                if isinstance(target, ast.Name):
+                    var_name = target.id
+                    
+                    # Check if assignment is from a user input source
+                    if isinstance(node.value, ast.Call):
+                        call_name = _get_call_name(node.value)
+                        if any(source in call_name for source in input_sources):
+                            tainted_vars.add(var_name)
+                    
+                    # Check if assignment is from another tainted variable
+                    elif isinstance(node.value, ast.Name):
+                        if node.value.id in tainted_vars:
+                            tainted_vars.add(var_name)
+                    
+                    # Check if variable is being sanitized
+                    elif isinstance(node.value, ast.Call):
+                        call_name = _get_call_name(node.value)
+                        if any(san in call_name for san in sanitizers):
+                            # Check if sanitizing a tainted variable
+                            if len(node.value.args) > 0:
+                                if isinstance(node.value.args[0], ast.Name):
+                                    if node.value.args[0].id in tainted_vars:
+                                        sanitized_vars.add(var_name)
+                                        tainted_vars.discard(var_name)
+        
+        # Check for direct user input in dangerous sinks
+        elif isinstance(node, ast.Call):
+            call_name = _get_call_name(node)
+            
+            # Check if this is a dangerous sink
+            if any(sink in call_name for sink in dangerous_sinks):
+                # Check arguments for tainted variables
+                for arg in node.args:
+                    if isinstance(arg, ast.Name):
+                        # Use taint_checker if available, otherwise check local tainted_vars
+                        is_tainted = False
+                        if taint_checker:
+                            is_tainted = taint_checker(arg.id, getattr(node, 'lineno', 0))
+                        elif tainted_vars is not None:
+                            is_tainted = arg.id in tainted_vars and arg.id not in sanitized_vars
+                        
+                        if is_tainted:
+                            findings.append({
+                                'line': getattr(node, 'lineno', 0),
+                                'column': getattr(node, 'col_offset', 0),
+                                'variable': arg.id,
+                                'sink': call_name,
+                                'snippet': f'{call_name}({arg.id})',
+                                'severity': 'CRITICAL',
+                                'type': 'xss_vulnerability',
+                                'hint': f'Sanitize {arg.id} before passing to {call_name}. Use escape() or similar.'
+                            })
+                    
+                    # Check for direct user input calls in arguments
+                    elif isinstance(arg, ast.Call):
+                        inner_call_name = _get_call_name(arg)
+                        if any(source in inner_call_name for source in input_sources):
+                            findings.append({
+                                'line': getattr(node, 'lineno', 0),
+                                'column': getattr(node, 'col_offset', 0),
+                                'source': inner_call_name,
+                                'sink': call_name,
+                                'snippet': f'{call_name}({inner_call_name}...)',
+                                'severity': 'CRITICAL',
+                                'type': 'xss_vulnerability',
+                                'hint': f'User input from {inner_call_name} directly passed to {call_name}. Must escape!'
+                            })
+                    
+                    # Check for f-strings or string concatenation with tainted vars
+                    elif isinstance(arg, ast.JoinedStr):  # f-string
+                        for value in arg.values:
+                            if isinstance(value, ast.FormattedValue):
+                                if isinstance(value.value, ast.Name):
+                                    # Use taint_checker if available
+                                    is_tainted = False
+                                    if taint_checker:
+                                        is_tainted = taint_checker(value.value.id, getattr(node, 'lineno', 0))
+                                    elif tainted_vars is not None:
+                                        is_tainted = value.value.id in tainted_vars and value.value.id not in sanitized_vars
+                                    
+                                    if is_tainted:
+                                        findings.append({
+                                            'line': getattr(node, 'lineno', 0),
+                                            'column': getattr(node, 'col_offset', 0),
+                                            'variable': value.value.id,
+                                            'sink': call_name,
+                                            'snippet': f'{call_name}(f"...{{{value.value.id}}}...")',
+                                            'severity': 'CRITICAL',
+                                            'type': 'xss_vulnerability',
+                                            'hint': f'Tainted variable {value.value.id} in f-string passed to {call_name}'
+                                        })
+        
+        # Check for string formatting with tainted variables
+        elif isinstance(node, ast.BinOp):
+            if isinstance(node.op, ast.Mod):  # % formatting
+                # Check if right operand contains tainted variables
+                tainted_in_format = False
+                if isinstance(node.right, ast.Name):
+                    if node.right.id in tainted_vars and node.right.id not in sanitized_vars:
+                        tainted_in_format = True
+                elif isinstance(node.right, ast.Tuple):
+                    for elt in node.right.elts:
+                        if isinstance(elt, ast.Name):
+                            if elt.id in tainted_vars and elt.id not in sanitized_vars:
+                                tainted_in_format = True
+                                break
+                
+                if tainted_in_format:
+                    # Check if this formatted string is used in a dangerous context
+                    parent = _get_parent_node(tree, node)
+                    if isinstance(parent, ast.Call):
+                        call_name = _get_call_name(parent)
+                        if any(sink in call_name for sink in dangerous_sinks):
+                            findings.append({
+                                'line': getattr(node, 'lineno', 0),
+                                'column': getattr(node, 'col_offset', 0),
+                                'snippet': 'String formatting with tainted data',
+                                'severity': 'CRITICAL',
+                                'type': 'xss_vulnerability',
+                                'hint': 'Escape user input before string formatting in HTML context'
+                            })
+    
+    return findings
+
+
+def _get_call_name(node: ast.Call) -> str:
+    """Helper to extract the full call name from a Call node."""
+    if isinstance(node.func, ast.Name):
+        return node.func.id
+    elif isinstance(node.func, ast.Attribute):
+        parts = []
+        current = node.func
+        while isinstance(current, ast.Attribute):
+            parts.append(current.attr)
+            current = current.value
+        if isinstance(current, ast.Name):
+            parts.append(current.id)
+        return '.'.join(reversed(parts))
+    return 'unknown'
+
+
+def _get_parent_node(tree: ast.AST, target_node: ast.AST) -> ast.AST:
+    """Helper to find the parent node of a given node in the AST."""
+    for node in ast.walk(tree):
+        for child in ast.iter_child_nodes(node):
+            if child == target_node:
+                return node
+    return None
+
+
+def _find_xss_vulnerabilities_tree_sitter(tree_wrapper: Dict[str, Any], taint_checker=None) -> List[Dict[str, Any]]:
+    """Find potential XSS vulnerabilities in JavaScript/TypeScript using tree-sitter AST.
+    
+    If taint_checker is provided by orchestrator, uses that instead of tracking taint locally.
+    """
+    findings = []
+    
+    # If orchestrator provides taint_checker with JavaScript support, use that
+    if taint_checker:
+        # Orchestrator's taint analyzer has JavaScript pattern support now
+        tainted_vars = None  # Not needed
+        sanitized_vars = set()
+    else:
+        # Track tainted variables locally (fallback)
+        tainted_vars = set()
+        sanitized_vars = set()
+    
+    # JavaScript/TypeScript taint sources
+    input_sources = {
+        # Express.js / Node.js
+        'req.body', 'req.query', 'req.params', 'req.cookies', 'req.headers',
+        'request.body', 'request.query', 'request.params', 'request.cookies',
+        # Browser APIs
+        'location.search', 'location.hash', 'location.href', 'location.pathname',
+        'document.location', 'window.location', 'document.URL', 'document.referrer',
+        'document.cookie', 'localStorage.getItem', 'sessionStorage.getItem',
+        # Form inputs
+        'getElementById', 'querySelector', 'querySelectorAll',
+        '.value', '.innerHTML', '.innerText', '.textContent',
+        # URL parameters
+        'URLSearchParams', 'searchParams.get',
+        # React/Vue/Angular inputs
+        'props.', 'this.props.', '$route.params', '$route.query',
+        # WebSocket/PostMessage
+        'message.data', 'event.data',
+    }
+    
+    # Dangerous sinks for JavaScript/TypeScript
+    dangerous_sinks = {
+        # Direct DOM manipulation
+        'innerHTML', 'outerHTML', 'document.write', 'document.writeln',
+        'insertAdjacentHTML', 'createContextualFragment',
+        # React dangerous methods
+        'dangerouslySetInnerHTML',
+        # jQuery methods
+        'html', 'append', 'prepend', 'after', 'before', 'replaceWith',
+        # Script execution
+        'eval', 'setTimeout', 'setInterval', 'Function', 'execScript',
+        # Template literals in dangerous contexts
+        'v-html',  # Vue.js
+        '[innerHTML]',  # Angular
+        # Server-side rendering
+        'res.send', 'res.write', 'res.end', 'res.render',
+        'response.send', 'response.write', 'response.end',
+    }
+    
+    # Common sanitizers for JavaScript
+    sanitizers = {
+        'DOMPurify.sanitize', 'sanitize', 'escape', 'escapeHtml',
+        'encodeURIComponent', 'encodeURI', 'encodeHTML',
+        'validator.escape', 'xss.clean', 'sanitize-html',
+        'he.encode', 'entities.encode', 'htmlspecialchars',
+        'Handlebars.escapeExpression', 'lodash.escape', '_.escape',
+    }
+    
+    tree = tree_wrapper.get("tree")
+    content = tree_wrapper.get("content", "")
+    language = tree_wrapper.get("language", "javascript")
+    
+    if not tree:
+        return findings
+    
+    # Try to use tree-sitter for proper traversal
+    try:
+        import tree_sitter
+        from tree_sitter_language_pack import get_language
+        
+        lang = get_language(language)
+        
+        # Query for variable declarations and assignments
+        var_query = lang.query("""
+            [
+                (variable_declaration
+                  (variable_declarator
+                    name: (identifier) @var_name
+                    value: (_) @var_value))
+                (assignment_expression
+                  left: (identifier) @var_name
+                  right: (_) @var_value)
+            ]
+        """)
+        
+        # Track tainted variables from assignments (skip if using orchestrator's taint_checker)
+        if not taint_checker:
+            for capture in var_query.captures(tree.root_node):
+                node, capture_name = capture
+                
+                if capture_name == "var_name":
+                    var_name = node.text.decode("utf-8", errors="ignore")
+                    
+                    # Find the corresponding value node
+                    parent = node.parent
+                    value_node = None
+                    
+                    if parent.type == "variable_declarator":
+                        for child in parent.children:
+                            if child != node and child.type != "=":
+                                value_node = child
+                                break
+                    elif parent.type == "assignment_expression":
+                        for child in parent.children:
+                            if child != node and child.type != "=":
+                                value_node = child
+                                break
+                    
+                    if value_node:
+                        value_text = value_node.text.decode("utf-8", errors="ignore")
+                        
+                        # Check if value is from a taint source
+                        is_tainted = any(source in value_text for source in input_sources)
+                        
+                        # Check if value is from another tainted variable
+                        if not is_tainted and value_node.type == "identifier":
+                            if value_text in tainted_vars:
+                                is_tainted = True
+                        
+                        # Check if value is being sanitized
+                        is_sanitized = any(san in value_text for san in sanitizers)
+                        
+                        if is_tainted and not is_sanitized:
+                            tainted_vars.add(var_name)
+                        elif is_sanitized:
+                            sanitized_vars.add(var_name)
+                            tainted_vars.discard(var_name)
+        
+        # Query for function calls (potential sinks)
+        call_query = lang.query("""
+            (call_expression) @call
+        """)
+        
+        # Check for dangerous sinks with tainted data
+        for capture in call_query.captures(tree.root_node):
+            call_node, _ = capture
+            call_text = call_node.text.decode("utf-8", errors="ignore")
+            
+            # Check if this is a dangerous sink
+            sink_found = None
+            for sink in dangerous_sinks:
+                if sink in call_text:
+                    sink_found = sink
+                    break
+            
+            if sink_found:
+                # Extract all variable names from the call
+                import re
+                var_names = re.findall(r'\b[a-zA-Z_]\w*\b', call_text)
+                
+                if taint_checker:
+                    # Use orchestrator's taint checker
+                    for var_name in var_names:
+                        if taint_checker(var_name, call_node.start_point[0] + 1):
+                            snippet = call_text[:100] + "..." if len(call_text) > 100 else call_text
+                            findings.append({
+                                'line': call_node.start_point[0] + 1,
+                                'column': call_node.start_point[1],
+                                'variable': var_name,
+                                'sink': sink_found,
+                                'snippet': snippet,
+                                'severity': 'CRITICAL',
+                                'type': 'xss_vulnerability',
+                                'hint': f'Sanitize {var_name} before using with {sink_found}. Use DOMPurify or similar.'
+                            })
+                            break
+                else:
+                    # Use local taint tracking
+                    for tainted_var in tainted_vars:
+                        if tainted_var in call_text and tainted_var not in sanitized_vars:
+                            snippet = call_text[:100] + "..." if len(call_text) > 100 else call_text
+                            findings.append({
+                                'line': call_node.start_point[0] + 1,
+                                'column': call_node.start_point[1],
+                                'variable': tainted_var,
+                                'sink': sink_found,
+                                'snippet': snippet,
+                                'severity': 'CRITICAL',
+                                'type': 'xss_vulnerability',
+                                'hint': f'Sanitize {tainted_var} before using with {sink_found}. Use DOMPurify or similar.'
+                            })
+                            break
+                
+                # Check for direct taint sources in the call
+                for source in input_sources:
+                    if source in call_text:
+                        snippet = call_text[:100] + "..." if len(call_text) > 100 else call_text
+                        
+                        findings.append({
+                            'line': call_node.start_point[0] + 1,
+                            'column': call_node.start_point[1],
+                            'source': source,
+                            'sink': sink_found,
+                            'snippet': snippet,
+                            'severity': 'CRITICAL',
+                            'type': 'xss_vulnerability',
+                            'hint': f'User input from {source} directly passed to {sink_found}. Must sanitize!'
+                        })
+                        break
+        
+        # Query for JSX elements with dangerous props (React-specific)
+        if language == "typescript" or "tsx" in tree_wrapper.get("content", ""):
+            jsx_query = lang.query("""
+                (jsx_element
+                  (jsx_opening_element
+                    (jsx_attribute
+                      (property_identifier) @prop_name
+                      (jsx_expression) @prop_value)))
+            """)
+            
+            for capture in jsx_query.captures(tree.root_node):
+                node, capture_name = capture
+                
+                if capture_name == "prop_name":
+                    prop_name = node.text.decode("utf-8", errors="ignore")
+                    
+                    # Check for dangerouslySetInnerHTML
+                    if prop_name == "dangerouslySetInnerHTML":
+                        parent = node.parent
+                        if parent:
+                            for child in parent.children:
+                                if child.type == "jsx_expression":
+                                    expr_text = child.text.decode("utf-8", errors="ignore")
+                                    
+                                    # Check for tainted variables
+                                    for tainted_var in tainted_vars:
+                                        if tainted_var in expr_text and tainted_var not in sanitized_vars:
+                                            findings.append({
+                                                'line': node.start_point[0] + 1,
+                                                'column': node.start_point[1],
+                                                'variable': tainted_var,
+                                                'sink': 'dangerouslySetInnerHTML',
+                                                'snippet': f'dangerouslySetInnerHTML={{...{tainted_var}...}}',
+                                                'severity': 'CRITICAL',
+                                                'type': 'xss_vulnerability',
+                                                'hint': f'Never use dangerouslySetInnerHTML with unsanitized user input!'
+                                            })
+                                            break
+    
+    except (ImportError, Exception):
+        # Tree-sitter not available or query failed, fall back to regex_ast
+        return _find_xss_vulnerabilities_regex_ast(tree_wrapper)
+    
+    return findings
+
+
+def _find_xss_vulnerabilities_regex_ast(tree_wrapper: Dict[str, Any], taint_checker=None) -> List[Dict[str, Any]]:
+    """Find potential XSS vulnerabilities using regex-based fallback AST.
+    
+    This is used when tree-sitter is not available for JavaScript/TypeScript.
+    If taint_checker is provided, uses that instead of local tracking.
+    """
+    findings = []
+    
+    # JavaScript/TypeScript taint sources (simplified for regex)
+    input_sources = [
+        r'req\.(body|query|params|cookies|headers)',
+        r'request\.(body|query|params|cookies)',
+        r'location\.(search|hash|href|pathname)',
+        r'document\.(location|URL|referrer|cookie)',
+        r'localStorage\.getItem',
+        r'sessionStorage\.getItem',
+        r'URLSearchParams',
+        r'\.value\s*[;\n]',  # Form input values
+        r'props\.',
+        r'event\.data',
+        r'message\.data',
+    ]
+    
+    # Dangerous sinks (simplified for regex)
+    dangerous_sinks = [
+        r'\.innerHTML\s*=',
+        r'\.outerHTML\s*=',
+        r'document\.write\(',
+        r'document\.writeln\(',
+        r'insertAdjacentHTML\(',
+        r'dangerouslySetInnerHTML\s*[:=]',
+        r'\.html\(',  # jQuery
+        r'eval\(',
+        r'setTimeout\([\'"`]',
+        r'setInterval\([\'"`]',
+        r'new\s+Function\(',
+        r'res\.(send|write|render)\(',
+        r'response\.(send|write|render)\(',
+    ]
+    
+    # Sanitizers (simplified for regex)
+    sanitizers = [
+        r'DOMPurify\.sanitize',
+        r'\.escape\(',
+        r'escapeHtml\(',
+        r'encodeURIComponent\(',
+        r'encodeURI\(',
+        r'sanitize\(',
+    ]
+    
+    content = tree_wrapper.get("content", "")
+    
+    if not content:
+        return findings
+    
+    lines = content.split('\n')
+    
+    # Simple taint tracking
+    tainted_vars = set()
+    
+    # First pass: identify tainted variables
+    for line_num, line in enumerate(lines, 1):
+        # Look for variable assignments from taint sources
+        # Pattern: const/let/var name = taint_source
+        var_assignment = re.search(r'(?:const|let|var)\s+(\w+)\s*=\s*(.+)', line)
+        if var_assignment:
+            var_name = var_assignment.group(1)
+            value = var_assignment.group(2)
+            
+            # Check if value contains a taint source
+            for source_pattern in input_sources:
+                if re.search(source_pattern, value):
+                    tainted_vars.add(var_name)
+                    break
+            
+            # Check if value is another tainted variable
+            for tainted in tainted_vars:
+                if tainted in value:
+                    # Check if it's being sanitized
+                    is_sanitized = any(re.search(san, value) for san in sanitizers)
+                    if not is_sanitized:
+                        tainted_vars.add(var_name)
+                    break
+    
+    # Second pass: find dangerous sinks with tainted data
+    for line_num, line in enumerate(lines, 1):
+        for sink_pattern in dangerous_sinks:
+            sink_match = re.search(sink_pattern, line)
+            if sink_match:
+                # Check for tainted variables in this line
+                for tainted_var in tainted_vars:
+                    if tainted_var in line:
+                        # Check if it's being sanitized
+                        is_sanitized = any(re.search(san, line) for san in sanitizers)
+                        
+                        if not is_sanitized:
+                            # Extract sink name for reporting
+                            sink_name = sink_pattern.replace(r'\(', '').replace(r'\s*=', '').replace('\\', '')
+                            
+                            findings.append({
+                                'line': line_num,
+                                'column': sink_match.start(),
+                                'variable': tainted_var,
+                                'sink': sink_name,
+                                'snippet': line.strip()[:80] + "..." if len(line.strip()) > 80 else line.strip(),
+                                'severity': 'CRITICAL',
+                                'type': 'xss_vulnerability',
+                                'hint': f'Sanitize {tainted_var} before using with {sink_name}'
+                            })
+                            break
+                
+                # Check for direct taint sources in dangerous sinks
+                for source_pattern in input_sources:
+                    if re.search(source_pattern, line):
+                        # Extract source and sink names for reporting
+                        source_match = re.search(source_pattern, line)
+                        if source_match:
+                            source_name = source_match.group(0)
+                            sink_name = sink_pattern.replace(r'\(', '').replace(r'\s*=', '').replace('\\', '')
+                            
+                            findings.append({
+                                'line': line_num,
+                                'column': sink_match.start(),
+                                'source': source_name,
+                                'sink': sink_name,
+                                'snippet': line.strip()[:80] + "..." if len(line.strip()) > 80 else line.strip(),
+                                'severity': 'CRITICAL',
+                                'type': 'xss_vulnerability',
+                                'hint': f'User input from {source_name} directly passed to {sink_name}. Must sanitize!'
+                            })
+                            break
+    
+    return findings
\ No newline at end of file
diff --git a/theauditor/security.py b/theauditor/security.py
new file mode 100644
index 0000000..a7e92df
--- /dev/null
+++ b/theauditor/security.py
@@ -0,0 +1,150 @@
+"""Security utilities for input sanitization and validation."""
+
+import shlex
+import urllib.parse
+from pathlib import Path
+from typing import Optional
+
+
+class SecurityError(Exception):
+    """Raised when a security violation is detected."""
+    pass
+
+
+def sanitize_path(path_str: str, project_root: Optional[str] = None) -> Path:
+    """
+    Sanitize a file path to prevent path traversal attacks.
+    
+    Args:
+        path_str: The path string to sanitize
+        project_root: The root directory to restrict paths within (default: current directory)
+    
+    Returns:
+        A resolved Path object that is safe to use
+    
+    Raises:
+        SecurityError: If the path attempts to escape the project root
+    """
+    if project_root is None:
+        project_root = "."
+    
+    # Resolve both paths to absolute
+    root = Path(project_root).resolve()
+    
+    # Handle relative paths - make them relative to project root
+    if not Path(path_str).is_absolute():
+        target = (root / path_str).resolve()
+    else:
+        target = Path(path_str).resolve()
+    
+    # Check if the resolved path is within the project root
+    try:
+        # This will raise ValueError if target is not relative to root
+        target.relative_to(root)
+    except ValueError:
+        # Path is outside project root - this is a security violation
+        raise SecurityError(f"Path traversal attempt detected: {path_str} resolves outside project root")
+    
+    return target
+
+
+def sanitize_shell_arg(arg: str) -> str:
+    """
+    Sanitize a string for safe use as a shell argument.
+    
+    Uses shlex.quote to properly escape special characters and prevent command injection.
+    
+    Args:
+        arg: The argument string to sanitize
+    
+    Returns:
+        A properly quoted/escaped string safe for shell use
+    """
+    # shlex.quote properly escapes shell metacharacters
+    return shlex.quote(arg)
+
+
+def sanitize_url_component(component: str) -> str:
+    """
+    Sanitize a string for safe use in URL construction.
+    
+    Properly encodes special characters to prevent URL injection.
+    
+    Args:
+        component: The URL component to sanitize (e.g., package name)
+    
+    Returns:
+        A properly URL-encoded string
+    """
+    # URL-encode the component to handle special characters safely
+    # safe='' means encode everything except alphanumerics
+    return urllib.parse.quote(component, safe='')
+
+
+def validate_package_name(name: str, manager: str) -> bool:
+    """
+    Validate that a package name follows the expected format for its package manager.
+    
+    Args:
+        name: The package name to validate
+        manager: The package manager type ("npm", "py", "docker")
+    
+    Returns:
+        True if the name is valid, False otherwise
+    """
+    import re
+    
+    if not name or len(name) > 214:  # npm max length is 214
+        return False
+    
+    if manager == "npm":
+        # npm package names: lowercase, alphanumeric, hyphens, underscores, dots
+        # Can be scoped: @scope/package
+        pattern = r'^(@[a-z0-9][\w.-]*\/)?[a-z0-9][\w.-]*$'
+        return bool(re.match(pattern, name))
+    
+    elif manager == "py":
+        # PyPI package names: alphanumeric, hyphens, underscores, dots
+        # Case insensitive but typically lowercase
+        pattern = r'^[a-zA-Z0-9][\w.-]*$'
+        return bool(re.match(pattern, name))
+    
+    elif manager == "docker":
+        # Docker image names: lowercase, alphanumeric, hyphens, underscores, dots, slashes
+        # Can include registry and namespace
+        pattern = r'^[a-z0-9][\w./:-]*$'
+        return bool(re.match(pattern, name))
+    
+    return False
+
+
+def sanitize_config_path(config_value: str, config_section: str, config_key: str, project_root: str = ".") -> Path:
+    """
+    Sanitize a path value from configuration.
+    
+    This is specifically for paths that come from config files or environment variables,
+    which are common sources of tainted input.
+    
+    Args:
+        config_value: The path value from config
+        config_section: The config section (e.g., "paths")
+        config_key: The config key (e.g., "manifest")
+        project_root: The root directory to restrict paths within
+    
+    Returns:
+        A sanitized Path object
+    
+    Raises:
+        SecurityError: If the path is invalid or attempts traversal
+    """
+    if not config_value:
+        raise SecurityError(f"Empty path in config[{config_section}][{config_key}]")
+    
+    # Special handling for known config paths that should always be under .pf/
+    if config_section == "paths" and config_key in ["manifest", "db", "workset", "pf_dir"]:
+        # These should always be under .pf/ directory
+        if not config_value.startswith("./.pf/") and not config_value.startswith(".pf/"):
+            # Force it to be under .pf/ for safety
+            config_value = f"./.pf/{Path(config_value).name}"
+    
+    return sanitize_path(config_value, project_root)
\ No newline at end of file
diff --git a/theauditor/taint/__init__.py b/theauditor/taint/__init__.py
new file mode 100644
index 0000000..c2d5361
--- /dev/null
+++ b/theauditor/taint/__init__.py
@@ -0,0 +1,99 @@
+"""Taint analysis module - refactored with backward compatibility.
+
+This module maintains 100% backward compatibility while providing
+a modular architecture for the taint analyzer.
+
+All existing imports continue to work:
+- from theauditor.taint_analyzer import trace_taint  # Still works via this module
+- from theauditor.taint import trace_taint  # Also works
+"""
+
+# Import everything from the refactored modules
+from .core import (
+    trace_taint,
+    TaintPath,
+    save_taint_analysis,
+    normalize_taint_path,
+)
+
+from .database import (
+    find_taint_sources,
+    find_security_sinks,
+    build_call_graph,
+    get_containing_function,
+    get_function_boundaries,
+    get_code_snippet,
+)
+
+from .sources import (
+    TAINT_SOURCES,
+    SECURITY_SINKS,
+    SANITIZERS,
+    IS_WINDOWS,
+)
+
+from .propagation import (
+    trace_from_source,
+    trace_from_source_legacy,
+    is_sanitizer,
+    has_sanitizer_between,
+    is_external_source,
+    deduplicate_paths,
+)
+
+from .interprocedural import (
+    trace_inter_procedural_flow,
+)
+
+from .javascript import (
+    track_destructuring,
+    track_spread_operators,
+    track_bracket_notation,
+    track_array_operations,
+    track_type_conversions,
+    enhance_javascript_tracking,
+)
+
+# Re-export EVERYTHING to maintain backward compatibility
+# This ensures that any code doing "from theauditor.taint_analyzer import X"
+# will continue to work when we update taint/__init__.py to import from here
+__all__ = [
+    # Core functions
+    "trace_taint",
+    "TaintPath",
+    "save_taint_analysis",
+    "normalize_taint_path",
+    
+    # Database functions
+    "find_taint_sources",
+    "find_security_sinks",
+    "build_call_graph",
+    "get_containing_function",
+    "get_function_boundaries",
+    "get_code_snippet",
+    
+    # Constants
+    "TAINT_SOURCES",
+    "SECURITY_SINKS",
+    "SANITIZERS",
+    "IS_WINDOWS",
+    
+    # Propagation functions
+    "trace_from_source",
+    "trace_from_source_legacy",
+    "is_sanitizer",
+    "has_sanitizer_between",
+    "is_external_source",
+    "deduplicate_paths",
+    
+    # Inter-procedural
+    "trace_inter_procedural_flow",
+    
+    # JavaScript enhancements (new!)
+    "track_destructuring",
+    "track_spread_operators",
+    "track_bracket_notation",
+    "track_array_operations",
+    "track_type_conversions",
+    "enhance_javascript_tracking",
+]
\ No newline at end of file
diff --git a/theauditor/taint/core.py b/theauditor/taint/core.py
new file mode 100644
index 0000000..7b2483b
--- /dev/null
+++ b/theauditor/taint/core.py
@@ -0,0 +1,479 @@
+"""Core taint analysis engine.
+
+This module contains the main taint analysis function and TaintPath class.
+"""
+
+import sys
+import json
+from pathlib import Path
+from typing import Dict, List, Any, Optional
+from collections import defaultdict
+
+from .sources import TAINT_SOURCES, SECURITY_SINKS, SANITIZERS
+from .database import (
+    find_taint_sources,
+    find_security_sinks,
+    build_call_graph,
+    get_containing_function,
+)
+from .propagation import trace_from_source, deduplicate_paths
+
+
+class TaintPath:
+    """Represents a taint flow path from source to sink."""
+    
+    def __init__(self, source: Dict[str, Any], sink: Dict[str, Any], path: List[Dict[str, Any]]):
+        self.source = source
+        self.sink = sink
+        self.path = path
+        self.vulnerability_type = self._classify_vulnerability()
+    
+    def _classify_vulnerability(self) -> str:
+        """Classify the vulnerability based on sink type - factual categorization."""
+        sink_name = self.sink.get("name", "").lower()
+        sink_category = self.sink.get("category", "")
+        
+        # Use category if available, otherwise infer from name
+        if sink_category:
+            category_map = {
+                "sql": "SQL Injection",
+                "command": "Command Injection", 
+                "xss": "Cross-Site Scripting (XSS)",
+                "path": "Path Traversal",
+                "ldap": "LDAP Injection",
+                "nosql": "NoSQL Injection"
+            }
+            return category_map.get(sink_category, "Data Exposure")
+        
+        # Fallback: infer from sink name patterns
+        for vuln_type, sinks in SECURITY_SINKS.items():
+            if any(s.lower() in sink_name for s in sinks):
+                return {
+                    "sql": "SQL Injection",
+                    "command": "Command Injection",
+                    "xss": "Cross-Site Scripting (XSS)",
+                    "path": "Path Traversal",
+                    "ldap": "LDAP Injection",
+                    "nosql": "NoSQL Injection"
+                }.get(vuln_type, "Data Exposure")
+        
+        return "Data Exposure"
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization with guaranteed structure."""
+        # Ensure source dict has all required keys
+        source_dict = self.source or {}
+        source_dict.setdefault("name", "unknown_source")
+        source_dict.setdefault("file", "unknown_file")
+        source_dict.setdefault("line", 0)
+        source_dict.setdefault("pattern", "unknown_pattern")
+        
+        # Ensure sink dict has all required keys
+        sink_dict = self.sink or {}
+        sink_dict.setdefault("name", "unknown_sink")
+        sink_dict.setdefault("file", "unknown_file")
+        sink_dict.setdefault("line", 0)
+        sink_dict.setdefault("pattern", "unknown_pattern")
+        
+        return {
+            "source": source_dict,
+            "sink": sink_dict,
+            "path": self.path or [],
+            "path_length": len(self.path) if self.path else 0,
+            "vulnerability_type": self.vulnerability_type
+        }
+
+
+def trace_taint(db_path: str, max_depth: int = 5, registry=None) -> Dict[str, Any]:
+    """
+    Perform taint analysis by tracing data flow from sources to sinks.
+    
+    Args:
+        db_path: Path to the SQLite database
+        max_depth: Maximum depth to trace taint propagation
+        registry: Optional TaintRegistry with enriched patterns from rules
+        
+    Returns:
+        Dictionary containing:
+        - taint_paths: List of source-to-sink vulnerability paths
+        - sources_found: Number of taint sources identified
+        - sinks_found: Number of security sinks identified
+        - vulnerabilities: Count by vulnerability type
+    """
+    import sqlite3
+    
+    # We'll temporarily modify the global TAINT_SOURCES and SECURITY_SINKS
+    global TAINT_SOURCES, SECURITY_SINKS
+    original_sources = TAINT_SOURCES
+    original_sinks = SECURITY_SINKS
+    
+    # Load framework data to enhance analysis
+    frameworks = []
+    frameworks_path = Path(".pf/frameworks.json")
+    if frameworks_path.exists():
+        try:
+            with open(frameworks_path, 'r') as f:
+                frameworks = json.load(f)
+        except (json.JSONDecodeError, IOError):
+            # Gracefully continue without framework data
+            pass
+    
+    # CRITICAL: Use registry if provided, otherwise use framework enhancement
+    if registry:
+        # Use registry's enriched patterns (from rules)
+        dynamic_sources = {}
+        for category, patterns in registry.sources.items():
+            dynamic_sources[category] = [p.pattern for p in patterns]
+        
+        dynamic_sinks = {}
+        for category, patterns in registry.sinks.items():
+            dynamic_sinks[category] = [p.pattern for p in patterns]
+        
+        # Registry already has all framework patterns from rules
+        # Skip the framework enhancement below
+    else:
+        # Original framework enhancement logic
+        # Dynamically extend taint sources based on detected frameworks
+        # Create local copies to avoid modifying global constants
+        dynamic_sources = dict(TAINT_SOURCES)
+        dynamic_sinks = dict(SECURITY_SINKS)
+        
+        # Add framework-specific patterns
+        for fw_info in frameworks:
+            framework = fw_info.get("framework", "").lower()
+            language = fw_info.get("language", "").lower()
+            
+            # Django-specific sources (uppercase patterns)
+            if framework == "django" and language == "python":
+                if "python" not in dynamic_sources:
+                    dynamic_sources["python"] = []
+                    django_sources = [
+                    "request.GET",
+                    "request.POST",
+                    "request.FILES",
+                    "request.META",
+                    "request.session",
+                    "request.COOKIES",
+                    "request.user",
+                    "request.path",
+                    "request.path_info",
+                    "request.method",
+                ]
+                # Add Django sources if not already present
+                for source in django_sources:
+                    if source not in dynamic_sources["python"]:
+                        dynamic_sources["python"].append(source)
+            
+            # Flask-specific sources (already mostly covered but ensure completeness)
+            elif framework == "flask" and language == "python":
+                if "python" not in dynamic_sources:
+                    dynamic_sources["python"] = []
+                flask_sources = [
+                "request.args",
+                "request.form",
+                "request.json",
+                "request.data",
+                "request.values",
+                "request.files",
+                "request.cookies",
+                "request.headers",
+                "request.get_json",
+                "request.get_data",
+                "request.environ",
+                "request.view_args",
+                ]
+                for source in flask_sources:
+                    if source not in dynamic_sources["python"]:
+                        dynamic_sources["python"].append(source)
+            
+            # FastAPI-specific sources 
+            elif framework == "fastapi" and language == "python":
+                if "python" not in dynamic_sources:
+                    dynamic_sources["python"] = []
+                fastapi_sources = [
+                # Starlette Request object (used in FastAPI)
+                "Request",
+                "request.url",
+                "request.headers",
+                "request.cookies",
+                "request.query_params",
+                "request.path_params",
+                "request.client",
+                "request.session",
+                "request.auth",
+                "request.user",
+                "request.state",
+                # FastAPI dependency injection parameters
+                "Query(",
+                "Path(",
+                "Body(",
+                "Header(",
+                "Cookie(",
+                "Form(",
+                "File(",
+                "UploadFile(",
+                "Depends(",
+                # FastAPI security
+                "HTTPBearer",
+                "HTTPBasic",
+                "OAuth2PasswordBearer",
+                "APIKeyHeader",
+                "APIKeyCookie",
+                "APIKeyQuery",
+                ]
+                for source in fastapi_sources:
+                    if source not in dynamic_sources["python"]:
+                        dynamic_sources["python"].append(source)
+            
+            # Express/Node.js sources
+            elif framework in ["express", "fastify", "koa"] and language == "javascript":
+                if "js" not in dynamic_sources:
+                    dynamic_sources["js"] = []
+                node_sources = [
+                "req.body",
+                "req.query",
+                "req.params",
+                "req.headers",
+                "req.cookies",
+                "req.ip",
+                "req.hostname",
+                "req.path",
+                "req.url",
+                ]
+                for source in node_sources:
+                    if source not in dynamic_sources["js"]:
+                        dynamic_sources["js"].append(source)
+                
+                # CRITICAL FIX: Add Express.js specific sinks
+                if "xss" not in dynamic_sinks:
+                    dynamic_sinks["xss"] = []
+                # Ensure it's a list (not a reference to the original)
+                if not isinstance(dynamic_sinks["xss"], list):
+                    dynamic_sinks["xss"] = list(dynamic_sinks["xss"])
+                express_xss_sinks = [
+                # Express response methods with chained status
+                "res.status().json",
+                "res.status().send", 
+                "res.status().jsonp",
+                "res.status().end",
+                # Other Express response methods
+                "res.redirect",
+                "res.cookie",
+                "res.header",
+                "res.set",
+                "res.jsonp",
+                "res.sendFile",  # Path traversal risk
+                "res.download",  # Path traversal risk
+                "res.sendStatus",
+                "res.format",
+                "res.attachment",
+                "res.append",
+                "res.location",
+                ]
+                for sink in express_xss_sinks:
+                    if sink not in dynamic_sinks["xss"]:
+                        dynamic_sinks["xss"].append(sink)
+                
+                # Add Express SQL sinks for ORMs commonly used with Express
+                if "sql" not in dynamic_sinks:
+                    dynamic_sinks["sql"] = []
+                # Ensure it's a list (not a reference to the original)
+                if not isinstance(dynamic_sinks["sql"], list):
+                    dynamic_sinks["sql"] = list(dynamic_sinks["sql"])
+                express_sql_sinks = [
+                "models.sequelize.query",  # Sequelize raw queries
+                "sequelize.query",
+                "knex.raw",  # Knex.js raw queries
+                "db.raw",
+                "db.query",
+                "pool.query",  # Direct pg pool queries
+                "client.query",  # Direct database client queries
+                ]
+                for sink in express_sql_sinks:
+                    if sink not in dynamic_sinks["sql"]:
+                        dynamic_sinks["sql"].append(sink)
+                
+                # Add path traversal sinks specific to Express/Node.js
+                if "path" not in dynamic_sinks:
+                    dynamic_sinks["path"] = []
+                # Ensure it's a list (not a reference to the original)
+                if not isinstance(dynamic_sinks["path"], list):
+                    dynamic_sinks["path"] = list(dynamic_sinks["path"])
+                express_path_sinks = [
+                "res.sendFile",
+                "res.download", 
+                "fs.promises.readFile",
+                "fs.promises.writeFile",
+                "fs.promises.unlink",
+                "fs.promises.rmdir",
+                "fs.promises.mkdir",
+                "require",  # Dynamic require with user input
+                ]
+                for sink in express_path_sinks:
+                    if sink not in dynamic_sinks["path"]:
+                        dynamic_sinks["path"].append(sink)
+    
+    # Replace global TAINT_SOURCES and SECURITY_SINKS with dynamic versions
+    TAINT_SOURCES = dynamic_sources
+    SECURITY_SINKS = dynamic_sinks
+    
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    
+    try:
+        # Step 1: Find all taint sources in the codebase
+        # CRITICAL FIX: Pass dynamic sources to database function
+        sources = find_taint_sources(cursor, TAINT_SOURCES)
+        
+        # Step 2: Find all security sinks in the codebase
+        # CRITICAL FIX: Pass dynamic sinks to database function
+        sinks = find_security_sinks(cursor, SECURITY_SINKS)
+        
+        # Step 3: Build a call graph for efficient traversal
+        call_graph = build_call_graph(cursor)
+        
+        # Step 4: Trace taint flow from each source
+        taint_paths = []
+        
+        for source in sources:
+            # Find what function contains this source
+            source_function = get_containing_function(cursor, source)
+            if not source_function:
+                continue
+            
+            # Trace taint propagation from this source
+            paths = trace_from_source(
+                cursor, source, source_function, sinks, call_graph, max_depth
+            )
+            taint_paths.extend(paths)
+        
+        # Step 5: Deduplicate paths
+        unique_paths = deduplicate_paths(taint_paths)
+        
+        # Step 6: Build factual summary with vulnerability counts
+        # Count vulnerabilities by type (factual categorization, not interpretation)
+        vulnerabilities_by_type = defaultdict(int)
+        for path in unique_paths:
+            vuln_type = path.vulnerability_type
+            vulnerabilities_by_type[vuln_type] += 1
+        
+        # Convert paths to dictionaries
+        path_dicts = [p.to_dict() for p in unique_paths]
+        
+        # Create summary for pipeline integration
+        summary = {
+            "total_count": len(unique_paths),
+            "by_type": dict(vulnerabilities_by_type),
+            # Basic counts for pipeline - no severity interpretation
+            "critical_count": 0,  # Base analyzer doesn't assign severity
+            "high_count": 0,
+            "medium_count": 0,
+            "low_count": 0
+        }
+        
+        return {
+            "success": True,
+            "taint_paths": path_dicts,  # Keep original key for backward compatibility
+            "vulnerabilities": path_dicts,  # Expected key for pipeline
+            "paths": path_dicts,  # Add expected key for report generation
+            "sources_found": len(sources),
+            "sinks_found": len(sinks),
+            "total_vulnerabilities": len(unique_paths),  # Expected field name
+            "total_flows": len(unique_paths),  # Keep for compatibility
+            "vulnerabilities_by_type": dict(vulnerabilities_by_type),
+            "summary": summary
+        }
+        
+    except sqlite3.OperationalError as e:
+        if "no such table" in str(e):
+            return {
+                "success": False,
+                "error": "Database is corrupted or incomplete. Run 'aud index' to rebuild the repository index.",
+                "taint_paths": [],
+                "vulnerabilities": [],
+                "paths": [],  # Include both keys for compatibility
+                "sources_found": 0,
+                "sinks_found": 0,
+                "total_vulnerabilities": 0,
+                "total_flows": 0,
+                "vulnerabilities_by_type": {},
+                "summary": {"total_count": 0, "by_type": {}, "critical_count": 0, "high_count": 0, "medium_count": 0, "low_count": 0}
+            }
+        else:
+            return {
+                "success": False,
+                "error": str(e),
+                "taint_paths": [],
+                "vulnerabilities": [],
+                "paths": [],
+                "sources_found": 0,
+                "sinks_found": 0,
+                "total_vulnerabilities": 0,
+                "total_flows": 0,
+                "vulnerabilities_by_type": {},
+                "summary": {"total_count": 0, "by_type": {}, "critical_count": 0, "high_count": 0, "medium_count": 0, "low_count": 0}
+            }
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e),
+            "taint_paths": [],
+            "vulnerabilities": [],
+            "paths": [],  # Include both keys for compatibility
+            "sources_found": 0,
+            "sinks_found": 0,
+            "total_vulnerabilities": 0,
+            "total_flows": 0,
+            "vulnerabilities_by_type": {},
+            "summary": {"total_count": 0, "by_type": {}, "critical_count": 0, "high_count": 0, "medium_count": 0, "low_count": 0}
+        }
+    finally:
+        conn.close()
+        # Restore original TAINT_SOURCES and SECURITY_SINKS
+        TAINT_SOURCES = original_sources
+        SECURITY_SINKS = original_sinks
+
+
+def save_taint_analysis(analysis_result: Dict[str, Any], output_path: str = "./.pf/taint_analysis.json"):
+    """Save taint analysis results to JSON file with normalized structure."""
+    output = Path(output_path)
+    output.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Normalize all paths before saving
+    if "taint_paths" in analysis_result:
+        analysis_result["taint_paths"] = [
+            normalize_taint_path(p) for p in analysis_result.get("taint_paths", [])
+        ]
+    if "paths" in analysis_result:
+        analysis_result["paths"] = [
+            normalize_taint_path(p) for p in analysis_result.get("paths", [])
+        ]
+    
+    with open(output, "w") as f:
+        json.dump(analysis_result, f, indent=2, sort_keys=True)
+
+
+def normalize_taint_path(path: Dict[str, Any]) -> Dict[str, Any]:
+    """Normalize a taint path dictionary to ensure all required keys exist."""
+    # Ensure top-level keys
+    # REMOVED: vulnerability_type and severity - Truth Couriers don't classify
+    path.setdefault("path_length", 0)
+    path.setdefault("path", [])
+    
+    # Ensure source structure
+    if "source" not in path:
+        path["source"] = {}
+    path["source"].setdefault("name", "unknown_source")
+    path["source"].setdefault("file", "unknown_file")
+    path["source"].setdefault("line", 0)
+    path["source"].setdefault("pattern", "unknown_pattern")
+    
+    # Ensure sink structure
+    if "sink" not in path:
+        path["sink"] = {}
+    path["sink"].setdefault("name", "unknown_sink")
+    path["sink"].setdefault("file", "unknown_file")
+    path["sink"].setdefault("line", 0)
+    path["sink"].setdefault("pattern", "unknown_pattern")
+    
+    return path
\ No newline at end of file
diff --git a/theauditor/taint/database.py b/theauditor/taint/database.py
new file mode 100644
index 0000000..7dd1d54
--- /dev/null
+++ b/theauditor/taint/database.py
@@ -0,0 +1,301 @@
+"""Database operations for taint analysis.
+
+This module contains all database query functions used by the taint analyzer.
+"""
+
+import sys
+import sqlite3
+from typing import Dict, List, Any, Optional, Tuple
+from collections import defaultdict
+
+from .sources import TAINT_SOURCES, SECURITY_SINKS
+
+
+def find_taint_sources(cursor: sqlite3.Cursor, sources_dict: Optional[Dict[str, List[str]]] = None) -> List[Dict[str, Any]]:
+    """Find all occurrences of taint sources in the codebase.
+    
+    Args:
+        cursor: Database cursor
+        sources_dict: Optional dictionary of sources to use instead of global TAINT_SOURCES
+    
+    Returns:
+        List of source occurrences found in the codebase
+    """
+    sources = []
+    
+    # Use provided sources or default to global
+    sources_to_use = sources_dict if sources_dict is not None else TAINT_SOURCES
+    
+    # Combine all source patterns
+    all_sources = []
+    for source_list in sources_to_use.values():
+        all_sources.extend(source_list)
+    
+    # Query for each source pattern
+    for source_pattern in all_sources:
+        # Handle dot notation (e.g., req.body)
+        if "." in source_pattern:
+            base, attr = source_pattern.rsplit(".", 1)
+            # Look for attribute access patterns - property accesses AND calls
+            cursor.execute("""
+                SELECT path, name, line, col
+                FROM symbols
+                WHERE (type = 'call' OR type = 'property' OR type = 'symbol')
+                AND name LIKE ?
+                ORDER BY path, line
+            """, (f"%{source_pattern}%",))
+        else:
+            # Look for simple function calls and symbols
+            cursor.execute("""
+                SELECT path, name, line, col
+                FROM symbols
+                WHERE (type = 'call' OR type = 'symbol')
+                AND name = ?
+                ORDER BY path, line
+            """, (source_pattern,))
+        
+        for row in cursor.fetchall():
+            sources.append({
+                "file": row[0].replace("\\", "/"),  # Normalize path separators
+                "name": row[1],
+                "line": row[2],
+                "column": row[3],
+                "pattern": source_pattern,
+                "type": "source"
+            })
+    
+    return sources
+
+
+def find_security_sinks(cursor: sqlite3.Cursor, sinks_dict: Optional[Dict[str, List[str]]] = None) -> List[Dict[str, Any]]:
+    """Find all occurrences of security sinks in the codebase.
+    
+    Args:
+        cursor: Database cursor
+        sinks_dict: Optional dictionary of sinks to use instead of global SECURITY_SINKS
+    
+    Returns:
+        List of sink occurrences found in the codebase
+    """
+    sinks = []
+    
+    # Use provided sinks or default to global
+    sinks_to_use = sinks_dict if sinks_dict is not None else SECURITY_SINKS
+    
+    # Combine all sink patterns
+    all_sinks = []
+    sink_categories = {}
+    for category, sink_list in sinks_to_use.items():
+        for sink in sink_list:
+            all_sinks.append(sink)
+            sink_categories[sink] = category
+    
+    # Query for each sink pattern
+    for sink_pattern in all_sinks:
+        # CRITICAL FIX: Handle chained method patterns like "res.status().json"
+        if '().' in sink_pattern:
+            # Decompose pattern: "res.status().json" → "res.status" + "json"
+            parts = sink_pattern.replace('().', '.').split('.')
+            base_method = '.'.join(parts[:-1])
+            final_method = parts[-1]
+            
+            # Performance optimization: Query for final method first (smaller result set)
+            # Then verify base method exists on same line
+            cursor.execute("""
+                SELECT DISTINCT a.path, a.line, a.col
+                FROM symbols a
+                WHERE a.type = 'call'
+                AND (a.name = ? OR a.name LIKE ?)
+                AND EXISTS (
+                    SELECT 1 FROM symbols b
+                    WHERE b.path = a.path
+                    AND b.line = a.line
+                    AND b.type = 'call'
+                    AND (b.name LIKE ? OR b.name = ?)
+                )
+                ORDER BY a.path, a.line
+            """, (final_method, f"%.{final_method}", f"%{base_method}%", base_method))
+            
+            for row in cursor.fetchall():
+                sinks.append({
+                    "file": row[0].replace("\\", "/"),  # Normalize path separators
+                    "name": sink_pattern,  # Use full pattern for reporting
+                    "line": row[1],
+                    "column": row[2],
+                    "pattern": sink_pattern,
+                    "category": sink_categories.get(sink_pattern, ""),  # Empty not unknown
+                    "type": "sink"
+                })
+        else:
+            # Original logic for simple patterns
+            cursor.execute("""
+                SELECT path, name, line, col
+                FROM symbols
+                WHERE type = 'call'
+                AND (name = ? OR name LIKE ?)
+                ORDER BY path, line
+            """, (sink_pattern, f"%.{sink_pattern}"))
+            
+            for row in cursor.fetchall():
+                sinks.append({
+                    "file": row[0].replace("\\", "/"),  # Normalize path separators
+                    "name": row[1],
+                    "line": row[2],
+                    "column": row[3],
+                    "pattern": sink_pattern,
+                    "category": sink_categories.get(sink_pattern, ""),  # Empty not unknown
+                    "type": "sink"
+                })
+    
+    return sinks
+
+
+def build_call_graph(cursor: sqlite3.Cursor) -> Dict[str, List[str]]:
+    """Build a call graph mapping functions to their callees."""
+    import os
+    call_graph = defaultdict(list)
+    
+    # Get all function definitions
+    cursor.execute("""
+        SELECT path, name, line
+        FROM symbols
+        WHERE type = 'function'
+        ORDER BY path, line
+    """)
+    
+    functions = cursor.fetchall()
+    
+    for func_path, func_name, func_line in functions:
+        # Normalize the path for consistency
+        func_path = func_path.replace("\\", "/")
+        # Use unified boundary detection
+        func_start, func_end = get_function_boundaries(cursor, func_path, func_line)
+        end_line = func_end
+        
+        # Find any nested functions within this function's range to exclude them
+        cursor.execute("""
+            SELECT line, name
+            FROM symbols
+            WHERE path = ?
+            AND type = 'function'
+            AND line > ?
+            AND line < ?
+            ORDER BY line
+        """, (func_path, func_line, end_line))
+        
+        nested_functions = cursor.fetchall()
+        
+        # Build SQL to exclude nested function ranges
+        if nested_functions:
+            # Create ranges to exclude
+            exclude_conditions = []
+            for i, (nested_line, nested_name) in enumerate(nested_functions):
+                # Find end of nested function
+                if i + 1 < len(nested_functions):
+                    next_nested_end = nested_functions[i + 1][0]
+                else:
+                    next_nested_end = end_line
+                # Create condition to exclude this nested function's range
+                exclude_conditions.append(f"NOT (line >= {nested_line} AND line < {next_nested_end})")
+            
+            exclude_clause = " AND " + " AND ".join(exclude_conditions)
+        else:
+            exclude_clause = ""
+        
+        # Find all calls within this function, excluding nested functions
+        # Fixed: Use >= instead of > to include calls on the function definition line
+        query = f"""
+            SELECT name
+            FROM symbols
+            WHERE path = ?
+            AND type = 'call'
+            AND line >= ?
+            AND line < ?
+            {exclude_clause}
+        """
+        
+        cursor.execute(query, (func_path, func_line, end_line))
+        
+        calls = [row[0] for row in cursor.fetchall()]
+        func_key = f"{func_path}:{func_name}"
+        call_graph[func_key] = calls
+        
+        # Diagnostic logging
+        if os.environ.get("THEAUDITOR_DEBUG"):
+            if calls:
+                print(f"[CALL GRAPH DEBUG] {func_key} calls: {calls[:5]}{'...' if len(calls) > 5 else ''}", file=sys.stderr)
+            elif func_name not in ['__init__', '__del__', '__str__', '__repr__']:  # Skip common empty methods
+                print(f"[CALL GRAPH DEBUG] WARNING: {func_key} has no calls", file=sys.stderr)
+    
+    return dict(call_graph)
+
+
+def get_containing_function(cursor: sqlite3.Cursor, location: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Find the function containing a given code location."""
+    cursor.execute("""
+        SELECT name, line
+        FROM symbols
+        WHERE path = ?
+        AND type = 'function'
+        AND line <= ?
+        ORDER BY line DESC
+        LIMIT 1
+    """, (location["file"], location["line"]))
+    
+    result = cursor.fetchone()
+    if result:
+        return {
+            "file": location["file"].replace("\\", "/"),  # Normalize path separators
+            "name": result[0],
+            "line": result[1]
+        }
+    return None
+
+
+def get_function_boundaries(cursor: sqlite3.Cursor, file_path: str,
+                          function_line: int) -> Tuple[int, int]:
+    """Get accurate start and end lines for a function.
+    
+    Uses next function start as current function end.
+    Falls back to max line in file for last function.
+    """
+    # Find next function in same file
+    cursor.execute("""
+        SELECT line FROM symbols
+        WHERE path = ? AND type = 'function' AND line > ?
+        ORDER BY line LIMIT 1
+    """, (file_path, function_line))
+    
+    next_func = cursor.fetchone()
+    if next_func:
+        # Function ends before next function starts
+        return function_line, next_func[0] - 1
+    
+    # No next function, get max line in file
+    cursor.execute("""
+        SELECT MAX(line) FROM symbols WHERE path = ?
+    """, (file_path,))
+    
+    max_line = cursor.fetchone()
+    return function_line, max_line[0] if max_line and max_line[0] else function_line + 200
+
+
+def get_code_snippet(file_path: str, line_num: int) -> str:
+    """
+    Get actual code line from file for enhanced path details.
+    
+    Args:
+        file_path: Path to the source file
+        line_num: Line number to extract (1-indexed)
+        
+    Returns:
+        Stripped code line or empty string if unavailable
+    """
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            lines = f.readlines()
+            if 0 <= line_num - 1 < len(lines):
+                return lines[line_num - 1].strip()[:100]  # Limit to 100 chars for readability
+    except (FileNotFoundError, IOError, OSError):
+        pass
+    return ""
\ No newline at end of file
diff --git a/theauditor/taint/insights.py b/theauditor/taint/insights.py
new file mode 100644
index 0000000..62ee217
--- /dev/null
+++ b/theauditor/taint/insights.py
@@ -0,0 +1,17 @@
+"""Backward compatibility shim for taint insights.
+
+This file exists to maintain backward compatibility for code that imports
+from theauditor.taint.insights directly. All functionality has been moved to
+theauditor.insights.taint for better organization.
+
+This ensures that:
+  - from theauditor.taint.insights import calculate_severity  # STILL WORKS
+  - from theauditor.taint.insights import format_taint_report  # STILL WORKS
+  - import theauditor.taint.insights  # STILL WORKS
+"""
+
+# Import everything from the new location
+from theauditor.insights.taint import *
+
+# This shim ensures 100% backward compatibility while the actual
+# implementation is now in theauditor/insights/taint.py
\ No newline at end of file
diff --git a/theauditor/taint/interprocedural.py b/theauditor/taint/interprocedural.py
new file mode 100644
index 0000000..efcdcc4
--- /dev/null
+++ b/theauditor/taint/interprocedural.py
@@ -0,0 +1,239 @@
+"""Inter-procedural taint tracking - the 'Toss the Salad' algorithm.
+
+This module implements cross-function taint tracking by following
+data flow through function arguments and return values.
+"""
+
+import os
+import sys
+import sqlite3
+from typing import Dict, List, Any, Optional, Set
+
+from .database import get_containing_function, get_code_snippet
+
+
+def trace_inter_procedural_flow(
+    cursor: sqlite3.Cursor,
+    source_var: str,
+    source_file: str,
+    source_line: int,
+    source_function: str,
+    sinks: List[Dict[str, Any]],
+    max_depth: int = 5
+) -> List[Any]:  # Returns List[TaintPath]
+    """
+    The 'Toss the Salad' algorithm for inter-procedural taint tracking.
+    
+    This function traces taint flow across function boundaries by:
+    1. Following variables passed as function arguments
+    2. Mapping arguments to function parameters inside callees
+    3. Tracking taint through return values
+    4. Mapping return values back to variables in the caller
+    
+    Args:
+        cursor: Database cursor
+        source_var: The tainted variable to track
+        source_file: File containing the source
+        source_line: Line where taint originates
+        source_function: Function containing the source
+        sinks: List of potential sinks to check
+        max_depth: Maximum call depth to trace
+        
+    Returns:
+        List of TaintPath objects showing inter-procedural vulnerabilities
+    """
+    # Import TaintPath here to avoid circular dependency
+    from .core import TaintPath
+    
+    paths = []
+    debug = os.environ.get("THEAUDITOR_TAINT_DEBUG") or os.environ.get("THEAUDITOR_DEBUG")
+    
+    if debug:
+        print(f"\n[INTER-PROCEDURAL] Starting toss-the-salad tracking:", file=sys.stderr)
+        print(f"  Source var: {source_var} in {source_function} at {source_file}:{source_line}", file=sys.stderr)
+        print(f"  Max depth: {max_depth}", file=sys.stderr)
+        print(f"  Checking {len(sinks)} sinks", file=sys.stderr)
+    
+    # Track visited functions to avoid cycles
+    visited = set()
+    
+    # Worklist: (current_var, current_function, current_file, depth, path_so_far)
+    worklist = [(source_var, source_function, source_file, 0, [])]
+    
+    while worklist:
+        current_var, current_func, current_file, depth, path = worklist.pop(0)
+        
+        if depth > max_depth:
+            if debug:
+                print(f"[INTER-PROCEDURAL] Max depth {max_depth} reached", file=sys.stderr)
+            continue
+        
+        # Create unique key for this state
+        state_key = f"{current_file}:{current_func}:{current_var}:{depth}"
+        if state_key in visited:
+            continue
+        visited.add(state_key)
+        
+        if debug:
+            print(f"\n[INTER-PROCEDURAL] Depth {depth}: Tracking {current_var} in {current_func}", file=sys.stderr)
+        
+        # Step 1: Check if current variable is passed as argument to other functions
+        cursor.execute("""
+            SELECT callee_function, param_name, line
+            FROM function_call_args
+            WHERE file = ? 
+            AND caller_function = ?
+            AND (argument_expr = ? OR argument_expr LIKE ?)
+        """, (current_file, current_func, current_var, f"%{current_var}%"))
+        
+        calls = cursor.fetchall()
+        if debug and calls:
+            print(f"[INTER-PROCEDURAL] Found {len(calls)} function calls passing {current_var}", file=sys.stderr)
+        
+        for callee_func, param_name, call_line in calls:
+            if debug:
+                print(f"  -> {current_var} passed to {callee_func}({param_name}) at line {call_line}", file=sys.stderr)
+            
+            # Track the parameter in the callee function
+            new_path = path + [{
+                "type": "argument_pass",
+                "from_func": current_func,
+                "to_func": callee_func,
+                "var": current_var,
+                "param": param_name,
+                "line": call_line
+            }]
+            
+            # Add to worklist to continue tracking in callee
+            worklist.append((param_name, callee_func, current_file, depth + 1, new_path))
+            
+            # Step 2: Check if callee function contains any sinks using this parameter
+            for sink in sinks:
+                if sink["file"] != current_file:
+                    continue
+                
+                # Get function containing the sink
+                sink_function = get_containing_function(cursor, sink)
+                if not sink_function or sink_function["name"] != callee_func:
+                    continue
+                
+                # Check if parameter flows to sink
+                cursor.execute("""
+                    SELECT COUNT(*)
+                    FROM function_call_args
+                    WHERE file = ? 
+                    AND line = ?
+                    AND argument_expr LIKE ?
+                """, (sink["file"], sink["line"], f"%{param_name}%"))
+                
+                if cursor.fetchone()[0] > 0:
+                    # Found inter-procedural vulnerability!
+                    if debug:
+                        print(f"[INTER-PROCEDURAL] VULNERABILITY FOUND!", file=sys.stderr)
+                        print(f"  {source_var} -> {param_name} -> {sink['pattern']}", file=sys.stderr)
+                    
+                    vuln_path = new_path + [{
+                        "type": "sink_reached",
+                        "func": callee_func,
+                        "var": param_name,
+                        "sink": sink["pattern"],
+                        "line": sink["line"]
+                    }]
+                    
+                    path_obj = TaintPath(
+                        source={"file": source_file, "line": source_line, "pattern": source_var, "name": source_var},
+                        sink=sink,
+                        path=vuln_path
+                    )
+                    paths.append(path_obj)
+        
+        # Step 3: Check if current variable is returned by current function
+        cursor.execute("""
+            SELECT return_expr, line
+            FROM function_returns
+            WHERE file = ? 
+            AND function_name = ?
+            AND (return_expr = ? OR return_expr LIKE ? OR return_vars LIKE ?)
+        """, (current_file, current_func, current_var, f"%{current_var}%", f'%"{current_var}"%'))
+        
+        returns = cursor.fetchall()
+        if debug and returns:
+            print(f"[INTER-PROCEDURAL] {current_func} returns {current_var} in {len(returns)} places", file=sys.stderr)
+        
+        for return_expr, return_line in returns:
+            # Find where this function is called and its return value is used
+            cursor.execute("""
+                SELECT caller_function, target_var, line
+                FROM function_call_args
+                WHERE file = ? 
+                AND callee_function = ?
+                AND target_var IS NOT NULL
+            """, (current_file, current_func))
+            
+            call_sites = cursor.fetchall()
+            if debug and call_sites:
+                print(f"[INTER-PROCEDURAL] {current_func} called from {len(call_sites)} locations", file=sys.stderr)
+            
+            for caller_func, target_var, call_line in call_sites:
+                if not target_var:
+                    continue
+                
+                if debug:
+                    print(f"  <- Return value assigned to {target_var} in {caller_func}", file=sys.stderr)
+                
+                # The return value is now tainted in the caller
+                new_path = path + [{
+                    "type": "return_flow",
+                    "from_func": current_func,
+                    "to_func": caller_func,
+                    "return_var": current_var,
+                    "target_var": target_var,
+                    "line": call_line
+                }]
+                
+                # Add to worklist to continue tracking in caller
+                worklist.append((target_var, caller_func, current_file, depth + 1, new_path))
+        
+        # Step 4: Check if current variable directly reaches a sink in current function
+        for sink in sinks:
+            if sink["file"] != current_file:
+                continue
+            
+            # Get function containing the sink
+            sink_function = get_containing_function(cursor, sink)
+            if not sink_function or sink_function["name"] != current_func:
+                continue
+            
+            # Check if current variable is used in sink
+            cursor.execute("""
+                SELECT COUNT(*)
+                FROM function_call_args
+                WHERE file = ? 
+                AND line = ?
+                AND argument_expr LIKE ?
+            """, (sink["file"], sink["line"], f"%{current_var}%"))
+            
+            if cursor.fetchone()[0] > 0:
+                # Direct vulnerability in current function
+                if debug:
+                    print(f"[INTER-PROCEDURAL] Direct sink reached in {current_func}", file=sys.stderr)
+                
+                vuln_path = path + [{
+                    "type": "direct_sink",
+                    "func": current_func,
+                    "var": current_var,
+                    "sink": sink["pattern"],
+                    "line": sink["line"]
+                }]
+                
+                path_obj = TaintPath(
+                    source={"file": source_file, "line": source_line, "pattern": source_var, "name": source_var},
+                    sink=sink,
+                    path=vuln_path
+                )
+                paths.append(path_obj)
+    
+    if debug:
+        print(f"\n[INTER-PROCEDURAL] Completed. Found {len(paths)} vulnerabilities", file=sys.stderr)
+    
+    return paths
\ No newline at end of file
diff --git a/theauditor/taint/javascript.py b/theauditor/taint/javascript.py
new file mode 100644
index 0000000..fa0aa06
--- /dev/null
+++ b/theauditor/taint/javascript.py
@@ -0,0 +1,375 @@
+"""JavaScript/TypeScript-specific taint patterns.
+
+This module implements taint tracking for JavaScript-specific constructs
+that don't exist in other languages:
+- Object destructuring
+- Spread operators
+- Bracket notation
+- Array operations
+- Type conversions
+"""
+
+import sqlite3
+from typing import Set, List, Dict, Any
+
+
+def track_destructuring(cursor: sqlite3.Cursor, source: Dict[str, Any], file_path: str) -> Set[str]:
+    """
+    Track object destructuring: const { x, y } = tainted_object.
+    
+    When a tainted object is destructured, all extracted properties
+    become tainted.
+    
+    Args:
+        cursor: Database cursor
+        source: The tainted source
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        Set of newly tainted variables from destructuring
+    """
+    tainted_vars = set()
+    
+    # Look for destructuring patterns in assignments
+    # Pattern: const { ... } = source_pattern
+    cursor.execute("""
+        SELECT target_var, line, source_expr
+        FROM assignments
+        WHERE file = ?
+        AND source_expr LIKE ?
+        AND (target_var LIKE '%{%' OR target_var LIKE '%[%')
+    """, (file_path, f"%{source['pattern']}%"))
+    
+    for target, line, expr in cursor.fetchall():
+        # Parse destructuring pattern
+        # Examples:
+        # { username, password } = req.body
+        # { data: userData } = response
+        # [ first, second ] = array
+        
+        if '{' in target and '}' in target:
+            # Object destructuring
+            # Extract variable names between { and }
+            start = target.index('{') + 1
+            end = target.index('}')
+            props = target[start:end]
+            
+            # Handle both simple and renamed destructuring
+            for prop in props.split(','):
+                prop = prop.strip()
+                if ':' in prop:
+                    # Renamed: { data: userData }
+                    _, var_name = prop.split(':', 1)
+                    var_name = var_name.strip()
+                else:
+                    # Simple: { username }
+                    var_name = prop.strip()
+                
+                if var_name and not var_name.startswith('...'):
+                    tainted_vars.add(var_name)
+        
+        elif '[' in target and ']' in target:
+            # Array destructuring
+            start = target.index('[') + 1
+            end = target.index(']')
+            elements = target[start:end]
+            
+            for element in elements.split(','):
+                element = element.strip()
+                if element and element != '_':  # Skip placeholders
+                    tainted_vars.add(element)
+    
+    return tainted_vars
+
+
+def track_spread_operators(cursor: sqlite3.Cursor, source: Dict[str, Any], file_path: str) -> Set[str]:
+    """
+    Track spread operators: const { ...rest } = tainted_object.
+    
+    When a tainted object is spread, the rest object becomes tainted.
+    
+    Args:
+        cursor: Database cursor
+        source: The tainted source
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        Set of newly tainted variables from spread operations
+    """
+    tainted_vars = set()
+    
+    # Look for spread patterns in assignments
+    cursor.execute("""
+        SELECT target_var, line, source_expr
+        FROM assignments
+        WHERE file = ?
+        AND source_expr LIKE ?
+        AND target_var LIKE '%...%'
+    """, (file_path, f"%{source['pattern']}%"))
+    
+    for target, line, expr in cursor.fetchall():
+        # Extract spread variable names
+        if '...' in target:
+            # Find the variable after ...
+            spread_index = target.index('...')
+            after_spread = target[spread_index + 3:]
+            
+            # Extract variable name (could be in various contexts)
+            # Examples:
+            # { ...rest }
+            # { x, ...rest }
+            # [ ...items ]
+            
+            # Simple extraction - get the word after ...
+            import re
+            match = re.search(r'\.\.\.(\w+)', target)
+            if match:
+                var_name = match.group(1)
+                tainted_vars.add(var_name)
+    
+    # Also check for spread in object/array construction
+    cursor.execute("""
+        SELECT target_var, source_expr
+        FROM assignments
+        WHERE file = ?
+        AND source_expr LIKE ?
+    """, (file_path, f"%...{source['pattern']}%"))
+    
+    for target, expr in cursor.fetchall():
+        # If source is spread into new object/array, target is tainted
+        tainted_vars.add(target)
+    
+    return tainted_vars
+
+
+def track_bracket_notation(cursor: sqlite3.Cursor, source_pattern: str, file_path: str) -> List[Dict[str, Any]]:
+    """
+    Track bracket notation access: obj['key'] or obj[variable].
+    
+    This is commonly used for accessing query parameters and headers.
+    
+    Args:
+        cursor: Database cursor
+        source_pattern: Pattern to search for (e.g., "req.query")
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        List of sources/sinks found via bracket notation
+    """
+    results = []
+    
+    # Look for bracket notation patterns
+    # Common patterns:
+    # req.query['param']
+    # req.headers['authorization']
+    # obj[key]
+    
+    # Search in symbols for bracket access
+    cursor.execute("""
+        SELECT name, line, col, type
+        FROM symbols
+        WHERE path = ?
+        AND (name LIKE ? OR name LIKE ?)
+        ORDER BY line
+    """, (file_path, f"%{source_pattern}[%", f"%{source_pattern}['%"))
+    
+    for name, line, col, sym_type in cursor.fetchall():
+        results.append({
+            "file": file_path,
+            "name": name,
+            "line": line,
+            "column": col,
+            "pattern": source_pattern,
+            "type": "source",
+            "access": "bracket"
+        })
+    
+    # Also check in assignments for bracket notation
+    cursor.execute("""
+        SELECT DISTINCT line, source_expr
+        FROM assignments
+        WHERE file = ?
+        AND (source_expr LIKE ? OR source_expr LIKE ?)
+    """, (file_path, f"%{source_pattern}[%", f"%{source_pattern}['%"))
+    
+    for line, expr in cursor.fetchall():
+        results.append({
+            "file": file_path,
+            "name": expr,
+            "line": line,
+            "column": 0,
+            "pattern": source_pattern,
+            "type": "source",
+            "access": "bracket"
+        })
+    
+    return results
+
+
+def track_array_operations(cursor: sqlite3.Cursor, tainted_var: str, file_path: str) -> Set[str]:
+    """
+    Track array operations: map, filter, forEach, reduce, etc.
+    
+    When a tainted array is processed, the callback parameters become tainted.
+    
+    Args:
+        cursor: Database cursor
+        tainted_var: The tainted array variable
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        Set of newly tainted variables from array operations
+    """
+    tainted_vars = set()
+    
+    # Array methods that propagate taint
+    array_methods = [
+        'map', 'filter', 'forEach', 'reduce', 'find', 'findIndex',
+        'some', 'every', 'flatMap', 'reduceRight'
+    ]
+    
+    for method in array_methods:
+        # Look for calls like: tainted_var.method(...)
+        cursor.execute("""
+            SELECT name, line
+            FROM symbols
+            WHERE path = ?
+            AND type = 'call'
+            AND name LIKE ?
+        """, (file_path, f"{tainted_var}.{method}%"))
+        
+        for name, line in cursor.fetchall():
+            # The callback parameters are tainted
+            # This is simplified - ideally we'd parse the callback signature
+            # Common patterns:
+            # items.map(item => ...)  // 'item' is tainted
+            # items.filter((item, index) => ...)  // 'item' and 'index' are tainted
+            
+            # Look for arrow functions or function expressions near this line
+            cursor.execute("""
+                SELECT target_var
+                FROM assignments
+                WHERE file = ?
+                AND line BETWEEN ? AND ?
+                AND source_expr LIKE ?
+            """, (file_path, line, line + 3, f"%{method}%"))
+            
+            for (target,) in cursor.fetchall():
+                # Simplified: assume first parameter of callback is tainted
+                tainted_vars.add(f"{target}_element")  # Placeholder for element parameter
+    
+    # Also track direct array access: tainted_array[0]
+    cursor.execute("""
+        SELECT target_var
+        FROM assignments
+        WHERE file = ?
+        AND source_expr LIKE ?
+    """, (file_path, f"{tainted_var}[%"))
+    
+    for (target,) in cursor.fetchall():
+        tainted_vars.add(target)
+    
+    return tainted_vars
+
+
+def track_type_conversions(cursor: sqlite3.Cursor, tainted_var: str, file_path: str) -> Set[str]:
+    """
+    Track type conversion functions that propagate taint.
+    
+    Functions like parseInt, String(), JSON.parse propagate taint
+    from input to output.
+    
+    Args:
+        cursor: Database cursor
+        tainted_var: The tainted variable
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        Set of newly tainted variables from type conversions
+    """
+    tainted_vars = set()
+    
+    # Type conversion functions that propagate taint
+    converters = [
+        'parseInt', 'parseFloat', 'Number',
+        'String', 'toString',
+        'JSON.parse', 'JSON.stringify',
+        'atob', 'btoa',  # Base64 encoding/decoding
+        'encodeURIComponent', 'decodeURIComponent',
+        'encodeURI', 'decodeURI'
+    ]
+    
+    for converter in converters:
+        # Look for conversions using the tainted variable
+        cursor.execute("""
+            SELECT target_var, source_expr
+            FROM assignments
+            WHERE file = ?
+            AND source_expr LIKE ?
+            AND source_expr LIKE ?
+        """, (file_path, f"%{converter}%", f"%{tainted_var}%"))
+        
+        for target, expr in cursor.fetchall():
+            # The result of conversion is tainted
+            tainted_vars.add(target)
+    
+    # Also check for method calls on the tainted variable
+    cursor.execute("""
+        SELECT target_var
+        FROM assignments
+        WHERE file = ?
+        AND source_expr LIKE ?
+    """, (file_path, f"{tainted_var}.toString%"))
+    
+    for (target,) in cursor.fetchall():
+        tainted_vars.add(target)
+    
+    return tainted_vars
+
+
+def enhance_javascript_tracking(
+    cursor: sqlite3.Cursor,
+    source: Dict[str, Any],
+    tainted_elements: Set[str],
+    file_path: str
+) -> Set[str]:
+    """
+    Main entry point for JavaScript-specific taint enhancements.
+    
+    This function applies all JavaScript-specific tracking to enhance
+    the base taint analysis.
+    
+    Args:
+        cursor: Database cursor
+        source: The taint source
+        tainted_elements: Current set of tainted elements
+        file_path: Path to the file being analyzed
+        
+    Returns:
+        Enhanced set of tainted elements
+    """
+    enhanced = set(tainted_elements)
+    
+    # Track destructuring
+    destructured = track_destructuring(cursor, source, file_path)
+    enhanced.update(destructured)
+    
+    # Track spread operators
+    spread = track_spread_operators(cursor, source, file_path)
+    enhanced.update(spread)
+    
+    # Track array operations for each tainted variable
+    for element in list(enhanced):
+        if ':' in element:
+            _, var_name = element.split(':', 1)
+        else:
+            var_name = element
+        
+        array_tainted = track_array_operations(cursor, var_name, file_path)
+        enhanced.update(array_tainted)
+        
+        # Track type conversions
+        converted = track_type_conversions(cursor, var_name, file_path)
+        enhanced.update(converted)
+    
+    return enhanced
\ No newline at end of file
diff --git a/theauditor/taint/propagation.py b/theauditor/taint/propagation.py
new file mode 100644
index 0000000..444b008
--- /dev/null
+++ b/theauditor/taint/propagation.py
@@ -0,0 +1,633 @@
+"""Taint propagation through assignments and data flow.
+
+This module implements the worklist algorithm for tracking taint through
+variable assignments and function calls within a single function scope.
+"""
+
+import os
+import sys
+import sqlite3
+import json
+from typing import Dict, List, Set, Any, Optional
+from collections import deque
+
+from .sources import SANITIZERS, TAINT_SOURCES
+from .database import get_containing_function, get_function_boundaries, get_code_snippet
+from .interprocedural import trace_inter_procedural_flow
+
+
+def is_sanitizer(function_name: str) -> bool:
+    """Check if a function is a known sanitizer."""
+    if not function_name:
+        return False
+    
+    # Normalize function name
+    func_lower = function_name.lower()
+    
+    # Check all sanitizer categories
+    for sanitizer_list in SANITIZERS.values():
+        for sanitizer in sanitizer_list:
+            if sanitizer.lower() in func_lower or func_lower in sanitizer.lower():
+                return True
+    
+    return False
+
+
+def has_sanitizer_between(cursor: sqlite3.Cursor, source: Dict[str, Any], sink: Dict[str, Any]) -> bool:
+    """Check if there's a sanitizer call between source and sink in the same function."""
+    if source["file"] != sink["file"]:
+        return False
+    
+    # Find all calls between source and sink lines
+    cursor.execute("""
+        SELECT name, line
+        FROM symbols
+        WHERE path = ?
+        AND type = 'call'
+        AND line > ?
+        AND line < ?
+        ORDER BY line
+    """, (source["file"], source["line"], sink["line"]))
+    
+    intermediate_calls = cursor.fetchall()
+    
+    # Check if any intermediate call is a sanitizer
+    for call_name, _ in intermediate_calls:
+        if is_sanitizer(call_name):
+            return True
+    
+    return False
+
+
+def is_external_source(cursor: sqlite3.Cursor, source: Dict[str, Any]) -> bool:
+    """
+    Validate if source actually handles external data.
+    
+    Returns True only for sources that truly bring in untrusted external data,
+    not internal application data.
+    """
+    pattern = source.get("pattern", "")
+    
+    # Web scraping sources are always external
+    web_scraping_patterns = [
+        "requests.get", "requests.post", "requests.put", "requests.patch", "requests.delete",
+        "response.text", "response.content", "response.json",
+        "BeautifulSoup", "soup.find", "soup.find_all", "soup.select",
+        "page.content", "page.inner_text", "page.inner_html",
+        "driver.page_source", "element.text", "element.get_attribute",
+        "urlopen", "urllib.request.urlopen"
+    ]
+    if pattern in web_scraping_patterns:
+        return True
+    
+    # Web framework inputs are external
+    web_input_patterns = [
+        "req.body", "req.query", "req.params", "req.headers",
+        "request.args", "request.form", "request.json", "request.data",
+        "request.GET", "request.POST", "request.FILES"
+    ]
+    if pattern in web_input_patterns:
+        return True
+    
+    # File I/O - check if reading external files
+    if pattern in ["open", "json.load", "json.loads", "pd.read_csv", "pd.read_json", "pd.read_excel"]:
+        # Check for nearby network/scraping calls suggesting external data
+        cursor.execute("""
+            SELECT COUNT(*) FROM symbols 
+            WHERE path = ? AND line BETWEEN ? AND ?
+            AND (name LIKE '%request%' OR name LIKE '%download%' 
+                 OR name LIKE '%fetch%' OR name LIKE '%scrape%'
+                 OR name LIKE '%BeautifulSoup%' OR name LIKE '%urlopen%')
+        """, (source["file"], source["line"] - 50, source["line"] + 50))
+        
+        nearby_external_calls = cursor.fetchone()[0]
+        return nearby_external_calls > 0
+    
+    # Environment variables and CLI args are external
+    if pattern in ["os.getenv", "os.environ.get", "sys.argv", "input", "click.argument"]:
+        return True
+    
+    # Conservative: if we're not sure, don't flag it
+    return False
+
+
+def trace_from_source(
+    cursor: sqlite3.Cursor,
+    source: Dict[str, Any],
+    source_function: Dict[str, Any],
+    sinks: List[Dict[str, Any]],
+    call_graph: Dict[str, List[str]],
+    max_depth: int
+) -> List[Any]:  # Returns List[TaintPath]
+    """
+    Trace taint propagation from a source to potential sinks using true data flow analysis.
+    
+    This implements a worklist algorithm that:
+    1. Identifies variables tainted by the source
+    2. Propagates taint through assignments
+    3. Tracks taint through function calls and returns
+    4. Only reports vulnerabilities when tainted data reaches a sink
+    """
+    # Import TaintPath here to avoid circular dependency
+    from .core import TaintPath
+    
+    # Validate source is truly external
+    if not is_external_source(cursor, source):
+        return []  # Skip internal sources
+    
+    paths = []
+    
+    # CRITICAL FIX: Check for direct-use vulnerabilities FIRST
+    # This handles cases like res.send(req.body) where tainted data flows directly to sink
+    # without intermediate variable assignment
+    for sink in sinks:
+        # Check if source and sink are in the same function
+        if sink["file"] == source_function["file"]:
+            # Use actual function boundaries
+            source_start, source_end = get_function_boundaries(
+                cursor, source["file"], source_function["line"]
+            )
+            
+            # Verify BOTH source and sink are within same function scope
+            if (source_start <= source["line"] <= source_end and
+                source_start <= sink["line"] <= source_end):
+                # Guaranteed same function - no false positives
+                # Check if there's a sanitizer between source and sink
+                if not has_sanitizer_between(cursor, source, sink):
+                    # Direct vulnerability found - source flows directly to sink
+                    path = TaintPath(
+                        source=source,
+                        sink=sink,
+                        path=[
+                            {
+                                "type": "direct_use",
+                                "location": f"{source['file']}:{source['line']}",
+                                "code": get_code_snippet(source['file'], source['line'])
+                            },
+                            {
+                                "type": "sink",
+                                "location": f"{sink['file']}:{sink['line']}",
+                                "code": get_code_snippet(sink['file'], sink['line'])
+                            }
+                        ]
+                    )
+                    paths.append(path)
+    
+    # Check if the new data flow tables exist for assignment-based tracing
+    cursor.execute("""
+        SELECT name FROM sqlite_master 
+        WHERE type='table' AND name='assignments'
+    """)
+    has_data_flow_tables = cursor.fetchone() is not None
+    
+    if not has_data_flow_tables:
+        # Fall back to old proximity-based approach if tables don't exist
+        # This maintains backward compatibility
+        if paths:  # Return direct-use paths if found
+            return paths
+        return trace_from_source_legacy(cursor, source, source_function, sinks, call_graph, max_depth)
+    
+    # Initialize the set of tainted elements for assignment-based tracing
+    # Format: "function:variable" or "function:__return__" for return values
+    tainted_elements = set()
+    
+    # CRITICAL AMENDMENT: Check assignments table for taint source instantiation
+    # Find initial tainted variables from assignments that match ANY taint source
+    cursor.execute("""
+        SELECT target_var, in_function, source_expr 
+        FROM assignments 
+        WHERE file = ? AND line BETWEEN ? AND ?
+    """, (source["file"], source["line"] - 1, source["line"] + 1))
+    
+    initial_assignments = cursor.fetchall()
+    
+    # Get all taint source patterns for comparison
+    all_taint_sources = []
+    for source_list in TAINT_SOURCES.values():
+        all_taint_sources.extend(source_list)
+    
+    # Check each assignment to see if it contains a taint source
+    for target_var, in_function, source_expr in initial_assignments:
+        # Check if the source expression contains any known taint source
+        for source_pattern in all_taint_sources:
+            if source_pattern in source_expr:
+                # Add this variable as initially tainted
+                tainted_elements.add(f"{in_function}:{target_var}")
+                break  # Move to the next assignment
+    
+    # DEBUG: Log what we're looking for
+    debug_mode = os.environ.get("THEAUDITOR_DEBUG") or os.environ.get("THEAUDITOR_TAINT_DEBUG")
+    if debug_mode:
+        print(f"\n{'='*60}", file=sys.stderr)
+        print(f"[TAINT] Processing source: {source['pattern']} at {source['file']}:{source['line']}", file=sys.stderr)
+        print(f"[TAINT] Source function: {source_function.get('name', 'unknown')} ({source_function['file']}:{source_function['line']})", file=sys.stderr)
+        print(f"[TAINT] Initial tainted variables: {tainted_elements}", file=sys.stderr)
+        print(f"[TAINT] Found {len(sinks)} potential sinks to check", file=sys.stderr)
+    
+    # Step 1: Also check for direct assignment matching the specific source pattern
+    # Check if the source directly taints a variable through assignment
+    cursor.execute("""
+        SELECT target_var, in_function FROM assignments 
+        WHERE file = ? AND line = ? AND source_expr LIKE ?
+    """, (source["file"], source["line"], f"%{source['pattern']}%"))
+    
+    initial_taints = cursor.fetchall()
+    
+    # DEBUG: Log what we found
+    if debug_mode:
+        print(f"[TAINT] Found {len(initial_taints)} initial taints from direct assignment", file=sys.stderr)
+        for taint in initial_taints[:3]:  # Show first 3
+            print(f"[TAINT]   - {taint[0]} in {taint[1]}", file=sys.stderr)
+    if not initial_taints:
+        # Try to find assignments near the source (within 3 lines)
+        cursor.execute("""
+            SELECT target_var, in_function, line, source_expr FROM assignments 
+            WHERE file = ? AND line BETWEEN ? AND ? AND source_expr LIKE ?
+        """, (source["file"], source["line"] - 1, source["line"] + 3, f"%{source['pattern']}%"))
+        initial_taints = cursor.fetchall()
+    
+    # Add initially tainted variables to the worklist
+    for row in initial_taints:
+        target_var = row[0]
+        in_function = row[1]
+        tainted_elements.add(f"{in_function}:{target_var}")
+    
+    # If no direct assignment found, check if source is in a property access
+    if not tainted_elements:
+        # For sources like req.body, req.query, treat the entire expression as tainted
+        if "." in source["pattern"]:
+            # Find where this property is used
+            cursor.execute("""
+                SELECT target_var, in_function FROM assignments 
+                WHERE file = ? AND source_expr LIKE ?
+            """, (source["file"], f"%{source['pattern']}%"))
+            for target_var, in_function in cursor.fetchall():
+                tainted_elements.add(f"{in_function}:{target_var}")
+        
+        # ENHANCEMENT: If still no tainted elements, check for source usage in expressions
+        # This helps catch cases where source is used in expressions without assignment
+        if not tainted_elements:
+            # Look for any usage of the source pattern in expressions
+            cursor.execute("""
+                SELECT DISTINCT in_function FROM assignments 
+                WHERE file = ? AND (source_expr LIKE ? OR source_vars LIKE ?)
+                LIMIT 1
+            """, (source["file"], f"%{source['pattern']}%", f'%"{source["pattern"]}"%'))
+            result = cursor.fetchone()
+            if result:
+                # Mark the source pattern itself as tainted in this function
+                tainted_elements.add(f"{result[0]}:{source['pattern']}")
+    
+    # DEBUG: Log tainted elements before propagation
+    if debug_mode:
+        print(f"[TAINT] Tainted elements before propagation: {tainted_elements}", file=sys.stderr)
+        if not tainted_elements:
+            print(f"[TAINT] WARNING: No tainted elements found for source {source['pattern']}", file=sys.stderr)
+            print(f"[TAINT]   This means taint will be LOST here!", file=sys.stderr)
+    
+    # CRITICAL FIX: For JavaScript, ensure source patterns create initial taint
+    if source["file"].endswith(('.js', '.jsx', '.ts', '.tsx')):
+        # If no tainted elements found yet for common JS sources, create one
+        if not tainted_elements and source["pattern"] in ["req.body", "req.query", "req.params", "req.headers", "req.cookies"]:
+            # Treat the source itself as tainted within its function scope
+            func_name = source_function.get("name", "unknown")
+            tainted_elements.add(f"{func_name}:{source['pattern']}")
+            if debug_mode:
+                print(f"[TAINT] Created initial taint for JS source: {func_name}:{source['pattern']}", file=sys.stderr)
+    
+    # ENHANCEMENT: Apply JavaScript-specific taint tracking
+    if source["file"].endswith(('.js', '.jsx', '.ts', '.tsx')):
+        from .javascript import enhance_javascript_tracking
+        tainted_elements = enhance_javascript_tracking(
+            cursor, source, tainted_elements, source["file"]
+        )
+        if debug_mode and tainted_elements:
+            print(f"[TAINT] JavaScript enhancement added: {tainted_elements}", file=sys.stderr)
+    
+    # Step 2: Propagate taint through assignments (worklist algorithm)
+    processed = set()
+    iterations = 0
+    max_iterations = 100  # Prevent infinite loops
+    
+    while tainted_elements - processed and iterations < max_iterations:
+        iterations += 1
+        new_taints = set()
+        
+        for element in tainted_elements - processed:
+            processed.add(element)
+            
+            # Parse the element (format: "function:variable")
+            if ":" in element:
+                func_name, var_name = element.split(":", 1)
+            else:
+                func_name = "global"
+                var_name = element
+            
+            # Find assignments where this tainted variable is used as source
+            cursor.execute("""
+                SELECT target_var, in_function, line FROM assignments 
+                WHERE file = ? AND in_function = ? AND 
+                (source_expr LIKE ? OR source_vars LIKE ?)
+            """, (source["file"], func_name, f"%{var_name}%", f'%"{var_name}"%'))
+            
+            for target_var, in_function, line in cursor.fetchall():
+                new_element = f"{in_function}:{target_var}"
+                if new_element not in processed:
+                    # CRITICAL DEBUG: Log taint propagation through assignments
+                    if os.environ.get("THEAUDITOR_TAINT_DEBUG"):
+                        print(f"[TAINT] Propagating through assignment: {var_name} -> {target_var} in {in_function} at line {line}")
+                    new_taints.add(new_element)
+            
+            # Track taint through function calls
+            # Check if tainted variable is passed as argument
+            cursor.execute("""
+                SELECT callee_function, param_name, line FROM function_call_args 
+                WHERE file = ? AND caller_function = ? AND argument_expr LIKE ?
+            """, (source["file"], func_name, f"%{var_name}%"))
+            
+            for callee_function, param_name, line in cursor.fetchall():
+                # The parameter in the callee function is now tainted
+                new_element = f"{callee_function}:{param_name}"
+                if new_element not in processed:
+                    # CRITICAL DEBUG: Log taint propagation through function calls
+                    if os.environ.get("THEAUDITOR_TAINT_DEBUG"):
+                        print(f"[TAINT] Propagating through function call: {var_name} in {func_name} -> {param_name} in {callee_function} at line {line}")
+                    new_taints.add(new_element)
+                
+                # Check if the callee function returns the tainted parameter
+                cursor.execute("""
+                    SELECT return_expr FROM function_returns 
+                    WHERE file = ? AND function_name = ? AND 
+                    (return_expr LIKE ? OR return_vars LIKE ?)
+                """, (source["file"], callee_function, f"%{param_name}%", f'%"{param_name}"%'))
+                
+                if cursor.fetchone():
+                    # Function returns tainted data
+                    new_element = f"{callee_function}:__return__"
+                    if new_element not in processed:
+                        new_taints.add(new_element)
+        
+        tainted_elements.update(new_taints)
+    
+    # DEBUG: Log final tainted elements
+    if debug_mode:
+        print(f"[TAINT] Propagation completed after {iterations} iterations", file=sys.stderr)
+        print(f"[TAINT] Final tainted elements: {tainted_elements}", file=sys.stderr)
+        print(f"[TAINT] Checking {len(sinks)} sinks for vulnerabilities", file=sys.stderr)
+    
+    # Step 3: Check if any tainted element reaches a sink
+    for sink in sinks:
+        # Only check sinks in the same file for now (can be extended)
+        if sink["file"] != source["file"]:
+            continue
+        
+        # Get the function containing the sink
+        sink_function = get_containing_function(cursor, sink)
+        if not sink_function:
+            continue
+        
+        # ENHANCEMENT: Also check for direct use of source pattern in sink arguments
+        # This catches cases where source is used directly without variable assignment
+        if sink_function["name"] == source_function["name"]:
+            # Check if source pattern appears directly in sink's arguments
+            cursor.execute("""
+                SELECT COUNT(*) FROM function_call_args 
+                WHERE file = ? AND line = ? AND argument_expr LIKE ?
+            """, (sink["file"], sink["line"], f"%{source['pattern']}%"))
+            
+            if cursor.fetchone()[0] > 0:
+                # Direct use of source in sink arguments
+                if not has_sanitizer_between(cursor, source, sink):
+                    path = TaintPath(
+                        source=source,
+                        sink=sink,
+                        path=[
+                            {
+                                "type": "direct_argument",
+                                "location": f"{source['file']}:{source['line']}",
+                                "pattern": source['pattern']
+                            },
+                            {
+                                "type": "sink",
+                                "location": f"{sink['file']}:{sink['line']}",
+                                "pattern": sink['pattern']
+                            }
+                        ]
+                    )
+                    paths.append(path)
+                    continue  # Move to next sink
+        
+        # Check if any tainted variable is used in the sink
+        for element in tainted_elements:
+            if ":" in element:
+                func_name, var_name = element.split(":", 1)
+            else:
+                func_name = "global"
+                var_name = element
+            
+            # Skip if not in the same function as the sink - BUT try inter-procedural tracking
+            if func_name != sink_function["name"]:
+                # CRITICAL: Attempt inter-procedural tracking
+                if debug_mode:
+                    print(f"[TAINT] Attempting inter-procedural tracking: {var_name} in {func_name} to sink in {sink_function['name']}", file=sys.stderr)
+                
+                # Try to trace inter-procedural flow from this tainted variable to the sink
+                inter_paths = trace_inter_procedural_flow(
+                    cursor=cursor,
+                    source_var=var_name,
+                    source_file=source["file"],
+                    source_line=source["line"],
+                    source_function=func_name,
+                    sinks=[sink],  # Check just this specific sink
+                    max_depth=3  # Limited depth for performance
+                )
+                
+                if inter_paths:
+                    # Found inter-procedural vulnerability!
+                    if debug_mode:
+                        print(f"[TAINT] INTER-PROCEDURAL VULNERABILITY FOUND via toss-the-salad!", file=sys.stderr)
+                    paths.extend(inter_paths)
+                elif debug_mode:
+                    print(f"[TAINT] No inter-procedural path found from {var_name} to sink", file=sys.stderr)
+                
+                continue
+            
+            # Check if the tainted variable appears in the sink's context
+            # This is a simplified check - ideally we'd parse the sink expression
+            sink_context_found = False
+            
+            # CRITICAL DEBUG: Log sink checking
+            if os.environ.get("THEAUDITOR_TAINT_DEBUG"):
+                print(f"[TAINT] Checking if tainted var {var_name} in {func_name} reaches sink at {sink['file']}:{sink['line']}")
+            
+            # Check in function call arguments at the sink line
+            cursor.execute("""
+                SELECT argument_expr FROM function_call_args 
+                WHERE file = ? AND line = ? AND argument_expr LIKE ?
+            """, (sink["file"], sink["line"], f"%{var_name}%"))
+            
+            if cursor.fetchone():
+                sink_context_found = True
+                if os.environ.get("THEAUDITOR_TAINT_DEBUG"):
+                    print(f"[TAINT] FOUND: Tainted var {var_name} reaches sink at line {sink['line']}!")
+            
+            # Also check if sink pattern matches and variable is in scope
+            if not sink_context_found and var_name != "__return__":
+                # Check if there's an assignment or usage near the sink
+                cursor.execute("""
+                    SELECT COUNT(*) FROM assignments 
+                    WHERE file = ? AND in_function = ? AND 
+                    line BETWEEN ? AND ? AND 
+                    (target_var = ? OR source_expr LIKE ?)
+                """, (sink["file"], func_name, sink["line"] - 5, sink["line"] + 5,
+                     var_name, f"%{var_name}%"))
+                
+                if cursor.fetchone()[0] > 0:
+                    sink_context_found = True
+            
+            if sink_context_found:
+                # Check for sanitizers between source and sink
+                if not has_sanitizer_between(cursor, source, sink):
+                    # We found a real taint path!
+                    if debug_mode:
+                        print(f"[TAINT] VULNERABILITY FOUND!", file=sys.stderr)
+                        print(f"[TAINT]   Source: {source['pattern']} at line {source['line']}", file=sys.stderr)
+                        print(f"[TAINT]   Sink: {sink['pattern']} at line {sink['line']}", file=sys.stderr)
+                        print(f"[TAINT]   Via variable: {var_name}", file=sys.stderr)
+                    path = TaintPath(
+                        source=source,
+                        sink=sink,
+                        path=[
+                            {
+                                "type": "source", 
+                                "location": f"{source['file']}:{source['line']}", 
+                                "var": var_name,
+                                "code": get_code_snippet(source['file'], source['line'])
+                            },
+                            {
+                                "type": "propagation", 
+                                "tainted_vars": list(tainted_elements)[:5],  # Limit for readability
+                                "transformations": len(tainted_elements)
+                            },
+                            {
+                                "type": "sink", 
+                                "location": f"{sink['file']}:{sink['line']}", 
+                                "var": var_name,
+                                "code": get_code_snippet(sink['file'], sink['line'])
+                            }
+                        ]
+                    )
+                    paths.append(path)
+                    break  # One path per sink is enough
+    
+    return paths
+
+
+def trace_from_source_legacy(
+    cursor: sqlite3.Cursor,
+    source: Dict[str, Any],
+    source_function: Dict[str, Any],
+    sinks: List[Dict[str, Any]],
+    call_graph: Dict[str, List[str]],
+    max_depth: int
+) -> List[Any]:  # Returns List[TaintPath]
+    """Legacy proximity-based taint tracing for backward compatibility."""
+    # Import TaintPath here to avoid circular dependency
+    from .core import TaintPath
+    
+    paths = []
+    
+    # Check if source function directly contains any sinks
+    for sink in sinks:
+        if sink["file"] == source_function["file"]:
+            # Use unified boundary detection instead of arbitrary 100-line limit
+            source_start, source_end = get_function_boundaries(
+                cursor, source["file"], source_function["line"]
+            )
+            if source_start <= sink["line"] <= source_end:
+                # Check if sink is in same function
+                sink_function = get_containing_function(cursor, sink)
+                if sink_function and sink_function["name"] == source_function["name"]:
+                    # Check if there's a sanitizer between source and sink
+                    if not has_sanitizer_between(cursor, source, sink):
+                        # Only add path if no sanitizer found
+                        path = TaintPath(
+                            source=source,
+                            sink=sink,
+                            path=[source_function]
+                        )
+                        paths.append(path)
+    
+    # Trace interprocedural taint flow using BFS
+    visited = set()
+    sanitized_paths = set()  # Track paths that have been sanitized
+    queue = deque([(source_function, [source_function], 0, False)])
+    
+    while queue:
+        current_func, path, depth, is_sanitized = queue.popleft()
+        
+        if depth >= max_depth:
+            continue
+        
+        func_key = f"{current_func['file']}:{current_func['name']}"
+        if func_key in visited:
+            continue
+        visited.add(func_key)
+        
+        # Get functions called by current function
+        called_functions = call_graph.get(func_key, [])
+        
+        for called_name in called_functions:
+            # Check if this call is a sanitizer
+            if is_sanitizer(called_name):
+                # Mark this path as sanitized and continue tracing (but don't report vulnerabilities)
+                is_sanitized = True
+                sanitized_paths.add(func_key)
+            
+            # Check if this call is to a sink
+            for sink in sinks:
+                if called_name in sink["name"] or sink["pattern"] in called_name:
+                    # Only report if path is not sanitized
+                    if not is_sanitized:
+                        taint_path = TaintPath(
+                            source=source,
+                            sink=sink,
+                            path=path + [{"name": called_name, "type": "call", "file": sink["file"], "line": sink["line"]}]
+                        )
+                        paths.append(taint_path)
+            
+            # Find definition of called function
+            cursor.execute("""
+                SELECT path, line
+                FROM symbols
+                WHERE name = ?
+                AND type = 'function'
+                LIMIT 1
+            """, (called_name.split(".")[-1],))  # Handle method calls
+            
+            func_def = cursor.fetchone()
+            if func_def:
+                next_func = {
+                    "file": func_def[0],
+                    "name": called_name,
+                    "line": func_def[1]
+                }
+                queue.append((next_func, path + [next_func], depth + 1, is_sanitized))
+    
+    return paths
+
+
+def deduplicate_paths(paths: List[Any]) -> List[Any]:  # Accepts/returns List[TaintPath]
+    """Deduplicate taint paths, keeping the shortest path for each source-sink pair."""
+    unique = {}
+    
+    for path in paths:
+        key = (
+            f"{path.source['file']}:{path.source['line']}",
+            f"{path.sink['file']}:{path.sink['line']}"
+        )
+        
+        if key not in unique or len(path.path) < len(unique[key].path):
+            unique[key] = path
+    
+    return list(unique.values())
\ No newline at end of file
diff --git a/theauditor/taint/registry.py b/theauditor/taint/registry.py
new file mode 100644
index 0000000..0785644
--- /dev/null
+++ b/theauditor/taint/registry.py
@@ -0,0 +1,225 @@
+"""Central registry for taint sources and sinks.
+
+This module provides a unified registry for all taint patterns, combining
+the battle-tested hardcoded patterns with dynamic registration capabilities.
+"""
+
+from typing import Dict, List, Set
+from dataclasses import dataclass
+import sqlite3
+
+# Import the GOOD hardcoded patterns from taint.sources
+from theauditor.taint.sources import TAINT_SOURCES, SECURITY_SINKS, SANITIZERS
+
+
+@dataclass
+class TaintPattern:
+    """Represents a taint source or sink pattern."""
+    pattern: str
+    category: str
+    language: str
+    dynamic: bool = False  # Track if dynamically added
+
+
+class TaintRegistry:
+    """Central registry for all taint patterns.
+    
+    This provides:
+    1. Access to hardcoded patterns (the GOOD ones from 4 days of work)
+    2. Dynamic registration from rules
+    3. Query interface for pattern matching
+    4. Bridge between taint analyzer and rule systems
+    """
+    
+    def __init__(self):
+        # Start with the battle-tested hardcoded patterns (650+ patterns)
+        self.sources = self._convert_to_registry(TAINT_SOURCES, is_source=True)
+        self.sinks = self._convert_to_registry(SECURITY_SINKS, is_source=False)
+        self.sanitizers = SANITIZERS.copy()
+        
+        # Track dynamic additions for debugging
+        self.dynamic_sources: Set[str] = set()
+        self.dynamic_sinks: Set[str] = set()
+    
+    def _convert_to_registry(self, patterns_dict: Dict, is_source: bool) -> Dict[str, List[TaintPattern]]:
+        """Convert hardcoded pattern dict to registry format.
+        
+        Args:
+            patterns_dict: Dictionary of patterns from taint_analyzer
+            is_source: Whether these are source patterns (vs sinks)
+            
+        Returns:
+            Registry-formatted dictionary with TaintPattern objects
+        """
+        registry = {}
+        for category, patterns in patterns_dict.items():
+            registry[category] = []
+            for pattern in patterns:
+                # Detect language from category
+                if category in ["js", "javascript", "typescript"]:
+                    lang = "javascript"
+                elif category in ["python", "py"]:
+                    lang = "python"
+                elif category == "network":
+                    lang = "any"
+                elif category == "web_scraping":
+                    lang = "python"  # Most web scraping is Python
+                elif category == "file_io":
+                    lang = "any"
+                else:
+                    lang = "any"
+                
+                registry[category].append(TaintPattern(
+                    pattern=pattern,
+                    category=category,
+                    language=lang,
+                    dynamic=False  # These are all hardcoded patterns
+                ))
+        return registry
+    
+    def register_source(self, pattern: str, category: str, language: str = "any"):
+        """Allow rules to register additional taint sources dynamically.
+        
+        Args:
+            pattern: The taint source pattern (e.g., "getUserInput")
+            category: Category for the pattern (e.g., "user_input")
+            language: Language this applies to (default "any")
+        """
+        if category not in self.sources:
+            self.sources[category] = []
+        
+        # Check if pattern already exists
+        existing = [p for p in self.sources[category] if p.pattern == pattern]
+        if not existing:
+            self.sources[category].append(TaintPattern(
+                pattern=pattern,
+                category=category,
+                language=language,
+                dynamic=True
+            ))
+            self.dynamic_sources.add(pattern)
+    
+    def register_sink(self, pattern: str, category: str, language: str = "any"):
+        """Allow rules to register additional security sinks dynamically.
+        
+        Args:
+            pattern: The sink pattern (e.g., "dangerousFunction")
+            category: Category for the pattern (e.g., "code_execution")
+            language: Language this applies to (default "any")
+        """
+        if category not in self.sinks:
+            self.sinks[category] = []
+        
+        # Check if pattern already exists
+        existing = [p for p in self.sinks[category] if p.pattern == pattern]
+        if not existing:
+            self.sinks[category].append(TaintPattern(
+                pattern=pattern,
+                category=category,
+                language=language,
+                dynamic=True
+            ))
+            self.dynamic_sinks.add(pattern)
+    
+    def get_all_sources(self) -> List[str]:
+        """Get all source patterns for taint analysis.
+        
+        Returns:
+            Flat list of all source patterns
+        """
+        patterns = []
+        for category_patterns in self.sources.values():
+            patterns.extend([p.pattern for p in category_patterns])
+        return patterns
+    
+    def get_all_sinks(self) -> List[Dict[str, str]]:
+        """Get all sink patterns in taint analyzer format.
+        
+        Returns:
+            List of sink dictionaries with pattern and category
+        """
+        sinks = []
+        for category, patterns in self.sinks.items():
+            for pattern in patterns:
+                sinks.append({
+                    "pattern": pattern.pattern,
+                    "category": category,
+                    "name": pattern.pattern,
+                    "type": "sink"
+                })
+        return sinks
+    
+    def get_sources_by_language(self, language: str) -> List[str]:
+        """Get source patterns for a specific language.
+        
+        Args:
+            language: Language to filter by (e.g., "python", "javascript")
+            
+        Returns:
+            List of source patterns for that language
+        """
+        patterns = []
+        for category_patterns in self.sources.values():
+            for p in category_patterns:
+                if p.language == language or p.language == "any":
+                    patterns.append(p.pattern)
+        return patterns
+    
+    def get_sinks_by_language(self, language: str) -> List[str]:
+        """Get sink patterns for a specific language.
+        
+        Args:
+            language: Language to filter by (e.g., "python", "javascript")
+            
+        Returns:
+            List of sink patterns for that language
+        """
+        patterns = []
+        for category_patterns in self.sinks.values():
+            for p in category_patterns:
+                if p.language == language or p.language == "any":
+                    patterns.append(p.pattern)
+        return patterns
+    
+    def is_sanitizer(self, function_name: str) -> bool:
+        """Check if a function is a known sanitizer.
+        
+        Args:
+            function_name: Name of the function to check
+            
+        Returns:
+            True if the function is a known sanitizer
+        """
+        if not function_name:
+            return False
+        
+        func_lower = function_name.lower()
+        
+        # Check all sanitizer categories
+        for sanitizer_list in self.sanitizers.values():
+            for sanitizer in sanitizer_list:
+                if sanitizer.lower() in func_lower or func_lower in sanitizer.lower():
+                    return True
+        
+        return False
+    
+    def get_stats(self) -> Dict[str, int]:
+        """Get statistics about registered patterns.
+        
+        Returns:
+            Dictionary with counts of various pattern types
+        """
+        total_sources = sum(len(patterns) for patterns in self.sources.values())
+        total_sinks = sum(len(patterns) for patterns in self.sinks.values())
+        total_sanitizers = sum(len(patterns) for patterns in self.sanitizers.values())
+        
+        return {
+            "total_sources": total_sources,
+            "total_sinks": total_sinks,
+            "total_sanitizers": total_sanitizers,
+            "dynamic_sources": len(self.dynamic_sources),
+            "dynamic_sinks": len(self.dynamic_sinks),
+            "source_categories": len(self.sources),
+            "sink_categories": len(self.sinks),
+            "sanitizer_categories": len(self.sanitizers)
+        }
\ No newline at end of file
diff --git a/theauditor/taint/sources.py b/theauditor/taint/sources.py
new file mode 100644
index 0000000..552a368
--- /dev/null
+++ b/theauditor/taint/sources.py
@@ -0,0 +1,343 @@
+"""Taint source, sink, and sanitizer definitions.
+
+This module contains all the constant definitions for taint analysis:
+- TAINT_SOURCES: Where untrusted data originates
+- SECURITY_SINKS: Where untrusted data should not flow
+- SANITIZERS: Functions that clean/validate data
+"""
+
+import platform
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+
+# Define taint sources (where untrusted data originates)
+# Refined to focus on truly external/untrusted input sources
+TAINT_SOURCES = {
+    # JavaScript/TypeScript sources - Web request data only
+    "js": [
+        "req.body",
+        "req.query",
+        "req.params",
+        "req.headers",
+        "req.cookies",
+        "request.body",
+        "request.query",
+        "request.params",
+        "ctx.request.body",
+        "ctx.query",
+        "ctx.params",
+        "document.location",
+        "window.location",
+        "document.URL",
+        "document.referrer",
+        "localStorage.getItem",
+        "sessionStorage.getItem",
+        "URLSearchParams",
+        "postMessage",
+    ],
+    # Python sources - Web and CLI input only
+    "python": [
+        "request.args",
+        "request.form",
+        "request.json",
+        "request.data",
+        "request.values",
+        "request.files",
+        "request.cookies",
+        "request.headers",
+        "request.get_json",
+        "request.get_data",
+        "input",  # User console input
+        "raw_input",  # Python 2 user input
+        "sys.argv",  # Command line arguments
+        "click.argument",  # Click CLI arguments
+        "click.option",  # Click CLI options
+        "argparse.parse_args",  # Argparse arguments
+    ],
+    # Network sources only - removed generic file operations
+    "network": [
+        "socket.recv",
+        "socket.recvfrom",
+        "websocket.receive",
+        "stdin.read",  # Console input
+    ],
+    # Web scraping and data extraction sources
+    "web_scraping": [
+        # Requests library
+        "requests.get",
+        "requests.post",
+        "requests.put",
+        "requests.patch",
+        "requests.delete",
+        "response.text",
+        "response.content",
+        "response.json",
+        "resp.text",
+        "resp.content",
+        "resp.json",
+        
+        # urllib
+        "urlopen",
+        "urllib.request.urlopen",
+        "urllib2.urlopen",
+        
+        # BeautifulSoup HTML parsing
+        "BeautifulSoup",
+        "soup.find",
+        "soup.find_all",
+        "soup.select",
+        "soup.select_one",
+        "element.text",
+        "element.get_text",
+        "element.string",
+        "tag.text",
+        "tag.get_text",
+        
+        # Playwright browser automation
+        "page.content",
+        "page.inner_text",
+        "page.inner_html",
+        "page.locator",
+        "page.text_content",
+        "element.inner_text",
+        "element.inner_html",
+        "element.text_content",
+        
+        # Selenium browser automation
+        "driver.page_source",
+        "driver.find_element",
+        "element.text",
+        "element.get_attribute",
+        "webdriver.page_source",
+        
+        # Scrapy framework
+        "response.body",
+        "response.text",
+        "response.css",
+        "response.xpath",
+        "selector.get",
+        "selector.getall",
+    ],
+    # File I/O and data loading sources
+    "file_io": [
+        # Basic file operations
+        "open",
+        "file.read",
+        "file.readline",
+        "file.readlines",
+        
+        # JSON operations
+        "json.load",
+        "json.loads",
+        "json.JSONDecoder",
+        
+        # CSV/Excel operations
+        "csv.reader",
+        "csv.DictReader",
+        "pd.read_csv",
+        "pd.read_excel",
+        "pd.read_json",
+        "pd.read_html",
+        "pd.read_sql",
+        "pandas.read_csv",
+        "pandas.read_excel",
+        
+        # YAML operations
+        "yaml.load",
+        "yaml.safe_load",
+        "yaml.full_load",
+        
+        # XML operations
+        "etree.parse",
+        "etree.fromstring",
+        "xml.parse",
+        "ElementTree.parse",
+        
+        # Environment variables
+        "os.getenv",
+        "os.environ.get",
+        "environ.get",
+    ]
+    # Database category REMOVED - internal database data is trusted, not a taint source
+}
+
+# Define sanitizers that clean/validate data for different vulnerability types
+SANITIZERS = {
+    # SQL sanitizers - Functions that properly escape or parameterize queries
+    "sql": [
+        "escape_string",
+        "mysql_real_escape_string",
+        "mysqli_real_escape_string",
+        "pg_escape_string",
+        "sqlite3.escape_string",
+        "sqlalchemy.text",
+        "db.prepare",
+        "parameterize",
+        "prepared_statement",
+        "bind_param",
+        "execute_prepared",
+        "psycopg2.sql.SQL",
+        "psycopg2.sql.Identifier",
+        "psycopg2.sql.Literal",
+    ],
+    # XSS sanitizers - HTML escaping functions
+    "xss": [
+        "escape_html",
+        "html.escape",
+        "cgi.escape",
+        "markupsafe.escape",
+        "DOMPurify.sanitize",
+        "bleach.clean",
+        "strip_tags",
+        "sanitize_html",
+        "escape_javascript",
+        "json.dumps",  # When used for JSON encoding
+        "JSON.stringify",
+        "encodeURIComponent",
+        "encodeURI",
+        "_.escape",  # Lodash escape
+        "escapeHtml",
+        "htmlspecialchars",
+        "htmlentities",
+    ],
+    # Path traversal sanitizers
+    "path": [
+        "os.path.basename",
+        "Path.basename",
+        "secure_filename",
+        "sanitize_filename",
+        "normalize_path",
+        "realpath",
+        "abspath",
+        "path.resolve",
+        "path.normalize",
+        "werkzeug.utils.secure_filename",
+    ],
+    # Command injection sanitizers
+    "command": [
+        "shlex.quote",
+        "pipes.quote",
+        "escapeshellarg",
+        "escapeshellcmd",
+        "shell_escape",
+        "quote",
+        "escape_shell",
+    ],
+    # General validation functions
+    "validation": [
+        "validate",
+        "validator",
+        "is_valid",
+        "check_input",
+        "sanitize",
+        "clean",
+        "filter_var",
+        "assert_valid",
+        "verify",
+    ]
+}
+
+# Define security sinks (functions where external data flows are tracked)
+# Categories are for organizational purposes only - Truth Couriers don't classify vulnerabilities
+SECURITY_SINKS = {
+    # SQL-related sinks (factual: functions that interact with databases)
+    "sql": [
+        "db.query",
+        "db.execute",
+        "db.exec",
+        "db.raw",
+        "cursor.execute",
+        "connection.execute",
+        "query",
+        "execute",
+        "executemany",
+        "rawQuery",
+        "knex.raw",
+        "sequelize.query",
+        "mongoose.find",
+        "collection.find",
+        # Async Python ORMs
+        "asyncpg.execute",
+        "asyncpg.executemany",
+        "asyncpg.fetch",
+        "asyncpg.fetchrow",
+        "asyncpg.fetchval",
+        "tortoise.execute_query",
+        "tortoise.execute_sql",
+        "databases.execute",
+        "databases.fetch_all",
+        "databases.fetch_one",
+        # Modern JS ORMs
+        "prisma.$queryRaw",
+        "prisma.$executeRaw",
+        "prisma.$queryRawUnsafe",
+        "prisma.$executeRawUnsafe",
+        "typeorm.query",
+        "typeorm.createQueryBuilder",
+        "objection.raw",
+        "knex.raw",
+    ],
+    # Command execution sinks (factual: functions that execute system commands)
+    "command": [
+        "os.system",
+        "os.popen",
+        "subprocess.run",
+        "subprocess.call",
+        "subprocess.Popen",
+        "subprocess.check_call",
+        "subprocess.check_output",
+        "exec",
+        "eval",
+        "child_process.exec",
+        "child_process.spawn",
+        "child_process.execFile",
+        "shell.exec",
+    ],
+    # HTML/Response output sinks (factual: functions that output to HTML/HTTP responses)
+    "xss": [
+        "innerHTML",
+        "outerHTML",
+        "document.write",
+        "document.writeln",
+        "dangerouslySetInnerHTML",
+        "insertAdjacentHTML",
+        "response.write",
+        "res.send",
+        "res.render",
+        "res.json",
+    ],
+    # File system operation sinks (factual: functions that interact with file system)
+    "path": [
+        "fs.readFile",
+        "fs.readFileSync",
+        "fs.writeFile",
+        "fs.writeFileSync",
+        "fs.createReadStream",
+        "fs.createWriteStream",
+        "open",
+        "file.open",
+        "Path.join",
+        "path.join",
+        "os.path.join",
+    ],
+    # LDAP injection sinks
+    "ldap": [
+        "ldap.search",
+        "ldap.bind",
+        "ldap.modify",
+        "ldap.add",
+        "ldap.delete",
+    ],
+    # NoSQL injection sinks
+    "nosql": [
+        "$where",
+        "$regex",
+        "collection.find",
+        "collection.findOne",
+        "collection.update",
+        "collection.remove",
+        "collection.aggregate",
+    ]
+}
\ No newline at end of file
diff --git a/theauditor/taint_analyzer.py b/theauditor/taint_analyzer.py
new file mode 100644
index 0000000..9cebefe
--- /dev/null
+++ b/theauditor/taint_analyzer.py
@@ -0,0 +1,17 @@
+"""Backward compatibility shim for taint analyzer.
+
+This file exists to maintain backward compatibility for code that imports
+from theauditor.taint_analyzer directly. All functionality has been
+refactored into the theauditor.taint package for better maintainability.
+
+This ensures that:
+  - from theauditor.taint_analyzer import trace_taint  # STILL WORKS
+  - from theauditor import taint_analyzer  # STILL WORKS
+  - import theauditor.taint_analyzer  # STILL WORKS
+"""
+
+# Import everything from the refactored taint package
+from theauditor.taint import *
+
+# This shim ensures 100% backward compatibility while the actual
+# implementation is now modularized in theauditor/taint/
\ No newline at end of file
diff --git a/theauditor/test_frameworks.py b/theauditor/test_frameworks.py
new file mode 100644
index 0000000..10ea0e5
--- /dev/null
+++ b/theauditor/test_frameworks.py
@@ -0,0 +1,236 @@
+"""Test framework detection for various languages."""
+
+import json
+import re
+from pathlib import Path
+from typing import Any
+from theauditor.manifest_parser import ManifestParser
+from theauditor.framework_registry import TEST_FRAMEWORK_REGISTRY
+
+
+def detect_test_framework(root: str | Path) -> dict[str, Any]:
+    """Detect the test framework used in a project using unified registry approach.
+
+    Args:
+        root: Root directory of the project.
+
+    Returns:
+        Dictionary with framework info:
+        {
+            "name": str,  # pytest, jest, rspec, go, junit, etc.
+            "language": str,  # python, javascript, etc.
+            "cmd": str,  # Command to run tests
+        }
+    """
+    root = Path(root)
+    parser = ManifestParser()
+    
+    # Parse all relevant manifests once
+    manifests = {}
+    manifest_files = {
+        "pyproject.toml": root / "pyproject.toml",
+        "package.json": root / "package.json",
+        "requirements.txt": root / "requirements.txt",
+        "requirements-dev.txt": root / "requirements-dev.txt",
+        "requirements-test.txt": root / "requirements-test.txt",
+        "setup.cfg": root / "setup.cfg",
+        "setup.py": root / "setup.py",
+        "tox.ini": root / "tox.ini",
+        "Gemfile": root / "Gemfile",
+        "Gemfile.lock": root / "Gemfile.lock",
+        "go.mod": root / "go.mod",
+        "pom.xml": root / "pom.xml",
+        "build.gradle": root / "build.gradle",
+        "build.gradle.kts": root / "build.gradle.kts",
+    }
+    
+    for name, path in manifest_files.items():
+        if path.exists():
+            try:
+                if name.endswith('.toml'):
+                    manifests[name] = parser.parse_toml(path)
+                elif name.endswith('.json'):
+                    manifests[name] = parser.parse_json(path)
+                elif name.endswith('.cfg') or name.endswith('.ini'):
+                    manifests[name] = parser.parse_ini(path)
+                elif name.endswith('.txt'):
+                    manifests[name] = parser.parse_requirements_txt(path)
+                elif name in ['Gemfile', 'Gemfile.lock']:
+                    with open(path, 'r', encoding='utf-8') as f:
+                        manifests[name] = f.read()
+                elif name.endswith(('.xml', '.gradle', '.kts', '.mod', '.py')):
+                    with open(path, 'r', encoding='utf-8') as f:
+                        manifests[name] = f.read()
+            except Exception:
+                # Skip files that can't be parsed
+                continue
+    
+    # Check each test framework in priority order
+    for tf_name, tf_config in TEST_FRAMEWORK_REGISTRY.items():
+        # Check config files first (highest confidence)
+        if "config_files" in tf_config:
+            for config_file in tf_config["config_files"]:
+                if (root / config_file).exists():
+                    # Special handling for different test runners
+                    cmd = tf_config.get("command", "")
+                    # For JUnit, determine build tool
+                    if tf_name == "junit":
+                        if (root / "pom.xml").exists():
+                            cmd = tf_config.get("command_maven", "mvn test")
+                        elif (root / "build.gradle").exists() or (root / "build.gradle.kts").exists():
+                            cmd = tf_config.get("command_gradle", "gradle test")
+                    return {
+                        "name": tf_name,
+                        "language": tf_config["language"],
+                        "cmd": cmd
+                    }
+        
+        # Check config sections in manifests
+        if "config_sections" in tf_config:
+            for manifest_name, section_paths in tf_config.get("config_sections", {}).items():
+                if manifest_name in manifests:
+                    for section_path in section_paths:
+                        section = parser.extract_nested_value(manifests[manifest_name], section_path)
+                        if section is not None:
+                            return {
+                                "name": tf_name,
+                                "language": tf_config["language"],
+                                "cmd": tf_config.get("command", "")
+                            }
+        
+        # Check dependencies in manifests
+        if "detection_sources" in tf_config:
+            for manifest_name, search_configs in tf_config["detection_sources"].items():
+                if manifest_name not in manifests:
+                    continue
+                    
+                manifest_data = manifests[manifest_name]
+                
+                if search_configs == "line_search":
+                    # Text search for requirements.txt or Gemfile
+                    if isinstance(manifest_data, list):
+                        for line in manifest_data:
+                            if tf_name in line:
+                                return {
+                                    "name": tf_name,
+                                    "language": tf_config["language"],
+                                    "cmd": tf_config.get("command", "")
+                                }
+                    elif isinstance(manifest_data, str) and tf_name in manifest_data:
+                        return {
+                            "name": tf_name,
+                            "language": tf_config["language"],
+                            "cmd": tf_config.get("command", "")
+                        }
+                
+                elif search_configs == "content_search":
+                    # Content search for text files
+                    if isinstance(manifest_data, str):
+                        # Check for test framework patterns
+                        if tf_config.get("content_patterns"):
+                            for pattern in tf_config["content_patterns"]:
+                                if pattern in manifest_data:
+                                    # Determine command based on build tool
+                                    cmd = tf_config.get("command", "")
+                                    if tf_name == "junit":
+                                        if (root / "pom.xml").exists():
+                                            cmd = tf_config.get("command_maven", "mvn test")
+                                        elif (root / "build.gradle").exists() or (root / "build.gradle.kts").exists():
+                                            cmd = tf_config.get("command_gradle", "gradle test")
+                                    return {
+                                        "name": tf_name,
+                                        "language": tf_config["language"],
+                                        "cmd": cmd
+                                    }
+                
+                elif search_configs == "exists":
+                    # Just check if file exists (for go.mod)
+                    return {
+                        "name": tf_name,
+                        "language": tf_config["language"],
+                        "cmd": tf_config.get("command", "")
+                    }
+                
+                else:
+                    # Structured search for dependencies
+                    for key_path in search_configs:
+                        deps = parser.extract_nested_value(manifest_data, key_path)
+                        if deps:
+                            # Check if test framework is in dependencies
+                            version = parser.check_package_in_deps(deps, tf_name)
+                            if version:
+                                return {
+                                    "name": tf_name,
+                                    "language": tf_config["language"],
+                                    "cmd": tf_config.get("command", "")
+                                }
+        
+        # Check for directory markers
+        if "directory_markers" in tf_config:
+            for dir_marker in tf_config["directory_markers"]:
+                if (root / dir_marker.rstrip('/')).is_dir():
+                    return {
+                        "name": tf_name,
+                        "language": tf_config["language"],
+                        "cmd": tf_config.get("command", "")
+                    }
+        
+        # Check for file patterns
+        if "file_patterns" in tf_config:
+            for pattern in tf_config["file_patterns"]:
+                # Use glob to find matching files
+                import glob
+                if glob.glob(str(root / pattern)):
+                    return {
+                        "name": tf_name,
+                        "language": tf_config["language"],
+                        "cmd": tf_config.get("command", "")
+                    }
+    
+    # Fallback: Check for import patterns in source files (for unittest)
+    # This is last resort for frameworks like unittest that don't have manifest entries
+    max_files_to_check = 20
+    files_checked = 0
+    
+    for tf_name, tf_config in TEST_FRAMEWORK_REGISTRY.items():
+        if "import_patterns" not in tf_config:
+            continue
+            
+        # Find files matching the language
+        ext_map = {
+            "python": ["*.py"],
+            "javascript": ["*.js", "*.jsx", "*.ts", "*.tsx"],
+            "java": ["*.java"],
+            "go": ["*.go"],
+            "ruby": ["*.rb"],
+        }
+        
+        extensions = ext_map.get(tf_config["language"], [])
+        for ext in extensions:
+            if files_checked >= max_files_to_check:
+                break
+                
+            import glob
+            for file_path in glob.glob(str(root / "**" / ext), recursive=True)[:max_files_to_check]:
+                if files_checked >= max_files_to_check:
+                    break
+                    
+                files_checked += 1
+                try:
+                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                        content = f.read(2000)  # Read first 2000 chars
+                        
+                    for import_pattern in tf_config["import_patterns"]:
+                        if import_pattern in content:
+                            return {
+                                "name": tf_name,
+                                "language": tf_config["language"],
+                                "cmd": tf_config.get("command", "")
+                            }
+                except Exception:
+                    continue
+    
+    # Unknown framework
+    return {"name": "unknown", "language": "unknown", "cmd": ""}
+
+# Old detection functions removed - now using unified registry-based detection
\ No newline at end of file
diff --git a/theauditor/tools.py b/theauditor/tools.py
new file mode 100644
index 0000000..c3a8c1d
--- /dev/null
+++ b/theauditor/tools.py
@@ -0,0 +1,152 @@
+"""Tool version detection and reporting."""
+
+import json
+import os
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Any
+
+
+def detect_tool_version(cmd: list[str]) -> str:
+    """
+    Detect tool version by running command.
+
+    Returns version string or "missing" if not found.
+    """
+    try:
+        # Use temp files to avoid buffer overflow
+        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
+             tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:
+            
+            stdout_path = stdout_fp.name
+            stderr_path = stderr_fp.name
+            
+            result = subprocess.run(cmd, stdout=stdout_fp, stderr=stderr_fp, text=True, timeout=5, check=False)
+        
+        # Read the outputs back
+        with open(stdout_path, 'r', encoding='utf-8') as f:
+            result.stdout = f.read()
+        with open(stderr_path, 'r', encoding='utf-8') as f:
+            result.stderr = f.read()
+        
+        # Clean up temp files
+        os.unlink(stdout_path)
+        os.unlink(stderr_path)
+
+        if result.returncode == 0:
+            output = result.stdout.strip()
+            # Extract version from common patterns
+            # Handle formats like "ruff 0.1.0", "black, 23.1.0", "mypy 1.0.0", "Version 5.0.0"
+            import re
+
+            # Look for version patterns like x.y.z
+            version_pattern = r"(\d+\.\d+(?:\.\d+)?)"
+            match = re.search(version_pattern, output)
+            if match:
+                return match.group(1)
+
+            # Fallback to simple version extraction
+            parts = output.split()
+            for part in parts:
+                if any(c.isdigit() for c in part):
+                    # Found version-like string
+                    version = part.strip(",").strip()
+                    # Clean up common prefixes
+                    if version.startswith("v"):
+                        version = version[1:]
+                    return version
+
+            return output.split()[0] if output else "unknown"
+        return "missing"
+    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+        return "missing"
+
+
+def write_tools_report(out_dir: str) -> dict[str, Any]:
+    """
+    Detect all tool versions and write reports.
+
+    Writes:
+    - {out_dir}/TOOLS.md - Human-readable markdown
+    - {out_dir}/tools.json - Machine-readable JSON
+
+    Returns the tools dictionary.
+    """
+    # Ensure output directory exists
+    Path(out_dir).mkdir(parents=True, exist_ok=True)
+
+    # Detect Python tools
+    python_tools = {
+        "ruff": detect_tool_version(["ruff", "--version"]),
+        "black": detect_tool_version(["black", "--version"]),
+        "pytest": detect_tool_version(["pytest", "--version"]),
+        "mypy": detect_tool_version(["mypy", "--version"]),
+    }
+
+    # Detect Node tools from TheAuditor's sandboxed environment
+    # Check in .auditor_venv/.theauditor_tools/ for our bundled tools
+    sandbox_base = Path(".auditor_venv/.theauditor_tools")
+    
+    # Try sandboxed tools first, fallback to system
+    if sandbox_base.exists():
+        # Use our sandboxed Node.js and tools
+        node_exe = sandbox_base / "node-runtime" / ("node.exe" if os.name == "nt" else "bin/node")
+        npx_exe = sandbox_base / "node-runtime" / ("npx.cmd" if os.name == "nt" else "bin/npx")
+        
+        if node_exe.exists() and npx_exe.exists():
+            # Run npx from sandbox with proper paths
+            node_tools = {
+                "eslint": detect_tool_version([str(npx_exe), "--prefix", str(sandbox_base), "eslint", "--version"]),
+                "typescript": detect_tool_version([str(npx_exe), "--prefix", str(sandbox_base), "tsc", "--version"]),
+                "prettier": detect_tool_version([str(npx_exe), "--prefix", str(sandbox_base), "prettier", "--version"]),
+            }
+        else:
+            # Sandbox exists but Node not found
+            node_tools = {
+                "eslint": "missing (sandbox incomplete)",
+                "typescript": "missing (sandbox incomplete)", 
+                "prettier": "missing (sandbox incomplete)",
+            }
+    else:
+        # No sandbox, check system (fallback)
+        node_tools = {
+            "eslint": detect_tool_version(["npx", "eslint", "--version"]),
+            "typescript": detect_tool_version(["npx", "tsc", "--version"]),
+            "prettier": detect_tool_version(["npx", "prettier", "--version"]),
+        }
+
+    # Build report structure
+    tools = {
+        "python": python_tools,
+        "node": node_tools,
+    }
+
+    # Write JSON (machine-readable)
+    json_path = Path(out_dir) / "tools.json"
+    with open(json_path, "w") as f:
+        json.dump(tools, f, indent=2, sort_keys=True)
+
+    # Write Markdown (human-readable)
+    md_path = Path(out_dir) / "TOOLS.md"
+    with open(md_path, "w") as f:
+        f.write("# Tool Versions Report\n\n")
+
+        f.write("## Python Tools\n\n")
+        f.write("| Tool | Version |\n")
+        f.write("|------|--------|\n")
+        for tool, version in sorted(python_tools.items()):
+            status = "[OK]" if version != "missing" else "[MISSING]"
+            f.write(f"| {tool} | {status} {version} |\n")
+
+        f.write("\n## Node Tools\n\n")
+        f.write("| Tool | Version |\n")
+        f.write("|------|--------|\n")
+        for tool, version in sorted(node_tools.items()):
+            status = "[OK]" if version != "missing" else "[MISSING]"
+            f.write(f"| {tool} | {status} {version} |\n")
+
+        f.write("\n---\n")
+        f.write("*Generated by TheAuditor*\n")
+
+    return tools
diff --git a/theauditor/universal_detector.py b/theauditor/universal_detector.py
new file mode 100644
index 0000000..7b3856e
--- /dev/null
+++ b/theauditor/universal_detector.py
@@ -0,0 +1,1093 @@
+"""Universal pattern detector - finds patterns and outputs in courier format.
+
+This module is part of TheAuditor's COURIER pipeline:
+- Runs pattern detection for runtime, DB, and logic issues
+- Outputs findings using standard keys (file, line, message)
+- Acts as one of the 16+ "tools" that TheAuditor couriers data from
+- Never interprets whether patterns are actually problems
+"""
+
+import ast
+import importlib
+import inspect
+import json
+import os
+import pkgutil
+import re
+import sqlite3
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List
+
+import click
+
+from theauditor.pattern_loader import Pattern, PatternLoader
+from theauditor.rules.orchestrator import RulesOrchestrator, RuleContext
+
+
+def sanitize_for_av(snippet: str) -> str:
+    """Defang dangerous patterns using zero-width spaces to prevent AV false positives.
+    
+    This function inserts zero-width spaces (U+200C) into patterns that commonly
+    trigger antivirus heuristics. The zero-width space is invisible to humans
+    and doesn't affect how AI models tokenize the text, but breaks pattern
+    matching in antivirus scanners.
+    
+    Args:
+        snippet: Code snippet that may contain dangerous patterns
+        
+    Returns:
+        Sanitized snippet with zero-width spaces inserted
+    """
+    if not snippet:
+        return snippet
+    
+    # Zero-width space character (invisible, breaks pattern matching)
+    zws = "\u200C"
+    
+    # Patterns that commonly trigger AV heuristics
+    # These are the actual patterns found in vulnerability code
+    replacements = {
+        # Code execution patterns
+        "eval": f"ev{zws}al",
+        "exec": f"ex{zws}ec",
+        "system": f"sys{zws}tem",
+        "spawn": f"spa{zws}wn",
+        "__import__": f"__imp{zws}ort__",
+        "subprocess": f"sub{zws}process",
+        "os.": f"o{zws}s.",
+        "shell=True": f"shell={zws}True",
+        
+        # SQL injection patterns
+        "SELECT": f"SEL{zws}ECT",
+        "DELETE": f"DEL{zws}ETE",
+        "DROP": f"DR{zws}OP",
+        "INSERT": f"INS{zws}ERT",
+        "UPDATE": f"UPD{zws}ATE",
+        "UNION": f"UNI{zws}ON",
+        "WHERE": f"WH{zws}ERE",
+        "FROM": f"FR{zws}OM",
+        
+        # Credential patterns
+        "password": f"pass{zws}word",
+        "passwd": f"pass{zws}wd",
+        "secret": f"sec{zws}ret",
+        "token": f"tok{zws}en",
+        "api_key": f"api{zws}_key",
+        "apikey": f"api{zws}key",
+        "private_key": f"private{zws}_key",
+        "credentials": f"cred{zws}entials",
+        
+        # XSS patterns
+        "innerHTML": f"inner{zws}HTML",
+        "document.write": f"document.{zws}write",
+        "dangerouslySetInnerHTML": f"dangerously{zws}SetInnerHTML",
+        
+        # Network patterns
+        "http://": f"ht{zws}tp://",
+        "https://": f"ht{zws}tps://",
+        "0.0.0.0": f"0.0.{zws}0.0",
+        
+        # File operations
+        "unlink": f"un{zws}link",
+        "rmdir": f"rm{zws}dir",
+        "chmod": f"ch{zws}mod",
+    }
+    
+    result = snippet
+    for dangerous, safe in replacements.items():
+        # Case-insensitive replacement for SQL keywords
+        if dangerous.isupper():
+            import re
+            result = re.sub(
+                re.escape(dangerous),
+                safe,
+                result,
+                flags=re.IGNORECASE
+            )
+        else:
+            # Case-sensitive for code patterns
+            result = result.replace(dangerous, safe)
+    
+    return result
+
+
+# Simple finding dataclass without validation
+@dataclass
+class Finding:
+    """Represents a pattern finding without validation."""
+    pattern_name: str
+    message: str
+    file: str
+    line: int
+    column: int
+    severity: str
+    snippet: str
+    category: str
+    match_type: str = "regex"
+    # FIX: Removed framework field - frameworks are project-level, not file-level
+    
+    def to_dict(self):
+        """Convert finding to dictionary with AV-safe snippets."""
+        data = asdict(self)
+        # Sanitize snippet to prevent antivirus false positives
+        data['snippet'] = sanitize_for_av(data['snippet'])
+        return data
+
+
+class UniversalPatternDetector:
+    """Detects universal patterns across any codebase."""
+
+    # File extensions mapped to language identifiers
+    LANGUAGE_MAP = {
+        ".py": "python",
+        ".js": "javascript",
+        ".jsx": "javascript",
+        ".ts": "typescript",
+        ".tsx": "typescript",
+        ".vue": "vue",
+        ".java": "java",
+        ".cs": "c#",
+        ".cpp": "c++",
+        ".cc": "c++",
+        ".cxx": "c++",
+        ".c": "c",
+        ".h": "c",
+        ".hpp": "c++",
+        ".go": "go",
+        ".rs": "rust",
+        ".rb": "ruby",
+        ".php": "php",
+        ".swift": "swift",
+        ".kt": "kotlin",
+        ".scala": "scala",
+        ".sql": "sql",
+        ".sh": "bash",
+        ".bash": "bash",
+        ".zsh": "bash",
+        ".yaml": "yaml",
+        ".yml": "yaml",
+        ".json": "json",
+    }
+
+    # AST-covered patterns - Maps pattern names to languages where AST rules provide superior coverage
+    # This prevents redundant regex pattern execution when high-fidelity AST rules already cover the same issue
+    AST_COVERED_PATTERNS = {
+        'hardcoded-secret': {'python', 'javascript', 'typescript'},
+        'n-plus-one-query': {'python', 'javascript', 'typescript'},
+        'xss-direct-output': {'python', 'javascript', 'typescript'},
+        # Note: sql-injection is intentionally omitted as its AST rule is Python-only
+    }
+
+    def __init__(
+        self,
+        project_path: Path,
+        pattern_loader: PatternLoader | None = None,
+        with_ast: bool = True,
+        with_frameworks: bool = True,
+        exclude_patterns: list[str] = None,
+    ):
+        """Initialize detector.
+
+        Args:
+            project_path: Root path of project to analyze.
+            pattern_loader: Optional PatternLoader instance.
+            with_ast: Enable AST-based pattern matching.
+            with_frameworks: Enable framework detection and framework-specific patterns.
+            exclude_patterns: List of patterns to exclude from scanning.
+        """
+        self.project_path = Path(project_path).resolve()
+        self.pattern_loader = pattern_loader or PatternLoader()
+        self.findings: list[Finding] = []
+        self.with_ast = with_ast
+        self.with_frameworks = with_frameworks
+        self.detected_frameworks = []
+        self.exclude_patterns = exclude_patterns or []
+        # FIX: Removed framework_by_language - frameworks are project-level, not file-level
+        
+        # Initialize AST parser if enabled
+        self.ast_parser = None
+        if self.with_ast:
+            try:
+                from theauditor.ast_parser import ASTParser
+                self.ast_parser = ASTParser()
+            except ImportError:
+                print("Warning: AST parser not available, falling back to regex-only")
+                self.with_ast = False
+        
+        # Detect frameworks if enabled
+        if self.with_frameworks:
+            try:
+                from theauditor.framework_detector import FrameworkDetector
+                detector = FrameworkDetector(self.project_path, exclude_patterns=self.exclude_patterns)
+                self.detected_frameworks = detector.detect_all()
+                if self.detected_frameworks:
+                    print(f"Detected frameworks: {', '.join(fw['framework'] for fw in self.detected_frameworks)}")
+                    # FIX: Removed framework_by_language dictionary that was losing multiple frameworks per language
+                    # Frameworks are project-level, not file-level - they shouldn't be tagged on individual findings
+            except ImportError:
+                print("Warning: Framework detector not available")
+                self.with_frameworks = False
+
+
+
+    def detect_language(self, file_path: Path) -> str | None:
+        """Detect programming language from file extension.
+
+        Args:
+            file_path: Path to file.
+
+        Returns:
+            Language identifier or None.
+        """
+        suffix = file_path.suffix.lower()
+        return self.LANGUAGE_MAP.get(suffix)
+
+    def scan_file(self, file_path: Path, patterns: list[Pattern], category: str, sha256: str = None, run_ast_rules: bool = True) -> list[Finding]:
+        """Scan a single file for pattern matches.
+
+        Args:
+            file_path: Path to file to scan.
+            patterns: List of patterns to apply.
+            category: Category name for findings.
+            sha256: Optional SHA256 hash from database for cache lookup.
+            run_ast_rules: Whether to run AST-based rules.
+
+        Returns:
+            List of findings.
+        """
+        findings = []
+        
+        # Early language detection for optimization
+        language = self.detect_language(file_path)
+        
+        # OPTIMIZATION: Early exit if no patterns match this language and no AST rules to run
+        applicable_patterns = [p for p in patterns if p.matches_language(language)] if language else []
+        if not applicable_patterns and not run_ast_rules:
+            return []  # Nothing to do for this file
+        
+        # FIX: Removed framework lookup - frameworks are project-level, not file-level
+
+        try:
+            with open(file_path, encoding="utf-8", errors="ignore") as f:
+                content = f.read()
+                lines = content.splitlines()
+        except OSError as e:
+            print(f"Warning: Could not read {file_path}: {e}")
+            return findings
+
+        # Try AST parsing if enabled
+        ast_tree = None
+        # Language already detected at the start of the function
+        if self.with_ast and self.ast_parser:
+            if language and self.ast_parser.supports_language(language):
+                # Check persistent cache first for JS/TS files
+                if language in ["javascript", "typescript"]:
+                    # Use provided SHA256 from database, or compute from content
+                    if sha256:
+                        file_hash = sha256
+                    else:
+                        # Fallback: compute file hash for cache lookup
+                        import hashlib
+                        file_hash = hashlib.sha256(content.encode('utf-8')).hexdigest()
+                    
+                    # Check cache
+                    cache_dir = self.project_path / ".pf" / "ast_cache"
+                    cache_file = cache_dir / f"{file_hash}.json"
+                    if cache_file.exists():
+                        try:
+                            import json
+                            with open(cache_file, 'r', encoding='utf-8') as f:
+                                ast_tree = json.load(f)
+                        except (json.JSONDecodeError, OSError):
+                            # Cache read failed, parse fresh
+                            ast_tree = self.ast_parser.parse_file(file_path, language)
+                            # REMOVED: Cache write logic - only indexer.py should write to cache
+                    else:
+                        # Parse fresh (cache miss)
+                        ast_tree = self.ast_parser.parse_file(file_path, language)
+                        # REMOVED: Cache write logic - only indexer.py should write to cache
+                else:
+                    # Non-JS/TS files, parse directly
+                    ast_tree = self.ast_parser.parse_file(file_path, language)
+        
+        # Invoke high-fidelity AST-based rules using the orchestrator
+        if run_ast_rules:
+            # Initialize orchestrator if not already done
+            if not hasattr(self, '_orchestrator'):
+                self._orchestrator = RulesOrchestrator(self.project_path)
+            
+            # Prepare appropriate AST for the orchestrator
+            rule_ast = None
+            if language == "python":
+                try:
+                    # Parse Python code with native AST
+                    rule_ast = ast.parse(content)
+                except SyntaxError:
+                    # If Python parsing fails, continue with other patterns
+                    pass
+            
+            else:
+                # For other languages, use the parsed AST
+                rule_ast = ast_tree
+            
+            # Run all rules through the orchestrator
+            if rule_ast is not None or ast_tree is not None:
+                context = RuleContext(
+                    file_path=file_path,
+                    content=content,
+                    ast_tree=rule_ast if rule_ast is not None else ast_tree,
+                    language=language,
+                    db_path=str(self.project_path / ".pf" / "repo_index.db"),
+                    project_path=self.project_path
+                )
+                
+                # Run rules for this file
+                try:
+                    rule_findings = self._orchestrator.run_rules_for_file(context)
+                    
+                    # Convert rule findings to Finding objects
+                    for finding in rule_findings:
+                        findings.append(Finding(
+                            pattern_name=finding.get("pattern_name", finding.get("rule", "RULE_FINDING")),
+                            message=finding.get("message", "Issue detected"),
+                            file=str(file_path.relative_to(self.project_path)),
+                            line=finding.get("line", 0),
+                            column=finding.get("column", finding.get("col", 0)),
+                            severity=finding.get("severity", "medium").lower(),
+                            snippet=finding.get("snippet", finding.get("evidence", finding.get("message", ""))),
+                            category=finding.get("category", "security"),
+                            match_type="ast",
+                        ))
+                    
+                except Exception as e:
+                    if os.environ.get("THEAUDITOR_DEBUG"):
+                        print(f"[ORCHESTRATOR] Failed to run rules for file {file_path}: {e}")
+
+        # OPTIMIZATION: Use pre-computed applicable_patterns instead of filtering again
+        for pattern in applicable_patterns:
+            # Skip this SPECIFIC pattern if it's covered by a superior AST rule for this language
+            if (pattern.name in self.AST_COVERED_PATTERNS and 
+                language in self.AST_COVERED_PATTERNS[pattern.name]):
+                continue
+            
+            # Try AST pattern matching first if available
+            if ast_tree and pattern.ast_pattern:
+                ast_matches = self.ast_parser.find_ast_matches(ast_tree, pattern.ast_pattern)
+                for ast_match in ast_matches:
+                    finding = Finding(
+                        pattern_name=pattern.name,
+                        message=pattern.description,
+                        file=str(file_path.relative_to(self.project_path)),
+                        line=ast_match.start_line,
+                        column=ast_match.start_col,
+                        severity=pattern.severity,
+                        snippet=ast_match.snippet,
+                        category=category,
+                        match_type="ast",
+                    )
+                    findings.append(finding)
+            
+            # Fallback to regex if no AST match or no AST pattern
+            elif pattern.compiled_regex:
+                # Find all matches
+                for match in pattern.compiled_regex.finditer(content):
+                    # Calculate line number
+                    line_start = content.count("\n", 0, match.start()) + 1
+
+                    # Get the matched line for snippet
+                    if line_start <= len(lines):
+                        snippet = lines[line_start - 1].strip()
+                        # Limit snippet length
+                        if len(snippet) > 200:
+                            snippet = snippet[:197] + "..."
+                    else:
+                        snippet = match.group(0)[:200]
+
+                    # Calculate column (position in line)
+                    line_start_pos = content.rfind("\n", 0, match.start()) + 1
+                    column = match.start() - line_start_pos
+
+                    finding = Finding(
+                        pattern_name=pattern.name,
+                        message=pattern.description,
+                        file=str(file_path.relative_to(self.project_path)),
+                        line=line_start,
+                        column=column,
+                        severity=pattern.severity,
+                        snippet=snippet,
+                        category=category,
+                        match_type="regex",
+                    )
+                    findings.append(finding)
+
+        return findings
+
+    def _process_rule_package(self, package_name: str, db_path: str) -> tuple[list[Finding], int]:
+        """Process a single rule package and execute its rules.
+        
+        Helper method for parallel execution of rule packages.
+        
+        Args:
+            package_name: Name of the package to process
+            db_path: Path to the repo_index.db database
+            
+        Returns:
+            Tuple of (findings, rules_executed_count)
+        """
+        findings = []
+        rules_executed = 0
+        
+        try:
+            # Dynamically import the package
+            package = importlib.import_module(package_name)
+            
+            # Get the package directory path
+            package_dir = Path(package.__file__).parent
+            
+            # Discover all Python modules in the package directory
+            for module_info in pkgutil.iter_modules([str(package_dir)]):
+                # Skip __init__ module
+                if module_info.name == '__init__':
+                    continue
+                
+                try:
+                    # Dynamically import the module
+                    module_name = f'{package_name}.{module_info.name}'
+                    module = importlib.import_module(module_name)
+                
+                    # Find all functions in the module that match our pattern
+                    for name, obj in inspect.getmembers(module, inspect.isfunction):
+                        # Check if function name starts with 'find_'
+                        if name.startswith('find_'):
+                            # Verify function signature matches expected pattern
+                            sig = inspect.signature(obj)
+                            params = list(sig.parameters.keys())
+                            
+                            # Should have exactly one parameter (db_path)
+                            if len(params) == 1:
+                                # Execute the rule function
+                                try:
+                                    rule_findings = obj(db_path)
+                                    rules_executed += 1
+                                    
+                                    # Convert findings to Finding dataclass format
+                                    for finding in rule_findings:
+                                        findings.append(Finding(
+                                            pattern_name=finding.get('pattern_name', name.upper()),
+                                            message=finding.get('message', f'Issue detected by {name}'),
+                                            file=finding.get('file', ''),
+                                            line=finding.get('line', 0),
+                                            column=finding.get('column', 0),
+                                            severity=finding.get('severity', 'medium'),
+                                            snippet=finding.get('snippet', ''),
+                                            category=finding.get('category', 'security'),
+                                            match_type=finding.get('match_type', 'database'),
+                                        ))
+                                    
+                                    if rule_findings:
+                                        print(f"  {name}: Found {len(rule_findings)} issues")
+                                
+                                except Exception as e:
+                                    print(f"  Warning: Rule {name} in {module_info.name} failed: {e}")
+                
+                except ImportError as e:
+                    print(f"  Warning: Could not import module {module_info.name}: {e}")
+                except Exception as e:
+                    print(f"  Warning: Error processing module {module_info.name}: {e}")
+        
+        except Exception as e:
+            print(f"Warning: Failed to process package {package_name}: {e}")
+        
+        return findings, rules_executed
+    
+    def _run_database_aware_rules(self, db_path: str) -> list[Finding]:
+        """Dynamically discover and execute all database-aware rules.
+        
+        This method discovers all rule modules within the security_rules and orm directories,
+        dynamically imports them, and executes any functions that follow the pattern:
+        - Function name starts with 'find_'
+        - Takes a single argument 'db_path: str'
+        - Returns List[Dict[str, Any]] with findings
+        
+        Now uses parallel execution with ThreadPoolExecutor for improved performance.
+        
+        Args:
+            db_path: Path to the repo_index.db database
+            
+        Returns:
+            List of Finding objects from all discovered rules
+        """
+        findings = []
+        total_rules_executed = 0
+        
+        # List of rule directories to search
+        rule_packages = [
+            'theauditor.rules.security_rules',
+            'theauditor.rules.orm',
+            'theauditor.rules.deployment',
+            'theauditor.rules.react',
+            'theauditor.rules.vue'
+        ]
+        
+        # Execute rule packages in parallel (limit workers to prevent resource exhaustion)
+        import os
+        max_workers = min(4, (os.cpu_count() or 1) + 1)  # Cap at 4 or CPU count + 1, whichever is smaller
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all packages for processing
+            futures = {}
+            for package_name in rule_packages:
+                future = executor.submit(self._process_rule_package, package_name, db_path)
+                futures[future] = package_name
+            
+            # Collect results as they complete
+            for future in as_completed(futures):
+                package_name = futures[future]
+                try:
+                    package_findings, rules_executed = future.result()
+                    findings.extend(package_findings)
+                    total_rules_executed += rules_executed
+                except Exception as e:
+                    print(f"Warning: Package {package_name} processing failed: {e}")
+        
+        if total_rules_executed > 0:
+            print(f"  Executed {total_rules_executed} database-aware rules in parallel")
+        
+        return findings
+
+    def _run_bundle_analysis(self) -> list[Finding]:
+        """Run bundle analysis as a separate task for parallel execution.
+        
+        Returns:
+            List of Finding objects from bundle analysis
+        """
+        findings = []
+        try:
+            from theauditor.rules.build import find_bundle_issues
+            
+            bundle_issues = find_bundle_issues(str(self.project_path))
+            
+            for issue in bundle_issues:
+                findings.append(Finding(
+                    pattern_name=issue.get('pattern_name', 'BUNDLE_ISSUE'),
+                    message=issue.get('message', 'Bundle issue detected'),
+                    file=issue.get('file', 'unknown'),
+                    line=issue.get('line', 0),
+                    column=issue.get('column', 0),
+                    severity=issue.get('severity', 'medium').lower(),
+                    snippet=issue.get('details', {}).get('recommendation', issue.get('message', '')),
+                    category=issue.get('category', 'build'),
+                    match_type='holistic',
+                ))
+            
+            if bundle_issues:
+                print(f"  Found {len(bundle_issues)} bundle issues")
+                
+        except ImportError:
+            pass
+        except Exception as e:
+            print(f"Warning: Bundle analysis failed: {e}")
+        
+        return findings
+    
+    def _run_sourcemap_detection(self) -> list[Finding]:
+        """Run source map detection as a separate task for parallel execution.
+        
+        Returns:
+            List of Finding objects from source map detection
+        """
+        findings = []
+        try:
+            from theauditor.rules.security.sourcemap_detector import find_source_maps
+            
+            source_map_issues = find_source_maps(str(self.project_path))
+            
+            for issue in source_map_issues:
+                findings.append(Finding(
+                    pattern_name=issue.get('pattern_name', 'SOURCE_MAP_ISSUE'),
+                    message=issue.get('message', 'Source map exposure detected'),
+                    file=issue.get('file', 'unknown'),
+                    line=issue.get('line', 0),
+                    column=issue.get('column', 0),
+                    severity=issue.get('severity', 'high').lower(),
+                    snippet=issue.get('details', {}).get('recommendation', issue.get('message', '')),
+                    category=issue.get('category', 'security'),
+                    match_type='holistic',
+                ))
+            
+            if source_map_issues:
+                print(f"  Found {len(source_map_issues)} source map exposures")
+                
+        except ImportError:
+            pass
+        except Exception as e:
+            print(f"Warning: Source map detection failed: {e}")
+        
+        return findings
+    
+    def _run_holistic_analysis(self) -> list[Finding]:
+        """Run project-level analysis that requires multiple file types.
+        
+        This method runs analyses that need holistic view of the project,
+        such as bundle analysis which requires package.json, lock files,
+        and source code analysis together.
+        
+        Now uses parallel execution with ThreadPoolExecutor for improved performance.
+        
+        Returns:
+            List of Finding objects from holistic analysis
+        """
+        findings = []
+        
+        # Execute holistic analyses in parallel
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            # Submit analysis tasks
+            bundle_future = executor.submit(self._run_bundle_analysis)
+            sourcemap_future = executor.submit(self._run_sourcemap_detection)
+            
+            # Collect results
+            futures = {bundle_future: 'bundle', sourcemap_future: 'sourcemap'}
+            
+            for future in as_completed(futures):
+                analysis_name = futures[future]
+                try:
+                    analysis_findings = future.result()
+                    findings.extend(analysis_findings)
+                except Exception as e:
+                    print(f"Warning: {analysis_name} analysis failed: {e}")
+        
+        return findings
+
+    def detect_patterns(
+        self, categories: list[str] | None = None, file_filter: str | None = None
+    ) -> list[Finding]:
+        """Run pattern detection across project.
+
+        Args:
+            categories: Optional list of pattern categories to use.
+            file_filter: Optional glob pattern to filter files.
+
+        Returns:
+            List of all findings.
+        """
+        # Load patterns - now includes framework patterns automatically due to recursive scanning
+        patterns_by_category = self.pattern_loader.load_patterns(categories)
+
+        if not patterns_by_category:
+            print("Warning: No patterns loaded")
+            return []
+
+        self.findings = []
+        
+        # Import threading for thread safety
+        import threading
+        findings_lock = threading.Lock()
+
+        # Get files from database instead of filesystem
+        print("Querying indexed files from database...")
+        files_to_scan = []
+        
+        # Check if database exists (it's stored in .pf/repo_index.db)
+        db_path = self.project_path / ".pf" / "repo_index.db"
+        if not db_path.exists():
+            print("Error: Database not found. Run 'aud index' first to build the file index.")
+            return []
+        
+        # Query indexed files from database
+        import sqlite3
+        try:
+            conn = sqlite3.connect(str(db_path))
+            cursor = conn.cursor()
+            
+            # Build query with optional file filter
+            if file_filter:
+                # Use GLOB for pattern matching (SQLite supports this)
+                query = "SELECT path, sha256, ext FROM files WHERE path GLOB ?"
+                rows = cursor.execute(query, (file_filter,)).fetchall()
+            else:
+                query = "SELECT path, sha256, ext FROM files"
+                rows = cursor.execute(query).fetchall()
+            
+            # Process database results
+            for file_path, sha256_hash, ext in rows:
+                full_path = self.project_path / file_path
+                
+                # Skip if file no longer exists on disk
+                if not full_path.exists():
+                    continue
+                
+                # Detect language from extension
+                language = self.detect_language(Path(file_path))
+                if language is None:
+                    continue  # Skip unknown file types
+                
+                # Add to list with SHA256 for cache lookup
+                files_to_scan.append((full_path, language, sha256_hash))
+            
+            conn.close()
+            
+        except sqlite3.Error as e:
+            print(f"Error querying database: {e}")
+            print("Database may be corrupted or locked. Try running 'aud index' again.")
+            return []
+        
+        total_files = len(files_to_scan)
+        print(f"Found {total_files} files to scan...")
+        
+        if total_files == 0:
+            return []
+        
+        # Define worker function for parallel processing
+        def process_file(file_info):
+            """Process a single file and return its findings."""
+            file_path, language, sha256_hash = file_info  # Now includes SHA256
+            local_findings = []
+            
+            # Apply patterns for each category
+            first_category = True
+            for category, patterns in patterns_by_category.items():
+                # Filter patterns by language
+                applicable_patterns = [p for p in patterns if p.matches_language(language)]
+
+                if applicable_patterns:
+                    # Only run AST rules on the first category to avoid duplicates
+                    try:
+                        file_findings = self.scan_file(
+                            file_path, applicable_patterns, category, 
+                            sha256=sha256_hash,  # Pass SHA256 for cache lookup
+                            run_ast_rules=first_category
+                        )
+                        local_findings.extend(file_findings)
+                        first_category = False
+                    except Exception as e:
+                        print(f"Warning: Failed to scan {file_path}: {e}")
+            
+            return local_findings
+        
+        # Process files in parallel using ThreadPoolExecutor
+        # REAL-WORLD OPTIMIZATION: Adaptive worker count based on available resources
+        # 
+        # Learned the hard way: 16 workers might be "optimal" but will:
+        # - Trigger antivirus scanners (looks like malware behavior)
+        # - Consume 20+ GB RAM with AST parsing
+        # - Make the system unusable for users
+        # - Crash on systems with Firefox/Chrome eating RAM
+        #
+        # Better to be 8x faster and WORK than 37x faster and CRASH!
+        def get_safe_worker_count():
+            """Calculate safe worker count based on system resources."""
+            try:
+                import psutil
+                # Check available RAM
+                available_ram = psutil.virtual_memory().available
+                ram_per_worker = 1.5 * 1024**3  # Assume 1.5GB per worker
+                
+                # Check current CPU usage
+                cpu_percent = psutil.cpu_percent(interval=0.1)
+                
+                # Calculate limits - more aggressive thresholds
+                max_by_ram = max(4, int(available_ram / ram_per_worker))
+                max_by_cpu = os.cpu_count() if cpu_percent < 85 else max(4, os.cpu_count() // 2)
+                
+                # Increased limit to 16 workers for modern systems
+                safe_workers = min(16, max_by_ram, max_by_cpu)
+                
+                # Ensure minimum of 4 workers even under pressure
+                safe_workers = max(4, safe_workers)
+                
+                # If system is under extreme memory pressure, still keep 4 workers minimum
+                if psutil.virtual_memory().percent > 90:
+                    safe_workers = max(4, min(8, safe_workers))
+                
+                # Log diagnostic info to stderr so it's visible
+                mem_gb = available_ram / (1024**3)
+                mem_percent = psutil.virtual_memory().percent
+                click.echo(f"[RESOURCES] CPU: {cpu_percent:.1f}%, RAM: {mem_gb:.1f}GB available ({mem_percent:.1f}% used)", err=True)
+                click.echo(f"[WORKERS] Selected {safe_workers} workers (max_by_ram={max_by_ram}, max_by_cpu={max_by_cpu})", err=True)
+                
+                return safe_workers
+            except ImportError:
+                # psutil not available, use conservative default but still minimum 4
+                return max(4, min(8, (os.cpu_count() or 4)))
+        
+        max_workers = get_safe_worker_count()
+        click.echo(f"Processing files with {max_workers} parallel workers (adapted to system resources)...", err=True)
+        
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all files for processing
+            futures = [executor.submit(process_file, file_info) for file_info in files_to_scan]
+            
+            # Process results as they complete
+            files_processed = 0
+            for future in as_completed(futures):
+                try:
+                    file_findings = future.result()
+                    
+                    # Thread-safe append to findings
+                    with findings_lock:
+                        self.findings.extend(file_findings)
+                    
+                    files_processed += 1
+                    
+                    # Update progress
+                    if files_processed % 10 == 0 or files_processed == total_files:
+                        click.echo(f"\rScanning files... [{files_processed}/{total_files}] - {len(self.findings)} findings", nl=False)
+                        
+                except Exception as e:
+                    print(f"\nWarning: File processing failed: {e}")
+                    files_processed += 1
+        
+        print()  # New line after progress
+        
+        # Run database-aware rules once after all file scanning is complete
+        # These rules operate on the aggregated data in .pf/repo_index.db
+        db_path = self.project_path / ".pf" / "repo_index.db"
+        if db_path.exists():
+            print("Running database-aware security rules...")
+            
+            # Execute all discovered database-aware rules dynamically
+            db_findings = self._run_database_aware_rules(str(db_path))
+            self.findings.extend(db_findings)
+        
+        # Run holistic/project-level analysis (e.g., bundle analysis)
+        # These rules need access to multiple file types simultaneously
+        print("Running project-level analysis...")
+        holistic_findings = self._run_holistic_analysis()
+        self.findings.extend(holistic_findings)
+        
+        print(f"Scanned {files_processed} files, found {len(self.findings)} issues")
+        return self.findings
+
+    def detect_patterns_for_files(
+        self, 
+        file_list: List[str], 
+        categories: List[str] = None
+    ) -> List[Finding]:
+        """Optimized pattern detection for specific file list.
+        
+        This method is specifically designed for targeted analysis like refactoring
+        where we know exactly which files to analyze.
+        """
+        if not file_list:
+            return []
+        
+        # Load patterns once
+        patterns_by_category = self.pattern_loader.load_patterns(categories)
+        if not patterns_by_category:
+            return []
+        
+        self.findings = []
+        db_path = self.project_path / ".pf" / "repo_index.db"
+        
+        if not db_path.exists():
+            print("Error: Database not found. Run 'aud index' first.")
+            return []
+        
+        # Build file info batch query
+        conn = sqlite3.connect(str(db_path))
+        cursor = conn.cursor()
+        
+        # Normalize paths for database lookup
+        normalized_files = []
+        for f in file_list:
+            # Handle both absolute and relative paths
+            try:
+                rel_path = Path(f).relative_to(self.project_path).as_posix()
+            except ValueError:
+                # Already relative or outside project
+                rel_path = Path(f).as_posix()
+            if rel_path.startswith("./"):
+                rel_path = rel_path[2:]
+            normalized_files.append(rel_path)
+        
+        # Query all files at once
+        placeholders = ','.join(['?'] * len(normalized_files))
+        query = f"SELECT path, sha256, ext FROM files WHERE path IN ({placeholders})"
+        
+        files_to_scan = []
+        try:
+            rows = cursor.execute(query, normalized_files).fetchall()
+            for file_path, sha256_hash, ext in rows:
+                full_path = self.project_path / file_path
+                if full_path.exists():
+                    language = self.detect_language(Path(file_path))
+                    if language:
+                        files_to_scan.append((full_path, language, sha256_hash))
+        finally:
+            conn.close()
+        
+        if not files_to_scan:
+            print(f"Warning: No valid files found from list of {len(file_list)} files")
+            return []
+        
+        print(f"Scanning {len(files_to_scan)} files with targeted analysis...")
+        
+        # Use fewer workers for targeted analysis (usually smaller file sets)
+        max_workers = min(4, len(files_to_scan), os.cpu_count() or 4)
+        
+        # Process files (reuse existing parallel logic)
+        import threading
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        
+        findings_lock = threading.Lock()
+        
+        def process_file(file_info):
+            """Process a single file for patterns."""
+            file_path, language, sha256_hash = file_info
+            local_findings = []
+            
+            first_category = True
+            for category, patterns in patterns_by_category.items():
+                applicable_patterns = [p for p in patterns if p.matches_language(language)]
+                
+                if applicable_patterns:
+                    try:
+                        file_findings = self.scan_file(
+                            file_path, applicable_patterns, category,
+                            sha256=sha256_hash,
+                            run_ast_rules=first_category
+                        )
+                        local_findings.extend(file_findings)
+                        first_category = False
+                    except Exception as e:
+                        if os.environ.get("THEAUDITOR_DEBUG"):
+                            print(f"Warning: Failed to scan {file_path}: {e}")
+            
+            return local_findings
+        
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [executor.submit(process_file, file_info) for file_info in files_to_scan]
+            
+            for i, future in enumerate(as_completed(futures), 1):
+                try:
+                    file_findings = future.result()
+                    with findings_lock:
+                        self.findings.extend(file_findings)
+                    
+                    if i % 10 == 0 or i == len(files_to_scan):
+                        print(f"\rProcessed {i}/{len(files_to_scan)} files - {len(self.findings)} findings", end="")
+                except Exception as e:
+                    print(f"\nWarning: File processing failed: {e}")
+        
+        print()  # New line after progress
+        
+        # Run database-aware rules once (but only for affected files context)
+        db_path_str = str(self.project_path / ".pf" / "repo_index.db")
+        if Path(db_path_str).exists():
+            print("Running targeted database-aware rules...")
+            # Note: These rules operate on the whole database but we could
+            # enhance them to filter by file list in the future
+            db_findings = self._run_database_aware_rules(db_path_str)
+            
+            # Filter to only findings in our file list
+            filtered_db_findings = [
+                f for f in db_findings 
+                if any(norm_file in f.file for norm_file in normalized_files)
+            ]
+            self.findings.extend(filtered_db_findings)
+        
+        print(f"Targeted analysis complete: {len(self.findings)} issues found")
+        return self.findings
+
+    def format_table(self, max_rows: int = 50) -> str:
+        """Format findings as a human-readable table.
+
+        Args:
+            max_rows: Maximum number of rows to display.
+
+        Returns:
+            Formatted table string.
+        """
+        if not self.findings:
+            return "No issues found."
+
+        # Sort by severity (critical > high > medium > low) then by file
+        severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+        sorted_findings = sorted(
+            self.findings,
+            key=lambda f: (severity_order.get(f.severity, 4), f.file, f.line),
+        )
+
+        # Build table
+        lines = []
+        lines.append(
+            "PATTERN                          FILE                             LINE  SEVERITY"
+        )
+        lines.append("-" * 80)
+
+        displayed = 0
+        for finding in sorted_findings:
+            if displayed >= max_rows:
+                lines.append(f"... and {len(sorted_findings) - displayed} more findings")
+                lines.append("\n" + "="*80)
+                lines.append("TIP: View all findings in .pf/patterns.json")
+                lines.append("     Use --output-json to save to a custom location")
+                break
+
+            # Truncate long names/paths for display
+            pattern_name = finding.pattern_name[:32].ljust(32)
+            file_str = finding.file
+            if len(file_str) > 35:
+                file_str = "..." + file_str[-32:]
+            file_str = file_str.ljust(35)
+
+            line = (
+                f"{pattern_name} {file_str} {finding.line:4d}  {finding.severity.upper()}"
+            )
+            lines.append(line)
+            displayed += 1
+
+        return "\n".join(lines)
+
+    def to_json(self, output_file: Path | None = None) -> str:
+        """Export findings to JSON.
+
+        Args:
+            output_file: Optional file path to write JSON.
+
+        Returns:
+            JSON string.
+        """
+        data = {
+            "findings": [f.to_dict() for f in self.findings],
+        }
+
+        json_str = json.dumps(data, indent=2, sort_keys=True)
+
+        if output_file:
+            output_file = Path(output_file)
+            output_file.parent.mkdir(parents=True, exist_ok=True)
+            output_file.write_text(json_str)
+            print(f"Findings written to {output_file}")
+
+        return json_str
+
+    def get_summary_stats(self) -> dict[str, Any]:
+        """Get summary statistics of findings.
+
+        Returns:
+            Dictionary with summary stats.
+        """
+        stats = {
+            "total_findings": len(self.findings),
+            "by_severity": {},
+            "by_category": {},
+            "by_pattern": {},
+            "files_affected": len({f.file for f in self.findings}),
+        }
+
+        # Count by severity
+        for finding in self.findings:
+            severity = finding.severity
+            stats["by_severity"][severity] = stats["by_severity"].get(severity, 0) + 1
+
+            category = finding.category
+            stats["by_category"][category] = stats["by_category"].get(category, 0) + 1
+
+            pattern = finding.pattern_name
+            stats["by_pattern"][pattern] = stats["by_pattern"].get(pattern, 0) + 1
+
+        return stats
diff --git a/theauditor/utils/__init__.py b/theauditor/utils/__init__.py
new file mode 100644
index 0000000..7351f17
--- /dev/null
+++ b/theauditor/utils/__init__.py
@@ -0,0 +1,21 @@
+"""TheAuditor utilities package."""
+
+from .error_handler import handle_exceptions
+from .exit_codes import ExitCodes
+from .helpers import (
+    compute_file_hash,
+    load_json_file,
+    save_json_file,
+    count_lines_in_file,
+    extract_data_array,
+)
+
+__all__ = [
+    "handle_exceptions",
+    "ExitCodes",
+    "compute_file_hash",
+    "load_json_file",
+    "save_json_file",
+    "count_lines_in_file",
+    "extract_data_array",
+]
\ No newline at end of file
diff --git a/theauditor/utils/error_handler.py b/theauditor/utils/error_handler.py
new file mode 100644
index 0000000..82ebb0c
--- /dev/null
+++ b/theauditor/utils/error_handler.py
@@ -0,0 +1,66 @@
+"""Centralized error handler for TheAuditor commands.
+
+This module provides a decorator that captures detailed error information
+including full tracebacks, while presenting clean error messages to users.
+All detailed debugging information is logged to .pf/error.log.
+"""
+
+import click
+import traceback
+from functools import wraps
+from pathlib import Path
+
+
+def handle_exceptions(func):
+    """Decorator that provides robust error handling with detailed logging.
+    
+    This decorator:
+    1. Catches all exceptions from the wrapped command
+    2. Logs full traceback to .pf/error.log for debugging
+    3. Shows clean, user-friendly error messages in the console
+    4. Points users to the error log for detailed information
+    
+    Args:
+        func: The Click command function to wrap
+        
+    Returns:
+        Wrapped function with enhanced error handling
+    """
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        """Inner wrapper that implements the try-except logic."""
+        try:
+            # Execute the original command
+            return func(*args, **kwargs)
+        except Exception as e:
+            # Step 1: Ensure log directory exists
+            log_dir = Path("./.pf")
+            log_dir.mkdir(parents=True, exist_ok=True)
+            
+            # Step 2: Define error log path
+            error_log_path = log_dir / "error.log"
+            
+            # Step 3: Write detailed traceback to log file
+            with open(error_log_path, "a", encoding="utf-8") as f:
+                f.write("\n" + "=" * 80 + "\n")
+                f.write(f"Error in command: {func.__name__}\n")
+                f.write("=" * 80 + "\n")
+                # Write the full traceback with all details
+                traceback.print_exc(file=f)
+                f.write("=" * 80 + "\n\n")
+            
+            # Step 4: Construct user-friendly error message
+            error_type = type(e).__name__
+            error_msg = str(e)
+            
+            # Create informative but clean error message
+            user_message = (
+                f"{error_type}: {error_msg}\n\n"
+                f"Detailed error information has been logged to: {error_log_path}\n"
+                f"Please check the log file for the full traceback and debugging information."
+            )
+            
+            # Step 5: Raise clean Click exception for user
+            raise click.ClickException(user_message)
+    
+    return wrapper
\ No newline at end of file
diff --git a/theauditor/utils/exit_codes.py b/theauditor/utils/exit_codes.py
new file mode 100644
index 0000000..864e3de
--- /dev/null
+++ b/theauditor/utils/exit_codes.py
@@ -0,0 +1,65 @@
+"""Centralized exit codes for TheAuditor CLI.
+
+This module provides a single source of truth for all program exit codes,
+eliminating magic numbers and ensuring consistency across the application.
+"""
+
+
+class ExitCodes:
+    """Standard exit codes for TheAuditor CLI commands.
+    
+    These codes follow a semantic pattern:
+    - 0: Complete success, no issues found
+    - 1: Command executed but found issues requiring attention
+    - 2: Command executed but found critical/security issues
+    - 3: Command could not complete its intended task
+    - 4+: Reserved for future use
+    
+    This aligns with Unix conventions where 0 = success and non-zero = various failure modes.
+    """
+    
+    # Success states
+    SUCCESS = 0  # Complete success, no issues found
+    
+    # Issue severity levels (command succeeded but found problems)
+    HIGH_SEVERITY = 1  # High severity findings (e.g., lint errors, bugs)
+    CRITICAL_SEVERITY = 2  # Critical/security findings (e.g., vulnerabilities)
+    
+    # Task completion failures (command ran but couldn't complete objective)
+    TASK_INCOMPLETE = 3  # Task could not be completed (e.g., missing prerequisites)
+    
+    # Future expansion
+    # CONFIGURATION_ERROR = 4  # Invalid configuration
+    # DEPENDENCY_ERROR = 5  # Missing dependencies
+    # PERMISSION_ERROR = 6  # Insufficient permissions
+    
+    @classmethod
+    def get_description(cls, code: int) -> str:
+        """Get human-readable description for an exit code.
+        
+        Args:
+            code: The exit code to describe
+            
+        Returns:
+            Human-readable description of the exit code's meaning
+        """
+        descriptions = {
+            cls.SUCCESS: "Success - No issues found",
+            cls.HIGH_SEVERITY: "High severity findings detected",
+            cls.CRITICAL_SEVERITY: "Critical security findings detected",
+            cls.TASK_INCOMPLETE: "Task could not be completed due to missing prerequisites",
+        }
+        return descriptions.get(code, f"Unknown exit code: {code}")
+    
+    @classmethod
+    def should_fail_pipeline(cls, code: int) -> bool:
+        """Determine if an exit code should fail a CI/CD pipeline.
+        
+        Args:
+            code: The exit code to check
+            
+        Returns:
+            True if the code indicates a failure that should stop the pipeline
+        """
+        # Only SUCCESS (0) should allow pipeline to continue
+        return code != cls.SUCCESS
\ No newline at end of file
diff --git a/theauditor/utils/finding_priority.py b/theauditor/utils/finding_priority.py
new file mode 100644
index 0000000..ee3eabb
--- /dev/null
+++ b/theauditor/utils/finding_priority.py
@@ -0,0 +1,178 @@
+"""Centralized finding prioritization for internal organization.
+
+This module provides consistent sorting of findings to ensure
+critical security issues appear before style warnings in reports.
+This is NOT severity mapping for tools, but internal organization
+for optimal AI context utilization.
+"""
+
+# Priority order for internal organization (lower = higher priority)
+# This is our SINGLE SOURCE OF TRUTH for severity ranking
+PRIORITY_ORDER = {
+    "critical": 0,  # Immediate security threats
+    "high": 1,      # Serious bugs/vulnerabilities  
+    "medium": 2,    # Should fix soon
+    "low": 3,       # Minor issues
+    "warning": 4,   # Potential problems
+    "info": 5,      # Informational
+    "style": 6,     # Code style/formatting
+    "unknown": 7    # Unrecognized severity
+}
+
+# Tool importance for secondary sorting (lower = more important)
+# Security tools rank higher than style tools
+TOOL_IMPORTANCE = {
+    # Security tools - highest importance
+    "taint-analyzer": 0,
+    "vulnerability-scanner": 0,
+    "security-rules": 0,
+    "sql-injection": 0,
+    "xss-detector": 0,
+    "docker-analyzer": 0,  # Docker security findings
+    
+    # Pattern detection
+    "pattern-detector": 1,
+    "orm": 1,
+    "database-rules": 1,
+    
+    # Testing and validation
+    "fce": 2,
+    "test": 2,
+    "pytest": 2,
+    "jest": 2,
+    
+    # Analysis tools
+    "ml": 3,
+    "graph": 3,
+    "dependency": 3,
+    "deps": 3,
+    
+    # Code quality
+    "ruff": 4,
+    "mypy": 4,
+    "bandit": 4,
+    "pylint": 4,
+    
+    # Style tools - lowest importance
+    "eslint": 5,
+    "prettier": 6,
+    "format": 7,
+    "beautifier": 7
+}
+
+# Comprehensive severity normalization mappings
+# Handles all formats: integers, strings, alternatives
+SEVERITY_MAPPINGS = {
+    # Integer mappings (Docker uses 4=critical, CVE uses 1-4 scale)
+    4: "critical",  # Docker's highest
+    3: "high",
+    2: "medium",
+    1: "low",
+    0: "info",      # Sometimes used for informational
+    
+    # String alternatives from various tools
+    "error": "high",        # ESLint, many linters
+    "warning": "medium",    # Standard warning
+    "warn": "medium",       # Prettier variant
+    "info": "low",          # Informational
+    "note": "low",          # Ruff uses this
+    "debug": "low",         # Debug-level issues
+    "fatal": "critical",    # Some tools use fatal
+    "blocker": "critical",  # Severity naming from bug trackers
+    "major": "high",        
+    "minor": "low",
+    "trivial": "low",
+    
+    # Pass-through for already normalized
+    "critical": "critical",
+    "high": "high",
+    "medium": "medium",
+    "low": "low",
+    
+    # Style-specific (for prettier/eslint)
+    "style": "style",
+    "formatting": "style"
+}
+
+def normalize_severity(severity_value):
+    """Normalize severity from various formats to standard string.
+    
+    Handles integers (Docker), floats (ML confidence), strings (ESLint),
+    and missing values (test failures).
+    
+    Args:
+        severity_value: Can be int, float, string, or None
+        
+    Returns:
+        Normalized severity string from PRIORITY_ORDER keys
+    """
+    if severity_value is None:
+        return "warning"  # Default for missing severity
+    
+    # Handle numeric types
+    if isinstance(severity_value, (int, float)):
+        # ML confidence scores (0.0-1.0)
+        if isinstance(severity_value, float) and 0.0 <= severity_value <= 1.0:
+            if severity_value >= 0.9:
+                return "critical"
+            elif severity_value >= 0.7:
+                return "high"
+            elif severity_value >= 0.4:
+                return "medium"
+            else:
+                return "low"
+        # Integer severity (Docker style, CVE scores)
+        return SEVERITY_MAPPINGS.get(int(severity_value), "warning")
+    
+    # Handle string types
+    severity_str = str(severity_value).lower().strip()
+    
+    # Check if it's already a valid normalized severity
+    if severity_str in PRIORITY_ORDER:
+        return severity_str
+    
+    # Try to map it
+    return SEVERITY_MAPPINGS.get(severity_str, "warning")
+
+def get_sort_key(finding):
+    """Generate sort key for a finding.
+    
+    Multi-level sort: severity -> tool -> file -> line
+    
+    Args:
+        finding: Dictionary with severity, tool, file, line fields
+        
+    Returns:
+        Tuple for sorting (lower values = higher priority)
+    """
+    # Normalize severity to handle all formats
+    normalized_severity = normalize_severity(finding.get("severity"))
+    
+    # Get tool name, handle missing
+    tool_name = str(finding.get("tool", "unknown")).lower()
+    
+    # Build sort key with defaults for missing fields
+    return (
+        PRIORITY_ORDER.get(normalized_severity, 7),      # Severity priority
+        TOOL_IMPORTANCE.get(tool_name, 8),               # Tool priority
+        finding.get("file", "zzz"),                      # File path
+        finding.get("line", 999999)                      # Line number
+    )
+
+def sort_findings(findings):
+    """Sort findings by priority for optimal report organization.
+    
+    Critical security issues will appear first, style issues last.
+    This ensures AI sees the most important issues within its
+    limited context window.
+    
+    Args:
+        findings: List of finding dictionaries
+        
+    Returns:
+        New sorted list (original unchanged)
+    """
+    if not findings:
+        return findings
+    
+    return sorted(findings, key=get_sort_key)
\ No newline at end of file
diff --git a/theauditor/utils/helpers.py b/theauditor/utils/helpers.py
new file mode 100644
index 0000000..bc86f62
--- /dev/null
+++ b/theauditor/utils/helpers.py
@@ -0,0 +1,156 @@
+"""Helper utility functions for TheAuditor."""
+
+import hashlib
+import json
+from pathlib import Path
+from typing import Any
+
+
+def compute_file_hash(file_path: Path) -> str:
+    """
+    Compute SHA256 hash of a file.
+
+    Args:
+        file_path: Path to the file
+
+    Returns:
+        Hex digest of SHA256 hash
+    """
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+
+def load_json_file(file_path: str) -> dict[str, Any]:
+    """
+    Load and parse a JSON file.
+
+    Args:
+        file_path: Path to JSON file
+
+    Returns:
+        Parsed JSON as dictionary
+    """
+    with open(file_path) as f:
+        return json.load(f)
+
+
+def save_json_file(data: dict[str, Any], file_path: str) -> None:
+    """
+    Save data as JSON to file.
+
+    Args:
+        data: Data to save
+        file_path: Path to output file
+    """
+    with open(file_path, "w") as f:
+        json.dump(data, f, indent=2)
+
+
+def count_lines_in_file(file_path: Path) -> int:
+    """
+    Count number of lines in a text file.
+
+    Args:
+        file_path: Path to the file
+
+    Returns:
+        Number of lines
+    """
+    with open(file_path, encoding="utf-8", errors="ignore") as f:
+        return sum(1 for _ in f)
+
+
+def extract_data_array(data: Any, key: str, path: str) -> list:
+    """
+    Extract array from potentially wrapped data structure.
+    
+    This function provides a standardized way to handle both legacy flat arrays
+    and current wrapped object formats that include metadata.
+    
+    Args:
+        data: The loaded JSON data (could be dict, list, or other)
+        key: The key to look for if data is a dictionary
+        path: The file path for logging warnings
+        
+    Returns:
+        The extracted list of items, or empty list if invalid format
+        
+    Examples:
+        >>> extract_data_array({"results": [1, 2, 3]}, "results", "file.json")
+        [1, 2, 3]
+        >>> extract_data_array([1, 2, 3], "any_key", "file.json")
+        [1, 2, 3]
+        >>> extract_data_array("invalid", "key", "file.json")
+        []
+    """
+    if isinstance(data, dict) and key in data:
+        # Current format: wrapped with metadata
+        return data[key]
+    elif isinstance(data, list):
+        # Legacy format: flat array (backward compatibility)
+        return data
+    else:
+        # Invalid format - log warning and return empty list
+        print(f"[WARNING] Invalid format in {path} - expected dict with '{key}' key or flat list")
+        return []
+
+
+def get_self_exclusion_patterns(exclude_self_enabled: bool) -> list[str]:
+    """
+    Get exclusion patterns for TheAuditor's own files.
+    
+    Centralized function to provide consistent exclusion patterns
+    across all commands that support --exclude-self.
+    
+    Args:
+        exclude_self_enabled: Whether to exclude TheAuditor's own files
+        
+    Returns:
+        List of exclusion patterns if enabled, empty list otherwise
+    """
+    if not exclude_self_enabled:
+        return []
+    
+    # Exclude all TheAuditor's own directories
+    patterns = [
+        "theauditor/**",
+        "tests/**",
+        "agent_templates/**",
+        ".claude/**",
+        ".make/**",
+        ".venv/**",
+        ".venv_wsl/**",
+        ".auditor_venv/**",
+    ]
+    
+    # Exclude ALL root-level files to prevent framework/project detection
+    # This makes TheAuditor think there's no project at root level
+    root_files_to_exclude = [
+        "pyproject.toml",
+        "pyproject.toml.bak",  # Created by deps --upgrade-all
+        "package.json.bak",  # Created by deps --upgrade-all
+        "requirements*.txt.bak",  # Created by deps --upgrade-all
+        "*.bak",  # Catch any other backup files from deps
+        "package-template.json",
+        "Makefile",
+        "Dockerfile",
+        "docker-compose.yml",
+        "docker-compose.production.yml",
+        "setup.py",
+        "setup.cfg",
+        "MANIFEST.in",
+        "requirements*.txt",
+        "tox.ini",
+        ".dockerignore",
+        "*.md",  # All markdown files at root
+        "LICENSE*",
+        ".gitignore",
+        ".gitattributes",
+        ".editorconfig",
+    ]
+    patterns.extend(root_files_to_exclude)
+    
+    return patterns
\ No newline at end of file
diff --git a/theauditor/utils/temp_manager.py b/theauditor/utils/temp_manager.py
new file mode 100644
index 0000000..77306b2
--- /dev/null
+++ b/theauditor/utils/temp_manager.py
@@ -0,0 +1,150 @@
+"""Centralized temporary file management for TheAuditor.
+
+This module provides a custom temporary directory solution that avoids
+WSL2/Windows permission issues by creating temp files within the project's
+.auditor_venv directory instead of system temp.
+"""
+
+import os
+import shutil
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Optional, Tuple
+
+
+class TempManager:
+    """Manages temporary files within project boundaries to avoid cross-filesystem issues."""
+    
+    @staticmethod
+    def get_temp_dir(root_path: str) -> Path:
+        """Get the project-specific temp directory.
+        
+        Args:
+            root_path: Project root directory
+            
+        Returns:
+            Path to temp directory (.auditor_venv/tmp/)
+        """
+        temp_dir = Path(root_path) / ".auditor_venv" / "tmp"
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        return temp_dir
+    
+    @staticmethod
+    def create_temp_file(root_path: str, suffix: str = ".txt", prefix: str = "tmp") -> Tuple[Path, int]:
+        """Create a temporary file in project temp directory.
+        
+        Args:
+            root_path: Project root directory
+            suffix: File suffix (e.g., "_stdout.txt")
+            prefix: File prefix (e.g., "tmp")
+            
+        Returns:
+            Tuple of (file_path, file_descriptor)
+        """
+        temp_dir = TempManager.get_temp_dir(root_path)
+        
+        # Generate unique filename
+        unique_id = uuid.uuid4().hex[:8]
+        filename = f"{prefix}_{unique_id}{suffix}"
+        file_path = temp_dir / filename
+        
+        # Create and open file
+        fd = os.open(str(file_path), os.O_RDWR | os.O_CREAT | os.O_EXCL, 0o600)
+        
+        return file_path, fd
+    
+    @staticmethod
+    def cleanup_temp_dir(root_path: str) -> bool:
+        """Clean up all temporary files in project temp directory.
+        
+        Args:
+            root_path: Project root directory
+            
+        Returns:
+            True if cleanup successful, False otherwise
+        """
+        temp_dir = Path(root_path) / ".auditor_venv" / "tmp"
+        
+        if not temp_dir.exists():
+            return True
+        
+        try:
+            # Remove all files in temp directory
+            for temp_file in temp_dir.iterdir():
+                if temp_file.is_file():
+                    try:
+                        temp_file.unlink()
+                    except (OSError, PermissionError):
+                        # Continue even if some files can't be deleted
+                        pass
+            
+            # Try to remove directory if empty
+            try:
+                temp_dir.rmdir()
+            except OSError:
+                # Directory not empty or in use
+                pass
+            
+            return True
+            
+        except Exception:
+            return False
+    
+    @staticmethod
+    def create_temp_files_for_subprocess(root_path: str, tool_name: str = "process") -> Tuple[Path, Path]:
+        """Create stdout and stderr temp files for subprocess capture.
+        
+        Args:
+            root_path: Project root directory  
+            tool_name: Name of tool/process (for filename)
+            
+        Returns:
+            Tuple of (stdout_path, stderr_path)
+        """
+        # Sanitize tool name for safe filenames (remove problematic chars)
+        safe_tool_name = tool_name.replace('/', '_').replace('\\', '_').replace(':', '_')
+        safe_tool_name = safe_tool_name.replace('(', '').replace(')', '').replace(' ', '_')
+        # Limit length to avoid path too long errors
+        safe_tool_name = safe_tool_name[:50]
+        
+        stdout_path, stdout_fd = TempManager.create_temp_file(
+            root_path, 
+            suffix=f"_{safe_tool_name}_stdout.txt",
+            prefix="subprocess"
+        )
+        os.close(stdout_fd)  # Close fd, we'll open with Python's file handling
+        
+        stderr_path, stderr_fd = TempManager.create_temp_file(
+            root_path,
+            suffix=f"_{safe_tool_name}_stderr.txt", 
+            prefix="subprocess"
+        )
+        os.close(stderr_fd)  # Close fd, we'll open with Python's file handling
+        
+        return stdout_path, stderr_path
+
+
+# Convenience function for backward compatibility
+def get_project_temp_dir(root_path: str) -> Path:
+    """Get project-specific temp directory.
+    
+    Args:
+        root_path: Project root directory
+        
+    Returns:
+        Path to temp directory
+    """
+    return TempManager.get_temp_dir(root_path)
+
+
+def cleanup_project_temp(root_path: str) -> bool:
+    """Clean up project temp directory.
+    
+    Args:
+        root_path: Project root directory
+        
+    Returns:
+        True if successful
+    """
+    return TempManager.cleanup_temp_dir(root_path)
\ No newline at end of file
diff --git a/theauditor/venv_install.py b/theauditor/venv_install.py
new file mode 100644
index 0000000..c32756d
--- /dev/null
+++ b/theauditor/venv_install.py
@@ -0,0 +1,779 @@
+"""Pure Python venv creation and TheAuditor installation."""
+
+import json
+import os
+import platform
+import subprocess
+import sys
+import venv
+from pathlib import Path
+from typing import Optional, Tuple
+
+# Import dependency checking functions for self-updating sandbox
+from theauditor.deps import _check_npm_latest
+# Import our custom temp manager to avoid WSL2/Windows issues
+from theauditor.utils.temp_manager import TempManager
+
+# Detect if running on Windows for character encoding
+IS_WINDOWS = platform.system() == "Windows"
+
+# Node.js runtime configuration - UPDATE WHEN UPGRADING NODE.JS VERSION
+NODE_VERSION = "v20.11.1"  # LTS version, update quarterly with checksums
+NODE_BASE_URL = "https://nodejs.org/dist"
+
+# Hardcoded SHA-256 checksums for Node.js v20.11.1
+# Source: https://nodejs.org/dist/v20.11.1/SHASUMS256.txt
+# These are immutable - the checksum for a specific version never changes
+NODE_CHECKSUMS = {
+    "node-v20.11.1-win-x64.zip": "bc032628d77d206ffa7f133518a6225a9c5d6d9210ead30d67e294ff37044bda",
+    "node-v20.11.1-linux-x64.tar.xz": "e3db4b6d80d5a8aa9a6a6521da7b9e7ab0209e16a3f24aea4eb466f239efa073",
+    "node-v20.11.1-linux-arm64.tar.xz": "b6b998947595c9550d6171bc3a8e6e8a94e90ae94472fa506b6a6c1e54a45166",
+    "node-v20.11.1-darwin-x64.tar.gz": "e0065c61f340e85106a99c4b54746c5cee09d59b08c5712f67f99e92aa44995d",
+    "node-v20.11.1-darwin-arm64.tar.gz": "4607d35e6639d0e1df7e58e723281ccf6050571d45c55bafb3a956ab45fbb41e"
+}
+
+
+def find_theauditor_root() -> Path:
+    """Find TheAuditor project root by walking up from __file__ to pyproject.toml."""
+    current = Path(__file__).resolve().parent
+    
+    # Walk up the directory tree
+    while current != current.parent:
+        if (current / "pyproject.toml").exists():
+            # Verify it's actually TheAuditor
+            content = (current / "pyproject.toml").read_text()
+            if "theauditor" in content.lower():
+                return current
+        current = current.parent
+    
+    raise RuntimeError("Could not find TheAuditor project root (pyproject.toml)")
+
+
+def get_venv_paths(venv_path: Path) -> Tuple[Path, Path]:
+    """
+    Get platform-specific paths for venv Python and aud executables.
+    
+    Returns:
+        (python_exe, aud_exe) paths
+    """
+    if platform.system() == "Windows":
+        python_exe = venv_path / "Scripts" / "python.exe"
+        aud_exe = venv_path / "Scripts" / "aud.exe"
+    else:
+        python_exe = venv_path / "bin" / "python"
+        aud_exe = venv_path / "bin" / "aud"
+    
+    return python_exe, aud_exe
+
+
+def create_venv(target_dir: Path, force: bool = False) -> Path:
+    """
+    Create a Python virtual environment at target_dir/.venv.
+    
+    Args:
+        target_dir: Project root directory
+        force: If True, recreate even if exists
+        
+    Returns:
+        Path to the created venv directory
+    """
+    venv_path = target_dir / ".auditor_venv"
+    
+    if venv_path.exists() and not force:
+        check_mark = "[OK]" if IS_WINDOWS else "✓"
+        print(f"{check_mark} Venv already exists: {venv_path}")
+        return venv_path
+    
+    print(f"Creating venv at {venv_path}...", flush=True)
+    
+    # Create venv using stdlib
+    builder = venv.EnvBuilder(
+        system_site_packages=False,
+        clear=force,
+        symlinks=(platform.system() != "Windows"),
+        upgrade=False,
+        with_pip=True,
+        prompt=f"[{target_dir.name}]"
+    )
+    
+    builder.create(venv_path)
+    check_mark = "[OK]" if IS_WINDOWS else "✓"
+    print(f"{check_mark} Created venv: {venv_path}")
+    
+    return venv_path
+
+
+def install_theauditor_editable(venv_path: Path, theauditor_root: Optional[Path] = None) -> bool:
+    """
+    Install TheAuditor in editable mode into the venv.
+    
+    Args:
+        venv_path: Path to the virtual environment
+        theauditor_root: Path to TheAuditor source (auto-detected if None)
+        
+    Returns:
+        True if installation succeeded
+    """
+    if theauditor_root is None:
+        theauditor_root = find_theauditor_root()
+    
+    python_exe, aud_exe = get_venv_paths(venv_path)
+    
+    if not python_exe.exists():
+        raise RuntimeError(f"Venv Python not found: {python_exe}")
+    
+    # Check if already installed
+    try:
+        stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(
+            str(venv_path.parent), "pip_show"
+        )
+        
+        with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+             open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+            
+            result = subprocess.run(
+                [str(python_exe), "-m", "pip", "show", "theauditor"],
+                stdout=stdout_fp,
+                stderr=stderr_fp,
+                text=True,
+                timeout=30
+            )
+        
+        with open(stdout_path, 'r', encoding='utf-8') as f:
+            result.stdout = f.read()
+        with open(stderr_path, 'r', encoding='utf-8') as f:
+            result.stderr = f.read()
+        
+        # Clean up temp files
+        try:
+            Path(stdout_path).unlink()
+            Path(stderr_path).unlink()
+        except (OSError, PermissionError):
+            pass
+        
+        if result.returncode == 0:
+            check_mark = "[OK]" if IS_WINDOWS else "✓"
+            print(f"{check_mark} TheAuditor already installed in {venv_path}")
+            # Upgrade to ensure latest
+            print("  Upgrading to ensure latest version...")
+    except subprocess.TimeoutExpired:
+        print("Warning: pip show timed out, proceeding with install")
+    
+    # Install in editable mode
+    print(f"Installing TheAuditor from {theauditor_root}...", flush=True)
+    
+    cmd = [
+        str(python_exe),
+        "-m", "pip",
+        "install",
+        "--no-cache-dir",
+        "-e", str(theauditor_root)
+    ]
+    
+    try:
+        stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(
+            str(venv_path.parent), "pip_install"
+        )
+        
+        with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+             open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+            
+            result = subprocess.run(
+                cmd,
+                stdout=stdout_fp,
+                stderr=stderr_fp,
+                text=True,
+                timeout=120,
+                cwd=str(venv_path.parent)
+            )
+        
+        with open(stdout_path, 'r', encoding='utf-8') as f:
+            result.stdout = f.read()
+        with open(stderr_path, 'r', encoding='utf-8') as f:
+            result.stderr = f.read()
+        
+        # Clean up temp files
+        try:
+            Path(stdout_path).unlink()
+            Path(stderr_path).unlink()
+        except (OSError, PermissionError):
+            pass
+        
+        if result.returncode != 0:
+            print(f"Error installing TheAuditor:")
+            print(result.stderr)
+            return False
+        
+        check_mark = "[OK]" if IS_WINDOWS else "✓"
+        print(f"{check_mark} Installed TheAuditor (editable) from {theauditor_root}")
+        
+        # Verify installation
+        if aud_exe.exists():
+            check_mark = "[OK]" if IS_WINDOWS else "✓"
+            print(f"{check_mark} Executable available: {aud_exe}")
+        else:
+            # Fallback check for module
+            stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(
+                str(venv_path.parent), "verify"
+            )
+            
+            with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+                 open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+                
+                verify_result = subprocess.run(
+                    [str(python_exe), "-m", "theauditor.cli", "--version"],
+                    stdout=stdout_fp,
+                    stderr=stderr_fp,
+                    text=True,
+                    timeout=10
+                )
+            
+            with open(stdout_path, 'r', encoding='utf-8') as f:
+                verify_result.stdout = f.read()
+            with open(stderr_path, 'r', encoding='utf-8') as f:
+                verify_result.stderr = f.read()
+            
+            # Clean up temp files
+            try:
+                Path(stdout_path).unlink()
+                Path(stderr_path).unlink()
+            except (OSError, PermissionError):
+                pass
+            
+            if verify_result.returncode == 0:
+                check_mark = "[OK]" if IS_WINDOWS else "✓"
+                print(f"{check_mark} Module available: python -m theauditor.cli")
+            else:
+                print("Warning: Could not verify TheAuditor installation")
+        
+        return True
+        
+    except subprocess.TimeoutExpired:
+        print("Error: Installation timed out after 120 seconds")
+        return False
+    except Exception as e:
+        print(f"Error during installation: {e}")
+        return False
+
+
+def _self_update_package_json(package_json_path: Path) -> int:
+    """
+    Self-update package.json with latest versions from npm registry.
+    
+    This function is called BEFORE npm install to ensure we always
+    get the latest versions of our tools, solving the paradox of
+    needing to update dependencies that are in excluded directories.
+    
+    Args:
+        package_json_path: Path to the package.json file to update
+        
+    Returns:
+        Number of packages updated
+    """
+    try:
+        # Read current package.json
+        with open(package_json_path, 'r') as f:
+            data = json.load(f)
+        
+        updated_count = 0
+        
+        # Update dependencies if present
+        if "dependencies" in data:
+            for name in list(data["dependencies"].keys()):
+                try:
+                    latest = _check_npm_latest(name)
+                    if latest:
+                        current = data["dependencies"][name]
+                        # Clean current version for comparison
+                        current_clean = current.lstrip('^~>=')
+                        if current_clean != latest:
+                            data["dependencies"][name] = f"^{latest}"
+                            updated_count += 1
+                            check_mark = "[OK]" if IS_WINDOWS else "✓"
+                            print(f"      {check_mark} Updated {name}: {current} → ^{latest}")
+                except Exception as e:
+                    # If we can't get latest, keep current version
+                    print(f"      ⚠ Could not check {name}: {e}")
+                    continue
+        
+        # Update devDependencies if present
+        if "devDependencies" in data:
+            for name in list(data["devDependencies"].keys()):
+                try:
+                    latest = _check_npm_latest(name)
+                    if latest:
+                        current = data["devDependencies"][name]
+                        # Clean current version for comparison
+                        current_clean = current.lstrip('^~>=')
+                        if current_clean != latest:
+                            data["devDependencies"][name] = f"^{latest}"
+                            updated_count += 1
+                            check_mark = "[OK]" if IS_WINDOWS else "✓"
+                            print(f"      {check_mark} Updated {name}: {current} → ^{latest}")
+                except Exception as e:
+                    # If we can't get latest, keep current version
+                    print(f"      ⚠ Could not check {name}: {e}")
+                    continue
+        
+        # Write updated package.json
+        if updated_count > 0:
+            with open(package_json_path, 'w') as f:
+                json.dump(data, f, indent=2)
+                f.write("\n")  # Add trailing newline
+            print(f"    Updated {updated_count} packages to latest versions")
+        else:
+            print(f"    All packages already at latest versions")
+        
+        return updated_count
+        
+    except Exception as e:
+        print(f"    ⚠ Could not self-update package.json: {e}")
+        return 0
+
+
+
+
+def download_portable_node(sandbox_dir: Path) -> Path:
+    """
+    Download and extract portable Node.js runtime with integrity verification.
+    
+    Args:
+        sandbox_dir: Directory to install Node.js into (.auditor_venv/.theauditor_tools)
+        
+    Returns:
+        Path to node executable
+        
+    Raises:
+        RuntimeError: If download fails or checksum doesn't match
+    """
+    import urllib.request
+    import urllib.error
+    import hashlib
+    import zipfile
+    import tarfile
+    import shutil
+    
+    node_runtime_dir = sandbox_dir / "node-runtime"
+    
+    # Determine platform and architecture
+    system = platform.system()
+    machine = platform.machine().lower()
+    
+    # Build archive name based on platform
+    if system == "Windows":
+        node_exe = node_runtime_dir / "node.exe"
+        archive_name = f"node-{NODE_VERSION}-win-x64.zip"
+        archive_type = "zip"
+    elif system == "Linux":
+        node_exe = node_runtime_dir / "bin" / "node"
+        if "arm" in machine or "aarch" in machine:
+            archive_name = f"node-{NODE_VERSION}-linux-arm64.tar.xz"
+        else:
+            archive_name = f"node-{NODE_VERSION}-linux-x64.tar.xz"
+        archive_type = "tar"
+    elif system == "Darwin":
+        node_exe = node_runtime_dir / "bin" / "node"
+        if "arm" in machine:
+            archive_name = f"node-{NODE_VERSION}-darwin-arm64.tar.gz"
+        else:
+            archive_name = f"node-{NODE_VERSION}-darwin-x64.tar.gz"
+        archive_type = "tar"
+    else:
+        raise RuntimeError(f"Unsupported platform: {system}")
+    
+    # Check if already installed
+    if node_exe.exists():
+        check_mark = "[OK]" if IS_WINDOWS else "✓"
+        print(f"    {check_mark} Node.js runtime already installed at {node_runtime_dir}")
+        return node_exe
+    
+    # Use hardcoded checksums (immutable for a specific version)
+    expected_checksum = NODE_CHECKSUMS.get(archive_name)
+    
+    if not expected_checksum:
+        raise RuntimeError(
+            f"No checksum available for {archive_name}. "
+            f"Update NODE_CHECKSUMS in venv_install.py"
+        )
+    
+    # Build download URL
+    node_url = f"{NODE_BASE_URL}/{NODE_VERSION}/{archive_name}"
+    print(f"    Downloading Node.js {NODE_VERSION} for {system} {machine}...", flush=True)
+    print(f"    URL: {node_url}")
+    
+    try:
+        download_path = sandbox_dir / "node_download"
+        
+        # Download with progress indicator
+        def download_hook(block_num, block_size, total_size):
+            """Progress indicator for download."""
+            if total_size > 0:
+                downloaded = block_num * block_size
+                percent = min(100, (downloaded / total_size) * 100)
+                bar_length = 40
+                filled = int(bar_length * percent / 100)
+                bar = '=' * filled + '-' * (bar_length - filled)
+                print(f"\r    Progress: [{bar}] {percent:.1f}%", end='', flush=True)
+        
+        urllib.request.urlretrieve(node_url, str(download_path), reporthook=download_hook)
+        print()  # New line after progress bar
+        
+        # Verify SHA-256 checksum for security
+        print(f"    Verifying SHA-256 checksum...")
+        sha256_hash = hashlib.sha256()
+        with open(download_path, "rb") as f:
+            # Read in 8KB chunks for memory efficiency
+            for chunk in iter(lambda: f.read(8192), b""):
+                sha256_hash.update(chunk)
+        
+        actual_checksum = sha256_hash.hexdigest()
+        if actual_checksum != expected_checksum:
+            download_path.unlink()  # Remove corrupted/tampered download
+            raise RuntimeError(
+                f"Checksum verification failed!\n"
+                f"    Expected: {expected_checksum}\n"
+                f"    Actual:   {actual_checksum}\n"
+                f"    This may indicate a corrupted download or security issue."
+            )
+        
+        check_mark = "[OK]" if IS_WINDOWS else "✓"
+        print(f"    {check_mark} Checksum verified: {actual_checksum[:16]}...")
+        
+        # Extract based on archive type
+        print(f"    Extracting Node.js runtime...", flush=True)
+        if archive_type == "zip":
+            with zipfile.ZipFile(download_path) as zf:
+                # Extract to temp directory first
+                temp_extract = sandbox_dir / "temp_node"
+                temp_extract.mkdir(exist_ok=True)
+                zf.extractall(temp_extract)
+                # Node.js zips contain node-vX.Y.Z-platform/ directory
+                extracted = list(temp_extract.glob("node-*"))[0]
+                # Move contents to final location
+                shutil.move(str(extracted), str(node_runtime_dir))
+                if temp_extract.exists():
+                    temp_extract.rmdir()
+        else:
+            with tarfile.open(download_path, "r:*") as tf:
+                # Extract to temp directory first
+                temp_extract = sandbox_dir / "temp_node"
+                temp_extract.mkdir(exist_ok=True)
+                tf.extractall(temp_extract)
+                # Find extracted folder (node-vX.Y.Z-platform/)
+                extracted = list(temp_extract.glob("node-*"))[0]
+                # Move to final location
+                shutil.move(str(extracted), str(node_runtime_dir))
+                if temp_extract.exists():
+                    temp_extract.rmdir()
+        
+        # Clean up download file
+        download_path.unlink()
+        
+        check_mark = "[OK]" if IS_WINDOWS else "✓"
+        print(f"    {check_mark} Node.js runtime installed at {node_runtime_dir}")
+        return node_exe
+        
+    except urllib.error.URLError as e:
+        print(f"    ❌ Network error downloading Node.js: {e}")
+        raise RuntimeError(f"Failed to download Node.js: {e}")
+    except Exception as e:
+        print(f"    ❌ Failed to install Node.js: {e}")
+        # Clean up partial downloads
+        if 'download_path' in locals() and download_path.exists():
+            download_path.unlink()
+        raise RuntimeError(f"Failed to install Node.js: {e}")
+
+
+def setup_project_venv(target_dir: Path, force: bool = False) -> Tuple[Path, bool]:
+    """
+    Complete venv setup: create and install TheAuditor + ALL linting tools.
+    
+    Args:
+        target_dir: Project root directory
+        force: If True, recreate venv even if exists
+        
+    Returns:
+        (venv_path, success) tuple
+    """
+    target_dir = Path(target_dir).resolve()
+    
+    if not target_dir.exists():
+        raise ValueError(f"Target directory does not exist: {target_dir}")
+    
+    # Create venv
+    venv_path = create_venv(target_dir, force)
+    
+    # Install TheAuditor
+    success = install_theauditor_editable(venv_path)
+    
+    if success:
+        # Install Python linting tools from pyproject.toml
+        print("\nInstalling Python linting tools...", flush=True)
+        python_exe, aud_exe = get_venv_paths(venv_path)
+        theauditor_root = find_theauditor_root()
+        
+        # First, run aud deps --upgrade-all to get latest versions!
+        print("  Checking for latest linter versions...", flush=True)
+        try:
+            # Update pyproject.toml with latest versions
+            if aud_exe.exists():
+                stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(
+                    str(target_dir), "deps_upgrade"
+                )
+                
+                with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+                     open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+                    
+                    result = subprocess.run(
+                        [str(aud_exe), "deps", "--upgrade-all", "--root", str(theauditor_root)],
+                        stdout=stdout_fp,
+                        stderr=stderr_fp,
+                        text=True,
+                        timeout=120
+                    )
+                
+                with open(stdout_path, 'r', encoding='utf-8') as f:
+                    result.stdout = f.read()
+                with open(stderr_path, 'r', encoding='utf-8') as f:
+                    result.stderr = f.read()
+                
+                # Clean up temp files
+                try:
+                    Path(stdout_path).unlink()
+                    Path(stderr_path).unlink()
+                except (OSError, PermissionError):
+                    pass
+                
+                if result.returncode == 0:
+                    check_mark = "[OK]" if IS_WINDOWS else "✓"
+                    print(f"    {check_mark} Updated to latest package versions")
+        except Exception as e:
+            print(f"    ⚠ Could not update versions: {e}")
+        
+        # Install linters group from pyproject.toml
+        try:
+            print("  Installing linters from pyproject.toml...", flush=True)
+            stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(
+                str(target_dir), "pip_linters"
+            )
+            
+            with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+                 open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+                
+                result = subprocess.run(
+                    [str(python_exe), "-m", "pip", "install", "-e", f"{theauditor_root}[linters]"],
+                    stdout=stdout_fp,
+                    stderr=stderr_fp,
+                    text=True,
+                    timeout=120
+                )
+            
+            with open(stdout_path, 'r', encoding='utf-8') as f:
+                result.stdout = f.read()
+            with open(stderr_path, 'r', encoding='utf-8') as f:
+                result.stderr = f.read()
+            
+            # Clean up temp files
+            try:
+                Path(stdout_path).unlink()
+                Path(stderr_path).unlink()
+            except (OSError, PermissionError):
+                pass
+            
+            if result.returncode == 0:
+                check_mark = "[OK]" if IS_WINDOWS else "✓"
+                print(f"    {check_mark} Python linters installed (ruff, mypy, black, bandit, pylint)")
+            else:
+                print(f"    ⚠ Some linters failed: {result.stderr[:200]}")
+        except Exception as e:
+            print(f"    ⚠ Error installing linters: {e}")
+        
+        # ALWAYS install JavaScript tools in SANDBOXED location
+        # These are core TheAuditor tools needed for any project analysis
+        print("\nSetting up JavaScript/TypeScript tools in sandboxed environment...", flush=True)
+        
+        # Create sandboxed directory inside venv for TheAuditor's tools
+        sandbox_dir = venv_path / ".theauditor_tools"
+        sandbox_dir.mkdir(parents=True, exist_ok=True)
+        sandbox_package_json = sandbox_dir / "package.json"
+        
+        # Copy package.json from TheAuditor source
+        print(f"  Creating sandboxed tools directory: {sandbox_dir}", flush=True)
+        
+        # Look for package.json in linters directory
+        package_source = theauditor_root / "theauditor" / "linters" / "package.json"
+        
+        if package_source.exists():
+            # Copy the package.json file
+            with open(package_source) as f:
+                package_data = json.load(f)
+        else:
+            # Fallback if source not found
+            print(f"    ⚠ Package.json not found at {package_source}, using minimal config")
+            package_data = {
+                "name": "theauditor-tools",
+                "version": "1.0.0",
+                "private": True,
+                "description": "Sandboxed tools for TheAuditor static analysis",
+                "devDependencies": {
+                    "eslint": "^9.14.0",
+                    "@eslint/js": "^9.14.0",
+                    "typescript": "^5.6.3"
+                }
+            }
+        
+        # Write package.json to sandboxed location
+        with open(sandbox_package_json, "w") as f:
+            json.dump(package_data, f, indent=2)
+        
+        # Copy ESLint v9 flat config from TheAuditor source
+        eslint_config_source = theauditor_root / "theauditor" / "linters" / "eslint.config.cjs"
+        eslint_config_dest = sandbox_dir / "eslint.config.cjs"
+        
+        if eslint_config_source.exists():
+            # Copy the ESLint v9 flat config file
+            import shutil
+            shutil.copy2(str(eslint_config_source), str(eslint_config_dest))
+            check_mark = "[OK]" if IS_WINDOWS else "✓"
+            print(f"    {check_mark} ESLint v9 flat config copied to sandbox")
+        else:
+            print(f"    ⚠ ESLint config not found at {eslint_config_source}")
+        
+        # Create strict TypeScript configuration for sandboxed tools
+        tsconfig = sandbox_dir / "tsconfig.json"
+        tsconfig_data = {
+            "compilerOptions": {
+                "target": "ES2020",
+                "module": "commonjs",
+                "lib": ["ES2020"],
+                "strict": True,
+                "noImplicitAny": True,
+                "strictNullChecks": True,
+                "strictFunctionTypes": True,
+                "strictBindCallApply": True,
+                "strictPropertyInitialization": True,
+                "noImplicitThis": True,
+                "alwaysStrict": True,
+                "noUnusedLocals": True,
+                "noUnusedParameters": True,
+                "noImplicitReturns": True,
+                "noFallthroughCasesInSwitch": True,
+                "esModuleInterop": True,
+                "skipLibCheck": True,
+                "forceConsistentCasingInFileNames": True
+            },
+            "include": ["**/*"],
+            "exclude": ["node_modules", ".auditor_venv"]
+        }
+        with open(tsconfig, "w") as f:
+            json.dump(tsconfig_data, f, indent=2)
+        
+        # PARALLEL EXECUTION: Track A does package updates, Track B downloads Node.js
+        import concurrent.futures
+        
+        node_exe = None
+        node_error = None
+        
+        def track_a_package_updates():
+            """Track A: Update package.json with latest versions."""
+            print("  [Track A] Checking for latest tool versions...", flush=True)
+            _self_update_package_json(sandbox_package_json)
+        
+        def track_b_node_download():
+            """Track B: ONLY download Node.js, nothing else."""
+            nonlocal node_exe, node_error
+            try:
+                print("  [Track B] Setting up portable Node.js runtime...", flush=True)
+                node_exe = download_portable_node(sandbox_dir)
+            except Exception as e:
+                node_error = e
+        
+        # Run both tracks in parallel
+        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+            track_a_future = executor.submit(track_a_package_updates)
+            track_b_future = executor.submit(track_b_node_download)
+            
+            # Wait for both to complete
+            concurrent.futures.wait([track_a_future, track_b_future])
+        
+        # Check if Node.js download failed
+        if node_error:
+            raise RuntimeError(f"Failed to download Node.js: {node_error}")
+        if not node_exe:
+            raise RuntimeError("Node.js download completed but executable not found")
+        
+        try:
+            node_runtime_dir = sandbox_dir / "node-runtime"
+            
+            # Platform-specific npm command construction
+            if os.name == "nt":
+                # Windows: node.exe runs npm-cli.js directly
+                # npm is bundled with Node.js in node_modules/npm
+                npm_cli = node_runtime_dir / "node_modules" / "npm" / "bin" / "npm-cli.js"
+                if npm_cli.exists():
+                    npm_cmd = [str(node_exe), str(npm_cli)]
+                else:
+                    # Fallback: npm.cmd in node-runtime directory
+                    npm_cmd_path = node_runtime_dir / "npm.cmd"
+                    npm_cmd = [str(npm_cmd_path)]
+            else:
+                # Unix: use npm shell script from Node.js bundle
+                npm_script = node_runtime_dir / "bin" / "npm"
+                npm_cmd = [str(npm_script)]
+            
+            print(f"  Installing JS/TS linters using bundled Node.js...", flush=True)
+            stdout_path, stderr_path = TempManager.create_temp_files_for_subprocess(
+                str(target_dir), "npm_install"
+            )
+            
+            with open(stdout_path, 'w+', encoding='utf-8') as stdout_fp, \
+                 open(stderr_path, 'w+', encoding='utf-8') as stderr_fp:
+                
+                # Build full command with "install" argument
+                full_cmd = npm_cmd + ["install"]
+                
+                result = subprocess.run(
+                    full_cmd,
+                    cwd=str(sandbox_dir),  # Install in sandbox, NOT in user's project!
+                    stdout=stdout_fp,
+                    stderr=stderr_fp,
+                    text=True,
+                    timeout=120,
+                    shell=False  # No shell needed with absolute paths!
+                )
+            
+            with open(stdout_path, 'r', encoding='utf-8') as f:
+                result.stdout = f.read()
+            with open(stderr_path, 'r', encoding='utf-8') as f:
+                result.stderr = f.read()
+            
+            # Clean up temp files
+            try:
+                Path(stdout_path).unlink()
+                Path(stderr_path).unlink()
+            except (OSError, PermissionError):
+                pass
+            
+            if result.returncode == 0:
+                check_mark = "[OK]" if IS_WINDOWS else "✓"
+                print(f"    {check_mark} JavaScript/TypeScript tools installed in sandbox")
+                print(f"    {check_mark} Tools isolated from project: {sandbox_dir}")
+                print(f"    {check_mark} Using bundled Node.js - no system dependency!")
+                
+                # Verify tools are installed
+                eslint_path = sandbox_dir / "node_modules" / ".bin" / ("eslint.cmd" if os.name == "nt" else "eslint")
+                if eslint_path.exists():
+                    print(f"    {check_mark} ESLint verified at: {eslint_path}")
+            else:
+                print(f"    ⚠ npm install failed: {result.stderr[:500]}")
+                print(f"    ⚠ This may be a network issue. Try running setup again.")
+                
+        except RuntimeError as e:
+            print(f"    ⚠ Could not set up bundled Node.js: {e}")
+            print("    ⚠ JavaScript/TypeScript linting will not be available")
+            print("    ⚠ To retry: Delete .auditor_venv and run setup again")
+        except Exception as e:
+            print(f"    ⚠ Unexpected error setting up JS tools: {e}")
+        
+    
+    return venv_path, success
\ No newline at end of file
diff --git a/theauditor/vulnerability_scanner.py b/theauditor/vulnerability_scanner.py
new file mode 100644
index 0000000..0f1c85a
--- /dev/null
+++ b/theauditor/vulnerability_scanner.py
@@ -0,0 +1,420 @@
+"""Native vulnerability scanners wrapper for npm audit and pip-audit.
+
+This module runs native security tools and reports their raw output,
+following TheAuditor's philosophy of using industry-standard tools
+without interpretation.
+"""
+
+import json
+import subprocess
+import shutil
+import platform
+from pathlib import Path
+from typing import Dict, List, Any
+from datetime import datetime, UTC
+
+# Windows compatibility
+IS_WINDOWS = platform.system() == "Windows"
+
+
+def scan_dependencies(
+    deps: List[Dict[str, Any]], 
+    offline: bool = False,
+    cache_dir: str = "./.pf/vuln_cache"  # Kept for compatibility, unused
+) -> List[Dict[str, Any]]:
+    """
+    Run native vulnerability scanners (npm audit, pip-audit) on dependencies.
+    
+    Args:
+        deps: List of dependency dicts from deps.py
+        offline: If True, skip scanning (native tools need their own cache)
+        cache_dir: Unused, kept for backward compatibility
+        
+    Returns:
+        List of vulnerability findings from native tools
+    """
+    if offline:
+        # In offline mode, return empty - native tools manage their own offline capability
+        return []
+    
+    vulnerabilities = []
+    
+    # Check which package managers we have
+    has_npm = any(d["manager"] == "npm" for d in deps)
+    has_python = any(d["manager"] == "py" for d in deps)
+    
+    # Run npm audit for Node.js packages
+    if has_npm:
+        npm_vulns = run_npm_audit()
+        vulnerabilities.extend(npm_vulns)
+    
+    # Run pip-audit for Python packages
+    if has_python:
+        pip_vulns = run_pip_audit()
+        vulnerabilities.extend(pip_vulns)
+    
+    return vulnerabilities
+
+
+def run_npm_audit() -> List[Dict[str, Any]]:
+    """
+    Run npm audit and parse its output.
+    
+    Returns:
+        List of vulnerabilities in standard format
+    """
+    vulnerabilities = []
+    
+    # Check if package.json exists
+    project_root = Path.cwd()
+    package_json = project_root / "package.json"
+    if not package_json.exists():
+        return vulnerabilities
+    
+    # Check if node_modules exists (npm audit needs it)
+    node_modules = project_root / "node_modules"
+    if not node_modules.exists():
+        # No node_modules = nothing to audit
+        return vulnerabilities
+    
+    # CRITICAL FIX: Use sandboxed npm from TheAuditor's tools
+    # Find the sandboxed node and npm
+    sandbox_base = project_root / ".auditor_venv" / ".theauditor_tools"
+    node_runtime = sandbox_base / "node-runtime"
+    
+    if IS_WINDOWS:
+        node_exe = node_runtime / "node.exe"
+        # On Windows, npm is a JavaScript file we run with node
+        npm_cli = node_runtime / "node_modules" / "npm" / "bin" / "npm-cli.js"
+        if npm_cli.exists():
+            npm_cmd = [str(node_exe), str(npm_cli), "audit", "--json"]
+        else:
+            # Fallback: npm.cmd might exist
+            npm_cmd_path = node_runtime / "npm.cmd"
+            if npm_cmd_path.exists():
+                npm_cmd = [str(npm_cmd_path), "audit", "--json"]
+            else:
+                # No sandboxed npm found
+                return vulnerabilities
+    else:
+        node_exe = node_runtime / "bin" / "node"
+        npm_exe = node_runtime / "bin" / "npm"
+        if npm_exe.exists():
+            npm_cmd = [str(npm_exe), "audit", "--json"]
+        else:
+            # No sandboxed npm found
+            return vulnerabilities
+    
+    # Verify node exists before proceeding
+    if not node_exe.exists():
+        # Sandboxed node not installed - user needs to run 'aud setup-claude'
+        return vulnerabilities
+    
+    try:
+        # Run npm audit --json using sandboxed npm
+        # Note: npm audit exits with code 1 if vulnerabilities found, which is expected
+        result = subprocess.run(
+            npm_cmd,
+            cwd=str(project_root),
+            capture_output=True,
+            text=True,
+            timeout=60,
+            shell=IS_WINDOWS
+        )
+        
+        if result.stdout:
+            audit_data = json.loads(result.stdout)
+            
+            # Parse npm audit output format
+            if "vulnerabilities" in audit_data:
+                for pkg_name, pkg_data in audit_data["vulnerabilities"].items():
+                    # Skip if no actual vulnerability info
+                    if not pkg_data.get("via"):
+                        continue
+                    
+                    # Extract vulnerability details from via field
+                    for via_item in pkg_data.get("via", []):
+                        # Skip if via is just a dependency name (transitive)
+                        if isinstance(via_item, str):
+                            continue
+                        
+                        if isinstance(via_item, dict):
+                            # Extract raw severity from npm
+                            severity = via_item.get("severity", "")
+                            
+                            # Extract IDs
+                            vuln_id = via_item.get("cve")
+                            if not vuln_id:
+                                vuln_id = via_item.get("ghsa")
+                            if not vuln_id:
+                                vuln_id = via_item.get("source", f"npm-audit-{pkg_name}")
+                            
+                            # Build aliases list
+                            aliases = []
+                            if via_item.get("cve"):
+                                aliases.append(via_item["cve"])
+                            if via_item.get("ghsa"):
+                                aliases.append(via_item["ghsa"])
+                            
+                            # Extract fixed version if available
+                            fixed_version = None
+                            if pkg_data.get("fixAvailable"):
+                                fix_info = pkg_data["fixAvailable"]
+                                if isinstance(fix_info, dict) and "version" in fix_info:
+                                    fixed_version = fix_info["version"]
+                            
+                            # Get current version
+                            affected_range = pkg_data.get("range", "")
+                            current_version = affected_range.split(" ")[0].lstrip("<>=") if affected_range else ""
+                            
+                            vulnerability = {
+                                "package": pkg_name,
+                                "version": current_version,
+                                "manager": "npm",
+                                "vulnerability_id": vuln_id,
+                                "severity": severity,
+                                "summary": via_item.get("title", "No summary available"),
+                                "details": via_item.get("overview", ""),
+                                "aliases": aliases,
+                                "published": via_item.get("created", ""),
+                                "modified": via_item.get("updated", ""),
+                                "references": [{
+                                    "type": "ADVISORY",
+                                    "url": via_item.get("url", "")
+                                }] if via_item.get("url") else [],
+                                "affected_ranges": [pkg_data.get("range", "")] if pkg_data.get("range") else [],
+                                "fixed_version": fixed_version,
+                                "source": "npm audit"
+                            }
+                            
+                            vulnerabilities.append(vulnerability)
+    
+    except subprocess.TimeoutExpired:
+        # Timeout after 60 seconds
+        pass
+    except (subprocess.SubprocessError, json.JSONDecodeError):
+        # npm audit failed or returned invalid JSON
+        pass
+    
+    return vulnerabilities
+
+
+def run_pip_audit() -> List[Dict[str, Any]]:
+    """
+    Run pip-audit and parse its output.
+    
+    Returns:
+        List of vulnerabilities in standard format
+    """
+    vulnerabilities = []
+    
+    # Check if pip-audit is available
+    if not shutil.which("pip-audit"):
+        # pip-audit not installed, skip
+        return vulnerabilities
+    
+    # Check if we have Python dependencies to audit
+    # Look for requirements.txt or pyproject.toml
+    project_root = Path.cwd()
+    has_requirements = (project_root / "requirements.txt").exists()
+    has_pyproject = (project_root / "pyproject.toml").exists()
+    
+    if not has_requirements and not has_pyproject:
+        return vulnerabilities
+    
+    try:
+        # Build pip-audit command
+        cmd = ["pip-audit", "--format", "json"]
+        
+        # Add requirements file if it exists
+        if has_requirements:
+            cmd.extend(["-r", "requirements.txt"])
+        
+        # Run pip-audit
+        result = subprocess.run(
+            cmd,
+            cwd=str(project_root),
+            capture_output=True,
+            text=True,
+            timeout=60,
+            shell=IS_WINDOWS
+        )
+        
+        if result.stdout:
+            audit_data = json.loads(result.stdout)
+            
+            # Parse pip-audit output format
+            # pip-audit returns an array of vulnerability objects
+            for vuln in audit_data:
+                # Extract package info
+                pkg_name = vuln.get("name", "")
+                pkg_version = vuln.get("version", "")
+                
+                # Extract vulnerability info
+                vuln_id = vuln.get("id", f"pip-audit-{pkg_name}")
+                
+                # Build aliases from different ID fields
+                aliases = []
+                if vuln.get("aliases"):
+                    aliases.extend(vuln["aliases"])
+                
+                vulnerability = {
+                    "package": pkg_name,
+                    "version": pkg_version,
+                    "manager": "py",
+                    "vulnerability_id": vuln_id,
+                    "severity": vuln.get("fix_versions", [""])[0] if vuln.get("fix_versions") else "",  # pip-audit doesn't provide severity
+                    "summary": vuln.get("description", "No summary available"),
+                    "details": vuln.get("description", ""),
+                    "aliases": aliases,
+                    "published": "",  # pip-audit doesn't provide dates
+                    "modified": "",
+                    "references": [],  # pip-audit doesn't provide references in JSON
+                    "affected_ranges": [],
+                    "fixed_version": vuln.get("fix_versions", [""])[0] if vuln.get("fix_versions") else None,
+                    "source": "pip-audit"
+                }
+                
+                vulnerabilities.append(vulnerability)
+    
+    except subprocess.TimeoutExpired:
+        # Timeout after 60 seconds
+        pass
+    except (subprocess.SubprocessError, json.JSONDecodeError):
+        # pip-audit failed or returned invalid JSON
+        pass
+    
+    return vulnerabilities
+
+
+def write_vulnerabilities_json(
+    vulnerabilities: List[Dict[str, Any]],
+    output_path: str = "./.pf/vulnerabilities.json"
+) -> None:
+    """
+    Write vulnerability findings to JSON file.
+    
+    Args:
+        vulnerabilities: List of vulnerability dictionaries
+        output_path: Path to output JSON file
+    """
+    output = Path(output_path)
+    output.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Count by severity
+    severity_counts = {
+        "critical": 0,
+        "high": 0,
+        "medium": 0,
+        "low": 0
+    }
+    
+    for vuln in vulnerabilities:
+        severity = vuln.get("severity", "").lower()
+        if severity in severity_counts:
+            severity_counts[severity] += 1
+        else:
+            severity_counts["low"] += 1
+    
+    # Build report structure
+    report = {
+        "vulnerabilities": vulnerabilities,
+        "scan_metadata": {
+            "timestamp": datetime.now(UTC).isoformat(),
+            "packages_scanned": len(set(v["package"] for v in vulnerabilities)) if vulnerabilities else 0,
+            "vulnerabilities_found": len(vulnerabilities),
+            "critical_count": severity_counts["critical"],
+            "high_count": severity_counts["high"],
+            "medium_count": severity_counts["medium"],
+            "low_count": severity_counts["low"],
+            "sources_used": list(set(v.get("source", "unknown") for v in vulnerabilities))
+        }
+    }
+    
+    with open(output, "w", encoding="utf-8") as f:
+        json.dump(report, f, indent=2, sort_keys=True)
+
+
+def format_vulnerability_report(vulnerabilities: List[Dict[str, Any]]) -> str:
+    """
+    Format vulnerabilities as human-readable text report.
+    
+    Args:
+        vulnerabilities: List of vulnerability dictionaries
+        
+    Returns:
+        Formatted text report
+    """
+    if not vulnerabilities:
+        return "[OK] No known vulnerabilities found in dependencies\n"
+    
+    lines = []
+    
+    # Count by severity
+    severity_counts = {
+        "critical": 0,
+        "high": 0,
+        "medium": 0,
+        "low": 0,
+        "unknown": 0
+    }
+    
+    for vuln in vulnerabilities:
+        severity = vuln.get("severity", "").lower()
+        if severity in ["critical", "high", "medium", "low"]:
+            severity_counts[severity] += 1
+        else:
+            severity_counts["unknown"] += 1
+    
+    # Summary
+    lines.append("[FACT] Native tool vulnerability scan results\n")
+    lines.append("=" * 60)
+    lines.append(f"Total: {len(vulnerabilities)} vulnerabilities reported\n")
+    
+    if severity_counts["critical"] > 0:
+        lines.append(f"CRITICAL: {severity_counts['critical']}")
+    if severity_counts["high"] > 0:
+        lines.append(f"HIGH: {severity_counts['high']}")
+    if severity_counts["medium"] > 0:
+        lines.append(f"MEDIUM: {severity_counts['medium']}")
+    if severity_counts["low"] > 0:
+        lines.append(f"LOW: {severity_counts['low']}")
+    if severity_counts["unknown"] > 0:
+        lines.append(f"UNSPECIFIED: {severity_counts['unknown']}")
+    
+    lines.append("")
+    lines.append("=" * 60)
+    
+    # Group by source tool
+    npm_vulns = [v for v in vulnerabilities if v.get("source") == "npm audit"]
+    pip_vulns = [v for v in vulnerabilities if v.get("source") == "pip-audit"]
+    
+    if npm_vulns:
+        lines.append(f"\n[npm audit reported {len(npm_vulns)} issues]")
+        lines.append("-" * 40)
+        for vuln in npm_vulns[:5]:  # Show first 5
+            lines.append(f"  {vuln['package']} v{vuln['version']}")
+            lines.append(f"    {vuln.get('severity', 'UNSPECIFIED').upper()}: {vuln['summary']}")
+            if vuln.get("fixed_version"):
+                lines.append(f"    Fix available: v{vuln['fixed_version']}")
+        if len(npm_vulns) > 5:
+            lines.append(f"  ... and {len(npm_vulns) - 5} more")
+    
+    if pip_vulns:
+        lines.append(f"\n[pip-audit reported {len(pip_vulns)} issues]")
+        lines.append("-" * 40)
+        for vuln in pip_vulns[:5]:  # Show first 5
+            lines.append(f"  {vuln['package']} v{vuln['version']}")
+            lines.append(f"    {vuln['summary']}")
+            if vuln.get("fixed_version"):
+                lines.append(f"    Fix available: v{vuln['fixed_version']}")
+        if len(pip_vulns) > 5:
+            lines.append(f"  ... and {len(pip_vulns) - 5} more")
+    
+    lines.append("")
+    lines.append("=" * 60)
+    lines.append("\nNative tool commands you can run:")
+    lines.append("  npm audit fix        # Auto-fix npm vulnerabilities")
+    lines.append("  pip-audit --fix      # Auto-fix Python vulnerabilities")
+    
+    return "\n".join(lines)
\ No newline at end of file
diff --git a/theauditor/workset.py b/theauditor/workset.py
new file mode 100644
index 0000000..8f6810a
--- /dev/null
+++ b/theauditor/workset.py
@@ -0,0 +1,376 @@
+"""Workset resolver - computes target file set from git diff and dependencies."""
+
+import json
+import os
+import platform
+import sqlite3
+import subprocess
+import tempfile
+from datetime import UTC, datetime
+from fnmatch import fnmatch
+from pathlib import Path
+from typing import Any
+
+# Windows compatibility
+IS_WINDOWS = platform.system() == "Windows"
+
+
+def normalize_path(path: str) -> str:
+    """Normalize path to POSIX style."""
+    # Replace backslashes with forward slashes
+    path = path.replace("\\", "/")
+    # Use Path to properly resolve .. and .
+    path = str(Path(path).as_posix())
+    # Remove leading ./
+    if path.startswith("./"):
+        path = path[2:]
+    return path
+
+
+def load_manifest(manifest_path: str) -> dict[str, str]:
+    """Load manifest and create path -> sha256 mapping."""
+    with open(manifest_path) as f:
+        manifest = json.load(f)
+    return {item["path"]: item["sha256"] for item in manifest}
+
+
+def get_git_diff_files(diff_spec: str, root_path: str = ".") -> list[str]:
+    """Get list of changed files from git diff."""
+    import tempfile
+    try:
+        # Use temp files to avoid buffer overflow
+        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stdout.txt', encoding='utf-8') as stdout_fp, \
+             tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='_stderr.txt', encoding='utf-8') as stderr_fp:
+            
+            stdout_path = stdout_fp.name
+            stderr_path = stderr_fp.name
+            
+            result = subprocess.run(
+                ["git", "diff", "--name-only"] + diff_spec.split(".."),
+                cwd=root_path,
+                stdout=stdout_fp,
+                stderr=stderr_fp,
+                text=True,
+                encoding='utf-8',
+                errors='replace',
+                check=True,
+                shell=IS_WINDOWS  # Windows compatibility fix
+            )
+        
+        # Read the outputs back
+        with open(stdout_path, 'r', encoding='utf-8') as f:
+            stdout_content = f.read()
+        with open(stderr_path, 'r', encoding='utf-8') as f:
+            stderr_content = f.read()
+        
+        # Clean up temp files
+        os.unlink(stdout_path)
+        os.unlink(stderr_path)
+        
+        files = stdout_content.strip().split("\n") if stdout_content.strip() else []
+        return [normalize_path(f) for f in files]
+    except subprocess.CalledProcessError as e:
+        # Read stderr for error message
+        try:
+            with open(stderr_path, 'r', encoding='utf-8') as f:
+                error_msg = f.read()
+        except:
+            error_msg = 'git not available'
+        finally:
+            # Clean up temp files
+            if 'stdout_path' in locals() and os.path.exists(stdout_path):
+                os.unlink(stdout_path)
+            if 'stderr_path' in locals() and os.path.exists(stderr_path):
+                os.unlink(stderr_path)
+        raise RuntimeError(f"Git diff failed: {error_msg}") from e
+    except FileNotFoundError:
+        # Clean up temp files if they exist
+        if 'stdout_path' in locals() and os.path.exists(stdout_path):
+            os.unlink(stdout_path)
+        if 'stderr_path' in locals() and os.path.exists(stderr_path):
+            os.unlink(stderr_path)
+        raise RuntimeError("Git is not available. Use --files instead.") from None
+
+
+def get_forward_deps(
+    conn: sqlite3.Connection, file_path: str, manifest_paths: set[str]
+) -> set[str]:
+    """Get files that this file imports/uses."""
+    cursor = conn.cursor()
+    cursor.execute("SELECT value FROM refs WHERE src = ? AND kind = 'from'", (file_path,))
+
+    deps = set()
+    for (value,) in cursor.fetchall():
+        # Skip certain values that are not paths
+        if value in ["{", "}", "(", ")", "*"] or value.startswith("'") and value.endswith("'"):
+            continue
+
+        # Skip external packages (starting with @ or no slashes)
+        if value.startswith("@"):
+            continue
+
+        # Clean up the value - remove quotes if present
+        value = value.strip("'\"")
+
+        # Try to resolve the import path
+        candidates = []
+
+        # If it's a relative path
+        if value.startswith("./") or value.startswith("../"):
+            # Resolve relative to file's directory
+            file_dir = Path(file_path).parent
+            # Use normpath instead of resolve to stay relative
+            resolved = os.path.normpath(str(file_dir / value))
+            resolved = normalize_path(resolved)
+
+            # Remove any leading path that's outside the repo
+            if resolved.startswith(".."):
+                continue
+
+            candidates.append(resolved)
+
+            # Try with common extensions
+            for ext in [".ts", ".js", ".tsx", ".jsx", ".py"]:
+                candidates.append(resolved + ext)
+                candidates.append(resolved + "/index" + ext)
+        elif "/" in value and not value.startswith("/"):
+            # Could be a project path
+            candidates.append(normalize_path(value))
+            for ext in [".ts", ".js", ".tsx", ".jsx", ".py"]:
+                candidates.append(normalize_path(value) + ext)
+
+        # Check if any candidate exists in manifest
+        for candidate in candidates:
+            if candidate in manifest_paths:
+                deps.add(candidate)
+                break
+
+    return deps
+
+
+def get_reverse_deps(
+    conn: sqlite3.Connection, file_path: str, manifest_paths: set[str]
+) -> set[str]:
+    """Get files that import/use this file."""
+    cursor = conn.cursor()
+
+    # Find all refs that might point to this file
+    deps = set()
+    logged_paths = set()  # Track which paths we've already logged errors for
+
+    # Get all 'from' refs
+    cursor.execute("SELECT src, value FROM refs WHERE kind = 'from'")
+
+    # Remove extension from target file for matching
+    file_path_no_ext = str(Path(file_path).with_suffix(""))
+
+    for src, value in cursor.fetchall():
+        if src == file_path:
+            continue
+
+        # Clean up the value
+        value = value.strip("'\"")
+
+        # Skip non-path values
+        if value in ["{", "}", "(", ")", "*"] or value.startswith("@"):
+            continue
+
+        # Try to resolve this import from the source file
+        if value.startswith("./") or value.startswith("../"):
+            # Resolve relative to source file's directory
+            src_dir = Path(src).parent
+            try:
+                resolved = os.path.normpath(str(src_dir / value))
+                resolved = normalize_path(resolved)
+
+                # Check if this resolves to our target file
+                if resolved in (file_path_no_ext, file_path):
+                    deps.add(src)
+                    continue
+
+                # Also check with common extensions
+                for ext in [".ts", ".js", ".tsx", ".jsx", ".py"]:
+                    if resolved + ext == file_path:
+                        deps.add(src)
+                        break
+            except (FileNotFoundError, OSError, ValueError) as e:
+                # Log path resolution issue once per file
+                if src not in logged_paths:
+                    logged_paths.add(src)
+                    print(f"Debug: Could not resolve import from {src}: {type(e).__name__}")
+                continue
+
+    return deps
+
+
+def expand_dependencies(
+    conn: sqlite3.Connection,
+    seed_files: set[str],
+    manifest_paths: set[str],
+    max_depth: int,
+) -> set[str]:
+    """Expand file set by following dependencies up to max_depth."""
+    if max_depth == 0:
+        return seed_files
+
+    expanded = seed_files.copy()
+    current_level = seed_files
+
+    for _depth in range(max_depth):
+        next_level = set()
+
+        for file_path in current_level:
+            # Forward dependencies
+            forward = get_forward_deps(conn, file_path, manifest_paths)
+            next_level.update(forward - expanded)
+
+            # Reverse dependencies
+            reverse = get_reverse_deps(conn, file_path, manifest_paths)
+            # Filter to only files in manifest
+            reverse = {f for f in reverse if f in manifest_paths}
+            next_level.update(reverse - expanded)
+
+        if not next_level:
+            break
+
+        expanded.update(next_level)
+        current_level = next_level
+
+    return expanded
+
+
+def apply_glob_filters(
+    files: set[str],
+    include_patterns: list[str],
+    exclude_patterns: list[str],
+) -> set[str]:
+    """Apply include/exclude glob patterns to file set."""
+    if not include_patterns:
+        include_patterns = ["**"]
+
+    filtered = set()
+    for file_path in files:
+        # Check if file matches any include pattern
+        included = any(fnmatch(file_path, pattern) for pattern in include_patterns)
+
+        # Check if file matches any exclude pattern
+        excluded = any(fnmatch(file_path, pattern) for pattern in exclude_patterns)
+
+        if included and not excluded:
+            filtered.add(file_path)
+
+    return filtered
+
+
+def compute_workset(
+    root_path: str = ".",
+    db_path: str = "repo_index.db",
+    manifest_path: str = "manifest.json",
+    all_files: bool = False,
+    diff_spec: str = None,
+    file_list: list[str] = None,
+    include_patterns: list[str] = None,
+    exclude_patterns: list[str] = None,
+    max_depth: int = 2,
+    output_path: str = "./.pf/workset.json",
+    print_stats: bool = False,
+) -> dict[str, Any]:
+    """Compute workset from git diff, file list, or all files."""
+    # Validate inputs
+    if sum([bool(all_files), bool(diff_spec), bool(file_list)]) > 1:
+        raise ValueError("Cannot specify multiple input modes (--all, --diff, --files)")
+    if not all_files and not diff_spec and not file_list:
+        raise ValueError("Must specify either --all, --diff, or --files")
+
+    # Load manifest
+    try:
+        manifest_mapping = load_manifest(manifest_path)
+        manifest_paths = set(manifest_mapping.keys())
+    except FileNotFoundError:
+        # Check if user is in wrong directory
+        cwd = Path.cwd()
+        helpful_msg = f"Manifest not found at {manifest_path}. Run 'aud index' first."
+        if cwd.name in ["Desktop", "Documents", "Downloads"]:
+            helpful_msg += f"\n\nAre you in the right directory? You're in: {cwd}"
+            helpful_msg += "\nTry: cd <your-project-folder> then run this command again"
+        raise RuntimeError(helpful_msg) from None
+
+    # Connect to database
+    if not Path(db_path).exists():
+        raise RuntimeError(f"Database not found at {db_path}. Run 'aud index' first.")
+
+    conn = sqlite3.connect(db_path)
+
+    # Get seed files
+    seed_files = set()
+    seed_mode = None
+    seed_value = None
+
+    if all_files:
+        seed_mode = "all"
+        seed_value = "all_indexed_files"
+        # Use all files from manifest
+        seed_files = manifest_paths.copy()
+        # No dependency expansion needed for all files
+        max_depth = 0
+    elif diff_spec:
+        seed_mode = "diff"
+        seed_value = diff_spec
+        diff_files = get_git_diff_files(diff_spec, root_path)
+        # Filter to files in manifest
+        seed_files = {f for f in diff_files if f in manifest_paths}
+    else:
+        seed_mode = "files"
+        seed_value = ",".join(file_list)
+        # Normalize and filter to manifest
+        seed_files = {normalize_path(f) for f in file_list if normalize_path(f) in manifest_paths}
+
+    # Expand dependencies
+    expanded_files = expand_dependencies(conn, seed_files, manifest_paths, max_depth)
+
+    # Apply filters
+    filtered_files = apply_glob_filters(
+        expanded_files,
+        include_patterns or [],
+        exclude_patterns or [],
+    )
+
+    # Sort for deterministic output
+    sorted_files = sorted(filtered_files)
+
+    # Build output
+    workset_data = {
+        "generated_at": datetime.now(UTC).isoformat(),
+        "root": root_path,
+        "seed": {"mode": seed_mode, "value": seed_value},
+        "max_depth": max_depth,
+        "counts": {
+            "seed_files": len(seed_files),
+            "expanded_files": len(sorted_files),
+        },
+        "paths": [{"path": path, "sha256": manifest_mapping[path]} for path in sorted_files],
+    }
+
+    # Create output directory if needed
+    output_dir = Path(output_path).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Write output
+    with open(output_path, "w") as f:
+        json.dump(workset_data, f, indent=2)
+
+    if print_stats:
+        include_count = len(include_patterns) if include_patterns else 0
+        exclude_count = len(exclude_patterns) if exclude_patterns else 0
+        print(
+            f"seed={len(seed_files)} expanded={len(sorted_files)} depth={max_depth} include={include_count} exclude={exclude_count}"
+        )
+
+    conn.close()
+
+    return {
+        "success": True,
+        "seed_count": len(seed_files),
+        "expanded_count": len(sorted_files),
+        "output_path": output_path,
+    }