mirror of
https://github.com/aljazceru/Auditor.git
synced 2025-12-17 03:24:18 +01:00
343 lines
8.8 KiB
Python
343 lines
8.8 KiB
Python
"""Taint source, sink, and sanitizer definitions.
|
|
|
|
This module contains all the constant definitions for taint analysis:
|
|
- TAINT_SOURCES: Where untrusted data originates
|
|
- SECURITY_SINKS: Where untrusted data should not flow
|
|
- SANITIZERS: Functions that clean/validate data
|
|
"""
|
|
|
|
import platform
|
|
|
|
# Detect if running on Windows for character encoding
|
|
IS_WINDOWS = platform.system() == "Windows"
|
|
|
|
|
|
# Define taint sources (where untrusted data originates)
|
|
# Refined to focus on truly external/untrusted input sources
|
|
TAINT_SOURCES = {
|
|
# JavaScript/TypeScript sources - Web request data only
|
|
"js": [
|
|
"req.body",
|
|
"req.query",
|
|
"req.params",
|
|
"req.headers",
|
|
"req.cookies",
|
|
"request.body",
|
|
"request.query",
|
|
"request.params",
|
|
"ctx.request.body",
|
|
"ctx.query",
|
|
"ctx.params",
|
|
"document.location",
|
|
"window.location",
|
|
"document.URL",
|
|
"document.referrer",
|
|
"localStorage.getItem",
|
|
"sessionStorage.getItem",
|
|
"URLSearchParams",
|
|
"postMessage",
|
|
],
|
|
# Python sources - Web and CLI input only
|
|
"python": [
|
|
"request.args",
|
|
"request.form",
|
|
"request.json",
|
|
"request.data",
|
|
"request.values",
|
|
"request.files",
|
|
"request.cookies",
|
|
"request.headers",
|
|
"request.get_json",
|
|
"request.get_data",
|
|
"input", # User console input
|
|
"raw_input", # Python 2 user input
|
|
"sys.argv", # Command line arguments
|
|
"click.argument", # Click CLI arguments
|
|
"click.option", # Click CLI options
|
|
"argparse.parse_args", # Argparse arguments
|
|
],
|
|
# Network sources only - removed generic file operations
|
|
"network": [
|
|
"socket.recv",
|
|
"socket.recvfrom",
|
|
"websocket.receive",
|
|
"stdin.read", # Console input
|
|
],
|
|
# Web scraping and data extraction sources
|
|
"web_scraping": [
|
|
# Requests library
|
|
"requests.get",
|
|
"requests.post",
|
|
"requests.put",
|
|
"requests.patch",
|
|
"requests.delete",
|
|
"response.text",
|
|
"response.content",
|
|
"response.json",
|
|
"resp.text",
|
|
"resp.content",
|
|
"resp.json",
|
|
|
|
# urllib
|
|
"urlopen",
|
|
"urllib.request.urlopen",
|
|
"urllib2.urlopen",
|
|
|
|
# BeautifulSoup HTML parsing
|
|
"BeautifulSoup",
|
|
"soup.find",
|
|
"soup.find_all",
|
|
"soup.select",
|
|
"soup.select_one",
|
|
"element.text",
|
|
"element.get_text",
|
|
"element.string",
|
|
"tag.text",
|
|
"tag.get_text",
|
|
|
|
# Playwright browser automation
|
|
"page.content",
|
|
"page.inner_text",
|
|
"page.inner_html",
|
|
"page.locator",
|
|
"page.text_content",
|
|
"element.inner_text",
|
|
"element.inner_html",
|
|
"element.text_content",
|
|
|
|
# Selenium browser automation
|
|
"driver.page_source",
|
|
"driver.find_element",
|
|
"element.text",
|
|
"element.get_attribute",
|
|
"webdriver.page_source",
|
|
|
|
# Scrapy framework
|
|
"response.body",
|
|
"response.text",
|
|
"response.css",
|
|
"response.xpath",
|
|
"selector.get",
|
|
"selector.getall",
|
|
],
|
|
# File I/O and data loading sources
|
|
"file_io": [
|
|
# Basic file operations
|
|
"open",
|
|
"file.read",
|
|
"file.readline",
|
|
"file.readlines",
|
|
|
|
# JSON operations
|
|
"json.load",
|
|
"json.loads",
|
|
"json.JSONDecoder",
|
|
|
|
# CSV/Excel operations
|
|
"csv.reader",
|
|
"csv.DictReader",
|
|
"pd.read_csv",
|
|
"pd.read_excel",
|
|
"pd.read_json",
|
|
"pd.read_html",
|
|
"pd.read_sql",
|
|
"pandas.read_csv",
|
|
"pandas.read_excel",
|
|
|
|
# YAML operations
|
|
"yaml.load",
|
|
"yaml.safe_load",
|
|
"yaml.full_load",
|
|
|
|
# XML operations
|
|
"etree.parse",
|
|
"etree.fromstring",
|
|
"xml.parse",
|
|
"ElementTree.parse",
|
|
|
|
# Environment variables
|
|
"os.getenv",
|
|
"os.environ.get",
|
|
"environ.get",
|
|
]
|
|
# Database category REMOVED - internal database data is trusted, not a taint source
|
|
}
|
|
|
|
# Define sanitizers that clean/validate data for different vulnerability types
|
|
SANITIZERS = {
|
|
# SQL sanitizers - Functions that properly escape or parameterize queries
|
|
"sql": [
|
|
"escape_string",
|
|
"mysql_real_escape_string",
|
|
"mysqli_real_escape_string",
|
|
"pg_escape_string",
|
|
"sqlite3.escape_string",
|
|
"sqlalchemy.text",
|
|
"db.prepare",
|
|
"parameterize",
|
|
"prepared_statement",
|
|
"bind_param",
|
|
"execute_prepared",
|
|
"psycopg2.sql.SQL",
|
|
"psycopg2.sql.Identifier",
|
|
"psycopg2.sql.Literal",
|
|
],
|
|
# XSS sanitizers - HTML escaping functions
|
|
"xss": [
|
|
"escape_html",
|
|
"html.escape",
|
|
"cgi.escape",
|
|
"markupsafe.escape",
|
|
"DOMPurify.sanitize",
|
|
"bleach.clean",
|
|
"strip_tags",
|
|
"sanitize_html",
|
|
"escape_javascript",
|
|
"json.dumps", # When used for JSON encoding
|
|
"JSON.stringify",
|
|
"encodeURIComponent",
|
|
"encodeURI",
|
|
"_.escape", # Lodash escape
|
|
"escapeHtml",
|
|
"htmlspecialchars",
|
|
"htmlentities",
|
|
],
|
|
# Path traversal sanitizers
|
|
"path": [
|
|
"os.path.basename",
|
|
"Path.basename",
|
|
"secure_filename",
|
|
"sanitize_filename",
|
|
"normalize_path",
|
|
"realpath",
|
|
"abspath",
|
|
"path.resolve",
|
|
"path.normalize",
|
|
"werkzeug.utils.secure_filename",
|
|
],
|
|
# Command injection sanitizers
|
|
"command": [
|
|
"shlex.quote",
|
|
"pipes.quote",
|
|
"escapeshellarg",
|
|
"escapeshellcmd",
|
|
"shell_escape",
|
|
"quote",
|
|
"escape_shell",
|
|
],
|
|
# General validation functions
|
|
"validation": [
|
|
"validate",
|
|
"validator",
|
|
"is_valid",
|
|
"check_input",
|
|
"sanitize",
|
|
"clean",
|
|
"filter_var",
|
|
"assert_valid",
|
|
"verify",
|
|
]
|
|
}
|
|
|
|
# Define security sinks (functions where external data flows are tracked)
|
|
# Categories are for organizational purposes only - Truth Couriers don't classify vulnerabilities
|
|
SECURITY_SINKS = {
|
|
# SQL-related sinks (factual: functions that interact with databases)
|
|
"sql": [
|
|
"db.query",
|
|
"db.execute",
|
|
"db.exec",
|
|
"db.raw",
|
|
"cursor.execute",
|
|
"connection.execute",
|
|
"query",
|
|
"execute",
|
|
"executemany",
|
|
"rawQuery",
|
|
"knex.raw",
|
|
"sequelize.query",
|
|
"mongoose.find",
|
|
"collection.find",
|
|
# Async Python ORMs
|
|
"asyncpg.execute",
|
|
"asyncpg.executemany",
|
|
"asyncpg.fetch",
|
|
"asyncpg.fetchrow",
|
|
"asyncpg.fetchval",
|
|
"tortoise.execute_query",
|
|
"tortoise.execute_sql",
|
|
"databases.execute",
|
|
"databases.fetch_all",
|
|
"databases.fetch_one",
|
|
# Modern JS ORMs
|
|
"prisma.$queryRaw",
|
|
"prisma.$executeRaw",
|
|
"prisma.$queryRawUnsafe",
|
|
"prisma.$executeRawUnsafe",
|
|
"typeorm.query",
|
|
"typeorm.createQueryBuilder",
|
|
"objection.raw",
|
|
"knex.raw",
|
|
],
|
|
# Command execution sinks (factual: functions that execute system commands)
|
|
"command": [
|
|
"os.system",
|
|
"os.popen",
|
|
"subprocess.run",
|
|
"subprocess.call",
|
|
"subprocess.Popen",
|
|
"subprocess.check_call",
|
|
"subprocess.check_output",
|
|
"exec",
|
|
"eval",
|
|
"child_process.exec",
|
|
"child_process.spawn",
|
|
"child_process.execFile",
|
|
"shell.exec",
|
|
],
|
|
# HTML/Response output sinks (factual: functions that output to HTML/HTTP responses)
|
|
"xss": [
|
|
"innerHTML",
|
|
"outerHTML",
|
|
"document.write",
|
|
"document.writeln",
|
|
"dangerouslySetInnerHTML",
|
|
"insertAdjacentHTML",
|
|
"response.write",
|
|
"res.send",
|
|
"res.render",
|
|
"res.json",
|
|
],
|
|
# File system operation sinks (factual: functions that interact with file system)
|
|
"path": [
|
|
"fs.readFile",
|
|
"fs.readFileSync",
|
|
"fs.writeFile",
|
|
"fs.writeFileSync",
|
|
"fs.createReadStream",
|
|
"fs.createWriteStream",
|
|
"open",
|
|
"file.open",
|
|
"Path.join",
|
|
"path.join",
|
|
"os.path.join",
|
|
],
|
|
# LDAP injection sinks
|
|
"ldap": [
|
|
"ldap.search",
|
|
"ldap.bind",
|
|
"ldap.modify",
|
|
"ldap.add",
|
|
"ldap.delete",
|
|
],
|
|
# NoSQL injection sinks
|
|
"nosql": [
|
|
"$where",
|
|
"$regex",
|
|
"collection.find",
|
|
"collection.findOne",
|
|
"collection.update",
|
|
"collection.remove",
|
|
"collection.aggregate",
|
|
]
|
|
} |