mirror of
https://github.com/aljazceru/Auditor.git
synced 2025-12-17 03:24:18 +01:00
375 lines
12 KiB
Python
375 lines
12 KiB
Python
"""JavaScript/TypeScript-specific taint patterns.
|
|
|
|
This module implements taint tracking for JavaScript-specific constructs
|
|
that don't exist in other languages:
|
|
- Object destructuring
|
|
- Spread operators
|
|
- Bracket notation
|
|
- Array operations
|
|
- Type conversions
|
|
"""
|
|
|
|
import sqlite3
|
|
from typing import Set, List, Dict, Any
|
|
|
|
|
|
def track_destructuring(cursor: sqlite3.Cursor, source: Dict[str, Any], file_path: str) -> Set[str]:
|
|
"""
|
|
Track object destructuring: const { x, y } = tainted_object.
|
|
|
|
When a tainted object is destructured, all extracted properties
|
|
become tainted.
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
source: The tainted source
|
|
file_path: Path to the file being analyzed
|
|
|
|
Returns:
|
|
Set of newly tainted variables from destructuring
|
|
"""
|
|
tainted_vars = set()
|
|
|
|
# Look for destructuring patterns in assignments
|
|
# Pattern: const { ... } = source_pattern
|
|
cursor.execute("""
|
|
SELECT target_var, line, source_expr
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND source_expr LIKE ?
|
|
AND (target_var LIKE '%{%' OR target_var LIKE '%[%')
|
|
""", (file_path, f"%{source['pattern']}%"))
|
|
|
|
for target, line, expr in cursor.fetchall():
|
|
# Parse destructuring pattern
|
|
# Examples:
|
|
# { username, password } = req.body
|
|
# { data: userData } = response
|
|
# [ first, second ] = array
|
|
|
|
if '{' in target and '}' in target:
|
|
# Object destructuring
|
|
# Extract variable names between { and }
|
|
start = target.index('{') + 1
|
|
end = target.index('}')
|
|
props = target[start:end]
|
|
|
|
# Handle both simple and renamed destructuring
|
|
for prop in props.split(','):
|
|
prop = prop.strip()
|
|
if ':' in prop:
|
|
# Renamed: { data: userData }
|
|
_, var_name = prop.split(':', 1)
|
|
var_name = var_name.strip()
|
|
else:
|
|
# Simple: { username }
|
|
var_name = prop.strip()
|
|
|
|
if var_name and not var_name.startswith('...'):
|
|
tainted_vars.add(var_name)
|
|
|
|
elif '[' in target and ']' in target:
|
|
# Array destructuring
|
|
start = target.index('[') + 1
|
|
end = target.index(']')
|
|
elements = target[start:end]
|
|
|
|
for element in elements.split(','):
|
|
element = element.strip()
|
|
if element and element != '_': # Skip placeholders
|
|
tainted_vars.add(element)
|
|
|
|
return tainted_vars
|
|
|
|
|
|
def track_spread_operators(cursor: sqlite3.Cursor, source: Dict[str, Any], file_path: str) -> Set[str]:
|
|
"""
|
|
Track spread operators: const { ...rest } = tainted_object.
|
|
|
|
When a tainted object is spread, the rest object becomes tainted.
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
source: The tainted source
|
|
file_path: Path to the file being analyzed
|
|
|
|
Returns:
|
|
Set of newly tainted variables from spread operations
|
|
"""
|
|
tainted_vars = set()
|
|
|
|
# Look for spread patterns in assignments
|
|
cursor.execute("""
|
|
SELECT target_var, line, source_expr
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND source_expr LIKE ?
|
|
AND target_var LIKE '%...%'
|
|
""", (file_path, f"%{source['pattern']}%"))
|
|
|
|
for target, line, expr in cursor.fetchall():
|
|
# Extract spread variable names
|
|
if '...' in target:
|
|
# Find the variable after ...
|
|
spread_index = target.index('...')
|
|
after_spread = target[spread_index + 3:]
|
|
|
|
# Extract variable name (could be in various contexts)
|
|
# Examples:
|
|
# { ...rest }
|
|
# { x, ...rest }
|
|
# [ ...items ]
|
|
|
|
# Simple extraction - get the word after ...
|
|
import re
|
|
match = re.search(r'\.\.\.(\w+)', target)
|
|
if match:
|
|
var_name = match.group(1)
|
|
tainted_vars.add(var_name)
|
|
|
|
# Also check for spread in object/array construction
|
|
cursor.execute("""
|
|
SELECT target_var, source_expr
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND source_expr LIKE ?
|
|
""", (file_path, f"%...{source['pattern']}%"))
|
|
|
|
for target, expr in cursor.fetchall():
|
|
# If source is spread into new object/array, target is tainted
|
|
tainted_vars.add(target)
|
|
|
|
return tainted_vars
|
|
|
|
|
|
def track_bracket_notation(cursor: sqlite3.Cursor, source_pattern: str, file_path: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Track bracket notation access: obj['key'] or obj[variable].
|
|
|
|
This is commonly used for accessing query parameters and headers.
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
source_pattern: Pattern to search for (e.g., "req.query")
|
|
file_path: Path to the file being analyzed
|
|
|
|
Returns:
|
|
List of sources/sinks found via bracket notation
|
|
"""
|
|
results = []
|
|
|
|
# Look for bracket notation patterns
|
|
# Common patterns:
|
|
# req.query['param']
|
|
# req.headers['authorization']
|
|
# obj[key]
|
|
|
|
# Search in symbols for bracket access
|
|
cursor.execute("""
|
|
SELECT name, line, col, type
|
|
FROM symbols
|
|
WHERE path = ?
|
|
AND (name LIKE ? OR name LIKE ?)
|
|
ORDER BY line
|
|
""", (file_path, f"%{source_pattern}[%", f"%{source_pattern}['%"))
|
|
|
|
for name, line, col, sym_type in cursor.fetchall():
|
|
results.append({
|
|
"file": file_path,
|
|
"name": name,
|
|
"line": line,
|
|
"column": col,
|
|
"pattern": source_pattern,
|
|
"type": "source",
|
|
"access": "bracket"
|
|
})
|
|
|
|
# Also check in assignments for bracket notation
|
|
cursor.execute("""
|
|
SELECT DISTINCT line, source_expr
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND (source_expr LIKE ? OR source_expr LIKE ?)
|
|
""", (file_path, f"%{source_pattern}[%", f"%{source_pattern}['%"))
|
|
|
|
for line, expr in cursor.fetchall():
|
|
results.append({
|
|
"file": file_path,
|
|
"name": expr,
|
|
"line": line,
|
|
"column": 0,
|
|
"pattern": source_pattern,
|
|
"type": "source",
|
|
"access": "bracket"
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
def track_array_operations(cursor: sqlite3.Cursor, tainted_var: str, file_path: str) -> Set[str]:
|
|
"""
|
|
Track array operations: map, filter, forEach, reduce, etc.
|
|
|
|
When a tainted array is processed, the callback parameters become tainted.
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
tainted_var: The tainted array variable
|
|
file_path: Path to the file being analyzed
|
|
|
|
Returns:
|
|
Set of newly tainted variables from array operations
|
|
"""
|
|
tainted_vars = set()
|
|
|
|
# Array methods that propagate taint
|
|
array_methods = [
|
|
'map', 'filter', 'forEach', 'reduce', 'find', 'findIndex',
|
|
'some', 'every', 'flatMap', 'reduceRight'
|
|
]
|
|
|
|
for method in array_methods:
|
|
# Look for calls like: tainted_var.method(...)
|
|
cursor.execute("""
|
|
SELECT name, line
|
|
FROM symbols
|
|
WHERE path = ?
|
|
AND type = 'call'
|
|
AND name LIKE ?
|
|
""", (file_path, f"{tainted_var}.{method}%"))
|
|
|
|
for name, line in cursor.fetchall():
|
|
# The callback parameters are tainted
|
|
# This is simplified - ideally we'd parse the callback signature
|
|
# Common patterns:
|
|
# items.map(item => ...) // 'item' is tainted
|
|
# items.filter((item, index) => ...) // 'item' and 'index' are tainted
|
|
|
|
# Look for arrow functions or function expressions near this line
|
|
cursor.execute("""
|
|
SELECT target_var
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND line BETWEEN ? AND ?
|
|
AND source_expr LIKE ?
|
|
""", (file_path, line, line + 3, f"%{method}%"))
|
|
|
|
for (target,) in cursor.fetchall():
|
|
# Simplified: assume first parameter of callback is tainted
|
|
tainted_vars.add(f"{target}_element") # Placeholder for element parameter
|
|
|
|
# Also track direct array access: tainted_array[0]
|
|
cursor.execute("""
|
|
SELECT target_var
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND source_expr LIKE ?
|
|
""", (file_path, f"{tainted_var}[%"))
|
|
|
|
for (target,) in cursor.fetchall():
|
|
tainted_vars.add(target)
|
|
|
|
return tainted_vars
|
|
|
|
|
|
def track_type_conversions(cursor: sqlite3.Cursor, tainted_var: str, file_path: str) -> Set[str]:
|
|
"""
|
|
Track type conversion functions that propagate taint.
|
|
|
|
Functions like parseInt, String(), JSON.parse propagate taint
|
|
from input to output.
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
tainted_var: The tainted variable
|
|
file_path: Path to the file being analyzed
|
|
|
|
Returns:
|
|
Set of newly tainted variables from type conversions
|
|
"""
|
|
tainted_vars = set()
|
|
|
|
# Type conversion functions that propagate taint
|
|
converters = [
|
|
'parseInt', 'parseFloat', 'Number',
|
|
'String', 'toString',
|
|
'JSON.parse', 'JSON.stringify',
|
|
'atob', 'btoa', # Base64 encoding/decoding
|
|
'encodeURIComponent', 'decodeURIComponent',
|
|
'encodeURI', 'decodeURI'
|
|
]
|
|
|
|
for converter in converters:
|
|
# Look for conversions using the tainted variable
|
|
cursor.execute("""
|
|
SELECT target_var, source_expr
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND source_expr LIKE ?
|
|
AND source_expr LIKE ?
|
|
""", (file_path, f"%{converter}%", f"%{tainted_var}%"))
|
|
|
|
for target, expr in cursor.fetchall():
|
|
# The result of conversion is tainted
|
|
tainted_vars.add(target)
|
|
|
|
# Also check for method calls on the tainted variable
|
|
cursor.execute("""
|
|
SELECT target_var
|
|
FROM assignments
|
|
WHERE file = ?
|
|
AND source_expr LIKE ?
|
|
""", (file_path, f"{tainted_var}.toString%"))
|
|
|
|
for (target,) in cursor.fetchall():
|
|
tainted_vars.add(target)
|
|
|
|
return tainted_vars
|
|
|
|
|
|
def enhance_javascript_tracking(
|
|
cursor: sqlite3.Cursor,
|
|
source: Dict[str, Any],
|
|
tainted_elements: Set[str],
|
|
file_path: str
|
|
) -> Set[str]:
|
|
"""
|
|
Main entry point for JavaScript-specific taint enhancements.
|
|
|
|
This function applies all JavaScript-specific tracking to enhance
|
|
the base taint analysis.
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
source: The taint source
|
|
tainted_elements: Current set of tainted elements
|
|
file_path: Path to the file being analyzed
|
|
|
|
Returns:
|
|
Enhanced set of tainted elements
|
|
"""
|
|
enhanced = set(tainted_elements)
|
|
|
|
# Track destructuring
|
|
destructured = track_destructuring(cursor, source, file_path)
|
|
enhanced.update(destructured)
|
|
|
|
# Track spread operators
|
|
spread = track_spread_operators(cursor, source, file_path)
|
|
enhanced.update(spread)
|
|
|
|
# Track array operations for each tainted variable
|
|
for element in list(enhanced):
|
|
if ':' in element:
|
|
_, var_name = element.split(':', 1)
|
|
else:
|
|
var_name = element
|
|
|
|
array_tainted = track_array_operations(cursor, var_name, file_path)
|
|
enhanced.update(array_tainted)
|
|
|
|
# Track type conversions
|
|
converted = track_type_conversions(cursor, var_name, file_path)
|
|
enhanced.update(converted)
|
|
|
|
return enhanced |