mirror of
https://github.com/aljazceru/recon-pipeline.git
synced 2025-12-20 07:44:26 +01:00
* Adds req testing methodology, needs fixes * Improves dependency exception handling * Better meets_requirements implementation Still need to adjust tests to fake installation * Changes to exception boolean to enable tool check tests and class variables modified for new tool check * Adjust test_get_scans to use appropriate variable * Adds Go requirement where relevant * Adds missing scan dependencies * Add clarification to error message
280 lines
10 KiB
Python
280 lines
10 KiB
Python
import json
|
|
import logging
|
|
import subprocess
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
import luigi
|
|
from luigi.util import inherits
|
|
from luigi.contrib.sqla import SQLAlchemyTarget
|
|
|
|
from .targets import GatherWebTargets
|
|
from ..config import defaults
|
|
from ...tools import tools
|
|
|
|
import pipeline.models.db_manager
|
|
from ..helpers import meets_requirements
|
|
from ...models.port_model import Port
|
|
from ...models.header_model import Header
|
|
from ...models.endpoint_model import Endpoint
|
|
from ...models.screenshot_model import Screenshot
|
|
|
|
|
|
@inherits(GatherWebTargets)
|
|
class AquatoneScan(luigi.Task):
|
|
""" Screenshot all web targets and generate HTML report.
|
|
|
|
Install:
|
|
.. code-block:: console
|
|
|
|
mkdir /tmp/aquatone
|
|
wget -q https://github.com/michenriksen/aquatone/releases/download/v1.7.0/aquatone_linux_amd64_1.7.0.zip -O /tmp/aquatone/aquatone.zip
|
|
unzip /tmp/aquatone/aquatone.zip -d /tmp/aquatone
|
|
sudo mv /tmp/aquatone/aquatone /usr/local/bin/aquatone
|
|
rm -rf /tmp/aquatone
|
|
|
|
Basic Example:
|
|
``aquatone`` commands are structured like the example below.
|
|
|
|
``cat webtargets.tesla.txt | /opt/aquatone -scan-timeout 900 -threads 20``
|
|
|
|
Luigi Example:
|
|
.. code-block:: python
|
|
|
|
PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.web.aquatone AquatoneScan --target-file tesla --top-ports 1000
|
|
|
|
Args:
|
|
threads: number of threads for parallel aquatone command execution
|
|
scan_timeout: timeout in miliseconds for aquatone port scans
|
|
db_location: specifies the path to the database used for storing results *Required by upstream Task*
|
|
exempt_list: Path to a file providing blacklisted subdomains, one per line. *Optional by upstream Task*
|
|
top_ports: Scan top N most popular ports *Required by upstream Task*
|
|
ports: specifies the port(s) to be scanned *Required by upstream Task*
|
|
interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
|
|
rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
|
|
target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
|
|
results_dir: specifes the directory on disk to which all Task results are written *Required by upstream Task*
|
|
"""
|
|
|
|
threads = luigi.Parameter(default=defaults.get("threads", ""))
|
|
scan_timeout = luigi.Parameter(default=defaults.get("aquatone-scan-timeout", ""))
|
|
requirements = ["aquatone", "masscan"]
|
|
exception = True
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
|
|
self.results_subfolder = Path(self.results_dir) / "aquatone-results"
|
|
|
|
def requires(self):
|
|
""" AquatoneScan depends on GatherWebTargets to run.
|
|
|
|
GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
|
|
and either ports or top_ports as parameters
|
|
|
|
Returns:
|
|
luigi.Task - GatherWebTargets
|
|
"""
|
|
meets_requirements(self.requirements, self.exception)
|
|
args = {
|
|
"results_dir": self.results_dir,
|
|
"rate": self.rate,
|
|
"target_file": self.target_file,
|
|
"top_ports": self.top_ports,
|
|
"interface": self.interface,
|
|
"ports": self.ports,
|
|
"exempt_list": self.exempt_list,
|
|
"db_location": self.db_location,
|
|
}
|
|
return GatherWebTargets(**args)
|
|
|
|
def output(self):
|
|
""" Returns the target output for this task.
|
|
|
|
Returns:
|
|
luigi.contrib.sqla.SQLAlchemyTarget
|
|
"""
|
|
return SQLAlchemyTarget(
|
|
connection_string=self.db_mgr.connection_string, target_table="screenshot", update_id=self.task_id
|
|
)
|
|
|
|
def _get_similar_pages(self, url, results):
|
|
# populate similar pages if any exist
|
|
similar_pages = None
|
|
|
|
for cluster_id, cluster in results.get("pageSimilarityClusters").items():
|
|
if url not in cluster:
|
|
continue
|
|
|
|
similar_pages = list()
|
|
|
|
for similar_url in cluster:
|
|
if similar_url == url:
|
|
continue
|
|
|
|
similar_pages.append(self.db_mgr.get_or_create(Screenshot, url=similar_url))
|
|
|
|
return similar_pages
|
|
|
|
def parse_results(self):
|
|
""" Read in aquatone's .json file and update the associated Target record """
|
|
|
|
""" Example data
|
|
|
|
"https://email.assetinventory.bugcrowd.com:8443/": {
|
|
"uuid": "679b0dc7-02ea-483f-9e0a-3a5e6cdea4b6",
|
|
"url": "https://email.assetinventory.bugcrowd.com:8443/",
|
|
"hostname": "email.assetinventory.bugcrowd.com",
|
|
"addrs": [
|
|
"104.20.60.51",
|
|
"104.20.61.51",
|
|
"2606:4700:10::6814:3d33",
|
|
"2606:4700:10::6814:3c33"
|
|
],
|
|
"status": "403 Forbidden",
|
|
"pageTitle": "403 Forbidden",
|
|
"headersPath": "headers/https__email_assetinventory_bugcrowd_com__8443__42099b4af021e53f.txt",
|
|
"bodyPath": "html/https__email_assetinventory_bugcrowd_com__8443__42099b4af021e53f.html",
|
|
"screenshotPath": "screenshots/https__email_assetinventory_bugcrowd_com__8443__42099b4af021e53f.png",
|
|
"hasScreenshot": true,
|
|
"headers": [
|
|
{
|
|
"name": "Cf-Ray",
|
|
"value": "55d396727981d25a-DFW",
|
|
"decreasesSecurity": false,
|
|
"increasesSecurity": false
|
|
},
|
|
...
|
|
],
|
|
"tags": [
|
|
{
|
|
"text": "CloudFlare",
|
|
"type": "info",
|
|
"link": "http://www.cloudflare.com",
|
|
"hash": "9ea92fc7dce5e740ccc8e64d8f9e3336a96efc2a"
|
|
}
|
|
],
|
|
"notes": null
|
|
},
|
|
...
|
|
|
|
"pageSimilarityClusters": {
|
|
"11905c72-fd18-43de-9133-99ba2a480e2b": [
|
|
"http://52.53.92.161/",
|
|
"https://staging.bitdiscovery.com/",
|
|
"https://52.53.92.161/",
|
|
...
|
|
],
|
|
"139fc2c4-0faa-4ae3-a6e4-0a1abe2418fa": [
|
|
"https://104.20.60.51:8443/",
|
|
"https://email.assetinventory.bugcrowd.com:8443/",
|
|
...
|
|
],
|
|
"""
|
|
try:
|
|
with open(self.results_subfolder / "aquatone_session.json") as f:
|
|
# results.keys -> dict_keys(['version', 'stats', 'pages', 'pageSimilarityClusters'])
|
|
results = json.load(f)
|
|
except FileNotFoundError as e:
|
|
logging.error(e)
|
|
return
|
|
|
|
for page, page_dict in results.get("pages").items():
|
|
headers = list()
|
|
|
|
url = page_dict.get("url") # one url to one screenshot, unique key
|
|
|
|
# build out the endpoint's data to include headers, this has value whether or not there's a screenshot
|
|
endpoint = self.db_mgr.get_or_create(Endpoint, url=url)
|
|
if not endpoint.status_code:
|
|
status = page_dict.get("status").split(maxsplit=1)
|
|
if len(status) > 1:
|
|
endpoint.status_code, _ = status
|
|
else:
|
|
endpoint.status_code = status[0]
|
|
|
|
for header_dict in page_dict.get("headers"):
|
|
header = self.db_mgr.get_or_create(Header, name=header_dict.get("name"), value=header_dict.get("value"))
|
|
|
|
if endpoint not in header.endpoints:
|
|
header.endpoints.append(endpoint)
|
|
|
|
headers.append(header)
|
|
|
|
endpoint.headers = headers
|
|
|
|
parsed_url = urlparse(url)
|
|
|
|
ip_or_hostname = parsed_url.hostname
|
|
tgt = self.db_mgr.get_or_create_target_by_ip_or_hostname(ip_or_hostname)
|
|
|
|
endpoint.target = tgt
|
|
|
|
if not page_dict.get("hasScreenshot"):
|
|
# if there isn't a screenshot, save the endpoint data and move along
|
|
self.db_mgr.add(endpoint)
|
|
# This causes an integrity error on insertion due to the task_id being the same for two
|
|
# different target tables. Could subclass SQLAlchemyTarget and set the unique-ness to be the
|
|
# combination of update_id + target_table. The question is, does it matter?
|
|
# TODO: assess above and act
|
|
# SQLAlchemyTarget(
|
|
# connection_string=self.db_mgr.connection_string, target_table="endpoint", update_id=self.task_id
|
|
# ).touch()
|
|
continue
|
|
|
|
# build out screenshot data
|
|
port = parsed_url.port if parsed_url.port else 80
|
|
port = self.db_mgr.get_or_create(Port, protocol="tcp", port_number=port)
|
|
|
|
image = (self.results_subfolder / page_dict.get("screenshotPath")).read_bytes()
|
|
|
|
screenshot = self.db_mgr.get_or_create(Screenshot, url=url)
|
|
screenshot.port = port
|
|
screenshot.endpoint = endpoint
|
|
screenshot.target = screenshot.endpoint.target
|
|
screenshot.image = image
|
|
|
|
similar_pages = self._get_similar_pages(url, results)
|
|
|
|
if similar_pages is not None:
|
|
screenshot.similar_pages = similar_pages
|
|
|
|
self.db_mgr.add(screenshot)
|
|
self.output().touch()
|
|
|
|
self.db_mgr.close()
|
|
|
|
def run(self):
|
|
""" Defines the options/arguments sent to aquatone after processing.
|
|
|
|
cat webtargets.tesla.txt | /opt/aquatone -scan-timeout 900 -threads 20
|
|
|
|
Returns:
|
|
list: list of options/arguments, beginning with the name of the executable to run
|
|
"""
|
|
self.results_subfolder.mkdir(parents=True, exist_ok=True)
|
|
|
|
command = [
|
|
tools.get("aquatone").get("path"),
|
|
"-scan-timeout",
|
|
self.scan_timeout,
|
|
"-threads",
|
|
self.threads,
|
|
"-silent",
|
|
"-out",
|
|
self.results_subfolder,
|
|
]
|
|
|
|
aquatone_input_file = self.results_subfolder / "input-from-webtargets"
|
|
|
|
with open(aquatone_input_file, "w") as f:
|
|
for target in self.db_mgr.get_all_web_targets():
|
|
f.write(f"{target}\n")
|
|
|
|
with open(aquatone_input_file) as target_list:
|
|
subprocess.run(command, stdin=target_list)
|
|
|
|
aquatone_input_file.unlink()
|
|
|
|
self.parse_results()
|