WIP: add waybackurls scan (#56)

* fixed up config.defaults definition

tools-dir and database-dir now use defaults.home value

* added tool definition file; closes #54

* added basic PoC for waybackurls scanner; updated helpers.py test

* added Endpoint/Target parsing; updated existing tests to pass

* added tests for waybackurls

* added WaybackurlsScan to FullScan

* added documenation for WaybackurlsScan
This commit is contained in:
epi052
2020-05-02 18:06:44 -07:00
committed by GitHub
parent 078fdaada7
commit f556319453
13 changed files with 212 additions and 13 deletions

View File

@@ -71,7 +71,7 @@ The installer maintains a (naive) list of installed tools at `~/.local/recon-pip
## Defining a Scan's Scope ## Defining a Scan's Scope
**New in v0.9.0**: In the event you're scanning a single ip address or host, simply use `--target`. It accepts a single target and works in conjunction with `--exempt-list` if specified. **New as of v0.9.0**: In the event you're scanning a single ip address or host, simply use `--target`. It accepts a single target and works in conjunction with `--exempt-list` if specified.
```text ```text
scan HTBScan --target 10.10.10.183 --top-ports 1000 scan HTBScan --target 10.10.10.183 --top-ports 1000
@@ -146,6 +146,7 @@ Scan the target
[-] SearchsploitScan queued [-] SearchsploitScan queued
[-] ThreadedNmapScan queued [-] ThreadedNmapScan queued
[-] SubjackScan queued [-] SubjackScan queued
[-] WaybackurlsScan queued
[-] AquatoneScan queued [-] AquatoneScan queued
[-] GobusterScan queued [-] GobusterScan queued
[db-1] recon-pipeline> [db-1] recon-pipeline>

View File

@@ -67,6 +67,13 @@ TKOSubs Scanner
.. autoclass:: pipeline.recon.web.subdomain_takeover.TKOSubsScan .. autoclass:: pipeline.recon.web.subdomain_takeover.TKOSubsScan
:members: :members:
WaybackurlsScan Scanner
#######################
.. autoclass:: pipeline.recon.web.waybackurls.WaybackurlsScan
:members:
Webanalyze Scanner Webanalyze Scanner
################## ##################

View File

@@ -15,6 +15,7 @@ following individual scans are available
- :class:`pipeline.recon.web.subdomain_takeover.SubjackScan` - :class:`pipeline.recon.web.subdomain_takeover.SubjackScan`
- :class:`pipeline.recon.nmap.ThreadedNmapScan` - :class:`pipeline.recon.nmap.ThreadedNmapScan`
- :class:`pipeline.recon.web.subdomain_takeover.TKOSubsScan` - :class:`pipeline.recon.web.subdomain_takeover.TKOSubsScan`
- :class:`pipeline.recon.web.waybackurls.WaybackurlsScan`
- :class:`pipeline.recon.web.webanalyze.WebanalyzeScan` - :class:`pipeline.recon.web.webanalyze.WebanalyzeScan`
Additionally, two wrapper scans are made available. These execute multiple scans in a pipeline. Additionally, two wrapper scans are made available. These execute multiple scans in a pipeline.
@@ -49,6 +50,7 @@ Create a targetfile
/root/PycharmProjects/recon-pipeline/pipeline/recon-pipeline.py /root/PycharmProjects/recon-pipeline/pipeline/recon-pipeline.py
recon-pipeline> recon-pipeline>
**New as of v0.9.0**: In the event you're scanning a single ip address or host, simply use ``--target``. It accepts a single target and works in conjunction with ``--exempt-list`` if specified.
Create a new database to store scan results Create a new database to store scan results
@@ -79,6 +81,7 @@ Scan the target
[-] WebanalyzeScan queued [-] WebanalyzeScan queued
[-] SearchsploitScan queued [-] SearchsploitScan queued
[-] ThreadedNmapScan queued [-] ThreadedNmapScan queued
[-] WaybackurlsScan queued
[-] SubjackScan queued [-] SubjackScan queued
[-] AquatoneScan queued [-] AquatoneScan queued
[-] GobusterScan queued [-] GobusterScan queued

View File

@@ -141,7 +141,7 @@ class DBManager:
def get_status_codes(self): def get_status_codes(self):
""" Simple helper that returns all status codes found during scanning """ """ Simple helper that returns all status codes found during scanning """
return set(str(x[0]) for x in self.session.query(Endpoint.status_code).all()) return set(str(x[0]) for x in self.session.query(Endpoint.status_code).all() if x[0] is not None)
def get_and_filter(self, model, defaults=None, **kwargs): def get_and_filter(self, model, defaults=None, **kwargs):
""" Simple helper to either get an existing record if it exists otherwise create and return a new instance """ """ Simple helper to either get an existing record if it exists otherwise create and return a new instance """

View File

@@ -256,7 +256,7 @@ class ReconShell(cmd2.Cmd):
""" Scan something. """ Scan something.
Possible scans include Possible scans include
AmassScan GobusterScan SearchsploitScan AmassScan GobusterScan SearchsploitScan WaybackurlsScan
ThreadedNmapScan WebanalyzeScan AquatoneScan FullScan ThreadedNmapScan WebanalyzeScan AquatoneScan FullScan
MasscanScan SubjackScan TKOSubsScan HTBScan MasscanScan SubjackScan TKOSubsScan HTBScan
""" """
@@ -345,7 +345,6 @@ class ReconShell(cmd2.Cmd):
if persistent_tool_dict.exists(): if persistent_tool_dict.exists():
tools = pickle.loads(persistent_tool_dict.read_bytes()) tools = pickle.loads(persistent_tool_dict.read_bytes())
print(args.tool)
if tools.get(args.tool).get("dependencies"): if tools.get(args.tool).get("dependencies"):
# get all of the requested tools dependencies # get all of the requested tools dependencies
@@ -605,7 +604,7 @@ class ReconShell(cmd2.Cmd):
for endpoint in endpoints: for endpoint in endpoints:
color = color_map.get(str(endpoint.status_code)[0]) color = color_map.get(str(endpoint.status_code)[0])
if args.plain: if args.plain or endpoint.status_code is None:
results.append(endpoint.url) results.append(endpoint.url)
else: else:
results.append(f"[{style(endpoint.status_code, fg=color)}] {endpoint.url}") results.append(f"[{style(endpoint.status_code, fg=color)}] {endpoint.url}")

View File

@@ -10,14 +10,13 @@ defaults = {
"gobuster-extensions": "", "gobuster-extensions": "",
"results-dir": "recon-results", "results-dir": "recon-results",
"aquatone-scan-timeout": "900", "aquatone-scan-timeout": "900",
"tools-dir": f"{Path.home()}/.local/recon-pipeline/tools", "home": Path.home(),
"database-dir": f"{Path.home()}/.local/recon-pipeline/databases",
"home": Path.home().expanduser().resolve(),
} }
defaults["tools-dir"] = f"{defaults.get('home')}/.local/recon-pipeline/tools"
defaults["database-dir"] = f"{defaults.get('home')}/.local/recon-pipeline/databases"
defaults["gobuster-wordlist"] = f"{defaults.get('tools-dir')}/seclists/Discovery/Web-Content/common.txt" defaults["gobuster-wordlist"] = f"{defaults.get('tools-dir')}/seclists/Discovery/Web-Content/common.txt"
tool_paths = { tool_paths = {
"aquatone": f"{defaults.get('tools-dir')}/aquatone", "aquatone": f"{defaults.get('tools-dir')}/aquatone",
"tko-subs": f"{Path.home()}/go/bin/tko-subs", "tko-subs": f"{Path.home()}/go/bin/tko-subs",
@@ -34,6 +33,7 @@ tool_paths = {
"luigid": str(Path(__file__).parents[2] / "luigid.service"), "luigid": str(Path(__file__).parents[2] / "luigid.service"),
"seclists": f"{defaults.get('tools-dir')}/seclists", "seclists": f"{defaults.get('tools-dir')}/seclists",
"exploitdb": f"{defaults.get('tools-dir')}/exploitdb", "exploitdb": f"{defaults.get('tools-dir')}/exploitdb",
"waybackurls": f"{Path.home()}/go/bin/waybackurls",
} }
web_ports = { web_ports = {

View File

@@ -2,4 +2,5 @@ from .aquatone import AquatoneScan
from .gobuster import GobusterScan from .gobuster import GobusterScan
from .targets import GatherWebTargets from .targets import GatherWebTargets
from .webanalyze import WebanalyzeScan from .webanalyze import WebanalyzeScan
from .waybackurls import WaybackurlsScan
from .subdomain_takeover import SubjackScan, TKOSubsScan from .subdomain_takeover import SubjackScan, TKOSubsScan

View File

@@ -0,0 +1,117 @@
import subprocess
from pathlib import Path
from urllib.parse import urlparse
import luigi
from luigi.util import inherits
from luigi.contrib.sqla import SQLAlchemyTarget
from .targets import GatherWebTargets
from ...tools import tools
from ...models.endpoint_model import Endpoint
import pipeline.models.db_manager
@inherits(GatherWebTargets)
class WaybackurlsScan(luigi.Task):
""" Fetch known URLs from the Wayback Machine, Common Crawl, and Virus Total for historic data about the target.
Install:
.. code-block:: console
go get github.com/tomnomnom/waybackurls
Basic Example:
``waybackurls`` commands are structured like the example below.
``cat domains.txt | waybackurls > urls``
Luigi Example:
.. code-block:: python
PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.web.waybackurls WaybackurlsScan --target-file tesla --top-ports 1000
Args:
db_location: specifies the path to the database used for storing results *Required by upstream Task*
exempt_list: Path to a file providing blacklisted subdomains, one per line. *Optional by upstream Task*
top_ports: Scan top N most popular ports *Required by upstream Task*
ports: specifies the port(s) to be scanned *Required by upstream Task*
interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
results_dir: specifes the directory on disk to which all Task results are written *Required by upstream Task*
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
self.results_subfolder = Path(self.results_dir) / "waybackurls-results"
def requires(self):
""" WaybackurlsScan depends on GatherWebTargets to run.
GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
and either ports or top_ports as parameters
Returns:
luigi.Task - GatherWebTargets
"""
args = {
"results_dir": self.results_dir,
"rate": self.rate,
"target_file": self.target_file,
"top_ports": self.top_ports,
"interface": self.interface,
"ports": self.ports,
"exempt_list": self.exempt_list,
"db_location": self.db_location,
}
return GatherWebTargets(**args)
def output(self):
""" Returns the target output for this task.
Returns:
luigi.contrib.sqla.SQLAlchemyTarget
"""
return SQLAlchemyTarget(
connection_string=self.db_mgr.connection_string, target_table="endpoint", update_id=self.task_id
)
def run(self):
""" Defines the options/arguments sent to waybackurls after processing. """
self.results_subfolder.mkdir(parents=True, exist_ok=True)
command = [tools.get("waybackurls").get("path")]
waybackurls_input_file = self.results_subfolder / "input-from-webtargets"
with open(waybackurls_input_file, "w") as f:
for target in self.db_mgr.get_all_hostnames():
f.write(f"{target}\n")
with open(waybackurls_input_file) as target_list:
proc = subprocess.run(command, stdin=target_list, stdout=subprocess.PIPE)
for url in proc.stdout.decode().splitlines():
if not url:
continue
parsed_url = urlparse(url)
# get Target, may exist already or not
ip_or_hostname = parsed_url.hostname
tgt = self.db_mgr.get_or_create_target_by_ip_or_hostname(ip_or_hostname)
endpoint = self.db_mgr.get_or_create(Endpoint, url=url, target=tgt)
if endpoint not in tgt.endpoints:
tgt.endpoints.append(endpoint)
self.db_mgr.add(tgt)
self.db_mgr.add(endpoint)
self.output().touch()
waybackurls_input_file.unlink()

View File

@@ -2,10 +2,7 @@ import luigi
from luigi.util import inherits from luigi.util import inherits
from .nmap import SearchsploitScan from .nmap import SearchsploitScan
from .web import AquatoneScan from .web import AquatoneScan, GobusterScan, SubjackScan, TKOSubsScan, WaybackurlsScan, WebanalyzeScan
from .web import GobusterScan
from .web import WebanalyzeScan
from .web import TKOSubsScan, SubjackScan
@inherits(SearchsploitScan, AquatoneScan, TKOSubsScan, SubjackScan, GobusterScan, WebanalyzeScan) @inherits(SearchsploitScan, AquatoneScan, TKOSubsScan, SubjackScan, GobusterScan, WebanalyzeScan)
@@ -68,6 +65,7 @@ class FullScan(luigi.WrapperTask):
del args["threads"] del args["threads"]
yield TKOSubsScan(**args) yield TKOSubsScan(**args)
yield WaybackurlsScan(**args)
@inherits(SearchsploitScan, AquatoneScan, GobusterScan, WebanalyzeScan) @inherits(SearchsploitScan, AquatoneScan, GobusterScan, WebanalyzeScan)

View File

@@ -0,0 +1,9 @@
installed: false
dependencies: ["go"]
go: &gotool !get_tool_path "{go}"
path: !join_path [!get_default "{home}", go, bin, waybackurls]
commands:
- !join [*gotool, get, github.com/tomnomnom/waybackurls]
shell: false

View File

@@ -18,6 +18,7 @@ def test_get_scans():
"SearchsploitScan", "SearchsploitScan",
"ThreadedNmapScan", "ThreadedNmapScan",
"WebanalyzeScan", "WebanalyzeScan",
"WaybackurlsScan",
] ]
assert len(scan_names) == len(scans.keys()) assert len(scan_names) == len(scans.keys())

View File

@@ -440,6 +440,12 @@ class TestReconShell:
], ],
"shell": True, "shell": True,
}, },
"waybackurls": {
"installed": True,
"depencencies": ["go"],
"commands": ["/usr/local/go/bin/go get github.com/tomnomnom/waybackurls"],
"shell": True,
},
} }
tooldir = tmp_path / ".local" / "recon-pipeline" / "tools" tooldir = tmp_path / ".local" / "recon-pipeline" / "tools"

View File

@@ -0,0 +1,57 @@
import shutil
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch
from pipeline.recon.web import WaybackurlsScan, GatherWebTargets
class TestGatherWebTargets:
def setup_method(self):
self.tmp_path = Path(tempfile.mkdtemp())
self.scan = WaybackurlsScan(
target_file=__file__, results_dir=str(self.tmp_path), db_location=str(self.tmp_path / "testing.sqlite")
)
def teardown_method(self):
shutil.rmtree(self.tmp_path)
def test_scan_requires(self):
with patch("pipeline.recon.web.GatherWebTargets"):
retval = self.scan.requires()
assert isinstance(retval, GatherWebTargets)
def test_scan_creates_database(self):
assert self.scan.db_mgr.location.exists()
assert self.tmp_path / "testing.sqlite" == self.scan.db_mgr.location
def test_scan_creates_results_dir(self):
assert self.scan.results_subfolder == self.tmp_path / "waybackurls-results"
def test_scan_run(self):
with patch("subprocess.run", autospec=True) as mocked_run:
self.scan.results_subfolder = self.tmp_path / "waybackurls-results"
self.scan.db_mgr.get_all_hostnames = MagicMock()
self.scan.db_mgr.get_all_hostnames.return_value = ["google.com"]
completed_process_mock = MagicMock()
completed_process_mock.stdout.return_value = b"https://drive.google.com\nhttps://maps.google.com\n\n"
completed_process_mock.stdout.decode.return_value = "https://drive.google.com\nhttps://maps.google.com\n\n"
completed_process_mock.stdout.decode.splitlines.return_value = [
"https://drive.google.com",
"https://maps.google.com",
]
mocked_run.return_value = completed_process_mock
self.scan.db_mgr.add = MagicMock()
self.scan.db_mgr.get_or_create = MagicMock()
self.scan.db_mgr.get_or_create_target_by_ip_or_hostname = MagicMock()
self.scan.run()
assert mocked_run.called
assert self.scan.db_mgr.add.called
assert self.scan.db_mgr.get_or_create.called
assert self.scan.db_mgr.get_or_create_target_by_ip_or_hostname.called