Completed store results in a database project (#32)

Co-authored-by: Ryan Good <usafaryangood@gmail.com> * added initial skeleton; restructured project directories * removed workers directive from luigi; changed input to tko-subs * changed masscan command to use config.tool_paths * linted __init__ files and updated docstring for get_scans * added per-file-ignores for linting * recon-pipeline linted * PoC working for amass results -> db; rudimentary db mgmt commands also * more linting * added database management commands to the shell * db_location passes through to all tasks; masscan results added to db * removed unused imports from masscan.py * added ParseNmapOutput class to handle parsing for database storage * cleaned up repeat code * searchsploit results stored in db * lint/format * gobuster scans now stored in database * fixed test_recon tests to use db_location * fixed web tests * tkosub entries recorded in db * subjack scan results stored in database * webanalyze results stored in db * refactored older commits to use newer helper functions * refactored older commits to use newer helper functions * aquatone results stored in database refactored a few scans to use dbmanager helper functions refactored db structure wrt headers/screenshots added 80/443 to web_ports in config.py * fixed a few queries and re-added webanalyze to FullScan * view targets/endpoints done * overhauled nmap parsing * print all nmap_results good, next to focus on filtering * complex nmap filters complete * nmap printing done * updated pipfile * view web-technologies complete * view searchsploit results complete * removed filesystem code from amass * targetlist moved to db only * targets,amass,masscan all cutover to full database; added view ports * nmap fully db compliant * aquatone and webtargets db compliant * gobuster uses db now * webanalyze db compliant * all scans except corscanner are db compliant * recon tests passing * web tests passing * linted files * added tests for helpers.py and parsers.py * refactored some redundant code * added tests to pre-commit * updated amass tests and pre-commit version * updated recon.targets tests * updated nmap tests * updated masscan tests * updated config tests * updated web targets tests * added gobuster tests * added aquatone tests * added subdomain takeover and webanalyze tests; updated test data * removed homegrown sqlite target in favor of the sqla implementation * added tests for recon-pipeline.py * fixed cluge function to set __package__ globally * updated amass tests * updated targets tests * updated nmap tests * updated masscan tests * updated aquatone tests * updated nmap tests to account for no searchsploit * updated nmap tests to account for no searchsploit * updated masscan tests * updated subjack/tkosub tests * updated web targets tests * updated webanalyze tests * added corscanner tests * linted DBManager a bit * fixed weird cyclic import issue that only happened during docs build; housekeeping * added models tests, removed test_install dir * updated docs a bit; sidenav is wonky * fixed readthedocs requirements.txt * fixed issue where view results werent populated directly after scan * added new tests to pipeline; working on docs * updated a few overlooked view command items * updated tests to reflect changes to shell * incremental push of docs update * documentation done * updated exploitdb install * updated exploitdb install * updated seclists install * parseamass updates db in the event of no amass output * removed corscanner * added pipenv shell to install instructions per @GreaterGoodest * added pipenv shell to install instructions per @GreaterGoodest * added check for chromium-browser during aquatone install; closes #26 * added check for old recon-tools dir; updated Path.resolve calls to Path.expanduser.resolve; fixed very specific import bug due to filesystem location * added CONTIBUTING.md; updated pre-commit hooks/README * added .gitattributes for linguist reporting * updated tests * fixed a few weird bugs found during test * updated README * updated asciinema links in README * updated README with view command video * updated other location for url scheme /status * add ability to specify single target using --target (#31) * updated a few items in docs and moved tool-dict to tools-dir * fixed issue where removing tempfile without --verbose caused scan to fail
2025-12-21 16:24:26 +01:00 · 2020-04-17 10:29:16 -05:00
parent ff801dfc6b
commit 6eb3bd8cb0
4682 changed files with 133470 additions and 7368 deletions
--- a/pipeline/recon/web/gobuster.py
+++ b/pipeline/recon/web/gobuster.py
@@ -0,0 +1,181 @@
+import os
+import logging
+import subprocess
+from pathlib import Path
+from urllib.parse import urlparse
+from concurrent.futures import ThreadPoolExecutor
+
+import luigi
+from luigi.util import inherits
+from luigi.contrib.sqla import SQLAlchemyTarget
+
+import pipeline.models.db_manager
+from .targets import GatherWebTargets
+from ..config import tool_paths, defaults
+from ...models.endpoint_model import Endpoint
+from ..helpers import get_ip_address_version, is_ip_address
+
+
+@inherits(GatherWebTargets)
+class GobusterScan(luigi.Task):
+    """ Use ``gobuster`` to perform forced browsing.
+
+    Install:
+        .. code-block:: console
+
+            go get github.com/OJ/gobuster
+            git clone https://github.com/epi052/recursive-gobuster.git
+
+    Basic Example:
+        .. code-block:: console
+
+            gobuster dir -q -e -k -t 20 -u www.tesla.com -w /usr/share/seclists/Discovery/Web-Content/common.txt -p http://127.0.0.1:8080 -o gobuster.tesla.txt -x php,html
+
+    Luigi Example:
+        .. code-block:: console
+
+            PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.web.gobuster GobusterScan --target-file tesla --top-ports 1000 --interface eth0 --proxy http://127.0.0.1:8080 --extensions php,html --wordlist /usr/share/seclists/Discovery/Web-Content/common.txt --threads 20
+
+    Args:
+        threads: number of threads for parallel gobuster command execution
+        wordlist: wordlist used for forced browsing
+        extensions: additional extensions to apply to each item in the wordlist
+        recursive: whether or not to recursively gobust the target (may produce a LOT of traffic... quickly)
+        proxy: protocol://ip:port proxy specification for gobuster
+        exempt_list: Path to a file providing blacklisted subdomains, one per line. *Optional by upstream Task*
+        db_location: specifies the path to the database used for storing results *Required by upstream Task*
+        top_ports: Scan top N most popular ports *Required by upstream Task*
+        ports: specifies the port(s) to be scanned *Required by upstream Task*
+        interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
+        rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
+        target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
+        results_dir: specifes the directory on disk to which all Task results are written *Required by upstream Task*
+    """
+
+    recursive = luigi.BoolParameter(default=False)
+    proxy = luigi.Parameter(default=defaults.get("proxy"))
+    threads = luigi.Parameter(default=defaults.get("threads"))
+    wordlist = luigi.Parameter(default=defaults.get("gobuster-wordlist"))
+    extensions = luigi.Parameter(default=defaults.get("gobuster-extensions"))
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
+        self.results_subfolder = Path(self.results_dir) / "gobuster-results"
+
+    def requires(self):
+        """ GobusterScan depends on GatherWebTargets to run.
+
+        GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
+                         and either ports or top_ports as parameters
+
+        Returns:
+            luigi.Task - GatherWebTargets
+        """
+        args = {
+            "results_dir": self.results_dir,
+            "rate": self.rate,
+            "target_file": self.target_file,
+            "top_ports": self.top_ports,
+            "interface": self.interface,
+            "ports": self.ports,
+            "exempt_list": self.exempt_list,
+            "db_location": self.db_location,
+        }
+        return GatherWebTargets(**args)
+
+    def output(self):
+        """ Returns the target output for this task.
+
+        If recursion is disabled, the naming convention for the output file is gobuster.TARGET_FILE.txt
+        Otherwise the output file is recursive-gobuster_TARGET_FILE.log
+
+        Results are stored in their own directory: gobuster-TARGET_FILE-results
+
+        Returns:
+            luigi.local_target.LocalTarget
+        """
+        return SQLAlchemyTarget(
+            connection_string=self.db_mgr.connection_string, target_table="endpoint", update_id=self.task_id
+        )
+
+    def parse_results(self):
+        """ Reads in each individual gobuster file and adds each line to the database as an Endpoint """
+        for file in self.results_subfolder.iterdir():
+            tgt = None
+            for i, line in enumerate(file.read_text().splitlines()):
+                url, status = line.split(maxsplit=1)  # http://somewhere/path (Status:200)
+
+                if i == 0:
+                    # parse first entry to determine ip address -> target relationship
+                    parsed_url = urlparse(url)
+
+                    tgt = self.db_mgr.get_or_create_target_by_ip_or_hostname(parsed_url.hostname)
+
+                if tgt is not None:
+                    status_code = status.split(maxsplit=1)[1]
+                    ep = self.db_mgr.get_or_create(Endpoint, url=url, status_code=status_code.replace(")", ""))
+                    if ep not in tgt.endpoints:
+                        tgt.endpoints.append(ep)
+                    self.db_mgr.add(tgt)
+                    self.output().touch()
+
+    def run(self):
+        """ Defines the options/arguments sent to gobuster after processing.
+
+        Returns:
+            list: list of options/arguments, beginning with the name of the executable to run
+        """
+        try:
+            self.threads = abs(int(self.threads))
+        except (TypeError, ValueError):
+            return logging.error("The value supplied to --threads must be a non-negative integer.")
+
+        commands = list()
+
+        for target in self.db_mgr.get_all_web_targets():
+            if is_ip_address(target) and get_ip_address_version(target) == "6":
+                target = f"[{target}]"
+
+            for url_scheme in ("https://", "http://"):
+                if self.recursive:
+                    command = [tool_paths.get("recursive-gobuster"), "-s", "-w", self.wordlist, f"{url_scheme}{target}"]
+                else:
+                    command = [
+                        tool_paths.get("gobuster"),
+                        "dir",
+                        "-q",
+                        "-e",
+                        "-k",
+                        "-u",
+                        f"{url_scheme}{target}",
+                        "-w",
+                        self.wordlist,
+                        "-o",
+                        self.results_subfolder.joinpath(
+                            f"gobuster.{url_scheme.replace('//', '_').replace(':', '')}{target}.txt"
+                        ),
+                    ]
+
+                if self.extensions:
+                    command.extend(["-x", self.extensions])
+
+                if self.proxy:
+                    command.extend(["-p", self.proxy])
+
+                commands.append(command)
+
+        self.results_subfolder.mkdir(parents=True, exist_ok=True)
+
+        if self.recursive:
+            # workaround for recursive gobuster not accepting output directory
+            cwd = Path().cwd()
+            os.chdir(self.results_subfolder)
+
+        with ThreadPoolExecutor(max_workers=self.threads) as executor:
+            executor.map(subprocess.run, commands)
+
+        if self.recursive:
+            os.chdir(str(cwd))
+
+        self.parse_results()