Completed store results in a database project (#32)

Co-authored-by: Ryan Good <usafaryangood@gmail.com>

* added initial skeleton; restructured project directories

* removed workers directive from luigi; changed input to tko-subs

* changed masscan command to use config.tool_paths

* linted __init__ files and updated docstring for get_scans

* added per-file-ignores for linting

* recon-pipeline linted

* PoC working for amass results -> db; rudimentary db mgmt commands also

* more linting

* added database management commands to the shell

* db_location passes through to all tasks; masscan results added to db

* removed unused imports from masscan.py

* added ParseNmapOutput class to handle parsing for database storage

* cleaned up repeat code

* searchsploit results stored in db

* lint/format

* gobuster scans now stored in database

* fixed test_recon tests to use db_location

* fixed web tests

* tkosub entries recorded in db

* subjack scan results stored in database

* webanalyze results stored in db

* refactored older commits to use newer helper functions

* refactored older commits to use newer helper functions

* aquatone results stored in database

refactored a few scans to use dbmanager helper functions
refactored db structure wrt headers/screenshots
added 80/443 to web_ports in config.py

* fixed a few queries and re-added webanalyze to FullScan

* view targets/endpoints done

* overhauled nmap parsing

* print all nmap_results good, next to focus on filtering

* complex nmap filters complete

* nmap printing done

* updated pipfile

* view web-technologies complete

* view searchsploit results complete

* removed filesystem code from amass

* targetlist moved to db only

* targets,amass,masscan all cutover to full database; added view ports

* nmap fully db compliant

* aquatone and webtargets db compliant

* gobuster uses db now

* webanalyze db compliant

* all scans except corscanner are db compliant

* recon tests passing

* web tests passing

* linted files

* added tests for helpers.py and parsers.py

* refactored some redundant code

* added tests to pre-commit

* updated amass tests and pre-commit version

* updated recon.targets tests

* updated nmap tests

* updated masscan tests

* updated config tests

* updated web targets tests

* added gobuster tests

* added aquatone tests

* added subdomain takeover and webanalyze tests; updated test data

* removed homegrown sqlite target in favor of the sqla implementation

* added tests for recon-pipeline.py

* fixed cluge function to set __package__ globally

* updated amass tests

* updated targets tests

* updated nmap tests

* updated masscan tests

* updated aquatone tests

* updated nmap tests to account for no searchsploit

* updated nmap tests to account for no searchsploit

* updated masscan tests

* updated subjack/tkosub tests

* updated web targets tests

* updated webanalyze tests

* added corscanner tests

* linted DBManager a bit

* fixed weird cyclic import issue that only happened during docs build; housekeeping

* added models tests, removed test_install dir

* updated docs a bit; sidenav is wonky

* fixed readthedocs requirements.txt

* fixed issue where view results werent populated directly after scan

* added new tests to pipeline; working on docs

* updated a few overlooked view command items

* updated tests to reflect changes to shell

* incremental push of docs update

* documentation done

* updated exploitdb install

* updated exploitdb install

* updated seclists install

* parseamass updates db in the event of no amass output

* removed corscanner

* added pipenv shell to install instructions per @GreaterGoodest

* added pipenv shell to install instructions per @GreaterGoodest

* added check for chromium-browser during aquatone install; closes #26

* added check for old recon-tools dir; updated Path.resolve calls to Path.expanduser.resolve; fixed very specific import bug due to filesystem location

* added CONTIBUTING.md; updated pre-commit hooks/README

* added .gitattributes for linguist reporting

* updated tests

* fixed a few weird bugs found during test

* updated README

* updated asciinema links in README

* updated README with view command video

* updated other location for url scheme /status

* add ability to specify single target using --target (#31)

* updated a few items in docs and moved tool-dict to tools-dir

* fixed issue where removing tempfile without --verbose caused scan to fail
This commit is contained in:
epi052
2020-04-17 10:29:16 -05:00
committed by GitHub
parent ff801dfc6b
commit 6eb3bd8cb0
4682 changed files with 133470 additions and 7368 deletions

View File

@@ -0,0 +1,181 @@
import os
import logging
import subprocess
from pathlib import Path
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor
import luigi
from luigi.util import inherits
from luigi.contrib.sqla import SQLAlchemyTarget
import pipeline.models.db_manager
from .targets import GatherWebTargets
from ..config import tool_paths, defaults
from ...models.endpoint_model import Endpoint
from ..helpers import get_ip_address_version, is_ip_address
@inherits(GatherWebTargets)
class GobusterScan(luigi.Task):
""" Use ``gobuster`` to perform forced browsing.
Install:
.. code-block:: console
go get github.com/OJ/gobuster
git clone https://github.com/epi052/recursive-gobuster.git
Basic Example:
.. code-block:: console
gobuster dir -q -e -k -t 20 -u www.tesla.com -w /usr/share/seclists/Discovery/Web-Content/common.txt -p http://127.0.0.1:8080 -o gobuster.tesla.txt -x php,html
Luigi Example:
.. code-block:: console
PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.web.gobuster GobusterScan --target-file tesla --top-ports 1000 --interface eth0 --proxy http://127.0.0.1:8080 --extensions php,html --wordlist /usr/share/seclists/Discovery/Web-Content/common.txt --threads 20
Args:
threads: number of threads for parallel gobuster command execution
wordlist: wordlist used for forced browsing
extensions: additional extensions to apply to each item in the wordlist
recursive: whether or not to recursively gobust the target (may produce a LOT of traffic... quickly)
proxy: protocol://ip:port proxy specification for gobuster
exempt_list: Path to a file providing blacklisted subdomains, one per line. *Optional by upstream Task*
db_location: specifies the path to the database used for storing results *Required by upstream Task*
top_ports: Scan top N most popular ports *Required by upstream Task*
ports: specifies the port(s) to be scanned *Required by upstream Task*
interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
results_dir: specifes the directory on disk to which all Task results are written *Required by upstream Task*
"""
recursive = luigi.BoolParameter(default=False)
proxy = luigi.Parameter(default=defaults.get("proxy"))
threads = luigi.Parameter(default=defaults.get("threads"))
wordlist = luigi.Parameter(default=defaults.get("gobuster-wordlist"))
extensions = luigi.Parameter(default=defaults.get("gobuster-extensions"))
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
self.results_subfolder = Path(self.results_dir) / "gobuster-results"
def requires(self):
""" GobusterScan depends on GatherWebTargets to run.
GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
and either ports or top_ports as parameters
Returns:
luigi.Task - GatherWebTargets
"""
args = {
"results_dir": self.results_dir,
"rate": self.rate,
"target_file": self.target_file,
"top_ports": self.top_ports,
"interface": self.interface,
"ports": self.ports,
"exempt_list": self.exempt_list,
"db_location": self.db_location,
}
return GatherWebTargets(**args)
def output(self):
""" Returns the target output for this task.
If recursion is disabled, the naming convention for the output file is gobuster.TARGET_FILE.txt
Otherwise the output file is recursive-gobuster_TARGET_FILE.log
Results are stored in their own directory: gobuster-TARGET_FILE-results
Returns:
luigi.local_target.LocalTarget
"""
return SQLAlchemyTarget(
connection_string=self.db_mgr.connection_string, target_table="endpoint", update_id=self.task_id
)
def parse_results(self):
""" Reads in each individual gobuster file and adds each line to the database as an Endpoint """
for file in self.results_subfolder.iterdir():
tgt = None
for i, line in enumerate(file.read_text().splitlines()):
url, status = line.split(maxsplit=1) # http://somewhere/path (Status:200)
if i == 0:
# parse first entry to determine ip address -> target relationship
parsed_url = urlparse(url)
tgt = self.db_mgr.get_or_create_target_by_ip_or_hostname(parsed_url.hostname)
if tgt is not None:
status_code = status.split(maxsplit=1)[1]
ep = self.db_mgr.get_or_create(Endpoint, url=url, status_code=status_code.replace(")", ""))
if ep not in tgt.endpoints:
tgt.endpoints.append(ep)
self.db_mgr.add(tgt)
self.output().touch()
def run(self):
""" Defines the options/arguments sent to gobuster after processing.
Returns:
list: list of options/arguments, beginning with the name of the executable to run
"""
try:
self.threads = abs(int(self.threads))
except (TypeError, ValueError):
return logging.error("The value supplied to --threads must be a non-negative integer.")
commands = list()
for target in self.db_mgr.get_all_web_targets():
if is_ip_address(target) and get_ip_address_version(target) == "6":
target = f"[{target}]"
for url_scheme in ("https://", "http://"):
if self.recursive:
command = [tool_paths.get("recursive-gobuster"), "-s", "-w", self.wordlist, f"{url_scheme}{target}"]
else:
command = [
tool_paths.get("gobuster"),
"dir",
"-q",
"-e",
"-k",
"-u",
f"{url_scheme}{target}",
"-w",
self.wordlist,
"-o",
self.results_subfolder.joinpath(
f"gobuster.{url_scheme.replace('//', '_').replace(':', '')}{target}.txt"
),
]
if self.extensions:
command.extend(["-x", self.extensions])
if self.proxy:
command.extend(["-p", self.proxy])
commands.append(command)
self.results_subfolder.mkdir(parents=True, exist_ok=True)
if self.recursive:
# workaround for recursive gobuster not accepting output directory
cwd = Path().cwd()
os.chdir(self.results_subfolder)
with ThreadPoolExecutor(max_workers=self.threads) as executor:
executor.map(subprocess.run, commands)
if self.recursive:
os.chdir(str(cwd))
self.parse_results()