Completed store results in a database project (#32)

Co-authored-by: Ryan Good <usafaryangood@gmail.com>

* added initial skeleton; restructured project directories

* removed workers directive from luigi; changed input to tko-subs

* changed masscan command to use config.tool_paths

* linted __init__ files and updated docstring for get_scans

* added per-file-ignores for linting

* recon-pipeline linted

* PoC working for amass results -> db; rudimentary db mgmt commands also

* more linting

* added database management commands to the shell

* db_location passes through to all tasks; masscan results added to db

* removed unused imports from masscan.py

* added ParseNmapOutput class to handle parsing for database storage

* cleaned up repeat code

* searchsploit results stored in db

* lint/format

* gobuster scans now stored in database

* fixed test_recon tests to use db_location

* fixed web tests

* tkosub entries recorded in db

* subjack scan results stored in database

* webanalyze results stored in db

* refactored older commits to use newer helper functions

* refactored older commits to use newer helper functions

* aquatone results stored in database

refactored a few scans to use dbmanager helper functions
refactored db structure wrt headers/screenshots
added 80/443 to web_ports in config.py

* fixed a few queries and re-added webanalyze to FullScan

* view targets/endpoints done

* overhauled nmap parsing

* print all nmap_results good, next to focus on filtering

* complex nmap filters complete

* nmap printing done

* updated pipfile

* view web-technologies complete

* view searchsploit results complete

* removed filesystem code from amass

* targetlist moved to db only

* targets,amass,masscan all cutover to full database; added view ports

* nmap fully db compliant

* aquatone and webtargets db compliant

* gobuster uses db now

* webanalyze db compliant

* all scans except corscanner are db compliant

* recon tests passing

* web tests passing

* linted files

* added tests for helpers.py and parsers.py

* refactored some redundant code

* added tests to pre-commit

* updated amass tests and pre-commit version

* updated recon.targets tests

* updated nmap tests

* updated masscan tests

* updated config tests

* updated web targets tests

* added gobuster tests

* added aquatone tests

* added subdomain takeover and webanalyze tests; updated test data

* removed homegrown sqlite target in favor of the sqla implementation

* added tests for recon-pipeline.py

* fixed cluge function to set __package__ globally

* updated amass tests

* updated targets tests

* updated nmap tests

* updated masscan tests

* updated aquatone tests

* updated nmap tests to account for no searchsploit

* updated nmap tests to account for no searchsploit

* updated masscan tests

* updated subjack/tkosub tests

* updated web targets tests

* updated webanalyze tests

* added corscanner tests

* linted DBManager a bit

* fixed weird cyclic import issue that only happened during docs build; housekeeping

* added models tests, removed test_install dir

* updated docs a bit; sidenav is wonky

* fixed readthedocs requirements.txt

* fixed issue where view results werent populated directly after scan

* added new tests to pipeline; working on docs

* updated a few overlooked view command items

* updated tests to reflect changes to shell

* incremental push of docs update

* documentation done

* updated exploitdb install

* updated exploitdb install

* updated seclists install

* parseamass updates db in the event of no amass output

* removed corscanner

* added pipenv shell to install instructions per @GreaterGoodest

* added pipenv shell to install instructions per @GreaterGoodest

* added check for chromium-browser during aquatone install; closes #26

* added check for old recon-tools dir; updated Path.resolve calls to Path.expanduser.resolve; fixed very specific import bug due to filesystem location

* added CONTIBUTING.md; updated pre-commit hooks/README

* added .gitattributes for linguist reporting

* updated tests

* fixed a few weird bugs found during test

* updated README

* updated asciinema links in README

* updated README with view command video

* updated other location for url scheme /status

* add ability to specify single target using --target (#31)

* updated a few items in docs and moved tool-dict to tools-dir

* fixed issue where removing tempfile without --verbose caused scan to fail
This commit is contained in:
epi052
2020-04-17 10:29:16 -05:00
committed by GitHub
parent ff801dfc6b
commit 6eb3bd8cb0
4682 changed files with 133470 additions and 7368 deletions

324
pipeline/recon/nmap.py Normal file
View File

@@ -0,0 +1,324 @@
import ast
import logging
import subprocess
import concurrent.futures
from pathlib import Path
import luigi
import sqlalchemy
from luigi.util import inherits
from libnmap.parser import NmapParser
from luigi.contrib.sqla import SQLAlchemyTarget
import pipeline.models.db_manager
from .masscan import ParseMasscanOutput
from .config import defaults, tool_paths
from .helpers import get_ip_address_version, is_ip_address
from ..models.port_model import Port
from ..models.nse_model import NSEResult
from ..models.target_model import Target
from ..models.nmap_model import NmapResult
from ..models.ip_address_model import IPAddress
from ..models.searchsploit_model import SearchsploitResult
@inherits(ParseMasscanOutput)
class ThreadedNmapScan(luigi.Task):
""" Run ``nmap`` against specific targets and ports gained from the ParseMasscanOutput Task.
Install:
``nmap`` is already on your system if you're using kali. If you're not using kali, refer to your own
distributions instructions for installing ``nmap``.
Basic Example:
.. code-block:: console
nmap --open -sT -sC -T 4 -sV -Pn -p 43,25,21,53,22 -oA htb-targets-nmap-results/nmap.10.10.10.155-tcp 10.10.10.155
Luigi Example:
.. code-block:: console
PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.nmap ThreadedNmap --target-file htb-targets --top-ports 5000
Args:
threads: number of threads for parallel nmap command execution
db_location: specifies the path to the database used for storing results *Required by upstream Task*
rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
top_ports: Scan top N most popular ports *Required by upstream Task*
ports: specifies the port(s) to be scanned *Required by upstream Task*
target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
results_dir: specifes the directory on disk to which all Task results are written *Required by upstream Task*
"""
threads = luigi.Parameter(default=defaults.get("threads"))
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
self.results_subfolder = (Path(self.results_dir) / "nmap-results").expanduser().resolve()
def requires(self):
""" ThreadedNmap depends on ParseMasscanOutput to run.
TargetList expects target_file, results_dir, and db_location as parameters.
Masscan expects rate, target_file, interface, and either ports or top_ports as parameters.
Returns:
luigi.Task - ParseMasscanOutput
"""
args = {
"results_dir": self.results_dir,
"rate": self.rate,
"target_file": self.target_file,
"top_ports": self.top_ports,
"interface": self.interface,
"ports": self.ports,
"db_location": self.db_location,
}
return ParseMasscanOutput(**args)
def output(self):
""" Returns the target output for this task.
Naming convention for the output folder is TARGET_FILE-nmap-results.
The output folder will be populated with all of the output files generated by
any nmap commands run. Because the nmap command uses -oA, there will be three
files per target scanned: .xml, .nmap, .gnmap.
Returns:
luigi.local_target.LocalTarget
"""
return {
"sqltarget": SQLAlchemyTarget(
connection_string=self.db_mgr.connection_string, target_table="nmap_result", update_id=self.task_id
),
"localtarget": luigi.LocalTarget(str(self.results_subfolder)),
}
def parse_nmap_output(self):
""" Read nmap .xml results and add entries into specified database """
for entry in self.results_subfolder.glob("nmap*.xml"):
# relying on python-libnmap here
report = NmapParser.parse_fromfile(entry)
for host in report.hosts:
for service in host.services:
port = self.db_mgr.get_or_create(Port, protocol=service.protocol, port_number=service.port)
if is_ip_address(host.address) and get_ip_address_version(host.address) == "4":
ip_address = self.db_mgr.get_or_create(IPAddress, ipv4_address=host.address)
else:
ip_address = self.db_mgr.get_or_create(IPAddress, ipv6_address=host.address)
if ip_address.target is None:
# account for ip addresses identified that aren't already tied to a target
# almost certainly ipv6 addresses
tgt = self.db_mgr.get_or_create(Target)
tgt.ip_addresses.append(ip_address)
else:
tgt = ip_address.target
try:
nmap_result = self.db_mgr.get_or_create(
NmapResult, port=port, ip_address=ip_address, target=tgt
)
except sqlalchemy.exc.StatementError:
# one of the three (port/ip/tgt) didn't exist and we're querying on ids that the db doesn't know
self.db_mgr.add(port)
self.db_mgr.add(ip_address)
self.db_mgr.add(tgt)
nmap_result = self.db_mgr.get_or_create(
NmapResult, port=port, ip_address=ip_address, target=tgt
)
for nse_result in service.scripts_results:
script_id = nse_result.get("id")
script_output = nse_result.get("output")
nse_obj = self.db_mgr.get_or_create(NSEResult, script_id=script_id, script_output=script_output)
nmap_result.nse_results.append(nse_obj)
nmap_result.open = service.open()
nmap_result.reason = service.reason
nmap_result.service = service.service
nmap_result.commandline = report.commandline
nmap_result.product = service.service_dict.get("product")
nmap_result.product_version = service.service_dict.get("version")
nmap_result.target.nmap_results.append(nmap_result)
self.db_mgr.add(nmap_result)
self.output().get("sqltarget").touch()
self.db_mgr.close()
def run(self):
""" Parses pickled target info dictionary and runs targeted nmap scans against only open ports. """
try:
self.threads = abs(int(self.threads))
except (TypeError, ValueError):
return logging.error("The value supplied to --threads must be a non-negative integer.")
nmap_command = [ # placeholders will be overwritten with appropriate info in loop below
"nmap",
"--open",
"PLACEHOLDER-IDX-2",
"-n",
"-sC",
"-T",
"4",
"-sV",
"-Pn",
"-p",
"PLACEHOLDER-IDX-10",
"-oA",
]
commands = list()
for target in self.db_mgr.get_all_targets():
for protocol in ("tcp", "udp"):
ports = self.db_mgr.get_ports_by_ip_or_host_and_protocol(target, protocol)
if ports:
tmp_cmd = nmap_command[:]
tmp_cmd[2] = "-sT" if protocol == "tcp" else "-sU"
# arg to -oA, will drop into subdir off curdir
tmp_cmd[10] = ",".join(ports)
tmp_cmd.append(str(Path(self.output().get("localtarget").path) / f"nmap.{target}-{protocol}"))
if is_ip_address(target) and get_ip_address_version(target) == "6":
# got an ipv6 address
tmp_cmd.insert(-2, "-6")
tmp_cmd.append(target) # target as final arg to nmap
commands.append(tmp_cmd)
# basically mkdir -p, won't error out if already there
self.results_subfolder.mkdir(parents=True, exist_ok=True)
with concurrent.futures.ThreadPoolExecutor(max_workers=self.threads) as executor:
executor.map(subprocess.run, commands)
self.parse_nmap_output()
@inherits(ThreadedNmapScan)
class SearchsploitScan(luigi.Task):
""" Run ``searchcploit`` against each ``nmap*.xml`` file in the **TARGET-nmap-results** directory and write results to disk.
Install:
``searchcploit`` is already on your system if you're using kali. If you're not using kali, refer to your own
distributions instructions for installing ``searchcploit``.
Basic Example:
.. code-block:: console
searchsploit --nmap htb-targets-nmap-results/nmap.10.10.10.155-tcp.xml
Luigi Example:
.. code-block:: console
PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.nmap Searchsploit --target-file htb-targets --top-ports 5000
Args:
threads: number of threads for parallel nmap command execution *Required by upstream Task*
db_location: specifies the path to the database used for storing results *Required by upstream Task*
rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
top_ports: Scan top N most popular ports *Required by upstream Task*
ports: specifies the port(s) to be scanned *Required by upstream Task*
target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
results_dir: specifies the directory on disk to which all Task results are written *Required by upstream Task*
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
def requires(self):
""" Searchsploit depends on ThreadedNmap to run.
TargetList expects target_file, results_dir, and db_location as parameters.
Masscan expects rate, target_file, interface, and either ports or top_ports as parameters.
ThreadedNmap expects threads
Returns:
luigi.Task - ThreadedNmap
"""
args = {
"rate": self.rate,
"ports": self.ports,
"threads": self.threads,
"top_ports": self.top_ports,
"interface": self.interface,
"target_file": self.target_file,
"results_dir": self.results_dir,
"db_location": self.db_location,
}
return ThreadedNmapScan(**args)
def output(self):
""" Returns the target output for this task.
Naming convention for the output folder is TARGET_FILE-searchsploit-results.
The output folder will be populated with all of the output files generated by
any searchsploit commands run.
Returns:
luigi.local_target.LocalTarget
"""
return SQLAlchemyTarget(
connection_string=self.db_mgr.connection_string, target_table="searchsploit_result", update_id=self.task_id
)
def run(self):
""" Grabs the xml files created by ThreadedNmap and runs searchsploit --nmap on each one, saving the output. """
for entry in Path(self.input().get("localtarget").path).glob("nmap*.xml"):
proc = subprocess.run(
[tool_paths.get("searchsploit"), "-j", "-v", "--nmap", str(entry)], stdout=subprocess.PIPE
)
if proc.stdout:
# change wall-searchsploit-results/nmap.10.10.10.157-tcp to 10.10.10.157
ipaddr = entry.stem.replace("nmap.", "").replace("-tcp", "").replace("-udp", "")
contents = proc.stdout.decode()
for line in contents.splitlines():
if "Title" in line:
# {'Title': "Nginx (Debian Based Distros + Gentoo) ... }
# oddity introduced on 15 Apr 2020 from an exploitdb update
# entries have two double quotes in a row for no apparent reason
# {"Title":"PHP-FPM + Nginx - Remote Code Execution"", ...
# seems to affect all entries at the moment. will remove this line if it
# ever returns to normal
line = line.replace('""', '"')
if line.endswith(","):
# result would be a tuple if the comma is left on the line; remove it
tmp_result = ast.literal_eval(line.strip()[:-1])
else:
# normal dict
tmp_result = ast.literal_eval(line.strip())
tgt = self.db_mgr.get_or_create_target_by_ip_or_hostname(ipaddr)
ssr_type = tmp_result.get("Type")
ssr_title = tmp_result.get("Title")
ssr_path = tmp_result.get("Path")
ssr = self.db_mgr.get_or_create(
SearchsploitResult, type=ssr_type, title=ssr_title, path=ssr_path
)
tgt.searchsploit_results.append(ssr)
self.db_mgr.add(tgt)
self.output().touch()
self.db_mgr.close()