Files
recon-pipeline/pipeline/recon/nmap.py
epi052 6eb3bd8cb0 Completed store results in a database project (#32)
Co-authored-by: Ryan Good <usafaryangood@gmail.com>

* added initial skeleton; restructured project directories

* removed workers directive from luigi; changed input to tko-subs

* changed masscan command to use config.tool_paths

* linted __init__ files and updated docstring for get_scans

* added per-file-ignores for linting

* recon-pipeline linted

* PoC working for amass results -> db; rudimentary db mgmt commands also

* more linting

* added database management commands to the shell

* db_location passes through to all tasks; masscan results added to db

* removed unused imports from masscan.py

* added ParseNmapOutput class to handle parsing for database storage

* cleaned up repeat code

* searchsploit results stored in db

* lint/format

* gobuster scans now stored in database

* fixed test_recon tests to use db_location

* fixed web tests

* tkosub entries recorded in db

* subjack scan results stored in database

* webanalyze results stored in db

* refactored older commits to use newer helper functions

* refactored older commits to use newer helper functions

* aquatone results stored in database

refactored a few scans to use dbmanager helper functions
refactored db structure wrt headers/screenshots
added 80/443 to web_ports in config.py

* fixed a few queries and re-added webanalyze to FullScan

* view targets/endpoints done

* overhauled nmap parsing

* print all nmap_results good, next to focus on filtering

* complex nmap filters complete

* nmap printing done

* updated pipfile

* view web-technologies complete

* view searchsploit results complete

* removed filesystem code from amass

* targetlist moved to db only

* targets,amass,masscan all cutover to full database; added view ports

* nmap fully db compliant

* aquatone and webtargets db compliant

* gobuster uses db now

* webanalyze db compliant

* all scans except corscanner are db compliant

* recon tests passing

* web tests passing

* linted files

* added tests for helpers.py and parsers.py

* refactored some redundant code

* added tests to pre-commit

* updated amass tests and pre-commit version

* updated recon.targets tests

* updated nmap tests

* updated masscan tests

* updated config tests

* updated web targets tests

* added gobuster tests

* added aquatone tests

* added subdomain takeover and webanalyze tests; updated test data

* removed homegrown sqlite target in favor of the sqla implementation

* added tests for recon-pipeline.py

* fixed cluge function to set __package__ globally

* updated amass tests

* updated targets tests

* updated nmap tests

* updated masscan tests

* updated aquatone tests

* updated nmap tests to account for no searchsploit

* updated nmap tests to account for no searchsploit

* updated masscan tests

* updated subjack/tkosub tests

* updated web targets tests

* updated webanalyze tests

* added corscanner tests

* linted DBManager a bit

* fixed weird cyclic import issue that only happened during docs build; housekeeping

* added models tests, removed test_install dir

* updated docs a bit; sidenav is wonky

* fixed readthedocs requirements.txt

* fixed issue where view results werent populated directly after scan

* added new tests to pipeline; working on docs

* updated a few overlooked view command items

* updated tests to reflect changes to shell

* incremental push of docs update

* documentation done

* updated exploitdb install

* updated exploitdb install

* updated seclists install

* parseamass updates db in the event of no amass output

* removed corscanner

* added pipenv shell to install instructions per @GreaterGoodest

* added pipenv shell to install instructions per @GreaterGoodest

* added check for chromium-browser during aquatone install; closes #26

* added check for old recon-tools dir; updated Path.resolve calls to Path.expanduser.resolve; fixed very specific import bug due to filesystem location

* added CONTIBUTING.md; updated pre-commit hooks/README

* added .gitattributes for linguist reporting

* updated tests

* fixed a few weird bugs found during test

* updated README

* updated asciinema links in README

* updated README with view command video

* updated other location for url scheme /status

* add ability to specify single target using --target (#31)

* updated a few items in docs and moved tool-dict to tools-dir

* fixed issue where removing tempfile without --verbose caused scan to fail
2020-04-17 10:29:16 -05:00

325 lines
14 KiB
Python

import ast
import logging
import subprocess
import concurrent.futures
from pathlib import Path
import luigi
import sqlalchemy
from luigi.util import inherits
from libnmap.parser import NmapParser
from luigi.contrib.sqla import SQLAlchemyTarget
import pipeline.models.db_manager
from .masscan import ParseMasscanOutput
from .config import defaults, tool_paths
from .helpers import get_ip_address_version, is_ip_address
from ..models.port_model import Port
from ..models.nse_model import NSEResult
from ..models.target_model import Target
from ..models.nmap_model import NmapResult
from ..models.ip_address_model import IPAddress
from ..models.searchsploit_model import SearchsploitResult
@inherits(ParseMasscanOutput)
class ThreadedNmapScan(luigi.Task):
""" Run ``nmap`` against specific targets and ports gained from the ParseMasscanOutput Task.
Install:
``nmap`` is already on your system if you're using kali. If you're not using kali, refer to your own
distributions instructions for installing ``nmap``.
Basic Example:
.. code-block:: console
nmap --open -sT -sC -T 4 -sV -Pn -p 43,25,21,53,22 -oA htb-targets-nmap-results/nmap.10.10.10.155-tcp 10.10.10.155
Luigi Example:
.. code-block:: console
PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.nmap ThreadedNmap --target-file htb-targets --top-ports 5000
Args:
threads: number of threads for parallel nmap command execution
db_location: specifies the path to the database used for storing results *Required by upstream Task*
rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
top_ports: Scan top N most popular ports *Required by upstream Task*
ports: specifies the port(s) to be scanned *Required by upstream Task*
target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
results_dir: specifes the directory on disk to which all Task results are written *Required by upstream Task*
"""
threads = luigi.Parameter(default=defaults.get("threads"))
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
self.results_subfolder = (Path(self.results_dir) / "nmap-results").expanduser().resolve()
def requires(self):
""" ThreadedNmap depends on ParseMasscanOutput to run.
TargetList expects target_file, results_dir, and db_location as parameters.
Masscan expects rate, target_file, interface, and either ports or top_ports as parameters.
Returns:
luigi.Task - ParseMasscanOutput
"""
args = {
"results_dir": self.results_dir,
"rate": self.rate,
"target_file": self.target_file,
"top_ports": self.top_ports,
"interface": self.interface,
"ports": self.ports,
"db_location": self.db_location,
}
return ParseMasscanOutput(**args)
def output(self):
""" Returns the target output for this task.
Naming convention for the output folder is TARGET_FILE-nmap-results.
The output folder will be populated with all of the output files generated by
any nmap commands run. Because the nmap command uses -oA, there will be three
files per target scanned: .xml, .nmap, .gnmap.
Returns:
luigi.local_target.LocalTarget
"""
return {
"sqltarget": SQLAlchemyTarget(
connection_string=self.db_mgr.connection_string, target_table="nmap_result", update_id=self.task_id
),
"localtarget": luigi.LocalTarget(str(self.results_subfolder)),
}
def parse_nmap_output(self):
""" Read nmap .xml results and add entries into specified database """
for entry in self.results_subfolder.glob("nmap*.xml"):
# relying on python-libnmap here
report = NmapParser.parse_fromfile(entry)
for host in report.hosts:
for service in host.services:
port = self.db_mgr.get_or_create(Port, protocol=service.protocol, port_number=service.port)
if is_ip_address(host.address) and get_ip_address_version(host.address) == "4":
ip_address = self.db_mgr.get_or_create(IPAddress, ipv4_address=host.address)
else:
ip_address = self.db_mgr.get_or_create(IPAddress, ipv6_address=host.address)
if ip_address.target is None:
# account for ip addresses identified that aren't already tied to a target
# almost certainly ipv6 addresses
tgt = self.db_mgr.get_or_create(Target)
tgt.ip_addresses.append(ip_address)
else:
tgt = ip_address.target
try:
nmap_result = self.db_mgr.get_or_create(
NmapResult, port=port, ip_address=ip_address, target=tgt
)
except sqlalchemy.exc.StatementError:
# one of the three (port/ip/tgt) didn't exist and we're querying on ids that the db doesn't know
self.db_mgr.add(port)
self.db_mgr.add(ip_address)
self.db_mgr.add(tgt)
nmap_result = self.db_mgr.get_or_create(
NmapResult, port=port, ip_address=ip_address, target=tgt
)
for nse_result in service.scripts_results:
script_id = nse_result.get("id")
script_output = nse_result.get("output")
nse_obj = self.db_mgr.get_or_create(NSEResult, script_id=script_id, script_output=script_output)
nmap_result.nse_results.append(nse_obj)
nmap_result.open = service.open()
nmap_result.reason = service.reason
nmap_result.service = service.service
nmap_result.commandline = report.commandline
nmap_result.product = service.service_dict.get("product")
nmap_result.product_version = service.service_dict.get("version")
nmap_result.target.nmap_results.append(nmap_result)
self.db_mgr.add(nmap_result)
self.output().get("sqltarget").touch()
self.db_mgr.close()
def run(self):
""" Parses pickled target info dictionary and runs targeted nmap scans against only open ports. """
try:
self.threads = abs(int(self.threads))
except (TypeError, ValueError):
return logging.error("The value supplied to --threads must be a non-negative integer.")
nmap_command = [ # placeholders will be overwritten with appropriate info in loop below
"nmap",
"--open",
"PLACEHOLDER-IDX-2",
"-n",
"-sC",
"-T",
"4",
"-sV",
"-Pn",
"-p",
"PLACEHOLDER-IDX-10",
"-oA",
]
commands = list()
for target in self.db_mgr.get_all_targets():
for protocol in ("tcp", "udp"):
ports = self.db_mgr.get_ports_by_ip_or_host_and_protocol(target, protocol)
if ports:
tmp_cmd = nmap_command[:]
tmp_cmd[2] = "-sT" if protocol == "tcp" else "-sU"
# arg to -oA, will drop into subdir off curdir
tmp_cmd[10] = ",".join(ports)
tmp_cmd.append(str(Path(self.output().get("localtarget").path) / f"nmap.{target}-{protocol}"))
if is_ip_address(target) and get_ip_address_version(target) == "6":
# got an ipv6 address
tmp_cmd.insert(-2, "-6")
tmp_cmd.append(target) # target as final arg to nmap
commands.append(tmp_cmd)
# basically mkdir -p, won't error out if already there
self.results_subfolder.mkdir(parents=True, exist_ok=True)
with concurrent.futures.ThreadPoolExecutor(max_workers=self.threads) as executor:
executor.map(subprocess.run, commands)
self.parse_nmap_output()
@inherits(ThreadedNmapScan)
class SearchsploitScan(luigi.Task):
""" Run ``searchcploit`` against each ``nmap*.xml`` file in the **TARGET-nmap-results** directory and write results to disk.
Install:
``searchcploit`` is already on your system if you're using kali. If you're not using kali, refer to your own
distributions instructions for installing ``searchcploit``.
Basic Example:
.. code-block:: console
searchsploit --nmap htb-targets-nmap-results/nmap.10.10.10.155-tcp.xml
Luigi Example:
.. code-block:: console
PYTHONPATH=$(pwd) luigi --local-scheduler --module recon.nmap Searchsploit --target-file htb-targets --top-ports 5000
Args:
threads: number of threads for parallel nmap command execution *Required by upstream Task*
db_location: specifies the path to the database used for storing results *Required by upstream Task*
rate: desired rate for transmitting packets (packets per second) *Required by upstream Task*
interface: use the named raw network interface, such as "eth0" *Required by upstream Task*
top_ports: Scan top N most popular ports *Required by upstream Task*
ports: specifies the port(s) to be scanned *Required by upstream Task*
target_file: specifies the file on disk containing a list of ips or domains *Required by upstream Task*
results_dir: specifies the directory on disk to which all Task results are written *Required by upstream Task*
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db_mgr = pipeline.models.db_manager.DBManager(db_location=self.db_location)
def requires(self):
""" Searchsploit depends on ThreadedNmap to run.
TargetList expects target_file, results_dir, and db_location as parameters.
Masscan expects rate, target_file, interface, and either ports or top_ports as parameters.
ThreadedNmap expects threads
Returns:
luigi.Task - ThreadedNmap
"""
args = {
"rate": self.rate,
"ports": self.ports,
"threads": self.threads,
"top_ports": self.top_ports,
"interface": self.interface,
"target_file": self.target_file,
"results_dir": self.results_dir,
"db_location": self.db_location,
}
return ThreadedNmapScan(**args)
def output(self):
""" Returns the target output for this task.
Naming convention for the output folder is TARGET_FILE-searchsploit-results.
The output folder will be populated with all of the output files generated by
any searchsploit commands run.
Returns:
luigi.local_target.LocalTarget
"""
return SQLAlchemyTarget(
connection_string=self.db_mgr.connection_string, target_table="searchsploit_result", update_id=self.task_id
)
def run(self):
""" Grabs the xml files created by ThreadedNmap and runs searchsploit --nmap on each one, saving the output. """
for entry in Path(self.input().get("localtarget").path).glob("nmap*.xml"):
proc = subprocess.run(
[tool_paths.get("searchsploit"), "-j", "-v", "--nmap", str(entry)], stdout=subprocess.PIPE
)
if proc.stdout:
# change wall-searchsploit-results/nmap.10.10.10.157-tcp to 10.10.10.157
ipaddr = entry.stem.replace("nmap.", "").replace("-tcp", "").replace("-udp", "")
contents = proc.stdout.decode()
for line in contents.splitlines():
if "Title" in line:
# {'Title': "Nginx (Debian Based Distros + Gentoo) ... }
# oddity introduced on 15 Apr 2020 from an exploitdb update
# entries have two double quotes in a row for no apparent reason
# {"Title":"PHP-FPM + Nginx - Remote Code Execution"", ...
# seems to affect all entries at the moment. will remove this line if it
# ever returns to normal
line = line.replace('""', '"')
if line.endswith(","):
# result would be a tuple if the comma is left on the line; remove it
tmp_result = ast.literal_eval(line.strip()[:-1])
else:
# normal dict
tmp_result = ast.literal_eval(line.strip())
tgt = self.db_mgr.get_or_create_target_by_ip_or_hostname(ipaddr)
ssr_type = tmp_result.get("Type")
ssr_title = tmp_result.get("Title")
ssr_path = tmp_result.get("Path")
ssr = self.db_mgr.get_or_create(
SearchsploitResult, type=ssr_type, title=ssr_title, path=ssr_path
)
tgt.searchsploit_results.append(ssr)
self.db_mgr.add(tgt)
self.output().touch()
self.db_mgr.close()