From d1c90a49cd8a07c8f7e1ace2586c534398e0e283 Mon Sep 17 00:00:00 2001 From: Ira Lun Date: Tue, 29 Sep 2020 07:15:15 +0100 Subject: [PATCH 1/3] Improve memory usage in the generation of tasks (to fix issue #119). Switch to generating tasks using iterators, and make other changes such as using netaddr's IPSet to store ranges of IP addresses, in order to reduce the use of memory where possible. --- Interlace/interlace.py | 28 ++- Interlace/lib/core/input.py | 425 ++++++++++++++++++++---------------- Interlace/lib/threader.py | 10 +- 3 files changed, 261 insertions(+), 202 deletions(-) diff --git a/Interlace/interlace.py b/Interlace/interlace.py index 792a098..0c44d7e 100644 --- a/Interlace/interlace.py +++ b/Interlace/interlace.py @@ -7,13 +7,18 @@ from Interlace.lib.core.output import OutputHelper, Level from Interlace.lib.threader import Pool -def build_queue(arguments, output, repeat): - task_list = InputHelper.process_commands(arguments) - for task in task_list: - output.terminal(Level.THREAD, task.name(), "Added to Queue") - print('Generated {} commands in total'.format(len(task_list))) +def task_queue_generator_func(arguments, output, repeat): + tasks_data = InputHelper.process_data_for_tasks_iterator(arguments) + tasks_count = tasks_data["tasks_count"] + yield tasks_count + tasks_generator_func = InputHelper.make_tasks_generator_func(tasks_data) + for i in range(repeat): + tasks_iterator = tasks_generator_func() + for task in tasks_iterator: + output.terminal(Level.THREAD, task.name(), "Added to Queue") + yield task + print('Generated {} commands in total'.format(tasks_count)) print('Repeat set to {}'.format(repeat)) - return task_list * repeat def main(): @@ -27,9 +32,14 @@ def main(): repeat = int(arguments.repeat) else: repeat = 1 - - - pool = Pool(arguments.threads, build_queue(arguments, output, repeat), arguments.timeout, output, arguments.sober) + + pool = Pool( + arguments.threads, + task_queue_generator_func(arguments, output, repeat), + arguments.timeout, + output, + arguments.sober, + ) pool.run() diff --git a/Interlace/lib/core/input.py b/Interlace/lib/core/input.py index f83dd9d..3b965c2 100644 --- a/Interlace/lib/core/input.py +++ b/Interlace/lib/core/input.py @@ -1,11 +1,16 @@ +import functools +import itertools import os.path import sys from io import TextIOWrapper from argparse import ArgumentParser -from math import ceil -from random import sample, choice +from random import choice -from netaddr import IPNetwork, IPRange, IPGlob +from netaddr import ( + IPRange, + IPSet, + glob_to_iprange, +) from Interlace.lib.threader import Task @@ -45,42 +50,6 @@ class InputHelper(object): return files - @staticmethod - def _get_ips_from_range(ip_range): - ips = set() - ip_range = ip_range.split("-") - - # parsing the above structure into an array and then making into an IP address with the end value - end_ip = ".".join(ip_range[0].split(".")[0:-1]) + "." + ip_range[1] - - # creating an IPRange object to get all IPs in between - range_obj = IPRange(ip_range[0], end_ip) - - for ip in range_obj: - ips.add(str(ip)) - - return ips - - @staticmethod - def _get_ips_from_glob(glob_ips): - ip_glob = IPGlob(glob_ips) - - ips = set() - - for ip in ip_glob: - ips.add(str(ip)) - - return ips - - @staticmethod - def _get_cidr_to_ips(cidr_range): - ips = set() - - for ip in IPNetwork(cidr_range): - ips.add(str(ip)) - - return ips - @staticmethod def _process_port(port_type): if "," in port_type: @@ -146,178 +115,256 @@ class InputHelper(object): return task_block @staticmethod - def _pre_process_hosts(host_ranges, destination_set, arguments): - for host in host_ranges: - host = host.replace(" ", "").replace("\n", "") - # check if it is a domain name - if len(host.split(".")[0]) == 0: - destination_set.add(host) - continue + def _replace_target_variables_in_commands(tasks, str_targets, ipset_targets): + TARGET_VAR = "_target_" + HOST_VAR = "_host_" + CLEANTARGET_VAR = "_cleantarget_" + for task in tasks: + command = task.name() + if TARGET_VAR in command or HOST_VAR in command: + for dirty_target in itertools.chain(str_targets, ipset_targets): + yielded_task = task.clone() + dirty_target = str(dirty_target) + yielded_task.replace(TARGET_VAR, dirty_target) + yielded_task.replace(HOST_VAR, dirty_target) + yielded_task.replace( + CLEANTARGET_VAR, + dirty_target.replace("http://", "").replace( + "https://", "").rstrip("/").replace("/", "-"), + ) + yield yielded_task + else: + yield task - if host.split(".")[0][0].isalpha() or host.split(".")[-1][-1].isalpha(): - destination_set.add(host) - continue - for ips in host.split(","): - # checking for CIDR - if not arguments.nocidr and "/" in ips: - destination_set.update(InputHelper._get_cidr_to_ips(ips)) - # checking for IPs in a range - elif "-" in ips: - destination_set.update(InputHelper._get_ips_from_range(ips)) - # checking for glob ranges - elif "*" in ips: - destination_set.update(InputHelper._get_ips_from_glob(ips)) + @staticmethod + def _replace_variable_in_commands(tasks_generator_func, variable, replacements): + for task in tasks_generator_func(): + if variable in task.name(): + for replacement in replacements: + yielded_task = task.clone() + yielded_task.replace(variable, str(replacement)) + yield yielded_task + else: + yield task + + @staticmethod + def _replace_variable_array( + tasks_generator_func, variable, replacements_iterator + ): + for task in tasks_generator_func(): + task.replace(variable, str(next(replacements_iterator))) + yield task + + @staticmethod + def _process_targets(arguments): + def pre_process_target_spec(target_spec): + target_spec = "".join( + filter(lambda char: char not in (" ", "\n"), target_spec) + ) + return target_spec.split(",") + # If ","s not in target_spec, this returns [target_spec], so this + # static method always returns a list + + if arguments.target: + target_specs = pre_process_target_spec(arguments.target) + else: + target_specs_file = arguments.target_list + if not isinstance(target_specs_file, TextIOWrapper): + if not sys.stdin.isatty(): + target_specs_file = sys.stdin + target_specs = ( + target_spec.strip() for target_spec in target_specs_file + ) + target_specs = ( + pre_process_target_spec(target_spec) for target_spec in + target_specs if target_spec + ) + target_specs = itertools.chain(*target_specs) + + def parse_and_group_target_specs(target_specs, nocidr): + str_targets = set() + ipset_targets = IPSet() + for target_spec in target_specs: + if ( + target_spec.startswith(".") or + ( + (target_spec[0].isalpha() or target_spec[-1].isalpha()) + and "." in target_spec + ) or + (nocidr and "/" in target_spec) + ): + str_targets.add(target_spec) else: - destination_set.add(ips) + if "-" in target_spec: + start_ip, post_dash_segment = target_spec.split("-") + end_ip = start_ip.rsplit(".", maxsplit=1)[0] + "." + \ + post_dash_segment + target_spec = IPRange(start_ip, end_ip) + elif "*" in target_spec: + target_spec = glob_to_iprange(target_spec) + else: # str IP addresses and str CIDR notations + target_spec = (target_spec,) + ipset_targets.update(IPSet(target_spec)) + return (str_targets, ipset_targets) + + str_targets, ipset_targets = parse_and_group_target_specs( + target_specs=target_specs, + nocidr=arguments.nocidr, + ) + + if arguments.exclusions or arguments.exclusions_list: + if arguments.exclusions: + exclusion_specs = pre_process_target_spec(arguments.exclusions) + elif arguments.exclusions_list: + exclusion_specs = ( + exclusion_spec.strip() for exclusion_spec in + arguments.exclusions_list + ) + exclusion_specs = ( + pre_process_target_spec(exclusion_spec) for exclusion_spec + in exclusion_specs if exclusion_spec + ) + exclusion_specs = itertools.chain(*exclusion_specs) + str_exclusions, ipset_exclusions = parse_and_group_target_specs( + target_specs=exclusion_specs, + nocidr=arguments.nocidr, + ) + str_targets -= str_exclusions + ipset_targets -= ipset_exclusions + + return (str_targets, ipset_targets) @staticmethod - def _process_clean_targets(commands, dirty_targets): - def add_task(t, item_list, my_command_set): - if t not in my_command_set: - my_command_set.add(t) - item_list.append(t) - - variable = '_cleantarget_' - tasks = [] - temp = set() # this helps avoid command duplication and re/deconstructing of temporary set - # changed order to ensure different combinations of commands aren't created - for dirty_target in dirty_targets: - for command in commands: - new_task = command.clone() - if command.name().find(variable) != -1: - new_task.replace("_target_", dirty_target) - - # replace all https:// or https:// with nothing - dirty_target = dirty_target.replace('http://', '') - dirty_target = dirty_target.replace('https://', '') - # chop off all trailing '/', if any. - while dirty_target.endswith('/'): - dirty_target = dirty_target.strip('/') - # replace all remaining '/' with '-' and that's enough cleanup for the day - clean_target = dirty_target.replace('/', '-') - new_task.replace(variable, clean_target) - add_task(new_task, tasks, temp) - else: - new_task.replace("_target_", dirty_target) - add_task(new_task, tasks, temp) - - return tasks - - @staticmethod - def _replace_variable_with_commands(commands, variable, replacements): - def add_task(t, item_list, my_set): - if t not in my_set: - my_set.add(t) - item_list.append(t) - - tasks = [] - temp_set = set() # to avoid duplicates - for command in commands: - for replacement in replacements: - if command.name().find(variable) != -1: - new_task = command.clone() - new_task.replace(variable, str(replacement)) - add_task(new_task, tasks, temp_set) - else: - add_task(command, tasks, temp_set) - return tasks - - @staticmethod - def _replace_variable_array(commands, variable, replacement): - if variable not in sample(commands, 1)[0]: - return - - for counter, command in enumerate(commands): - command.replace(variable, str(replacement[counter])) - - @staticmethod - def process_commands(arguments): - commands = list() - ranges = set() - targets = set() - exclusions_ranges = set() - exclusions = set() - + def process_data_for_tasks_iterator(arguments): # removing the trailing slash if any if arguments.output and arguments.output[-1] == "/": arguments.output = arguments.output[:-1] - if arguments.port: - ports = InputHelper._process_port(arguments.port) + ports = InputHelper._process_port(arguments.port) if arguments.port \ + else None - if arguments.realport: - real_ports = InputHelper._process_port(arguments.realport) + real_ports = InputHelper._process_port(arguments.realport) if \ + arguments.realport else None - # process targets first - if arguments.target: - ranges.add(arguments.target) - else: - target_file = arguments.target_list - if not isinstance(target_file, TextIOWrapper): - if not sys.stdin.isatty(): - target_file = sys.stdin - ranges.update([target.strip() for target in target_file if target.strip()]) + str_targets, ipset_targets = InputHelper._process_targets( + arguments=arguments, + ) + targets_count = len(str_targets) + ipset_targets.size - # process exclusions first - if arguments.exclusions: - exclusions_ranges.add(arguments.exclusions) - else: - if arguments.exclusions_list: - for exclusion in arguments.exclusions_list: - exclusion = exclusion.strip() - if exclusion: - exclusions.add(exclusion) - - # removing elements that may have spaces (helpful for easily processing comma notation) - InputHelper._pre_process_hosts(ranges, targets, arguments) - InputHelper._pre_process_hosts(exclusions_ranges, exclusions, arguments) - - # difference operation - targets -= exclusions - - if len(targets) == 0: + if not targets_count: raise Exception("No target provided, or empty target list") if arguments.random: files = InputHelper._get_files_from_directory(arguments.random) random_file = choice(files) - - if arguments.command: - commands.append(Task(arguments.command.rstrip('\n'))) else: - commands = InputHelper._pre_process_commands(arguments.command_list) + random_file = None - # commands = InputHelper._replace_variable_with_commands(commands, "_target_", targets) - commands = InputHelper._process_clean_targets(commands, targets) - commands = InputHelper._replace_variable_with_commands(commands, "_host_", targets) - - if arguments.port: - commands = InputHelper._replace_variable_with_commands(commands, "_port_", ports) - - if arguments.realport: - commands = InputHelper._replace_variable_with_commands(commands, "_realport_", real_ports) - - if arguments.random: - commands = InputHelper._replace_variable_with_commands(commands, "_random_", [random_file]) - - if arguments.output: - commands = InputHelper._replace_variable_with_commands(commands, "_output_", [arguments.output]) + tasks = list() + if arguments.command: + tasks.append(Task(arguments.command.rstrip('\n'))) + else: + tasks = InputHelper._pre_process_commands(arguments.command_list) if arguments.proto: - if "," in arguments.proto: - protocols = arguments.proto.split(",") - else: - protocols = arguments.proto - commands = InputHelper._replace_variable_with_commands(commands, "_proto_", protocols) + protocols = arguments.proto.split(",") + # if "," not in arguments.proto, [arguments.proto] is returned by + # .split() + else: + protocols = None - # process proxies - if arguments.proxy_list: - proxy_list = [proxy for proxy in arguments.proxy_list if proxy.strip()] - if len(proxy_list) < len(commands): - proxy_list = ceil(len(commands) / len(proxy_list)) * proxy_list + # Calculate the tasks count, as we will not have access to the len() of + # the tasks iterator + tasks_count = len(tasks) * targets_count + if ports: + tasks_count *= len(ports) + if real_ports: + tasks_count *= len(real_ports) + if protocols: + tasks_count *= len(protocols) - InputHelper._replace_variable_array(commands, "_proxy_", proxy_list) - return commands + return { + "tasks": tasks, + "str_targets": str_targets, + "ipset_targets": ipset_targets, + "ports": ports, + "real_ports": real_ports, + "random_file": random_file, + "output": arguments.output, + "protocols": protocols, + "proxy_list": arguments.proxy_list, + "tasks_count": tasks_count, + } + + @staticmethod + def make_tasks_generator_func(tasks_data): + tasks_generator_func = functools.partial( + InputHelper._replace_target_variables_in_commands, + tasks=tasks_data["tasks"], + str_targets=tasks_data["str_targets"], + ipset_targets=tasks_data["ipset_targets"], + ) + + ports = tasks_data["ports"] + if ports: + tasks_generator_func = functools.partial( + InputHelper._replace_variable_in_commands, + tasks_generator_func=tasks_generator_func, + variable="_port_", + replacements=ports, + ) + + real_ports = tasks_data["real_ports"] + if real_ports: + tasks_generator_func = functools.partial( + InputHelper._replace_variable_in_commands, + tasks_generator_func=tasks_generator_func, + variable="_realport_", + replacements=real_ports, + ) + + random_file = tasks_data["random_file"] + if random_file: + tasks_generator_func = functools.partial( + InputHelper._replace_variable_in_commands, + tasks_generator_func=tasks_generator_func, + variable="_random_", + replacements=[random_file], + ) + + output = tasks_data["output"] + if output: + tasks_generator_func = functools.partial( + InputHelper._replace_variable_in_commands, + tasks_generator_func=tasks_generator_func, + variable="_output_", + replacements=[output], + ) + + protocols = tasks_data["protocols"] + if protocols: + tasks_generator_func = functools.partial( + InputHelper._replace_variable_in_commands, + tasks_generator_func=tasks_generator_func, + variable="_proto_", + replacements=protocols, + ) + + proxy_list = tasks_data["proxy_list"] + if proxy_list: + proxy_list_iterator = itertools.cycle( + proxy for proxy in ( + proxy.strip() for proxy in proxy_list + ) if proxy + ) + tasks_generator_func = functools.partial( + InputHelper._replace_variable_array, + tasks_generator_func=tasks_generator_func, + variable="_proxy_", + replacements_iterator=proxy_list_iterator, + ) + + return tasks_generator_func class InputParser(object): diff --git a/Interlace/lib/threader.py b/Interlace/lib/threader.py index e367f06..2bc7af7 100644 --- a/Interlace/lib/threader.py +++ b/Interlace/lib/threader.py @@ -69,7 +69,7 @@ class Worker(object): while True: try: # get task from queue - task = self.queue.pop(0) + task = next(self.queue) if isinstance(self.tqdm, tqdm): self.tqdm.update(1) # run task @@ -90,17 +90,19 @@ class Pool(object): if max_workers <= 0: raise ValueError("Workers must be >= 1") + tasks_count = next(task_queue) + # check if the queue is empty - if not task_queue: + if not tasks_count: raise ValueError("The queue is empty") self.queue = task_queue self.timeout = timeout self.output = output - self.max_workers = min(len(task_queue), max_workers) + self.max_workers = min(tasks_count, max_workers) if not progress_bar: - self.tqdm = tqdm(total=len(task_queue)) + self.tqdm = tqdm(total=tasks_count) else: self.tqdm = True From 14066c608abbf7f7a41595a6cd8057e1fb173537 Mon Sep 17 00:00:00 2001 From: Ira Lun Date: Thu, 1 Oct 2020 20:42:54 +0100 Subject: [PATCH 2/3] Fix the name of the exception when queue is empty. --- Interlace/lib/threader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Interlace/lib/threader.py b/Interlace/lib/threader.py index 2bc7af7..1038eaa 100644 --- a/Interlace/lib/threader.py +++ b/Interlace/lib/threader.py @@ -66,17 +66,17 @@ class Worker(object): self.tqdm = tqdm def __call__(self): + queue = self.queue while True: try: - # get task from queue - task = next(self.queue) + task = next(queue) if isinstance(self.tqdm, tqdm): self.tqdm.update(1) # run task task.run(self.tqdm) else: task.run() - except IndexError: + except StopIteration: break From c8c4ae83c5bb5d2c625a428cb7f3d313ef11f543 Mon Sep 17 00:00:00 2001 From: prodigysml Date: Sat, 3 Oct 2020 13:19:57 +0000 Subject: [PATCH 3/3] Added version increment for the major update --- Interlace/lib/core/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Interlace/lib/core/__version__.py b/Interlace/lib/core/__version__.py index aa1a8c4..e5102d3 100644 --- a/Interlace/lib/core/__version__.py +++ b/Interlace/lib/core/__version__.py @@ -1 +1 @@ -__version__ = '1.8.2' +__version__ = '1.9.0'