''' Created on 26.08.2018 @author: rpickhardt lib_autopilot is a library which based on a networkx graph tries to predict which channels should be added for a new node on the network. The long term is to generate a lightning network with good topological properties. This library currently uses 4 heuristics to select channels and supports two strategies for combining those heuristics. 1.) Diverse: which tries to to get nodes from every distribution 2.) Merge: which builds the mixture distribution of the 4 heuristics The library also estimates how much funds should be used for every newly added channel. This is achieved by looking at the average channel capacity of the suggested channel partners. A probability distribution which is proportional to those capacities is created and smoothed with the uniform distribution. The 4 heuristics for channel partner suggestion are: 1.) Random: following the Erdoes Renyi model nodes are drawn from a uniform distribution 2.) Central: nodes are sampled from a distribution proportional to the betweeness centrality of nodes 3.) Decrease Diameter: nodes are sampled from distribution of the nodes which favors badly connected nodes 4.) Richness: nodes with high liquidity are taken and it is sampled from a uniform distribution of those The library is supposed to be extended by a simulation framework which can be used to evaluate which strategies are useful on the long term. For this heavy computations (like centrality measures) might have to be reimplemented in a more dynamic way. Also it is important to understand that this program is not optimized to run efficiently on large scale graphs with more than 100k nodes or on densly connected graphs. the programm needs the following dependencies: pip install networkx numpy ''' """ ideas: * should we respect our own channel balances? * respect node life time / uptime? or time of channels? * include more statistics of the network: * allow autopilots of various nodes to exchange some information * exchange algorithms if the network grows. * include better handling for duplicates and existing channels * cap number of channels for well connected nodes. * channel balance of automatic channels should not be more than 50% of cummulative channel balance of destination node next steps: * test if the rankings from the heuristics are statistically independent * evaluate / simulate which method produces graphs with desirable properties """ from operator import itemgetter import logging import math import pickle import networkx as nx import numpy as np class Strategy: #define constants. Never changed as they are part of the API DIVERSE = "diverse" MERGE = "merge" class Autopilot(): def __init__(self,G): self.__add_logger() self.G = G def __add_logger(self): """ initiates the logging service for this class """ # FIXME: adapt to the settings that are proper for you self.__logger = logging.getLogger('lib-autopilot') self.__logger.setLevel(logging.INFO) ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) self.__logger.addHandler(ch) def __sample_from_pdf(self,pdf,k=21): """ helper function to quickly sample from a pdf encoded in a dictionary """ if type(k) is not int: raise TypeError("__sample_from: k must be an integer variable") if k < 0 or k > 21000: raise ValueError("__sample_from: k must be between 0 and 21000") keys,v = zip(*list(pdf.items())) if k>=len(keys): return keys res = np.random.choice(keys, k, replace=False, p=v) return res def __sample_from_percentile(self, pdf, percentile=0.5, num_items=21): """ only look at the most likely items and sample from those """ if not percentile: return self.__sample_from_pdf(pdf,num_items) if type(percentile) is not float: raise TypeError("percentile must be a floating point variable") if percentile < 0 or percentile > 1: raise ValueError("percentile must be btween 0 and 1") cumsum = 0 used_pdf = {} for n, value in sorted( pdf.items(), key=itemgetter(1), reverse=True): cumsum += value used_pdf[n] = value if cumsum > percentile: break used_pdf = {k:v/cumsum for k, v in used_pdf.items()} return self.__sample_from_pdf(used_pdf, num_items) def __get_uniform_pdf(self): """ Generates a uniform distribution of all nodes in the graph In opposite to other methods there are no arguments for smoothing or skewing since this would not do anything to the uniform distribution """ pdf = {n:1 for n in self.G.nodes()} length = len(pdf) return {k:v/length for k, v in pdf.items()} def __get_centrality_pdf(self, skew = False, smooth = False): """ produces a probability distribution which is proportional to nodes betweeness centrality scores the betweeness centrality counts on how many shortest paths a node is connecting to thos nodes will most likely make them even more central however it is good for the node operating those operation as this node itself gets a position in the network which is close to central nodes this distribution can be skewed and smoothed """ self.__logger.info( "CENTRALITY_PDF: Try to generate a PDF proportional to centrality scores") pdf = {} cumsum = 0 for n, score in nx.betweenness_centrality(self.G).items(): pdf[n] = score cumsum += score #renoremalize result pdf = {k:v/cumsum for k, v in pdf.items()} self.__logger.info( "CENTRALITY_PDF: Generated pdf") if skew and smooth: self.__logger.info( "CENTRALITY_PDF: Won't skew and smooth distribution ignore both") smooth = False skew = False return self.__manipulate_pdf(pdf, skew, smooth) def __get_rich_nodes_pdf(self,skew=False,smooth=False): """ Get a PDF proportional to the cummulative capacity of nodes The probability density function is calculated by looking at the cummulative capacity of all channels one node is part of. The method will by default skew the pdf by taking the squares of the sums of capacitoes after deriving a pdf. If one whishes the method can also be smoothed by taking the mixture distribution with the uniform distribution Skewing and smoothing is controlled via the arguments skew and smooth """ self.__logger.info( "RICH_PDF: Try to retrieve a PDF proportional to capacities") rich_nodes = {} network_capacity = 0 candidates = [] for n in self.G.nodes(): total_capacity = sum( self.G.get_edge_data( n, m)["satoshis"] for m in self.G.neighbors(n)) network_capacity += total_capacity rich_nodes[n] = total_capacity rich_nodes = {k:v/network_capacity for k, v in rich_nodes.items()} self.__logger.info( "RICH_PDF: Generated a PDF proportional to capacities") if skew and smooth: self.__logger.info( "RICH_PDF: Can't skew and smooth distribution ignore both") smooth = False skew = False return self.__manipulate_pdf(rich_nodes, skew, smooth) def __get_long_path_pdf(self,skew=True,smooth=False): """ A probability distribution in which badly connected nodes are likely This method looks at all pairs shortest paths and takes the sum of all path lenghts for each node and derives the a probability distribution from the sums. The idea of this method is to find nodes which are increasing the diameter of the network. The method will by default skew the pdf by taking the squares of the sums of path lengths before deriving a pdf. If one whishes the method can also be smoothed by taking the mixture distribution with the uniform distribution Skewing and smoothing is controlled via the arguments skew and smooth """ if skew and smooth: self.__logger.info( "DECREASE DIAMETER: Can't skew and smooth distribution ignore smoothing") smooth = False path_pdf = {} self.__logger.info( "DECREASE DIAMETER: Generating probability density function") all_pair_shortest_path_lengths = nx.shortest_path_length(self.G) for node, paths in all_pair_shortest_path_lengths: path_sum = sum(length for _, length in paths.items()) path_pdf[node] = path_sum s = sum(path_pdf.values()) path_pdf = {k:v/s for k,v in path_pdf.items()} self.__logger.info( "DECREASE DIAMETER: probability density function created") path_pdf = self.__manipulate_pdf(path_pdf, skew, smooth) return path_pdf def __manipulate_pdf(self, pdf, skew=True, smooth=False): """ helper function to skew or smooth a probability distribution skewing is achieved by taking the squares of probabilities and re normalize smoothing is achieved by taking the mixture distribution with the uniform distribution smoothing and skewing are not inverse to each other but should also not happen at the same time. The method will however not prevent this """ if not skew and not smooth: #nothing to do return pdf length = len(pdf) if skew: self.__logger.info( "manipulate_pdf: Skewing the probability density function") pdf = {k:v**2 for k,v in pdf.items()} s = sum(pdf.values()) pdf = {k:v/s for k,v in pdf.items()} if smooth: self.__logger.info( "manipulate_pdf: Smoothing the probability density function") pdf = {k:0.5*v + 0.5/length for k,v in pdf.items()} return pdf def __create_pdfs(self): res = {} res["path"] = self.__get_long_path_pdf() res["centrality"] = self.__get_centrality_pdf() res["rich"] = self.__get_rich_nodes_pdf() res["uniform"] = self.__get_uniform_pdf() return res def calculate_statistics(self, candidates): """ computes statistics of the candidate set about connectivity, wealth and returns a probability density function (pdf) which encodes which percentage of the funds should be used for each channel with each candidate node the pdf is proportional to the average balance of each candidate and smoothed with a uniform distribution currently the smoothing is just a weighted arithmetic mean with a weight of 0.3 for the uniform distribution. """ pdf = {} for candidate in candidates: neighbors = list(self.G.neighbors(candidate)) capacity = sum([self.G.get_edge_data(candidate, n) ["satoshis"] for n in neighbors]) average = capacity / (1+len(neighbors)) pdf[candidate] = average cumsum = sum(pdf.values()) pdf = {k: v / cumsum for k, v in pdf.items()} w = 0.7 print("percentage smoothed percentage capacity numchannels alias") print("----------------------------------------------------------------------") res_pdf = {} for k, v in pdf.items(): neighbors = list(self.G.neighbors(k)) capacity = sum([self.G.get_edge_data(k, n)["satoshis"] for n in neighbors]) name = k if "alias" in self.G.node[k]: name = self.G.node[k]["alias"] print("{:12.2f} ".format(100 * v), "{:12.2f} ".format( 100 * (w * v + (1 - w) / len(candidates))), "{:10} {:10} ".format(capacity, len(neighbors)), name) res_pdf[k] = (w * v + (1 - w) / len(candidates)) return res_pdf def calculate_proposed_channel_capacities(self, pdf, balance=1000000): minimal_channel_balance = 20000 # lnd uses 20k satoshi which seems reasonble min_probability = min(pdf.values()) needed_total_balance = math.ceil( minimal_channel_balance / min_probability) self.__logger.info( "Need at least a balance of {} satoshi to open {} channels".format( needed_total_balance, len(pdf))) while needed_total_balance > balance and len(pdf) > 1: min_val = min(pdf.values()) k = [k for k, v in pdf.items() if v == min_val][0] self.__logger.info( "Not enough balance to open {} channels. Remove node: {} and rebalance pdf for channel balances".format( len(pdf), k)) del pdf[k] s = sum(pdf.values()) pdf = {k: v / s for k, v in pdf.items()} min_probability = min(pdf.values()) needed_total_balance = math.ceil( minimal_channel_balance / min_probability) self.__logger.info( "Need at least a balance of {} satoshi to open {} channels".format( needed_total_balance, len(pdf))) return pdf def find_candidates(self, num_items=21,strategy = Strategy.DIVERSE, percentile = None): self.__logger.info("running the autopilot on a graph with {} nodes and {} edges.".format( len(self.G.nodes()), len(self.G.edges()))) """ Generates candidates with several strategies """ sub_k = math.ceil(num_items / 4) self.__logger.info( "GENERATE CANDIDATES: Try to generate up to {} nodes with 4 strategies: (random, central, network Improvement, liquidity)".format(num_items)) # FIXME: should remember from where nodes are known res = self.__create_pdfs() candidats = set() # FIXME: Run simulations to decide the following problem: """ we can either do a global sampling by merging all probability distributions and sample once from them or we can sample from each probability distribution and merge the results. These processes are obviously not commutative and we need to check which one seems more reasonable. My (renepickhardt) guts feeling says several samples which are merged gives the best of all worlds where the other method would probably result in something that is either pretty uniform or dominated by one very skew distribution. as mentioned this needs to be tested """ if strategy == Strategy.DIVERSE: for strategy, pdf in res.items(): tmp = self.__sample_from_percentile(pdf, percentile, sub_k) candidats = candidats.union(set(tmp)) elif strategy == Strategy.MERGE: merged = {} denominator = len(res) for pdf in res.values(): for k, v in pdf.items(): if k not in merged: merged[k] = v/denominator else: merged[k] += v/denominator candidats = self.__sample_from_percentile(merged, percentile, num_items) """ following code prints a list of candidates for debugging for k in res: if "alias" in self.G.node[key[k]]: print(pdf[key[k]], self.G.node[key[k]]["alias"]) """ if len(candidats) > num_items: candidats = np.random.choice(list(candidats), num_items, replace=False) self.__logger.info( "GENERATE CANDIDATES: Found {} nodes with which channel creation is suggested".format( len(candidats))) return candidats if __name__ == '__main__': print("This lib needs to be given a network graph so you need to create a wrapper")