mirror of
https://github.com/aljazceru/plugins.git
synced 2025-12-20 06:34:20 +01:00
431 lines
17 KiB
Python
431 lines
17 KiB
Python
'''
|
|
Created on 26.08.2018
|
|
|
|
@author: rpickhardt
|
|
|
|
lib_autopilot is a library which based on a networkx graph tries to
|
|
predict which channels should be added for a new node on the network. The
|
|
long term is to generate a lightning network with good topological properties.
|
|
|
|
This library currently uses 4 heuristics to select channels and supports
|
|
two strategies for combining those heuristics.
|
|
1.) Diverse: which tries to to get nodes from every distribution
|
|
2.) Merge: which builds the mixture distribution of the 4 heuristics
|
|
|
|
The library also estimates how much funds should be used for every newly
|
|
added channel. This is achieved by looking at the average channel capacity
|
|
of the suggested channel partners. A probability distribution which is
|
|
proportional to those capacities is created and smoothed with the uniform
|
|
distribution.
|
|
|
|
The 4 heuristics for channel partner suggestion are:
|
|
|
|
1.) Random: following the Erdoes Renyi model nodes are drawn from a uniform
|
|
distribution
|
|
2.) Central: nodes are sampled from a distribution proportional to the
|
|
betweeness centrality of nodes
|
|
3.) Decrease Diameter: nodes are sampled from distribution of the nodes which
|
|
favors badly connected nodes
|
|
4.) Richness: nodes with high liquidity are taken and it is sampled from a
|
|
uniform distribution of those
|
|
|
|
The library is supposed to be extended by a simulation framework which can
|
|
be used to evaluate which strategies are useful on the long term. For this
|
|
heavy computations (like centrality measures) might have to be reimplemented
|
|
in a more dynamic way.
|
|
|
|
Also it is important to understand that this program is not optimized to run
|
|
efficiently on large scale graphs with more than 100k nodes or on densly
|
|
connected graphs.
|
|
|
|
the programm needs the following dependencies:
|
|
pip install networkx numpy
|
|
'''
|
|
"""
|
|
ideas:
|
|
* should we respect our own channel balances?
|
|
* respect node life time / uptime? or time of channels?
|
|
* include more statistics of the network:
|
|
* allow autopilots of various nodes to exchange some information
|
|
* exchange algorithms if the network grows.
|
|
* include better handling for duplicates and existing channels
|
|
* cap number of channels for well connected nodes.
|
|
* channel balance of automatic channels should not be more than 50% of
|
|
cummulative channel balance of destination node
|
|
|
|
|
|
next steps:
|
|
* test if the rankings from the heuristics are statistically independent
|
|
* evaluate / simulate which method produces graphs with desirable properties
|
|
"""
|
|
|
|
from operator import itemgetter
|
|
import logging
|
|
import math
|
|
import pickle
|
|
|
|
|
|
import networkx as nx
|
|
import numpy as np
|
|
|
|
class Strategy:
|
|
#define constants. Never changed as they are part of the API
|
|
DIVERSE = "diverse"
|
|
MERGE = "merge"
|
|
|
|
class Autopilot():
|
|
|
|
def __init__(self, G):
|
|
self.__add_logger()
|
|
self.G = G
|
|
|
|
def __add_logger(self):
|
|
""" initiates the logging service for this class """
|
|
# FIXME: adapt to the settings that are proper for you
|
|
self.__logger = logging.getLogger('lib-autopilot')
|
|
self.__logger.setLevel(logging.INFO)
|
|
ch = logging.StreamHandler()
|
|
ch.setLevel(logging.INFO)
|
|
formatter = logging.Formatter(
|
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
ch.setFormatter(formatter)
|
|
self.__logger.addHandler(ch)
|
|
|
|
def __sample_from_pdf(self,pdf,k=21):
|
|
"""
|
|
helper function to quickly sample from a pdf encoded in a dictionary
|
|
"""
|
|
if type(k) is not int:
|
|
raise TypeError("__sample_from: k must be an integer variable")
|
|
if k < 0 or k > 21000:
|
|
raise ValueError("__sample_from: k must be between 0 and 21000")
|
|
|
|
keys,v = zip(*list(pdf.items()))
|
|
if k>=len(keys):
|
|
return keys
|
|
res = np.random.choice(keys, k, replace=False, p=v)
|
|
return res
|
|
|
|
def __sample_from_percentile(self, pdf, percentile=0.5, num_items=21):
|
|
"""
|
|
only look at the most likely items and sample from those
|
|
"""
|
|
if not percentile:
|
|
return self.__sample_from_pdf(pdf,num_items)
|
|
|
|
if type(percentile) is not float:
|
|
raise TypeError("percentile must be a floating point variable")
|
|
if percentile < 0 or percentile > 1:
|
|
raise ValueError("percentile must be btween 0 and 1")
|
|
|
|
cumsum = 0
|
|
used_pdf = {}
|
|
for n, value in sorted(
|
|
pdf.items(), key=itemgetter(1), reverse=True):
|
|
cumsum += value
|
|
used_pdf[n] = value
|
|
if cumsum > percentile:
|
|
break
|
|
|
|
used_pdf = {k:v/cumsum for k, v in used_pdf.items()}
|
|
return self.__sample_from_pdf(used_pdf, num_items)
|
|
|
|
def __get_uniform_pdf(self):
|
|
"""
|
|
Generates a uniform distribution of all nodes in the graph
|
|
|
|
In opposite to other methods there are no arguments for smoothing
|
|
or skewing since this would not do anything to the uniform
|
|
distribution
|
|
"""
|
|
pdf = {n:1 for n in self.G.nodes()}
|
|
length = len(pdf)
|
|
return {k:v/length for k, v in pdf.items()}
|
|
|
|
def __get_centrality_pdf(self, skew = False, smooth = False):
|
|
"""
|
|
produces a probability distribution which is proportional to nodes betweeness centrality scores
|
|
|
|
the betweeness centrality counts on how many shortest paths a node is
|
|
connecting to thos nodes will most likely make them even more central
|
|
however it is good for the node operating those operation as this node
|
|
itself gets a position in the network which is close to central nodes
|
|
|
|
this distribution can be skewed and smoothed
|
|
"""
|
|
self.__logger.info(
|
|
"CENTRALITY_PDF: Try to generate a PDF proportional to centrality scores")
|
|
pdf = {}
|
|
cumsum = 0
|
|
for n, score in nx.betweenness_centrality(self.G).items():
|
|
pdf[n] = score
|
|
cumsum += score
|
|
|
|
#renoremalize result
|
|
pdf = {k:v/cumsum for k, v in pdf.items()}
|
|
self.__logger.info(
|
|
"CENTRALITY_PDF: Generated pdf")
|
|
|
|
if skew and smooth:
|
|
self.__logger.info(
|
|
"CENTRALITY_PDF: Won't skew and smooth distribution ignore both")
|
|
smooth = False
|
|
skew = False
|
|
return self.__manipulate_pdf(pdf, skew, smooth)
|
|
|
|
def __get_rich_nodes_pdf(self,skew=False,smooth=False):
|
|
"""
|
|
Get a PDF proportional to the cummulative capacity of nodes
|
|
|
|
The probability density function is calculated by looking at the
|
|
cummulative capacity of all channels one node is part of.
|
|
|
|
The method will by default skew the pdf by taking the squares of the
|
|
sums of capacitoes after deriving a pdf. If one whishes the method
|
|
can also be smoothed by taking the mixture distribution with the
|
|
uniform distribution
|
|
|
|
Skewing and smoothing is controlled via the arguments skew and smooth
|
|
"""
|
|
self.__logger.info(
|
|
"RICH_PDF: Try to retrieve a PDF proportional to capacities")
|
|
|
|
rich_nodes = {}
|
|
network_capacity = 0
|
|
candidates = []
|
|
for n in self.G.nodes():
|
|
total_capacity = sum(
|
|
self.G.get_edge_data(
|
|
n, m)["satoshis"] for m in self.G.neighbors(n))
|
|
network_capacity += total_capacity
|
|
rich_nodes[n] = total_capacity
|
|
|
|
rich_nodes = {k:v/network_capacity for k, v in rich_nodes.items()}
|
|
|
|
self.__logger.info(
|
|
"RICH_PDF: Generated a PDF proportional to capacities")
|
|
|
|
|
|
if skew and smooth:
|
|
self.__logger.info(
|
|
"RICH_PDF: Can't skew and smooth distribution ignore both")
|
|
smooth = False
|
|
skew = False
|
|
|
|
return self.__manipulate_pdf(rich_nodes, skew, smooth)
|
|
|
|
|
|
def __get_long_path_pdf(self,skew=True,smooth=False):
|
|
"""
|
|
A probability distribution in which badly connected nodes are likely
|
|
|
|
This method looks at all pairs shortest paths and takes the sum of all
|
|
path lenghts for each node and derives the a probability distribution
|
|
from the sums. The idea of this method is to find nodes which are
|
|
increasing the diameter of the network.
|
|
|
|
The method will by default skew the pdf by taking the squares of the
|
|
sums of path lengths before deriving a pdf. If one whishes the method
|
|
can also be smoothed by taking the mixture distribution with the
|
|
uniform distribution
|
|
|
|
Skewing and smoothing is controlled via the arguments skew and smooth
|
|
"""
|
|
if skew and smooth:
|
|
self.__logger.info(
|
|
"DECREASE DIAMETER: Can't skew and smooth distribution ignore smoothing")
|
|
smooth = False
|
|
|
|
path_pdf = {}
|
|
self.__logger.info(
|
|
"DECREASE DIAMETER: Generating probability density function")
|
|
|
|
all_pair_shortest_path_lengths = nx.shortest_path_length(self.G)
|
|
|
|
for node, paths in all_pair_shortest_path_lengths:
|
|
path_sum = sum(length for _, length in paths.items())
|
|
path_pdf[node] = path_sum
|
|
|
|
s = sum(path_pdf.values())
|
|
path_pdf = {k:v/s for k,v in path_pdf.items()}
|
|
self.__logger.info(
|
|
"DECREASE DIAMETER: probability density function created")
|
|
|
|
path_pdf = self.__manipulate_pdf(path_pdf, skew, smooth)
|
|
|
|
return path_pdf
|
|
|
|
def __manipulate_pdf(self, pdf, skew=True, smooth=False):
|
|
"""
|
|
helper function to skew or smooth a probability distribution
|
|
|
|
skewing is achieved by taking the squares of probabilities and
|
|
re normalize
|
|
|
|
smoothing is achieved by taking the mixture distribution with the
|
|
uniform distribution
|
|
|
|
smoothing and skewing are not inverse to each other but should also
|
|
not happen at the same time. The method will however not prevent this
|
|
"""
|
|
if not skew and not smooth: #nothing to do
|
|
return pdf
|
|
length = len(pdf)
|
|
if skew:
|
|
self.__logger.info(
|
|
"manipulate_pdf: Skewing the probability density function")
|
|
pdf = {k:v**2 for k,v in pdf.items()}
|
|
s = sum(pdf.values())
|
|
pdf = {k:v/s for k,v in pdf.items()}
|
|
|
|
if smooth:
|
|
self.__logger.info(
|
|
"manipulate_pdf: Smoothing the probability density function")
|
|
pdf = {k:0.5*v + 0.5/length for k,v in pdf.items()}
|
|
|
|
return pdf
|
|
|
|
def __create_pdfs(self):
|
|
res = {}
|
|
res["path"] = self.__get_long_path_pdf()
|
|
res["centrality"] = self.__get_centrality_pdf()
|
|
res["rich"] = self.__get_rich_nodes_pdf()
|
|
res["uniform"] = self.__get_uniform_pdf()
|
|
return res
|
|
|
|
|
|
|
|
def calculate_statistics(self, candidates):
|
|
"""
|
|
computes statistics of the candidate set about connectivity, wealth
|
|
and returns a probability density function (pdf) which encodes which
|
|
percentage of the funds should be used for each channel with each
|
|
candidate node
|
|
|
|
the pdf is proportional to the average balance of each candidate and
|
|
smoothed with a uniform distribution currently the smoothing is just a
|
|
weighted arithmetic mean with a weight of 0.3 for the uniform
|
|
distribution.
|
|
"""
|
|
pdf = {}
|
|
for candidate in candidates:
|
|
neighbors = list(self.G.neighbors(candidate))
|
|
capacity = sum([self.G.get_edge_data(candidate, n)
|
|
["satoshis"] for n in neighbors])
|
|
average = capacity / (1+len(neighbors))
|
|
pdf[candidate] = average
|
|
cumsum = sum(pdf.values())
|
|
pdf = {k: v / cumsum for k, v in pdf.items()}
|
|
w = 0.7
|
|
print("percentage smoothed percentage capacity numchannels alias")
|
|
print("----------------------------------------------------------------------")
|
|
res_pdf = {}
|
|
for k, v in pdf.items():
|
|
neighbors = list(self.G.neighbors(k))
|
|
capacity = sum([self.G.get_edge_data(k, n)["satoshis"]
|
|
for n in neighbors])
|
|
name = k
|
|
if "alias" in self.G.node[k]:
|
|
name = self.G.node[k]["alias"]
|
|
print("{:12.2f} ".format(100 * v),
|
|
"{:12.2f} ".format(
|
|
100 * (w * v + (1 - w) / len(candidates))),
|
|
"{:10} {:10} ".format(capacity,
|
|
len(neighbors)),
|
|
name)
|
|
res_pdf[k] = (w * v + (1 - w) / len(candidates))
|
|
return res_pdf
|
|
|
|
def calculate_proposed_channel_capacities(self, pdf, balance=1000000):
|
|
minimal_channel_balance = 20000 # lnd uses 20k satoshi which seems reasonble
|
|
|
|
min_probability = min(pdf.values())
|
|
needed_total_balance = math.ceil(
|
|
minimal_channel_balance / min_probability)
|
|
self.__logger.info(
|
|
"Need at least a balance of {} satoshi to open {} channels".format(
|
|
needed_total_balance, len(pdf)))
|
|
while needed_total_balance > balance and len(pdf) > 1:
|
|
min_val = min(pdf.values())
|
|
k = [k for k, v in pdf.items() if v == min_val][0]
|
|
self.__logger.info(
|
|
"Not enough balance to open {} channels. Remove node: {} and rebalance pdf for channel balances".format(
|
|
len(pdf), k))
|
|
del pdf[k]
|
|
|
|
s = sum(pdf.values())
|
|
pdf = {k: v / s for k, v in pdf.items()}
|
|
|
|
min_probability = min(pdf.values())
|
|
needed_total_balance = math.ceil(
|
|
minimal_channel_balance / min_probability)
|
|
self.__logger.info(
|
|
"Need at least a balance of {} satoshi to open {} channels".format(
|
|
needed_total_balance, len(pdf)))
|
|
|
|
return pdf
|
|
|
|
|
|
|
|
def find_candidates(self, num_items=21,strategy = Strategy.DIVERSE,
|
|
percentile = None):
|
|
self.__logger.info("running the autopilot on a graph with {} nodes and {} edges.".format(
|
|
len(self.G.nodes()), len(self.G.edges())))
|
|
"""
|
|
Generates candidates with several strategies
|
|
"""
|
|
sub_k = math.ceil(num_items / 4)
|
|
self.__logger.info(
|
|
"GENERATE CANDIDATES: Try to generate up to {} nodes with 4 strategies: (random, central, network Improvement, liquidity)".format(num_items))
|
|
# FIXME: should remember from where nodes are known
|
|
|
|
res = self.__create_pdfs()
|
|
|
|
candidats = set()
|
|
# FIXME: Run simulations to decide the following problem:
|
|
"""
|
|
we can either do a global sampling by merging all probability
|
|
distributions and sample once from them or we can sample from
|
|
each probability distribution and merge the results. These processes
|
|
are obviously not commutative and we need to check which one seems
|
|
more reasonable.
|
|
My (renepickhardt) guts feeling says several samples which are
|
|
merged gives the best of all worlds where the other method would
|
|
probably result in something that is either pretty uniform or
|
|
dominated by one very skew distribution. as mentioned this needs
|
|
to be tested
|
|
"""
|
|
if strategy == Strategy.DIVERSE:
|
|
for strategy, pdf in res.items():
|
|
tmp = self.__sample_from_percentile(pdf, percentile, sub_k)
|
|
candidats = candidats.union(set(tmp))
|
|
|
|
elif strategy == Strategy.MERGE:
|
|
merged = {}
|
|
denominator = len(res)
|
|
for pdf in res.values():
|
|
for k, v in pdf.items():
|
|
if k not in merged:
|
|
merged[k] = v/denominator
|
|
else:
|
|
merged[k] += v/denominator
|
|
candidats = self.__sample_from_percentile(merged, percentile,
|
|
num_items)
|
|
"""
|
|
following code prints a list of candidates for debugging
|
|
for k in res:
|
|
if "alias" in self.G.node[key[k]]:
|
|
print(pdf[key[k]], self.G.node[key[k]]["alias"])
|
|
"""
|
|
|
|
if len(candidats) > num_items:
|
|
candidats = np.random.choice(list(candidats), num_items, replace=False)
|
|
|
|
self.__logger.info(
|
|
"GENERATE CANDIDATES: Found {} nodes with which channel creation is suggested".format(
|
|
len(candidats)))
|
|
return candidats
|
|
|
|
if __name__ == '__main__':
|
|
print("This lib needs to be given a network graph so you need to create a wrapper")
|