mirror of
https://github.com/aljazceru/plugins.git
synced 2025-12-22 07:34:20 +01:00
autopilot: Add a direct copy of Rene's original autopilot code
Just copying it over, not pluginizing it yet.
This commit is contained in:
430
autopilot/lib_autopilot.py
Normal file
430
autopilot/lib_autopilot.py
Normal file
@@ -0,0 +1,430 @@
|
||||
'''
|
||||
Created on 26.08.2018
|
||||
|
||||
@author: rpickhardt
|
||||
|
||||
lib_autopilot is a library which based on a networkx graph tries to
|
||||
predict which channels should be added for a new node on the network. The
|
||||
long term is to generate a lightning network with good topological properties.
|
||||
|
||||
This library currently uses 4 heuristics to select channels and supports
|
||||
two strategies for combining those heuristics.
|
||||
1.) Diverse: which tries to to get nodes from every distribution
|
||||
2.) Merge: which builds the mixture distribution of the 4 heuristics
|
||||
|
||||
The library also estimates how much funds should be used for every newly
|
||||
added channel. This is achieved by looking at the average channel capacity
|
||||
of the suggested channel partners. A probability distribution which is
|
||||
proportional to those capacities is created and smoothed with the uniform
|
||||
distribution.
|
||||
|
||||
The 4 heuristics for channel partner suggestion are:
|
||||
|
||||
1.) Random: following the Erdoes Renyi model nodes are drawn from a uniform
|
||||
distribution
|
||||
2.) Central: nodes are sampled from a distribution proportional to the
|
||||
betweeness centrality of nodes
|
||||
3.) Decrease Diameter: nodes are sampled from distribution of the nodes which
|
||||
favors badly connected nodes
|
||||
4.) Richness: nodes with high liquidity are taken and it is sampled from a
|
||||
uniform distribution of those
|
||||
|
||||
The library is supposed to be extended by a simulation framework which can
|
||||
be used to evaluate which strategies are useful on the long term. For this
|
||||
heavy computations (like centrality measures) might have to be reimplemented
|
||||
in a more dynamic way.
|
||||
|
||||
Also it is important to understand that this program is not optimized to run
|
||||
efficiently on large scale graphs with more than 100k nodes or on densly
|
||||
connected graphs.
|
||||
|
||||
the programm needs the following dependencies:
|
||||
pip install networkx numpy
|
||||
'''
|
||||
"""
|
||||
ideas:
|
||||
* should we respect our own channel balances?
|
||||
* respect node life time / uptime? or time of channels?
|
||||
* include more statistics of the network:
|
||||
* allow autopilots of various nodes to exchange some information
|
||||
* exchange algorithms if the network grows.
|
||||
* include better handling for duplicates and existing channels
|
||||
* cap number of channels for well connected nodes.
|
||||
* channel balance of automatic channels should not be more than 50% of
|
||||
cummulative channel balance of destination node
|
||||
|
||||
|
||||
next steps:
|
||||
* test if the rankings from the heuristics are statistically independent
|
||||
* evaluate / simulate which method produces graphs with desirable properties
|
||||
"""
|
||||
|
||||
from operator import itemgetter
|
||||
import logging
|
||||
import math
|
||||
import pickle
|
||||
|
||||
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
|
||||
class Strategy:
|
||||
#define constants. Never changed as they are part of the API
|
||||
DIVERSE = "diverse"
|
||||
MERGE = "merge"
|
||||
|
||||
class Autopilot():
|
||||
|
||||
def __init__(self,G):
|
||||
self.__add_logger()
|
||||
self.G = G
|
||||
|
||||
def __add_logger(self):
|
||||
""" initiates the logging service for this class """
|
||||
# FIXME: adapt to the settings that are proper for you
|
||||
self.__logger = logging.getLogger('lib-autopilot')
|
||||
self.__logger.setLevel(logging.INFO)
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
ch.setFormatter(formatter)
|
||||
self.__logger.addHandler(ch)
|
||||
|
||||
def __sample_from_pdf(self,pdf,k=21):
|
||||
"""
|
||||
helper function to quickly sample from a pdf encoded in a dictionary
|
||||
"""
|
||||
if type(k) is not int:
|
||||
raise TypeError("__sample_from: k must be an integer variable")
|
||||
if k < 0 or k > 21000:
|
||||
raise ValueError("__sample_from: k must be between 0 and 21000")
|
||||
|
||||
keys,v = zip(*list(pdf.items()))
|
||||
if k>=len(keys):
|
||||
return keys
|
||||
res = np.random.choice(keys, k, replace=False, p=v)
|
||||
return res
|
||||
|
||||
def __sample_from_percentile(self, pdf, percentile=0.5, num_items=21):
|
||||
"""
|
||||
only look at the most likely items and sample from those
|
||||
"""
|
||||
if not percentile:
|
||||
return self.__sample_from_pdf(pdf,num_items)
|
||||
|
||||
if type(percentile) is not float:
|
||||
raise TypeError("percentile must be a floating point variable")
|
||||
if percentile < 0 or percentile > 1:
|
||||
raise ValueError("percentile must be btween 0 and 1")
|
||||
|
||||
cumsum = 0
|
||||
used_pdf = {}
|
||||
for n, value in sorted(
|
||||
pdf.items(), key=itemgetter(1), reverse=True):
|
||||
cumsum += value
|
||||
used_pdf[n] = value
|
||||
if cumsum > percentile:
|
||||
break
|
||||
|
||||
used_pdf = {k:v/cumsum for k, v in used_pdf.items()}
|
||||
return self.__sample_from_pdf(used_pdf, num_items)
|
||||
|
||||
def __get_uniform_pdf(self):
|
||||
"""
|
||||
Generates a uniform distribution of all nodes in the graph
|
||||
|
||||
In opposite to other methods there are no arguments for smoothing
|
||||
or skewing since this would not do anything to the uniform
|
||||
distribution
|
||||
"""
|
||||
pdf = {n:1 for n in self.G.nodes()}
|
||||
length = len(pdf)
|
||||
return {k:v/length for k, v in pdf.items()}
|
||||
|
||||
def __get_centrality_pdf(self, skew = False, smooth = False):
|
||||
"""
|
||||
produces a probability distribution which is proportional to nodes betweeness centrality scores
|
||||
|
||||
the betweeness centrality counts on how many shortest paths a node is
|
||||
connecting to thos nodes will most likely make them even more central
|
||||
however it is good for the node operating those operation as this node
|
||||
itself gets a position in the network which is close to central nodes
|
||||
|
||||
this distribution can be skewed and smoothed
|
||||
"""
|
||||
self.__logger.info(
|
||||
"CENTRALITY_PDF: Try to generate a PDF proportional to centrality scores")
|
||||
pdf = {}
|
||||
cumsum = 0
|
||||
for n, score in nx.betweenness_centrality(self.G).items():
|
||||
pdf[n] = score
|
||||
cumsum += score
|
||||
|
||||
#renoremalize result
|
||||
pdf = {k:v/cumsum for k, v in pdf.items()}
|
||||
self.__logger.info(
|
||||
"CENTRALITY_PDF: Generated pdf")
|
||||
|
||||
if skew and smooth:
|
||||
self.__logger.info(
|
||||
"CENTRALITY_PDF: Won't skew and smooth distribution ignore both")
|
||||
smooth = False
|
||||
skew = False
|
||||
return self.__manipulate_pdf(pdf, skew, smooth)
|
||||
|
||||
def __get_rich_nodes_pdf(self,skew=False,smooth=False):
|
||||
"""
|
||||
Get a PDF proportional to the cummulative capacity of nodes
|
||||
|
||||
The probability density function is calculated by looking at the
|
||||
cummulative capacity of all channels one node is part of.
|
||||
|
||||
The method will by default skew the pdf by taking the squares of the
|
||||
sums of capacitoes after deriving a pdf. If one whishes the method
|
||||
can also be smoothed by taking the mixture distribution with the
|
||||
uniform distribution
|
||||
|
||||
Skewing and smoothing is controlled via the arguments skew and smooth
|
||||
"""
|
||||
self.__logger.info(
|
||||
"RICH_PDF: Try to retrieve a PDF proportional to capacities")
|
||||
|
||||
rich_nodes = {}
|
||||
network_capacity = 0
|
||||
candidates = []
|
||||
for n in self.G.nodes():
|
||||
total_capacity = sum(
|
||||
self.G.get_edge_data(
|
||||
n, m)["satoshis"] for m in self.G.neighbors(n))
|
||||
network_capacity += total_capacity
|
||||
rich_nodes[n] = total_capacity
|
||||
|
||||
rich_nodes = {k:v/network_capacity for k, v in rich_nodes.items()}
|
||||
|
||||
self.__logger.info(
|
||||
"RICH_PDF: Generated a PDF proportional to capacities")
|
||||
|
||||
|
||||
if skew and smooth:
|
||||
self.__logger.info(
|
||||
"RICH_PDF: Can't skew and smooth distribution ignore both")
|
||||
smooth = False
|
||||
skew = False
|
||||
|
||||
return self.__manipulate_pdf(rich_nodes, skew, smooth)
|
||||
|
||||
|
||||
def __get_long_path_pdf(self,skew=True,smooth=False):
|
||||
"""
|
||||
A probability distribution in which badly connected nodes are likely
|
||||
|
||||
This method looks at all pairs shortest paths and takes the sum of all
|
||||
path lenghts for each node and derives the a probability distribution
|
||||
from the sums. The idea of this method is to find nodes which are
|
||||
increasing the diameter of the network.
|
||||
|
||||
The method will by default skew the pdf by taking the squares of the
|
||||
sums of path lengths before deriving a pdf. If one whishes the method
|
||||
can also be smoothed by taking the mixture distribution with the
|
||||
uniform distribution
|
||||
|
||||
Skewing and smoothing is controlled via the arguments skew and smooth
|
||||
"""
|
||||
if skew and smooth:
|
||||
self.__logger.info(
|
||||
"DECREASE DIAMETER: Can't skew and smooth distribution ignore smoothing")
|
||||
smooth = False
|
||||
|
||||
path_pdf = {}
|
||||
self.__logger.info(
|
||||
"DECREASE DIAMETER: Generating probability density function")
|
||||
|
||||
all_pair_shortest_path_lengths = nx.shortest_path_length(self.G)
|
||||
|
||||
for node, paths in all_pair_shortest_path_lengths:
|
||||
path_sum = sum(length for _, length in paths.items())
|
||||
path_pdf[node] = path_sum
|
||||
|
||||
s = sum(path_pdf.values())
|
||||
path_pdf = {k:v/s for k,v in path_pdf.items()}
|
||||
self.__logger.info(
|
||||
"DECREASE DIAMETER: probability density function created")
|
||||
|
||||
path_pdf = self.__manipulate_pdf(path_pdf, skew, smooth)
|
||||
|
||||
return path_pdf
|
||||
|
||||
def __manipulate_pdf(self, pdf, skew=True, smooth=False):
|
||||
"""
|
||||
helper function to skew or smooth a probability distribution
|
||||
|
||||
skewing is achieved by taking the squares of probabilities and
|
||||
re normalize
|
||||
|
||||
smoothing is achieved by taking the mixture distribution with the
|
||||
uniform distribution
|
||||
|
||||
smoothing and skewing are not inverse to each other but should also
|
||||
not happen at the same time. The method will however not prevent this
|
||||
"""
|
||||
if not skew and not smooth: #nothing to do
|
||||
return pdf
|
||||
length = len(pdf)
|
||||
if skew:
|
||||
self.__logger.info(
|
||||
"manipulate_pdf: Skewing the probability density function")
|
||||
pdf = {k:v**2 for k,v in pdf.items()}
|
||||
s = sum(pdf.values())
|
||||
pdf = {k:v/s for k,v in pdf.items()}
|
||||
|
||||
if smooth:
|
||||
self.__logger.info(
|
||||
"manipulate_pdf: Smoothing the probability density function")
|
||||
pdf = {k:0.5*v + 0.5/length for k,v in pdf.items()}
|
||||
|
||||
return pdf
|
||||
|
||||
def __create_pdfs(self):
|
||||
res = {}
|
||||
res["path"] = self.__get_long_path_pdf()
|
||||
res["centrality"] = self.__get_centrality_pdf()
|
||||
res["rich"] = self.__get_rich_nodes_pdf()
|
||||
res["uniform"] = self.__get_uniform_pdf()
|
||||
return res
|
||||
|
||||
|
||||
|
||||
def calculate_statistics(self, candidates):
|
||||
"""
|
||||
computes statistics of the candidate set about connectivity, wealth
|
||||
and returns a probability density function (pdf) which encodes which
|
||||
percentage of the funds should be used for each channel with each
|
||||
candidate node
|
||||
|
||||
the pdf is proportional to the average balance of each candidate and
|
||||
smoothed with a uniform distribution currently the smoothing is just a
|
||||
weighted arithmetic mean with a weight of 0.3 for the uniform
|
||||
distribution.
|
||||
"""
|
||||
pdf = {}
|
||||
for candidate in candidates:
|
||||
neighbors = list(self.G.neighbors(candidate))
|
||||
capacity = sum([self.G.get_edge_data(candidate, n)
|
||||
["satoshis"] for n in neighbors])
|
||||
average = capacity / (1+len(neighbors))
|
||||
pdf[candidate] = average
|
||||
cumsum = sum(pdf.values())
|
||||
pdf = {k: v / cumsum for k, v in pdf.items()}
|
||||
w = 0.7
|
||||
print("percentage smoothed percentage capacity numchannels alias")
|
||||
print("----------------------------------------------------------------------")
|
||||
res_pdf = {}
|
||||
for k, v in pdf.items():
|
||||
neighbors = list(self.G.neighbors(k))
|
||||
capacity = sum([self.G.get_edge_data(k, n)["satoshis"]
|
||||
for n in neighbors])
|
||||
name = k
|
||||
if "alias" in self.G.node[k]:
|
||||
name = self.G.node[k]["alias"]
|
||||
print("{:12.2f} ".format(100 * v),
|
||||
"{:12.2f} ".format(
|
||||
100 * (w * v + (1 - w) / len(candidates))),
|
||||
"{:10} {:10} ".format(capacity,
|
||||
len(neighbors)),
|
||||
name)
|
||||
res_pdf[k] = (w * v + (1 - w) / len(candidates))
|
||||
return res_pdf
|
||||
|
||||
def calculate_proposed_channel_capacities(self, pdf, balance=1000000):
|
||||
minimal_channel_balance = 20000 # lnd uses 20k satoshi which seems reasonble
|
||||
|
||||
min_probability = min(pdf.values())
|
||||
needed_total_balance = math.ceil(
|
||||
minimal_channel_balance / min_probability)
|
||||
self.__logger.info(
|
||||
"Need at least a balance of {} satoshi to open {} channels".format(
|
||||
needed_total_balance, len(pdf)))
|
||||
while needed_total_balance > balance and len(pdf) > 1:
|
||||
min_val = min(pdf.values())
|
||||
k = [k for k, v in pdf.items() if v == min_val][0]
|
||||
self.__logger.info(
|
||||
"Not enough balance to open {} channels. Remove node: {} and rebalance pdf for channel balances".format(
|
||||
len(pdf), k))
|
||||
del pdf[k]
|
||||
|
||||
s = sum(pdf.values())
|
||||
pdf = {k: v / s for k, v in pdf.items()}
|
||||
|
||||
min_probability = min(pdf.values())
|
||||
needed_total_balance = math.ceil(
|
||||
minimal_channel_balance / min_probability)
|
||||
self.__logger.info(
|
||||
"Need at least a balance of {} satoshi to open {} channels".format(
|
||||
needed_total_balance, len(pdf)))
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
|
||||
def find_candidates(self, num_items=21,strategy = Strategy.DIVERSE,
|
||||
percentile = None):
|
||||
self.__logger.info("running the autopilot on a graph with {} nodes and {} edges.".format(
|
||||
len(self.G.nodes()), len(self.G.edges())))
|
||||
"""
|
||||
Generates candidates with several strategies
|
||||
"""
|
||||
sub_k = math.ceil(num_items / 4)
|
||||
self.__logger.info(
|
||||
"GENERATE CANDIDATES: Try to generate up to {} nodes with 4 strategies: (random, central, network Improvement, liquidity)".format(num_items))
|
||||
# FIXME: should remember from where nodes are known
|
||||
|
||||
res = self.__create_pdfs()
|
||||
|
||||
candidats = set()
|
||||
# FIXME: Run simulations to decide the following problem:
|
||||
"""
|
||||
we can either do a global sampling by merging all probability
|
||||
distributions and sample once from them or we can sample from
|
||||
each probability distribution and merge the results. These processes
|
||||
are obviously not commutative and we need to check which one seems
|
||||
more reasonable.
|
||||
My (renepickhardt) guts feeling says several samples which are
|
||||
merged gives the best of all worlds where the other method would
|
||||
probably result in something that is either pretty uniform or
|
||||
dominated by one very skew distribution. as mentioned this needs
|
||||
to be tested
|
||||
"""
|
||||
if strategy == Strategy.DIVERSE:
|
||||
for strategy, pdf in res.items():
|
||||
tmp = self.__sample_from_percentile(pdf, percentile, sub_k)
|
||||
candidats = candidats.union(set(tmp))
|
||||
|
||||
elif strategy == Strategy.MERGE:
|
||||
merged = {}
|
||||
denominator = len(res)
|
||||
for pdf in res.values():
|
||||
for k, v in pdf.items():
|
||||
if k not in merged:
|
||||
merged[k] = v/denominator
|
||||
else:
|
||||
merged[k] += v/denominator
|
||||
candidats = self.__sample_from_percentile(merged, percentile,
|
||||
num_items)
|
||||
"""
|
||||
following code prints a list of candidates for debugging
|
||||
for k in res:
|
||||
if "alias" in self.G.node[key[k]]:
|
||||
print(pdf[key[k]], self.G.node[key[k]]["alias"])
|
||||
"""
|
||||
|
||||
if len(candidats) > num_items:
|
||||
candidats = np.random.choice(list(candidats), num_items, replace=False)
|
||||
|
||||
self.__logger.info(
|
||||
"GENERATE CANDIDATES: Found {} nodes with which channel creation is suggested".format(
|
||||
len(candidats)))
|
||||
return candidats
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("This lib needs to be given a network graph so you need to create a wrapper")
|
||||
Reference in New Issue
Block a user