From 4f31f5971a96afda96394e3c1caa59dd1a6685fd Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Mon, 11 Feb 2019 13:53:03 +0100 Subject: [PATCH] probe: Added a very simple probe plugin This is a plugin that regularly probes the network by sending a random payment to a random destination. These do not succeed, since we randomly selected the `payment_hash`, but can still give us a lot of insight into the network health. The plugin stores its results in a `sqlite3` database for later inspection. Signed-off-by: Christian Decker --- probe/README.md | 32 +++++++++ probe/probe.py | 183 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 probe/README.md create mode 100755 probe/probe.py diff --git a/probe/README.md b/probe/README.md new file mode 100644 index 0000000..629a8f2 --- /dev/null +++ b/probe/README.md @@ -0,0 +1,32 @@ +# Network Probe plugin + +This plugin regularly performs a random probe of the network by sending a +payment to a random node in the network, with a random `payment_hash`, and +observing how the network reacts. The random `payment_hash` results in the +payments being rejected at the destination, so no funds are actually +transferred. The error messages however allow us to gather some information +about the success probability of a payment, and the stability of the channels. + +The random selection of destination nodes is a worst case scenario, since it's +likely that most of the nodes in the network are leaf nodes that are not +well-connected and often offline at any point in time. Expect to see a lot of +errors about being unable to route these payments as a result of this. + +The probe data is stored in a sqlite3 database for later inspection and to be +able to eventually draw pretty plots about how the network stability changes +over time. For now you can inspect the results using the `sqlite3` command +line utility: + +```bash +sqlite3 ~/.lightning/probes.db "select destination, erring_channel, failcode from probes" +``` + +Failcode -1 and 16399 are special: + + - -1 indicates that we were unable to find a route to the destination. This + usually indicates that this is a leaf node that is currently offline. + + - 16399 is the code for unknown payment details and indicates a successful + probe. The destination received the incoming payment but could not find a + matching `payment_key`, which is expected since we generated the + `payment_hash` at random :-) diff --git a/probe/probe.py b/probe/probe.py new file mode 100755 index 0000000..92f3c22 --- /dev/null +++ b/probe/probe.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +"""Plugin that probes the network for failed channels. + +This plugin regularly performs a random probe of the network by sending a +payment to a random node in the network, with a random `payment_hash`, and +observing how the network reacts. The random `payment_hash` results in the +payments being rejected at the destination, so no funds are actually +transferred. The error messages however allow us to gather some information +about the success probability of a payment, and the stability of the channels. + +The random selection of destination nodes is a worst case scenario, since it's +likely that most of the nodes in the network are leaf nodes that are not +well-connected and often offline at any point in time. Expect to see a lot of +errors about being unable to route these payments as a result of this. + +The probe data is stored in a sqlite3 database for later inspection and to be +able to eventually draw pretty plots about how the network stability changes +over time. For now you can inspect the results using the `sqlite3` command +line utility: + +```bash +sqlite3 ~/.lightning/probes.db "select destination, erring_channel, failcode from probes" +``` + +Failcode -1 and 16399 are special: + + - -1 indicates that we were unable to find a route to the destination. This + usually indicates that this is a leaf node that is currently offline. + + - 16399 is the code for unknown payment details and indicates a successful + probe. The destination received the incoming payment but could not find a + matching `payment_key`, which is expected since we generated the + `payment_hash` at random :-) + +""" +from datetime import datetime +from lightning import Plugin, RpcError +from random import choice +from sqlalchemy import Column, Integer, String, DateTime +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from time import sleep, time +import heapq +import json +import os +import random +import string +import threading + + +Base = declarative_base() +plugin = Plugin() + +exclusions = [] +temporary_exclusions = {} + + +class Probe(Base): + __tablename__ = "probes" + id = Column(Integer, primary_key=True) + destination = Column(String) + route = Column(String) + error = Column(String) + erring_channel = Column(String) + failcode = Column(Integer) + payment_hash = Column(String) + started_at = Column(DateTime) + finished_at = Column(DateTime) + + +@plugin.method('probe') +def start_probe(plugin): + nodes = plugin.rpc.listnodes()['nodes'] + dst = choice(nodes) + + s = plugin.Session() + p = Probe(destination=dst['nodeid'], started_at=datetime.now()) + s.add(p) + try: + route = plugin.rpc.getroute( + dst['nodeid'], + msatoshi=10000, + riskfactor=1, + exclude=exclusions + list(temporary_exclusions.keys()) + )['route'] + p.route = ','.join([r['channel'] for r in route]) + p.payment_hash = ''.join(choice(string.hexdigits) for _ in range(64)) + except RpcError: + p.failcode = -1 + s.commit() + return + s.commit() + + try: + plugin.rpc.sendpay(route, p.payment_hash) + plugin.rpc.waitsendpay(p.payment_hash) + except RpcError as e: + error = e.error['data'] + p.erring_channel = e.error['data']['erring_channel'] + p.failcode = e.error['data']['failcode'] + p.error = json.dumps(error) + + if p.failcode in [16392, 16394]: + exclusion = "{erring_channel}/{erring_direction}".format(**error) + print('Adding exclusion for channel {} ({} total))'.format( + exclusion, len(exclusions)) + ) + exclusions.append(exclusion) + + if p.failcode == 4103: + exclusion = "{erring_channel}/{erring_direction}".format(**error) + print('Adding temporary exclusion for channel {} ({} total))'.format( + exclusion, len(temporary_exclusions)) + ) + expiry = time() + plugin.probe_exclusion_duration + temporary_exclusions[exclusion] = expiry + + p.finished_at = datetime.now() + s.commit() + s.close() + + +def clear_temporary_exclusion(plugin): + timed_out = [k for k, v in temporary_exclusions.items() if v < time()] + for k in timed_out: + del temporary_exclusions[k] + + print("Removed {}/{} temporary exclusions.".format( + len(timed_out), len(temporary_exclusions)) + ) + + +def schedule(plugin): + # List of scheduled calls with next runtime, function and interval + next_runs = [ + (time() + 300, clear_temporary_exclusion, 300), + (time() + plugin.probe_interval, start_probe, plugin.probe_interval) + ] + heapq.heapify(next_runs) + + while True: + n = heapq.heappop(next_runs) + t = n[0] - time() + if t > 0: + sleep(t) + # Call the function + n[1](plugin) + + # Schedule the next run + heapq.heappush(next_runs, (time() + n[2], n[1], n[2])) + + +@plugin.init() +def init(configuration, options, plugin): + plugin.probe_interval = int(options['probe-interval']) + plugin.probe_exclusion_duration = int(options['probe-exclusion-duration']) + + db_filename = 'sqlite:///' + os.path.join( + configuration['lightning-dir'], + 'probes.db' + ) + + engine = create_engine(db_filename, echo=True) + Base.metadata.create_all(engine) + plugin.Session = sessionmaker() + plugin.Session.configure(bind=engine) + t = threading.Thread(target=schedule, args=[plugin]) + t.daemon = True + t.start() + + +plugin.add_option( + 'probe-interval', + '3600', + 'How many seconds should we wait between probes?' +) +plugin.add_option( + 'probe-exclusion-duration', + '1800', + 'How many seconds should temporarily failed channels be excluded?' +) +plugin.run()