From be2622a4ff69b93d775d2dc0b129497c93265e34 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 9 Oct 2021 16:23:27 +1030 Subject: [PATCH] channeld: perform regular keepalive pings. Send a ping every 15-45 seconds. If we try to send another one and we haven't got a reply, hang up. Signed-off-by: Rusty Russell Changelog-Changed: Protocol: Send regular pings to detect dead connections (particularly for Tor). --- channeld/channeld.c | 41 +++++++++++++++++++++++++++++++++------- tests/test_connection.py | 14 ++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/channeld/channeld.c b/channeld/channeld.c index 368c84776..93c0c775b 100644 --- a/channeld/channeld.c +++ b/channeld/channeld.c @@ -100,6 +100,9 @@ struct peer { u64 commit_timer_attempts; u32 commit_msec; + /* Random ping timer, to detect dead connections. */ + struct oneshot *ping_timer; + /* Are we expecting a pong? */ bool expecting_pong; @@ -1084,6 +1087,29 @@ static struct bitcoin_signature *calc_commitsigs(const tal_t *ctx, return htlc_sigs; } +/* Mutual recursion */ +static void send_ping(struct peer *peer); + +static void set_ping_timer(struct peer *peer) +{ + peer->ping_timer = new_reltimer(&peer->timers, peer, + time_from_sec(15 + pseudorand(30)), + send_ping, peer); +} + +static void send_ping(struct peer *peer) +{ + /* Already have a ping in flight? */ + if (peer->expecting_pong) { + status_debug("Last ping unreturned: hanging up"); + exit(0); + } + + sync_crypto_write_no_delay(peer->pps, take(make_ping(NULL, 1, 0))); + peer->expecting_pong = true; + set_ping_timer(peer); +} + /* Peer protocol doesn't want sighash flags. */ static secp256k1_ecdsa_signature *raw_sigs(const tal_t *ctx, const struct bitcoin_signature *sigs) @@ -2121,12 +2147,6 @@ static void peer_in(struct peer *peer, const u8 *msg) */ bool soft_error = peer->funding_locked[REMOTE] || peer->funding_locked[LOCAL]; - /* Catch our own ping replies. */ - if (type == WIRE_PONG && peer->expecting_pong) { - peer->expecting_pong = false; - return; - } - if (channeld_handle_custommsg(msg)) return; @@ -2210,6 +2230,13 @@ static void peer_in(struct peer *peer, const u8 *msg) case WIRE_INIT_RBF: case WIRE_ACK_RBF: break; + case WIRE_PONG: + if (peer->expecting_pong) { + peer->expecting_pong = false; + return; + } + status_debug("Unexpected pong?"); + return; case WIRE_CHANNEL_REESTABLISH: handle_unexpected_reestablish(peer, msg); @@ -2225,7 +2252,6 @@ static void peer_in(struct peer *peer, const u8 *msg) case WIRE_GOSSIP_TIMESTAMP_FILTER: case WIRE_REPLY_SHORT_CHANNEL_IDS_END: case WIRE_PING: - case WIRE_PONG: case WIRE_WARNING: case WIRE_ERROR: case WIRE_ONION_MESSAGE: @@ -3856,6 +3882,7 @@ int main(int argc, char *argv[]) peer->expecting_pong = false; timers_init(&peer->timers, time_mono()); peer->commit_timer = NULL; + set_ping_timer(peer); peer->have_sigs[LOCAL] = peer->have_sigs[REMOTE] = false; peer->announce_depth_reached = false; peer->channel_local_active = false; diff --git a/tests/test_connection.py b/tests/test_connection.py index 0bc0f95c2..891a66123 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -3734,3 +3734,17 @@ def test_old_feerate(node_factory): # This will timeout if l2 didn't accept fee. l1.pay(l2, 1000) + + +@pytest.mark.developer("dev-disconnect required") +def test_ping_timeout(node_factory): + # Disconnects after this, but doesn't know it. + l1_disconnects = ['xWIRE_PING'] + + l1, l2 = node_factory.line_graph(2, opts=[{'dev-no-reconnect': None, + 'disconnect': l1_disconnects}, + {}]) + # Takes 15-45 seconds, then another to try second ping + l1.daemon.wait_for_log('Last ping unreturned: hanging up', + timeout=45 + 45 + 5) + wait_for(lambda: l1.rpc.getpeer(l2.info['id'])['connected'] is False)