diff --git a/connectd/connectd.c b/connectd/connectd.c index 07a952732..48d027e54 100644 --- a/connectd/connectd.c +++ b/connectd/connectd.c @@ -310,6 +310,7 @@ static struct peer *new_peer(struct daemon *daemon, peer->ready_to_die = false; peer->active = false; peer->peer_outq = msg_queue_new(peer, false); + peer->last_recv_time = time_now(); #if DEVELOPER peer->dev_writes_enabled = NULL; diff --git a/connectd/connectd.h b/connectd/connectd.h index 9af5f815d..db5ec2022 100644 --- a/connectd/connectd.h +++ b/connectd/connectd.h @@ -87,6 +87,9 @@ struct peer { /* Random ping timer, to detect dead connections. */ struct oneshot *ping_timer; + /* Last time we received traffic */ + struct timeabs last_recv_time; + #if DEVELOPER bool dev_read_enabled; /* If non-NULL, this counts down; 0 means disable */ diff --git a/connectd/multiplex.c b/connectd/multiplex.c index d3d189a4a..0db695990 100644 --- a/connectd/multiplex.c +++ b/connectd/multiplex.c @@ -425,16 +425,23 @@ static void set_ping_timer(struct peer *peer) static void send_ping(struct peer *peer) { - /* Already have a ping in flight? */ - if (peer->expecting_pong != PONG_UNEXPECTED) { - status_peer_debug(&peer->id, "Last ping unreturned: hanging up"); - if (peer->to_peer) - io_close(peer->to_peer); - return; + /* If it's still sending us traffic, maybe ping reply is backed up? + * That's OK, ping is just to make sure it's still alive, and clearly + * it is. */ + if (time_before(peer->last_recv_time, + timeabs_sub(time_now(), time_from_sec(60)))) { + /* Already have a ping in flight? */ + if (peer->expecting_pong != PONG_UNEXPECTED) { + status_peer_debug(&peer->id, "Last ping unreturned: hanging up"); + if (peer->to_peer) + io_close(peer->to_peer); + return; + } + + inject_peer_msg(peer, take(make_ping(NULL, 1, 0))); + peer->expecting_pong = PONG_EXPECTED_PROBING; } - inject_peer_msg(peer, take(make_ping(NULL, 1, 0))); - peer->expecting_pong = PONG_EXPECTED_PROBING; set_ping_timer(peer); } @@ -993,6 +1000,9 @@ static struct io_plan *read_body_from_peer_done(struct io_conn *peer_conn, if (!IFDEV(peer->dev_read_enabled, true)) return read_hdr_from_peer(peer_conn, peer); + /* We got something! */ + peer->last_recv_time = time_now(); + /* Don't process packets while we're closing */ if (peer->ready_to_die) return read_hdr_from_peer(peer_conn, peer); diff --git a/tests/test_connection.py b/tests/test_connection.py index a4379627b..198556242 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -3892,15 +3892,18 @@ def test_ping_timeout(node_factory): l1, l2 = node_factory.get_nodes(2, opts=[{'dev-no-reconnect': None, 'disconnect': l1_disconnects}, - {}]) + {'dev-no-ping-timer': None}]) l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - # Takes 15-45 seconds, then another to try second ping - # Because of ping timer randomness we don't know which side hangs up first - wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == [], - timeout=45 + 45 + 5) - wait_for(lambda: (l1.daemon.is_in_log('Last ping unreturned: hanging up') - or l2.daemon.is_in_log('Last ping unreturned: hanging up'))) + # This can take 10 seconds (dev-fast-gossip means timer fires every 5 seconds) + l1.daemon.wait_for_log('seeker: startup peer finished', timeout=15) + # Ping timers runs at 15-45 seconds, *but* only fires if also 60 seconds + # after previous traffic. + l1.daemon.wait_for_log('dev_disconnect: xWIRE_PING', timeout=60 + 45 + 5) + + # Next pign will cause hangup + l1.daemon.wait_for_log('Last ping unreturned: hanging up', timeout=45 + 5) + wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == []) @pytest.mark.openchannel('v1')