mirror of
https://github.com/aljazceru/lightning.git
synced 2026-02-04 13:44:22 +01:00
connectd: don't insist on ping replies when other traffic is flowing.
Got complaints about us hanging up on some nodes because they don't respond to pings in a timely manner (e.g. ACINQ?), but that turned out to be something else. Nonetheless, we've had reports in the past of LND badly prioritizing gossip traffic, and thus important messages can get queued behind gossip dumps! Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Changelog-Changed: connectd: give busy peers more time to respond to pings.
This commit is contained in:
@@ -310,6 +310,7 @@ static struct peer *new_peer(struct daemon *daemon,
|
||||
peer->ready_to_die = false;
|
||||
peer->active = false;
|
||||
peer->peer_outq = msg_queue_new(peer, false);
|
||||
peer->last_recv_time = time_now();
|
||||
|
||||
#if DEVELOPER
|
||||
peer->dev_writes_enabled = NULL;
|
||||
|
||||
@@ -87,6 +87,9 @@ struct peer {
|
||||
/* Random ping timer, to detect dead connections. */
|
||||
struct oneshot *ping_timer;
|
||||
|
||||
/* Last time we received traffic */
|
||||
struct timeabs last_recv_time;
|
||||
|
||||
#if DEVELOPER
|
||||
bool dev_read_enabled;
|
||||
/* If non-NULL, this counts down; 0 means disable */
|
||||
|
||||
@@ -425,16 +425,23 @@ static void set_ping_timer(struct peer *peer)
|
||||
|
||||
static void send_ping(struct peer *peer)
|
||||
{
|
||||
/* Already have a ping in flight? */
|
||||
if (peer->expecting_pong != PONG_UNEXPECTED) {
|
||||
status_peer_debug(&peer->id, "Last ping unreturned: hanging up");
|
||||
if (peer->to_peer)
|
||||
io_close(peer->to_peer);
|
||||
return;
|
||||
/* If it's still sending us traffic, maybe ping reply is backed up?
|
||||
* That's OK, ping is just to make sure it's still alive, and clearly
|
||||
* it is. */
|
||||
if (time_before(peer->last_recv_time,
|
||||
timeabs_sub(time_now(), time_from_sec(60)))) {
|
||||
/* Already have a ping in flight? */
|
||||
if (peer->expecting_pong != PONG_UNEXPECTED) {
|
||||
status_peer_debug(&peer->id, "Last ping unreturned: hanging up");
|
||||
if (peer->to_peer)
|
||||
io_close(peer->to_peer);
|
||||
return;
|
||||
}
|
||||
|
||||
inject_peer_msg(peer, take(make_ping(NULL, 1, 0)));
|
||||
peer->expecting_pong = PONG_EXPECTED_PROBING;
|
||||
}
|
||||
|
||||
inject_peer_msg(peer, take(make_ping(NULL, 1, 0)));
|
||||
peer->expecting_pong = PONG_EXPECTED_PROBING;
|
||||
set_ping_timer(peer);
|
||||
}
|
||||
|
||||
@@ -993,6 +1000,9 @@ static struct io_plan *read_body_from_peer_done(struct io_conn *peer_conn,
|
||||
if (!IFDEV(peer->dev_read_enabled, true))
|
||||
return read_hdr_from_peer(peer_conn, peer);
|
||||
|
||||
/* We got something! */
|
||||
peer->last_recv_time = time_now();
|
||||
|
||||
/* Don't process packets while we're closing */
|
||||
if (peer->ready_to_die)
|
||||
return read_hdr_from_peer(peer_conn, peer);
|
||||
|
||||
@@ -3892,15 +3892,18 @@ def test_ping_timeout(node_factory):
|
||||
|
||||
l1, l2 = node_factory.get_nodes(2, opts=[{'dev-no-reconnect': None,
|
||||
'disconnect': l1_disconnects},
|
||||
{}])
|
||||
{'dev-no-ping-timer': None}])
|
||||
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
|
||||
|
||||
# Takes 15-45 seconds, then another to try second ping
|
||||
# Because of ping timer randomness we don't know which side hangs up first
|
||||
wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == [],
|
||||
timeout=45 + 45 + 5)
|
||||
wait_for(lambda: (l1.daemon.is_in_log('Last ping unreturned: hanging up')
|
||||
or l2.daemon.is_in_log('Last ping unreturned: hanging up')))
|
||||
# This can take 10 seconds (dev-fast-gossip means timer fires every 5 seconds)
|
||||
l1.daemon.wait_for_log('seeker: startup peer finished', timeout=15)
|
||||
# Ping timers runs at 15-45 seconds, *but* only fires if also 60 seconds
|
||||
# after previous traffic.
|
||||
l1.daemon.wait_for_log('dev_disconnect: xWIRE_PING', timeout=60 + 45 + 5)
|
||||
|
||||
# Next pign will cause hangup
|
||||
l1.daemon.wait_for_log('Last ping unreturned: hanging up', timeout=45 + 5)
|
||||
wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == [])
|
||||
|
||||
|
||||
@pytest.mark.openchannel('v1')
|
||||
|
||||
Reference in New Issue
Block a user