From c98734e0a4011cd1e5b6f93f00a175c06200fe10 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 12 Jan 2022 06:05:12 +1030 Subject: [PATCH] connectd: don't ignore requests to connect if we're shutting down. We used to shut down peers atomically, but now we flush the connections there's a delay. If we are asked to connect in that time, we ignore it, as we are already connected, but that's wrong: we need to remember that we were told to connect and reconnect. This should solve a few weird test failures where "connect" would hang indefinitely. Signed-off-by: Rusty Russell --- connectd/connectd.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/connectd/connectd.c b/connectd/connectd.c index e8897c7d3..01be81414 100644 --- a/connectd/connectd.c +++ b/connectd/connectd.c @@ -1733,10 +1733,17 @@ static void try_connect_peer(struct daemon *daemon, struct wireaddr_internal *addrs; bool use_proxy = daemon->always_use_proxy; struct connecting *connect; + struct peer *existing; - /* Already done? May happen with timer. */ - if (peer_htable_get(&daemon->peers, id)) - return; + /* Already existing? */ + existing = peer_htable_get(&daemon->peers, id); + if (existing) { + /* If it's exiting now, we've raced: reconnect after */ + if (existing->to_subd + && existing->to_peer + && !existing->told_to_close) + return; + } /* If we're trying to connect it right now, that's OK. */ if ((connect = find_connecting(daemon, id))) { @@ -1807,7 +1814,8 @@ static void try_connect_peer(struct daemon *daemon, tal_add_destructor(connect, destroy_connecting); /* Now we kick it off by recursively trying connect->addrs[connect->addrnum] */ - try_connect_one_addr(connect); + if (!existing) + try_connect_one_addr(connect); } /* lightningd tells us to connect to a peer by id, with optional addr hint. */ @@ -1828,6 +1836,8 @@ static void connect_to_peer(struct daemon *daemon, const u8 *msg) void peer_conn_closed(struct peer *peer) { + struct connecting *connect = find_connecting(peer->daemon, &peer->id); + /* These should be closed already! */ assert(!peer->to_subd); assert(!peer->to_peer); @@ -1841,6 +1851,10 @@ void peer_conn_closed(struct peer *peer) * a destructor attached to peer (called destroy_peer by * convention). */ tal_free(peer); + + /* If we wanted to connect to it, but found it was exiting, try again */ + if (connect) + try_connect_one_addr(connect); } /* A peer is gone: clean things up. */