lightningd: don't run more than one reconnect timer at once.

In various circumstances we can start a reconnection while one is
already going on.  These can stockpile if the node really is unreachable.

Reported-by: @whitslack
Fixes: #5654
Changelog-Fixed: lightningd: we no longer stack multiple reconnection attempts if connections fail.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell
2023-02-01 14:49:29 +10:30
parent ff1d537b87
commit 6347ee7308
2 changed files with 38 additions and 0 deletions

View File

@@ -252,6 +252,22 @@ struct delayed_reconnect {
bool dns_fallback;
};
static const struct node_id *delayed_reconnect_keyof(const struct delayed_reconnect *d)
{
return &d->id;
}
static bool node_id_delayed_reconnect_eq(const struct delayed_reconnect *d,
const struct node_id *node_id)
{
return node_id_eq(node_id, &d->id);
}
HTABLE_DEFINE_TYPE(struct delayed_reconnect,
delayed_reconnect_keyof,
node_id_hash, node_id_delayed_reconnect_eq,
delayed_reconnect_map);
static void gossipd_got_addrs(struct subd *subd,
const u8 *msg,
const int *fds,
@@ -281,6 +297,11 @@ static void do_connect(struct delayed_reconnect *d)
subd_req(d, d->ld->gossip, take(msg), -1, 0, gossipd_got_addrs, d);
}
static void destroy_delayed_reconnect(struct delayed_reconnect *d)
{
delayed_reconnect_map_del(d->ld->delayed_reconnect_map, d);
}
static void try_connect(const tal_t *ctx,
struct lightningd *ld,
const struct node_id *id,
@@ -291,11 +312,23 @@ static void try_connect(const tal_t *ctx,
struct delayed_reconnect *d;
struct peer *peer;
/* Don't stack, unless this is an instant reconnect */
d = delayed_reconnect_map_get(ld->delayed_reconnect_map, id);
if (d) {
if (seconds_delay) {
log_peer_debug(ld->log, id, "Already reconnecting");
return;
}
tal_free(d);
}
d = tal(ctx, struct delayed_reconnect);
d->ld = ld;
d->id = *id;
d->addrhint = tal_dup_or_null(d, struct wireaddr_internal, addrhint);
d->dns_fallback = dns_fallback;
delayed_reconnect_map_add(ld->delayed_reconnect_map, d);
tal_add_destructor(d, destroy_delayed_reconnect);
if (!seconds_delay) {
do_connect(d);
@@ -577,6 +610,9 @@ int connectd_init(struct lightningd *ld)
const char *websocket_helper_path;
void *ret;
ld->delayed_reconnect_map = tal(ld, struct delayed_reconnect_map);
delayed_reconnect_map_init(ld->delayed_reconnect_map);
websocket_helper_path = subdaemon_path(tmpctx, ld,
"lightning_websocketd");

View File

@@ -183,6 +183,8 @@ struct lightningd {
/* Daemon looking after peers during init / before channel. */
struct subd *connectd;
/* Reconnection attempts */
struct delayed_reconnect_map *delayed_reconnect_map;
/* All peers we're tracking (by node_id) */
struct peer_node_id_map *peers;