From 6dbd99ddc6673c0886835bd5a4c61324fed4c8b8 Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Mon, 14 Aug 2017 22:44:44 +0200 Subject: [PATCH] gossip: Fix a race condition between release_peer and fail_peer There was a race condition that would cause an assertion to segfault if a call to release_peer was interleaved with a fail_peer. The release_peer was making the peer non-local, which was then causing the assertion in fail_peer to fail. Now we just have 3 cases: not found, local, and non-local. Signed-off-by: Christian Decker --- lightningd/gossip/gossip.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lightningd/gossip/gossip.c b/lightningd/gossip/gossip.c index a58fa5505..de067d0dd 100644 --- a/lightningd/gossip/gossip.c +++ b/lightningd/gossip/gossip.c @@ -442,9 +442,11 @@ static struct io_plan *release_peer(struct io_conn *conn, struct daemon *daemon, "%s", tal_hex(trc, msg)); peer = find_peer(daemon, unique_id); - if (!peer) { + if (!peer || !peer->local) { /* This can happen with a reconnect vs connect race. - * See gossip_peer_released in master daemon. */ + * See gossip_peer_released in master daemon. It may + * also happen if we asked to release just before + * failing the peer*/ daemon_conn_send(&daemon->master, take(towire_gossipctl_release_peer_replyfail(msg))); } else { @@ -470,11 +472,13 @@ static struct io_plan *fail_peer(struct io_conn *conn, struct daemon *daemon, peer = find_peer(daemon, unique_id); if (!peer) status_trace("Unknown fail_peer %"PRIu64, unique_id); - else { - assert(peer->local); + else if (peer->local) { status_trace("fail_peer %"PRIu64, unique_id); /* This owns the peer, so we can free it */ io_close(peer->conn); + } else { + status_trace("Could not fail_peer %"PRIu64", it's not local", + unique_id); } return daemon_conn_read_next(conn, &daemon->master);