gossipd: don't close non-local channels immediately, add 12 block delay.

This adds a new "chan_dying" message to the gossip_store, but since we already changed the minor version in this PR, we don't bump it again. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Changelog-Added: Protocol: We now delay forgetting funding-spent channels for 12 blocks (as per latest BOLTs, to support splicing in future).
2026-02-11 17:14:22 +01:00 · 2022-09-14 13:20:32 +09:30
parent f0fa42bd73
commit a1f62ba0e7
12 changed files with 204 additions and 35 deletions
--- a/2
+++ b/2
@@ -23,7 +23,7 @@ CCANDIR := ccan

 # Where we keep the BOLT RFCs
 BOLTDIR := ../bolts/
-DEFAULT_BOLTVERSION := 6fee63fc342736a2eb7f6e856ae0d85807cc50ec
+DEFAULT_BOLTVERSION := 47d325c6ac50587d9974651a7b2a692f85c9a068
 # Can be overridden on cmdline.
 BOLTVERSION := $(DEFAULT_BOLTVERSION)

--- a/devtools/dump-gossipstore.c
+++ b/devtools/dump-gossipstore.c
@@ -68,6 +68,7 @@ int main(int argc, char *argv[])
 		u32 msglen = be32_to_cpu(hdr.len);
 		u8 *msg, *inner;
 		bool deleted, push, ratelimit;
+		u32 blockheight;

 		deleted = (msglen & GOSSIP_STORE_LEN_DELETED_BIT);
 		push = (msglen & GOSSIP_STORE_LEN_PUSH_BIT);
@@ -121,6 +122,11 @@ int main(int argc, char *argv[])
 			printf("delete channel: %s\n",
 			       type_to_string(tmpctx, struct short_channel_id,
 					      &scid));
+		} else if (fromwire_gossip_store_chan_dying(msg, &scid, &blockheight)) {
+			printf("dying channel: %s (deadline %u)\n",
+			       type_to_string(tmpctx, struct short_channel_id,
+					      &scid),
+			       blockheight);
 		} else {
 			warnx("Unknown message %u",
 			      fromwire_peektype(msg));
--- a/gossipd/gossip_store.c
+++ b/gossipd/gossip_store.c
@@ -802,6 +802,17 @@ u32 gossip_store_load(struct routing_state *rstate, struct gossip_store *gs)
 			chan_ann = tal_steal(gs, msg);
 			chan_ann_off = gs->len;
 			break;
+		case WIRE_GOSSIP_STORE_CHAN_DYING: {
+			struct short_channel_id scid;
+			u32 deadline;
+
+			if (!fromwire_gossip_store_chan_dying(msg, &scid, &deadline)) {
+				bad = "Bad gossip_store_chan_dying";
+				goto badmsg;
+			}
+			remember_chan_dying(rstate, &scid, deadline, gs->len);
+			break;
+		}
 		case WIRE_GOSSIP_STORE_PRIVATE_UPDATE:
 			if (!fromwire_gossip_store_private_update(tmpctx, msg, &msg)) {
 				bad = "invalid gossip_store_private_update";
--- a/gossipd/gossip_store_wire.csv
+++ b/gossipd/gossip_store_wire.csv
@@ -23,3 +23,7 @@ msgdata,gossip_store_delete_chan,scid,short_channel_id,

 msgtype,gossip_store_ended,4105
 msgdata,gossip_store_ended,equivalent_offset,u64,
+
+msgtype,gossip_store_chan_dying,4106
+msgdata,gossip_store_chan_dying,scid,short_channel_id,
+msgdata,gossip_store_chan_dying,blockheight,u32,
--- a/gossipd/gossipd.c
+++ b/gossipd/gossipd.c
@@ -797,6 +797,8 @@ static void new_blockheight(struct daemon *daemon, const u8 *msg)
 		i--;
 	}

+	routing_expire_channels(daemon->rstate, daemon->current_blockheight);
+
 	daemon_conn_send(daemon->master,
 			 take(towire_gossipd_new_blockheight_reply(NULL)));
 }
@@ -931,28 +933,19 @@ static void handle_outpoints_spent(struct daemon *daemon, const u8 *msg)
 {
 	struct short_channel_id *scids;
 	u32 blockheight;
-	struct routing_state *rstate = daemon->rstate;

 	if (!fromwire_gossipd_outpoints_spent(msg, msg, &blockheight, &scids))
 		master_badmsg(WIRE_GOSSIPD_OUTPOINTS_SPENT, msg);

 	for (size_t i = 0; i < tal_count(scids); i++) {
-		struct chan *chan = get_channel(rstate, &scids[i]);
+		struct chan *chan = get_channel(daemon->rstate, &scids[i]);

 		if (!chan)
 			continue;

-		status_debug(
-		    "Deleting channel %s due to the funding outpoint being "
-		    "spent",
-		    type_to_string(msg, struct short_channel_id, &scids[i]));
-		/* Suppress any now-obsolete updates/announcements */
-		add_to_txout_failures(rstate, &scids[i]);
-		remove_channel_from_store(rstate, chan);
-		/* Freeing is sufficient since everything else is allocated off
-		 * of the channel and this takes care of unregistering
-		 * the channel */
-		free_chan(rstate, chan);
+		/* We have a current_blockheight, but it's not necessarily
+		 * updated first. */
+		routing_channel_spent(daemon->rstate, blockheight, chan);
 	}
 }

--- a/gossipd/gossipd.h
+++ b/gossipd/gossipd.h
@@ -16,6 +16,7 @@ struct channel_update_timestamps;
 struct broadcastable;
 struct lease_rates;
 struct seeker;
+struct dying_channel;

 /*~ The core daemon structure: */
 struct daemon {
--- a/gossipd/routing.c
+++ b/gossipd/routing.c
@@ -33,6 +33,16 @@ struct pending_node_announce {
 	struct peer *peer_softref;
 };

+/* As per the below BOLT #7 quote, we delay forgetting a channel until 12
+ * blocks after we see it close.  This gives time for splicing (or even other
+ * opens) to replace the channel, and broadcast it after 6 blocks. */
+struct dying_channel {
+	struct short_channel_id scid;
+	u32 deadline_blockheight;
+	/* Where the dying_channel marker is in the store. */
+	struct broadcastable marker;
+};
+
 /* We consider a reasonable gossip rate to be 2 per day, with burst of
 * 4 per day.  So we use a granularity of one hour. */
 #define TOKENS_PER_MSG 12
@@ -249,8 +259,8 @@ static void txout_failure_age(struct routing_state *rstate)
 						   txout_failure_age, rstate);
 }

-void add_to_txout_failures(struct routing_state *rstate,
-			   const struct short_channel_id *scid)
+static void add_to_txout_failures(struct routing_state *rstate,
+				  const struct short_channel_id *scid)
 {
 	if (uintmap_add(&rstate->txout_failures, scid->u64, true)
 	    && ++rstate->num_txout_failures == 10000) {
@@ -288,6 +298,7 @@ struct routing_state *new_routing_state(const tal_t *ctx,
 	rstate->gs = gossip_store_new(rstate, peers);
 	rstate->local_channel_announced = false;
 	rstate->last_timestamp = 0;
+	rstate->dying_channels = tal_arr(rstate, struct dying_channel, 0);

 	pending_cannouncement_map_init(&rstate->pending_cannouncements);

@@ -852,8 +863,8 @@ static void delete_chan_messages_from_store(struct routing_state *rstate,
 			    &chan->half[1].bcast, update_type);
 }

-void remove_channel_from_store(struct routing_state *rstate,
-			       struct chan *chan)
+static void remove_channel_from_store(struct routing_state *rstate,
+				      struct chan *chan)
 {
 	/* Put in tombstone marker */
 	gossip_store_mark_channel_deleted(rstate->gs, &chan->scid);
@@ -2087,3 +2098,84 @@ void remove_all_gossip(struct routing_state *rstate)
 	/* Freeing unupdated chanmaps should empty this */
 	assert(pending_node_map_first(rstate->pending_node_map, &pnait) == NULL);
 }
+
+static void channel_spent(struct routing_state *rstate,
+			  struct chan *chan STEALS)
+{
+	status_debug("Deleting channel %s due to the funding outpoint being "
+		     "spent",
+		     type_to_string(tmpctx, struct short_channel_id,
+				    &chan->scid));
+	/* Suppress any now-obsolete updates/announcements */
+	add_to_txout_failures(rstate, &chan->scid);
+	remove_channel_from_store(rstate, chan);
+	/* Freeing is sufficient since everything else is allocated off
+	 * of the channel and this takes care of unregistering
+	 * the channel */
+	free_chan(rstate, chan);
+}
+
+void routing_expire_channels(struct routing_state *rstate, u32 blockheight)
+{
+	struct chan *chan;
+
+	for (size_t i = 0; i < tal_count(rstate->dying_channels); i++) {
+		struct dying_channel *d = rstate->dying_channels + i;
+
+		if (blockheight < d->deadline_blockheight)
+			continue;
+		chan = get_channel(rstate, &d->scid);
+		if (chan)
+			channel_spent(rstate, chan);
+		/* Delete dying marker itself */
+		gossip_store_delete(rstate->gs,
+				    &d->marker, WIRE_GOSSIP_STORE_CHAN_DYING);
+		tal_arr_remove(&rstate->dying_channels, i);
+		i--;
+	}
+}
+
+void remember_chan_dying(struct routing_state *rstate,
+			 const struct short_channel_id *scid,
+			 u32 deadline_blockheight,
+			 u64 index)
+{
+	struct dying_channel d;
+	d.scid = *scid;
+	d.deadline_blockheight = deadline_blockheight;
+	d.marker.index = index;
+	tal_arr_expand(&rstate->dying_channels, d);
+}
+
+void routing_channel_spent(struct routing_state *rstate,
+			   u32 current_blockheight,
+			   struct chan *chan)
+{
+	u64 index;
+	u32 deadline;
+	u8 *msg;
+
+	/* FIXME: We should note that delay is not necessary (or even
+	 * sensible) for local channels! */
+	if (local_direction(rstate, chan, NULL)) {
+		channel_spent(rstate, chan);
+		return;
+	}
+
+	/* BOLT #7:
+	 * - once its funding output has been spent OR reorganized out:
+	 *   - SHOULD forget a channel after a 12-block delay.
+	 */
+	deadline = current_blockheight + 12;
+
+	/* Save to gossip_store in case we restart */
+	msg = towire_gossip_store_chan_dying(tmpctx, &chan->scid, deadline);
+	index = gossip_store_add(rstate->gs, msg, 0, false, false, NULL);
+
+	/* Remember locally so we can kill it in 12 blocks */
+	status_debug("channel %s closing soon due"
+		     " to the funding outpoint being spent",
+		     type_to_string(msg, struct short_channel_id, &chan->scid));
+	remember_chan_dying(rstate, &chan->scid, deadline, index);
+}
+
--- a/gossipd/routing.h
+++ b/gossipd/routing.h
@@ -230,6 +230,9 @@ struct routing_state {
 	/* Highest timestamp of gossip we accepted (before now) */
 	u32 last_timestamp;

+	/* Channels which are closed, but we're waiting 12 blocks */
+	struct dying_channel *dying_channels;
+
 #if DEVELOPER
 	/* Override local time for gossip messages */
 	struct timeabs *gossip_time;
@@ -399,21 +402,37 @@ bool routing_add_private_channel(struct routing_state *rstate,
 */
 struct timeabs gossip_time_now(const struct routing_state *rstate);

+/**
+ * Add to rstate->dying_channels
+ *
+ * Exposed here for when we load the gossip_store.
+ */
+void remember_chan_dying(struct routing_state *rstate,
+			 const struct short_channel_id *scid,
+			 u32 deadline_blockheight,
+			 u64 index);
+
+/**
+ * When a channel's funding has been spent.
+ */
+void routing_channel_spent(struct routing_state *rstate,
+			   u32 current_blockheight,
+			   struct chan *chan);
+
+/**
+ * Clean up any dying channels.
+ *
+ * This finally deletes channel past their deadline.
+ */
+void routing_expire_channels(struct routing_state *rstate, u32 blockheight);
+
 /* Would we ratelimit a channel_update with this timestamp? */
 bool would_ratelimit_cupdate(struct routing_state *rstate,
 			     const struct half_chan *hc,
 			     u32 timestamp);

-/* Remove channel from store: announcement and any updates. */
-void remove_channel_from_store(struct routing_state *rstate,
-			       struct chan *chan);
-
 /* Returns an error string if there are unfinalized entries after load */
 const char *unfinalized_entries(const tal_t *ctx, struct routing_state *rstate);

 void remove_all_gossip(struct routing_state *rstate);
-
-/* This scid is dead to us. */
-void add_to_txout_failures(struct routing_state *rstate,
-			   const struct short_channel_id *scid);
 #endif /* LIGHTNING_GOSSIPD_ROUTING_H */
--- a/tests/test_connection.py
+++ b/tests/test_connection.py
@@ -4085,7 +4085,7 @@ def test_multichan(node_factory, executor, bitcoind):
        l2.rpc.close(l3.info['id'])

    l2.rpc.close(scid23b)
-    bitcoind.generate_block(1, wait_for_mempool=1)
+    bitcoind.generate_block(13, wait_for_mempool=1)
    sync_blockheight(bitcoind, [l1, l2, l3])

    # Gossip works as expected.
@@ -4130,14 +4130,14 @@ def test_multichan(node_factory, executor, bitcoind):
                        "state": "RCVD_REMOVE_ACK_REVOCATION"},
                       {"short_channel_id": scid12,
                        "id": 2,
-                        "expiry": 123,
+                        "expiry": 135,
                        "direction": "out",
                        "amount_msat": Millisatoshi(100001001),
                        "payment_hash": inv3['payment_hash'],
                        "state": "RCVD_REMOVE_ACK_REVOCATION"},
                       {"short_channel_id": scid12,
                        "id": 3,
-                        "expiry": 123,
+                        "expiry": 135,
                        "direction": "out",
                        "amount_msat": Millisatoshi(100001001),
                        "payment_hash": inv4['payment_hash'],
--- a/tests/test_db.py
+++ b/tests/test_db.py
@@ -96,7 +96,7 @@ def test_block_backfill(node_factory, bitcoind, chainparams):

    # Now close the channel and make sure `l3` cleans up correctly:
    txid = l1.rpc.close(l2.info['id'])['txid']
-    bitcoind.generate_block(1, wait_for_mempool=txid)
+    bitcoind.generate_block(13, wait_for_mempool=txid)
    wait_for(lambda: len(l3.rpc.listchannels()['channels']) == 0)


--- a/tests/test_gossip.py
+++ b/tests/test_gossip.py
@@ -559,8 +559,9 @@ def test_gossip_persistence(node_factory, bitcoind):
    # channel from their network view
    l1.rpc.dev_fail(l2.info['id'])

-    # We need to wait for the unilateral close to hit the mempool
-    bitcoind.generate_block(1, wait_for_mempool=1)
+    # We need to wait for the unilateral close to hit the mempool,
+    # and 12 blocks for nodes to actually forget it.
+    bitcoind.generate_block(13, wait_for_mempool=1)

    wait_for(lambda: active(l1) == [scid23, scid23])
    wait_for(lambda: active(l2) == [scid23, scid23])
@@ -1391,7 +1392,7 @@ def test_gossip_notices_close(node_factory, bitcoind):

    txid = l2.rpc.close(l3.info['id'])['txid']
    wait_for(lambda: only_one(l2.rpc.listpeers(l3.info['id'])['peers'])['channels'][0]['state'] == 'CLOSINGD_COMPLETE')
-    bitcoind.generate_block(1, txid)
+    bitcoind.generate_block(13, txid)

    wait_for(lambda: l1.rpc.listchannels()['channels'] == [])
    wait_for(lambda: l1.rpc.listnodes()['nodes'] == [])
@@ -2097,7 +2098,7 @@ def test_topology_leak(node_factory, bitcoind):

    # Close and wait for gossip to catchup.
    txid = l2.rpc.close(l3.info['id'])['txid']
-    bitcoind.generate_block(1, txid)
+    bitcoind.generate_block(13, txid)

    wait_for(lambda: len(l1.rpc.listchannels()['channels']) == 2)

@@ -2122,3 +2123,45 @@ def test_parms_listforwards(node_factory):

    assert len(forwards_new) == 0
    assert len(forwards_dep) == 0
+
+
+@pytest.mark.developer("gossip without DEVELOPER=1 is slow")
+def test_close_12_block_delay(node_factory, bitcoind):
+    l1, l2, l3, l4 = node_factory.line_graph(4, wait_for_announce=True)
+
+    # Close l1-l2
+    txid = l1.rpc.close(l2.info['id'])['txid']
+    bitcoind.generate_block(1, txid)
+
+    # But l4 doesn't believe it immediately.
+    l4.daemon.wait_for_log("channel .* closing soon due to the funding outpoint being spent")
+
+    # Close l2-l3 one block later.
+    txid = l2.rpc.close(l3.info['id'])['txid']
+    bitcoind.generate_block(1, txid)
+    l4.daemon.wait_for_log("channel .* closing soon due to the funding outpoint being spent")
+
+    # BOLT #7:
+    #   - once its funding output has been spent OR reorganized out:
+    #    - SHOULD forget a channel after a 12-block delay.
+
+    # That implies 12 blocks *after* spending, i.e. 13 blocks deep!
+
+    # 12 blocks deep, l4 still sees it
+    bitcoind.generate_block(10)
+    sync_blockheight(bitcoind, [l4])
+    assert len(l4.rpc.listchannels(source=l1.info['id'])['channels']) == 1
+
+    # 13 blocks deep does it.
+    bitcoind.generate_block(1)
+    wait_for(lambda: l4.rpc.listchannels(source=l1.info['id'])['channels'] == [])
+
+    # Other channel still visible.
+    assert len(l4.rpc.listchannels(source=l2.info['id'])['channels']) == 1
+
+    # Restart: it remembers channel is dying.
+    l4.restart()
+
+    # One more block, it's forgotten too.
+    bitcoind.generate_block(1)
+    wait_for(lambda: l4.rpc.listchannels(source=l2.info['id'])['channels'] == [])
--- a/tests/test_invoices.py
+++ b/tests/test_invoices.py
@@ -363,7 +363,7 @@ def test_invoice_routeboost_private(node_factory, bitcoind):
    # It will use an explicit exposeprivatechannels even if it thinks its a dead-end
    l0.rpc.close(l1.info['id'])
    l0.wait_for_channel_onchain(l1.info['id'])
-    bitcoind.generate_block(1)
+    bitcoind.generate_block(13)
    wait_for(lambda: l2.rpc.listchannels(scid_dummy)['channels'] == [])

    inv = l2.rpc.invoice(amount_msat=123456, label="inv7", description="?", exposeprivatechannels=scid)