From 0b7f78929176a2483d8cba7c6e48fa10f5db3ee6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 30 Mar 2022 14:13:12 +1030 Subject: [PATCH] lightningd: extra sanity checks and rescue attempts for missing HTLCs. These trip when anything weird happens; turns out that we tell onchaind about old htlcs (e.g. for penalties), so in that case we can actually have it tell us about missing HTLCs which we no longer have in memory. Signed-off-by: Rusty Russell --- lightningd/onchain_control.c | 20 ++++++- lightningd/peer_htlcs.c | 101 +++++++++++++++++++++++++++++++++-- lightningd/peer_htlcs.h | 3 +- 3 files changed, 117 insertions(+), 7 deletions(-) diff --git a/lightningd/onchain_control.c b/lightningd/onchain_control.c index d859b9006..1c22cc1b9 100644 --- a/lightningd/onchain_control.c +++ b/lightningd/onchain_control.c @@ -390,6 +390,14 @@ static void handle_missing_htlc_output(struct channel *channel, const u8 *msg) return; } + /* We only set tell_if_missing on LOCAL htlcs */ + if (htlc.owner != LOCAL) { + channel_internal_error(channel, + "onchaind_missing_htlc_output: htlc %"PRIu64" is not local!", + htlc.id); + return; + } + /* BOLT #5: * * - for any committed HTLC that does NOT have an output in this @@ -400,7 +408,7 @@ static void handle_missing_htlc_output(struct channel *channel, const u8 *msg) * corresponding to the HTLC. * - MAY fail the corresponding incoming HTLC sooner. */ - onchain_failed_our_htlc(channel, &htlc, "missing in commitment tx"); + onchain_failed_our_htlc(channel, &htlc, "missing in commitment tx", false); } static void handle_onchain_htlc_timeout(struct channel *channel, const u8 *msg) @@ -412,6 +420,14 @@ static void handle_onchain_htlc_timeout(struct channel *channel, const u8 *msg) return; } + /* It should tell us about timeouts on our LOCAL htlcs */ + if (htlc.owner != LOCAL) { + channel_internal_error(channel, + "onchaind_htlc_timeout: htlc %"PRIu64" is not local!", + htlc.id); + return; + } + /* BOLT #5: * * - if the commitment transaction HTLC output has *timed out* and @@ -419,7 +435,7 @@ static void handle_onchain_htlc_timeout(struct channel *channel, const u8 *msg) * - MUST *resolve* the output by spending it using the HTLC-timeout * transaction. */ - onchain_failed_our_htlc(channel, &htlc, "timed out"); + onchain_failed_our_htlc(channel, &htlc, "timed out", true); } static void handle_irrevocably_resolved(struct channel *channel, const u8 *msg UNUSED) diff --git a/lightningd/peer_htlcs.c b/lightningd/peer_htlcs.c index b17eadf0e..ea859ebeb 100644 --- a/lightningd/peer_htlcs.c +++ b/lightningd/peer_htlcs.c @@ -1411,9 +1411,89 @@ static bool peer_failed_our_htlc(struct channel *channel, return true; } +/* We've had a bug report about not failing incoming HTLCs. This does a sanity + * check, if we think we've already failed this HTLC */ +static void check_already_failed(const struct channel *channel, struct htlc_out *hout) +{ + htlc_out_check(hout, __func__); + + if (!hout->in) + return; + + /* in should have been failed/succeeded already */ + if (hout->in->badonion != 0 + || hout->in->failonion + || hout->in->preimage) + return; + + /* Print out what we think this htlc already has (failed/succeeded) */ + log_broken(channel->log, "HTLC id %"PRIu64" already complete, but ->in not resolved!" + " failonion = %s, failmsg = %s, preimage = %s", + hout->key.id, + hout->failonion ? tal_hex(tmpctx, hout->failonion->contents) : "(null)", + hout->failmsg ? tal_hex(tmpctx, hout->failmsg) : "(null)", + hout->preimage ? type_to_string(tmpctx, struct preimage, hout->preimage) : "(null)"); + + if (hout->preimage) { + /* Log on both ours and theirs! */ + log_broken(channel->log, + "MISSING incoming success for %"PRIu64": succeeding incoming now", + hout->key.id); + log_broken(hout->in->key.channel->log, + "MISSED incoming success for %"PRIu64": succeeding now", + hout->in->key.id); + fulfill_htlc(hout->in, hout->preimage); + } else { + log_broken(channel->log, + "MISSING incoming fail for %"PRIu64": failing incoming now", + hout->key.id); + log_broken(hout->in->key.channel->log, + "MISSED incoming fail for %"PRIu64": failing now", + hout->in->key.id); + local_fail_in_htlc(hout->in, + take(towire_permanent_channel_failure(NULL))); + } +} + +/* This case searches harder to see if there are any incoming HTLCs */ +static void fail_dangling_htlc_in(struct lightningd *ld, + const struct sha256 *payment_hash) +{ + struct htlc_in *hin; + struct htlc_in_map_iter ini; + + for (hin = htlc_in_map_first(&ld->htlcs_in, &ini); + hin; + hin = htlc_in_map_next(&ld->htlcs_in, &ini)) { + if (!sha256_eq(&hin->payment_hash, payment_hash)) + continue; + if (hin->badonion) { + log_broken(hin->key.channel->log, + "htlc %"PRIu64" already failed with badonion", + hin->key.id); + } else if (hin->preimage) { + log_broken(hin->key.channel->log, + "htlc %"PRIu64" already succeeded with preimage", + hin->key.id); + } else if (hin->failonion) { + log_broken(hin->key.channel->log, + "htlc %"PRIu64" already failed with failonion %s", + hin->key.id, + tal_hex(tmpctx, hin->failonion->contents)); + } else { + log_broken(hin->key.channel->log, + "htlc %"PRIu64" has matching hash: failing", + hin->key.id); + local_fail_in_htlc(hin, + take(towire_permanent_channel_failure(NULL))); + } + } +} + void onchain_failed_our_htlc(const struct channel *channel, const struct htlc_stub *htlc, - const char *why) + const char *why, + bool should_exist) { struct lightningd *ld = channel->peer->ld; struct htlc_out *hout; @@ -1421,8 +1501,14 @@ void onchain_failed_our_htlc(const struct channel *channel, log_debug(channel->log, "onchain_failed_our_htlc"); hout = find_htlc_out(&ld->htlcs_out, channel, htlc->id); if (!hout) { - log_broken(channel->log, "HTLC id %"PRIu64" not found!", - htlc->id); + /* For penalty transactions, tell onchaind about all possible + * HTLCs: they may not all exist any more. */ + if (should_exist) + log_broken(channel->log, "HTLC id %"PRIu64" not found!", + htlc->id); + /* Immediate corruption sanity check if this happens */ + htable_check(&ld->htlcs_out.raw, "onchain_failed_our_htlc out"); + htable_check(&ld->htlcs_in.raw, "onchain_failed_our_htlc in"); return; } @@ -1433,6 +1519,7 @@ void onchain_failed_our_htlc(const struct channel *channel, hout->failonion, hout->failmsg, hout->preimage); + check_already_failed(channel, hout); return; } @@ -1470,9 +1557,15 @@ void onchain_failed_our_htlc(const struct channel *channel, hout->failmsg ? fromwire_peektype(hout->failmsg) : 0); - } else + } else { log_broken(channel->log, "HTLC id %"PRIu64" is from nowhere?", htlc->id); + + /* Immediate corruption sanity check if this happens */ + htable_check(&ld->htlcs_out.raw, "onchain_failed_our_htlc out"); + htable_check(&ld->htlcs_in.raw, "onchain_failed_our_htlc in"); + fail_dangling_htlc_in(ld, &hout->payment_hash); + } } static void remove_htlc_in(struct channel *channel, struct htlc_in *hin) diff --git a/lightningd/peer_htlcs.h b/lightningd/peer_htlcs.h index 3cdf6476e..1bcef7bb3 100644 --- a/lightningd/peer_htlcs.h +++ b/lightningd/peer_htlcs.h @@ -54,7 +54,8 @@ const u8 *send_htlc_out(const tal_t *ctx, void onchain_failed_our_htlc(const struct channel *channel, const struct htlc_stub *htlc, - const char *why); + const char *why, + bool should_exist); void onchain_fulfilled_htlc(struct channel *channel, const struct preimage *preimage);