mirror of
https://github.com/aljazceru/lightning.git
synced 2025-12-19 23:24:27 +01:00
pay: handle returned errors more gracefully.
The code had incorrect assertions, partially because it didn't clearly distinguish errors from the final node (which, barring blockheight issues, mean a complete failre) and intermediate nodes. In particular, we can't trust the *values*, so we need to distinguish these by the *sender*. If a route is of length 2 (A, B): - erring_index == 0 means us complaining about channel A. - erring_index == 1 means A.node complaining about channel B. - erring_index == 2 means the final destination node B.node. This is particularly of note because Travis does NOT run test_pay_routeboost! Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
@@ -592,7 +592,8 @@ fail:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void channel_hints_update(struct payment *root,
|
static void channel_hints_update(struct payment *root,
|
||||||
struct short_channel_id *scid, int direction,
|
const struct short_channel_id *scid,
|
||||||
|
int direction,
|
||||||
bool enabled,
|
bool enabled,
|
||||||
struct amount_msat estimated_capacity)
|
struct amount_msat estimated_capacity)
|
||||||
{
|
{
|
||||||
@@ -660,12 +661,255 @@ static void payment_result_infer(struct route_hop *route,
|
|||||||
r->erring_direction = &route[i].direction;
|
r->erring_direction = &route[i].direction;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If a node takes too much fee or cltv, the next one reports it. We don't
|
||||||
|
* know who to believe, but log it */
|
||||||
|
static void report_tampering(struct payment *p,
|
||||||
|
size_t report_pos,
|
||||||
|
const char *style)
|
||||||
|
{
|
||||||
|
const struct node_id *id = &p->route[report_pos].nodeid;
|
||||||
|
|
||||||
|
if (report_pos == 0) {
|
||||||
|
plugin_log(p->plugin, LOG_UNUSUAL,
|
||||||
|
"Node #%zu (%s) claimed we sent them invalid %s",
|
||||||
|
report_pos + 1,
|
||||||
|
type_to_string(tmpctx, struct node_id, id),
|
||||||
|
style);
|
||||||
|
} else {
|
||||||
|
plugin_log(p->plugin, LOG_UNUSUAL,
|
||||||
|
"Node #%zu (%s) claimed #%zu (%s) sent them invalid %s",
|
||||||
|
report_pos + 1,
|
||||||
|
type_to_string(tmpctx, struct node_id, id),
|
||||||
|
report_pos,
|
||||||
|
type_to_string(tmpctx, struct node_id,
|
||||||
|
&p->route[report_pos-1].nodeid),
|
||||||
|
style);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct command_result *
|
||||||
|
handle_final_failure(struct command *cmd,
|
||||||
|
struct payment *p,
|
||||||
|
const struct node_id *final_id,
|
||||||
|
enum onion_type failcode)
|
||||||
|
{
|
||||||
|
/* We use an exhaustive switch statement here so you get a compile
|
||||||
|
* warning when new ones are added, and can think about where they go */
|
||||||
|
switch (failcode) {
|
||||||
|
case WIRE_FINAL_INCORRECT_CLTV_EXPIRY:
|
||||||
|
report_tampering(p, tal_count(p->route)-1, "cltv");
|
||||||
|
goto error;
|
||||||
|
case WIRE_FINAL_INCORRECT_HTLC_AMOUNT:
|
||||||
|
report_tampering(p, tal_count(p->route)-1, "amount");
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
/* BOLT #4:
|
||||||
|
*
|
||||||
|
* A _forwarding node_ MAY, but a _final node_ MUST NOT:
|
||||||
|
*...
|
||||||
|
* - return an `invalid_onion_version` error.
|
||||||
|
*...
|
||||||
|
* - return an `invalid_onion_hmac` error.
|
||||||
|
*...
|
||||||
|
* - return an `invalid_onion_key` error.
|
||||||
|
*...
|
||||||
|
* - return a `temporary_channel_failure` error.
|
||||||
|
*...
|
||||||
|
* - return a `permanent_channel_failure` error.
|
||||||
|
*...
|
||||||
|
* - return a `required_channel_feature_missing` error.
|
||||||
|
*...
|
||||||
|
* - return an `unknown_next_peer` error.
|
||||||
|
*...
|
||||||
|
* - return an `amount_below_minimum` error.
|
||||||
|
*...
|
||||||
|
* - return a `fee_insufficient` error.
|
||||||
|
*...
|
||||||
|
* - return an `incorrect_cltv_expiry` error.
|
||||||
|
*...
|
||||||
|
* - return an `expiry_too_soon` error.
|
||||||
|
*...
|
||||||
|
* - return an `expiry_too_far` error.
|
||||||
|
*...
|
||||||
|
* - return a `channel_disabled` error.
|
||||||
|
*/
|
||||||
|
case WIRE_INVALID_ONION_VERSION:
|
||||||
|
case WIRE_INVALID_ONION_HMAC:
|
||||||
|
case WIRE_INVALID_ONION_KEY:
|
||||||
|
case WIRE_TEMPORARY_CHANNEL_FAILURE:
|
||||||
|
case WIRE_PERMANENT_CHANNEL_FAILURE:
|
||||||
|
case WIRE_REQUIRED_CHANNEL_FEATURE_MISSING:
|
||||||
|
case WIRE_UNKNOWN_NEXT_PEER:
|
||||||
|
case WIRE_AMOUNT_BELOW_MINIMUM:
|
||||||
|
case WIRE_FEE_INSUFFICIENT:
|
||||||
|
case WIRE_INCORRECT_CLTV_EXPIRY:
|
||||||
|
case WIRE_EXPIRY_TOO_FAR:
|
||||||
|
case WIRE_EXPIRY_TOO_SOON:
|
||||||
|
case WIRE_CHANNEL_DISABLED:
|
||||||
|
goto strange_error;
|
||||||
|
|
||||||
|
case WIRE_INVALID_ONION_PAYLOAD:
|
||||||
|
case WIRE_INVALID_REALM:
|
||||||
|
case WIRE_PERMANENT_NODE_FAILURE:
|
||||||
|
case WIRE_TEMPORARY_NODE_FAILURE:
|
||||||
|
case WIRE_REQUIRED_NODE_FEATURE_MISSING:
|
||||||
|
#if EXPERIMENTAL_FEATURES
|
||||||
|
case WIRE_INVALID_ONION_BLINDING:
|
||||||
|
#endif
|
||||||
|
case WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS:
|
||||||
|
case WIRE_MPP_TIMEOUT:
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
strange_error:
|
||||||
|
plugin_log(p->plugin, LOG_UNUSUAL,
|
||||||
|
"Final node %s reported strange error code %u",
|
||||||
|
type_to_string(tmpctx, struct node_id, final_id),
|
||||||
|
failcode);
|
||||||
|
|
||||||
|
error:
|
||||||
|
p->result->code = PAY_DESTINATION_PERM_FAIL;
|
||||||
|
payment_root(p)->abort = true;
|
||||||
|
|
||||||
|
payment_fail(p, "%s", p->result->message);
|
||||||
|
return command_still_pending(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static struct command_result *
|
||||||
|
handle_intermediate_failure(struct command *cmd,
|
||||||
|
struct payment *p,
|
||||||
|
const struct node_id *errnode,
|
||||||
|
const struct route_hop *errchan,
|
||||||
|
enum onion_type failcode)
|
||||||
|
{
|
||||||
|
struct payment *root = payment_root(p);
|
||||||
|
|
||||||
|
/* We use an exhaustive switch statement here so you get a compile
|
||||||
|
* warning when new ones are added, and can think about where they go */
|
||||||
|
switch (failcode) {
|
||||||
|
/* BOLT #4:
|
||||||
|
*
|
||||||
|
* An _intermediate hop_ MUST NOT, but the _final node_:
|
||||||
|
*...
|
||||||
|
* - MUST return an `incorrect_or_unknown_payment_details` error.
|
||||||
|
*...
|
||||||
|
* - MUST return `final_incorrect_cltv_expiry` error.
|
||||||
|
*...
|
||||||
|
* - MUST return a `final_incorrect_htlc_amount` error.
|
||||||
|
*/
|
||||||
|
case WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS:
|
||||||
|
case WIRE_FINAL_INCORRECT_CLTV_EXPIRY:
|
||||||
|
case WIRE_FINAL_INCORRECT_HTLC_AMOUNT:
|
||||||
|
/* FIXME: Document in BOLT that intermediates must not return this! */
|
||||||
|
case WIRE_MPP_TIMEOUT:
|
||||||
|
goto strange_error;
|
||||||
|
|
||||||
|
case WIRE_PERMANENT_CHANNEL_FAILURE:
|
||||||
|
case WIRE_CHANNEL_DISABLED:
|
||||||
|
case WIRE_UNKNOWN_NEXT_PEER:
|
||||||
|
case WIRE_REQUIRED_CHANNEL_FEATURE_MISSING:
|
||||||
|
/* All of these result in the channel being marked as disabled. */
|
||||||
|
channel_hints_update(root,
|
||||||
|
&errchan->channel_id, errchan->direction,
|
||||||
|
false, AMOUNT_MSAT(0));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case WIRE_TEMPORARY_CHANNEL_FAILURE: {
|
||||||
|
/* These are an indication that the capacity was insufficient,
|
||||||
|
* remember the amount we tried as an estimate. */
|
||||||
|
struct amount_msat est = errchan->amount;
|
||||||
|
est.millisatoshis *= 0.75; /* Raw: Multiplication */
|
||||||
|
channel_hints_update(root,
|
||||||
|
&errchan->channel_id, errchan->direction,
|
||||||
|
true, est);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
case WIRE_INCORRECT_CLTV_EXPIRY:
|
||||||
|
report_tampering(p, errchan - p->route, "cltv");
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
case WIRE_INVALID_ONION_VERSION:
|
||||||
|
case WIRE_INVALID_ONION_HMAC:
|
||||||
|
case WIRE_INVALID_ONION_KEY:
|
||||||
|
case WIRE_PERMANENT_NODE_FAILURE:
|
||||||
|
case WIRE_TEMPORARY_NODE_FAILURE:
|
||||||
|
case WIRE_REQUIRED_NODE_FEATURE_MISSING:
|
||||||
|
case WIRE_INVALID_ONION_PAYLOAD:
|
||||||
|
case WIRE_INVALID_REALM:
|
||||||
|
#if EXPERIMENTAL_FEATURES
|
||||||
|
case WIRE_INVALID_ONION_BLINDING:
|
||||||
|
#endif
|
||||||
|
tal_arr_expand(&root->excluded_nodes, *errnode);
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
case WIRE_AMOUNT_BELOW_MINIMUM:
|
||||||
|
case WIRE_FEE_INSUFFICIENT:
|
||||||
|
case WIRE_EXPIRY_TOO_FAR:
|
||||||
|
case WIRE_EXPIRY_TOO_SOON:
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
strange_error:
|
||||||
|
plugin_log(p->plugin, LOG_UNUSUAL,
|
||||||
|
"Intermediate node %s reported strange error code %u",
|
||||||
|
type_to_string(tmpctx, struct node_id, errnode),
|
||||||
|
failcode);
|
||||||
|
|
||||||
|
error:
|
||||||
|
payment_fail(p, "%s", p->result->message);
|
||||||
|
return command_still_pending(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* From the docs:
|
||||||
|
*
|
||||||
|
* - *erring_index*: The index of the node along the route that
|
||||||
|
* reported the error. 0 for the local node, 1 for the first hop,
|
||||||
|
* and so on.
|
||||||
|
*
|
||||||
|
* The only difficulty is mapping the erring_index to the correct hop.
|
||||||
|
* We split into the erring node, and the error channel, since they're
|
||||||
|
* used in different contexts. NULL error_channel means it's the final
|
||||||
|
* node, whose errors are treated differently.
|
||||||
|
*/
|
||||||
|
static bool assign_blame(const struct payment *p,
|
||||||
|
const struct node_id **errnode,
|
||||||
|
const struct route_hop **errchan)
|
||||||
|
{
|
||||||
|
int index;
|
||||||
|
|
||||||
|
if (p->result->erring_index == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
index = *p->result->erring_index;
|
||||||
|
|
||||||
|
/* BADONION errors are reported on behalf of the next node. */
|
||||||
|
if (p->result->failcode & BADONION)
|
||||||
|
index++;
|
||||||
|
|
||||||
|
/* Final node *shouldn't* report BADONION, but don't assume. */
|
||||||
|
if (index >= tal_count(p->route)) {
|
||||||
|
*errchan = NULL;
|
||||||
|
*errnode = &p->route[tal_count(p->route) - 1].nodeid;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
*errchan = &p->route[index];
|
||||||
|
if (index == 0)
|
||||||
|
*errnode = p->local_id;
|
||||||
|
else
|
||||||
|
*errnode = &p->route[index - 1].nodeid;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static struct command_result *
|
static struct command_result *
|
||||||
payment_waitsendpay_finished(struct command *cmd, const char *buffer,
|
payment_waitsendpay_finished(struct command *cmd, const char *buffer,
|
||||||
const jsmntok_t *toks, struct payment *p)
|
const jsmntok_t *toks, struct payment *p)
|
||||||
{
|
{
|
||||||
struct payment *root;
|
const struct node_id *errnode;
|
||||||
struct route_hop *hop;
|
const struct route_hop *errchan;
|
||||||
|
|
||||||
assert(p->route != NULL);
|
assert(p->route != NULL);
|
||||||
|
|
||||||
p->end_time = time_now();
|
p->end_time = time_now();
|
||||||
@@ -689,98 +933,25 @@ payment_waitsendpay_finished(struct command *cmd, const char *buffer,
|
|||||||
return command_still_pending(cmd);
|
return command_still_pending(cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
root = payment_root(p);
|
|
||||||
payment_chanhints_apply_route(p, true);
|
payment_chanhints_apply_route(p, true);
|
||||||
|
|
||||||
/* Either we have no erring_index, or it must be a correct index into
|
if (!assign_blame(p, &errnode, &errchan)) {
|
||||||
* p->route. From the docs:
|
plugin_log(p->plugin, LOG_UNUSUAL,
|
||||||
*
|
"No erring_index set in `waitsendpay` result: %.*s",
|
||||||
* - *erring_index*: The index of the node along the route that
|
json_tok_full_len(toks),
|
||||||
* reported the error. 0 for the local node, 1 for the first hop,
|
json_tok_full(buffer, toks));
|
||||||
* and so on.
|
/* FIXME: Pick a random channel to fail? */
|
||||||
*
|
payment_set_step(p, PAYMENT_STEP_FAILED);
|
||||||
* The only difficulty is mapping the erring_index to the correct hop,
|
payment_continue(p);
|
||||||
* since the hop consists of the processing node, but the payload for
|
return command_still_pending(cmd);
|
||||||
* the next hop. In addition there is a class of onion-related errors
|
|
||||||
* that are reported by the previous hop to the one erring, since the
|
|
||||||
* erring node couldn't read the onion in the first place.
|
|
||||||
*/
|
|
||||||
assert(p->result->erring_index == NULL ||
|
|
||||||
*p->result->erring_index - 1 < tal_count(p->route));
|
|
||||||
|
|
||||||
switch (p->result->failcode) {
|
|
||||||
case WIRE_PERMANENT_CHANNEL_FAILURE:
|
|
||||||
case WIRE_CHANNEL_DISABLED:
|
|
||||||
case WIRE_UNKNOWN_NEXT_PEER:
|
|
||||||
case WIRE_REQUIRED_CHANNEL_FEATURE_MISSING:
|
|
||||||
/* All of these result in the channel being marked as disabled. */
|
|
||||||
assert(*p->result->erring_index < tal_count(p->route));
|
|
||||||
hop = &p->route[*p->result->erring_index];
|
|
||||||
channel_hints_update(root, &hop->channel_id, hop->direction,
|
|
||||||
false, AMOUNT_MSAT(0));
|
|
||||||
break;
|
|
||||||
case WIRE_TEMPORARY_CHANNEL_FAILURE:
|
|
||||||
/* These are an indication that the capacity was insufficient,
|
|
||||||
* remember the amount we tried as an estimate. */
|
|
||||||
assert(*p->result->erring_index < tal_count(p->route));
|
|
||||||
hop = &p->route[*p->result->erring_index];
|
|
||||||
struct amount_msat est = {
|
|
||||||
.millisatoshis = hop->amount.millisatoshis * 0.75}; /* Raw: Multiplication */
|
|
||||||
channel_hints_update(root, &hop->channel_id, hop->direction,
|
|
||||||
true, est);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case WIRE_INVALID_ONION_PAYLOAD:
|
|
||||||
case WIRE_INVALID_REALM:
|
|
||||||
case WIRE_PERMANENT_NODE_FAILURE:
|
|
||||||
case WIRE_TEMPORARY_NODE_FAILURE:
|
|
||||||
case WIRE_REQUIRED_NODE_FEATURE_MISSING:
|
|
||||||
case WIRE_INVALID_ONION_VERSION:
|
|
||||||
case WIRE_INVALID_ONION_HMAC:
|
|
||||||
case WIRE_INVALID_ONION_KEY:
|
|
||||||
#if EXPERIMENTAL_FEATURES
|
|
||||||
case WIRE_INVALID_ONION_BLINDING:
|
|
||||||
#endif
|
|
||||||
/* These are reported by the last hop, i.e., the destination of hop i-1. */
|
|
||||||
assert(*p->result->erring_index - 1 < tal_count(p->route));
|
|
||||||
hop = &p->route[*p->result->erring_index - 1];
|
|
||||||
tal_arr_expand(&root->excluded_nodes, hop->nodeid);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS:
|
|
||||||
p->result->code = PAY_DESTINATION_PERM_FAIL;
|
|
||||||
root->abort = true;
|
|
||||||
case WIRE_MPP_TIMEOUT:
|
|
||||||
/* These are permanent failures that should abort all of our
|
|
||||||
* attempts right away. We'll still track pending partial
|
|
||||||
* payments correctly, just not start new ones. */
|
|
||||||
root->abort = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case WIRE_AMOUNT_BELOW_MINIMUM:
|
|
||||||
case WIRE_EXPIRY_TOO_FAR:
|
|
||||||
case WIRE_EXPIRY_TOO_SOON:
|
|
||||||
case WIRE_FEE_INSUFFICIENT:
|
|
||||||
case WIRE_INCORRECT_CLTV_EXPIRY:
|
|
||||||
case WIRE_FINAL_INCORRECT_CLTV_EXPIRY:
|
|
||||||
/* These are issues that are due to gossipd being out of date,
|
|
||||||
* we ignore them here, and wait for gossipd to adjust
|
|
||||||
* instead. */
|
|
||||||
break;
|
|
||||||
case WIRE_FINAL_INCORRECT_HTLC_AMOUNT:
|
|
||||||
/* These are symptoms of intermediate hops tampering with the
|
|
||||||
* payment. */
|
|
||||||
hop = &p->route[*p->result->erring_index];
|
|
||||||
plugin_log(
|
|
||||||
p->plugin, LOG_UNUSUAL,
|
|
||||||
"Node %s reported an incorrect HTLC amount, this could be "
|
|
||||||
"a prior hop messing with the amounts.",
|
|
||||||
type_to_string(tmpctx, struct node_id, &hop->nodeid));
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
payment_fail(p, "%s", p->result->message);
|
if (!errchan)
|
||||||
return command_still_pending(cmd);
|
return handle_final_failure(cmd, p, errnode,
|
||||||
|
p->result->failcode);
|
||||||
|
|
||||||
|
return handle_intermediate_failure(cmd, p, errnode, errchan,
|
||||||
|
p->result->failcode);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct command_result *payment_sendonion_success(struct command *cmd,
|
static struct command_result *payment_sendonion_success(struct command *cmd,
|
||||||
|
|||||||
Reference in New Issue
Block a user