From 573f2f065a473eb93cb51680349d668db1031432 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 20 Sep 2018 14:01:09 +0930 Subject: [PATCH] hsmd: document as part II of our journey. Thanks greatly to the four people who I *know* have read this: @wythe, @ZmnSCPxj, @SimonVrouwe, and @cdecker Your feedback will help future developers seeking enlightenment! Signed-off-by: Rusty Russell --- hsmd/hsmd.c | 425 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 391 insertions(+), 34 deletions(-) diff --git a/hsmd/hsmd.c b/hsmd/hsmd.c index 2c637176f..21cb1cc97 100644 --- a/hsmd/hsmd.c +++ b/hsmd/hsmd.c @@ -1,3 +1,11 @@ +/*~ Welcome to the hsm daemon: keeper of our secrets! + * + * This is a separate daemon which keeps a root secret from which all others + * are generated. It starts with one client: lightningd, which can ask for + * new sockets for other clients. Each client has a simple capability map + * which indicates what it's allowed to ask for. We're entirely driven + * by request, response. + */ #include #include #include @@ -31,6 +39,7 @@ #include #include #include +/*~ All gen_ files are autogenerated; in this case by tools/generate-wire.py */ #include #include #include @@ -43,35 +52,55 @@ #include #include +/*~ Each subdaemon is started with stdin connected to lightningd (for status + * messages), and stderr untouched (for emergency printing). File descriptors + * 3 and beyond are set up on other sockets: for hsmd, fd 3 is the request + * stream from lightningd. */ #define REQ_FD 3 -/* Nobody will ever find it here! */ +/*~ Nobody will ever find it here! hsm_secret is our root secret, the bip32 + * tree is derived from that, and cached here. */ static struct { struct secret hsm_secret; struct ext_key bip32; } secretstuff; +/*~ We keep track of clients, but there's not much to keep. */ struct client { struct daemon_conn dc; struct daemon_conn *master; + /* ~Useful for logging, but also used to derive the per-channel seed. */ struct pubkey id; + + /* ~This is a unique value handed to us from lightningd, used for + * per-channel seed generation (a single id may have multiple channels + * over time). + * + * It's actually zero for the initial lightningd client connection and + * the ones for gossipd and connectd, which don't have channels + * associated. */ u64 dbid; /* What is this client allowed to ask for? */ u64 capabilities; }; -/* We keep a map of nonzero dbid -> clients */ +/*~ We keep a map of nonzero dbid -> clients, mainly for leak detection. + * This is ccan/uintmap, which maps u64 to some (non-NULL) pointer. + * I really dislike these kinds of declaration-via-magic macro things, as + * tags can't find them without special hacks, but the payoff here is that + * the map is typesafe: the compiler won't let you put anything in but a + * struct client pointer. */ static UINTMAP(struct client *) clients; -/* We get three zero-dbid clients: master, gossipd and connnectd. */ +/*~ Plus the three zero-dbid clients: master, gossipd and connnectd. */ static struct client *dbid_zero_clients[3]; static size_t num_dbid_zero_clients; -/* For reporting issues. */ +/*~ We need this deep inside bad_req_fmt, so we make it a global. */ static struct daemon_conn *status_conn; -/* FIXME: This is used by debug.c, but doesn't apply to us. */ +/*~ FIXME: This is used by debug.c. Doesn't apply to us, but lets us link. */ extern void dev_disconnect_init(int fd); void dev_disconnect_init(int fd UNUSED) { } @@ -82,6 +111,13 @@ static struct client *new_client(struct daemon_conn *master, const u64 capabilities, int fd); +/*~ ccan/compiler.h defines PRINTF_FMT as the gcc compiler hint so it will + * check that fmt and other trailing arguments really are the correct type. + * + * This is a convenient helper to tell lightningd we've received a bad request + * and closes the client connection. This should never happen, of course, but + * we definitely want to log if it does. + */ static PRINTF_FMT(4,5) struct io_plan *bad_req_fmt(struct io_conn *conn, struct client *c, @@ -95,20 +131,32 @@ static PRINTF_FMT(4,5) str = tal_fmt(tmpctx, fmt, ap); va_end(ap); - /* If the client was actually lightningd, it's Game Over. */ + /*~ If the client was actually lightningd, it's Game Over; we actually + * fail in this case, and it will too. */ if (&c->dc == c->master) { status_broken("%s", str); master_badmsg(fromwire_peektype(msg_in), msg_in); } + /*~ Note the use of NULL as the ctx arg to towire_hsmstatus_: only + * use NULL as the allocation when we're about to immediately free it + * or hand it off with take(), as here. That makes it clear we don't + * expect it to linger, and in fact our memleak detection will + * complain if it does (unlike using the deliberately-transient + * tmpctx). */ daemon_conn_send(status_conn, take(towire_hsmstatus_client_bad_request(NULL, &c->id, str, msg_in))); + + /*~ The way ccan/io works is that you return the "plan" for what to do + * next (eg. io_read). io_close() is special: it means to close the + * connection. */ return io_close(conn); } +/* Convenience wrapper for when we simply can't parse. */ static struct io_plan *bad_req(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -116,6 +164,7 @@ static struct io_plan *bad_req(struct io_conn *conn, return bad_req_fmt(conn, c, msg_in, "could not parse request"); } +/* This is the common pattern for the tail of each handler in this file. */ static struct io_plan *req_reply(struct io_conn *conn, struct client *c, const u8 *msg_out TAKES) @@ -124,18 +173,25 @@ static struct io_plan *req_reply(struct io_conn *conn, return daemon_conn_read_next(conn, &c->dc); } +/*~ This returns the secret and/or public key for this node. */ static void node_key(struct privkey *node_privkey, struct pubkey *node_id) { u32 salt = 0; struct privkey unused_s; struct pubkey unused_k; + /* If caller specifies NULL, they don't want the results. */ if (node_privkey == NULL) node_privkey = &unused_s; else if (node_id == NULL) node_id = &unused_k; + /*~ So, there is apparently a 1 in 2^127 chance that a random value is + * not a valid private key, so this never actually loops. */ do { + /*~ ccan/crypto/hkdf_sha256 implements RFC5869 "Hardened Key + * Derivation Functions". That means that if a derived key + * leaks somehow, the other keys are not compromised. */ hkdf_sha256(node_privkey, sizeof(*node_privkey), &salt, sizeof(salt), &secretstuff.hsm_secret, @@ -146,28 +202,40 @@ static void node_key(struct privkey *node_privkey, struct pubkey *node_id) node_privkey->secret.data)); } -/** - * hsm_channel_secret_base -- Derive the base secret seed for per-channel seeds - * - * This secret is the basis for all per-channel secrets: the per-channel seeds - * will be generated mixing in the channel_id and the peer node_id. - */ +/*~ This secret is the basis for all per-channel secrets: the per-channel seeds + * will be generated by mixing in the dbid and the peer node_id. */ static void hsm_channel_secret_base(struct secret *channel_seed_base) { hkdf_sha256(channel_seed_base, sizeof(struct secret), NULL, 0, &secretstuff.hsm_secret, sizeof(secretstuff.hsm_secret), + /*~ Initially, we didn't support multiple channels per + * peer at all: a channel had to be completely forgotten + * before another could exist. That was slightly relaxed, + * but the phrase "peer seed" is wired into the seed + * generation here, so we need to keep it that way for + * existing clients, rather than using "channel seed". */ "peer seed", strlen("peer seed")); } +/*~ This gets the seed for this particular channel. */ static void get_channel_seed(const struct pubkey *peer_id, u64 dbid, struct secret *channel_seed) { struct secret channel_base; u8 input[PUBKEY_DER_LEN + sizeof(dbid)]; + /*~ Again, "per-peer" should be "per-channel", but Hysterical Raisins */ const char *info = "per-peer seed"; + /*~ We use the DER encoding of the pubkey, because it's platform + * independent. Since the dbid is unique, however, it's completely + * unnecessary, but again, existing users can't be broken. */ + /* FIXME: lnd has a nicer BIP32 method for deriving secrets which we + * should migrate to. */ hsm_channel_secret_base(&channel_base); pubkey_to_der(input, peer_id); + /*~ For all that talk about platform-independence, note that this + * field is endian-dependent! But let's face it, little-endian won. + * In related news, we don't support EBCDIC or middle-endian. */ memcpy(input + PUBKEY_DER_LEN, &dbid, sizeof(dbid)); hkdf_sha256(channel_seed, sizeof(*channel_seed), @@ -176,6 +244,7 @@ static void get_channel_seed(const struct pubkey *peer_id, u64 dbid, info, strlen(info)); } +/*~ Called at startup to derive the bip32 field. */ static void populate_secretstuff(void) { u8 bip32_seed[BIP32_ENTROPY_LEN_256]; @@ -216,6 +285,9 @@ static void populate_secretstuff(void) /* Hence child 0, then child 0 again to get extkey to derive from. */ if (bip32_key_from_parent(&master_extkey, 0, BIP32_FLAG_KEY_PRIVATE, &child_extkey) != WALLY_OK) + /*~ status_failed() is a helper which exits and sends lightningd + * a message about what happened. For hsmd, that's fatal to + * lightningd. */ status_failed(STATUS_FAIL_INTERNAL_ERROR, "Can't derive child bip32 key"); @@ -225,7 +297,8 @@ static void populate_secretstuff(void) "Can't derive private bip32 key"); } -/* If privkey is NULL, we don't fill it in */ +/*~ Get the keys for this given BIP32 index: if privkey is NULL, we + * don't fill it in. */ static void bitcoin_key(struct privkey *privkey, struct pubkey *pubkey, u32 index) { @@ -239,12 +312,15 @@ static void bitcoin_key(struct privkey *privkey, struct pubkey *pubkey, status_failed(STATUS_FAIL_MASTER_IO, "Index %u too great", index); + /*~ This uses libwally, which doesn't dovetail directly with + * libsecp256k1 even though it, too, uses it internally. */ if (bip32_key_from_parent(&secretstuff.bip32, index, BIP32_FLAG_KEY_PRIVATE, &ext) != WALLY_OK) status_failed(STATUS_FAIL_INTERNAL_ERROR, "BIP32 of %u failed", index); - /* libwally says: The private key with prefix byte 0 */ + /* libwally says: The private key with prefix byte 0; remove it + * for libsecp256k1. */ memcpy(privkey->secret.data, ext.priv_key+1, 32); if (!secp256k1_ec_pubkey_create(secp256k1_ctx, &pubkey->pubkey, privkey->secret.data)) @@ -252,32 +328,50 @@ static void bitcoin_key(struct privkey *privkey, struct pubkey *pubkey, "BIP32 pubkey %u create failed", index); } +/*~ We store our root secret in a "hsm_secret" file (like all of c-lightning, + * we run in the user's .lightningd directory). */ static void maybe_create_new_hsm(void) { + /*~ Note that this is opened for write-only, even though the permissions + * are set to read-only. That's perfectly valid! */ int fd = open("hsm_secret", O_CREAT|O_EXCL|O_WRONLY, 0400); if (fd < 0) { + /* If this is not the first time we've run, it will exist. */ if (errno == EEXIST) return; status_failed(STATUS_FAIL_INTERNAL_ERROR, "creating: %s", strerror(errno)); } + /*~ This is libsodium's cryptographic randomness routine: we assume + * it's doing a good job. */ randombytes_buf(&secretstuff.hsm_secret, sizeof(secretstuff.hsm_secret)); + /*~ ccan/read_write_all has a more convenient return than write() where + * we'd have to check the return value == the length we gave: write() + * can return short on normal files if we run out of disk space. */ if (!write_all(fd, &secretstuff.hsm_secret, sizeof(secretstuff.hsm_secret))) { + /* ccan/noerr contains useful routines like this, which don't + * clobber errno, so we can use it in our error report. */ unlink_noerr("hsm_secret"); status_failed(STATUS_FAIL_INTERNAL_ERROR, "writing: %s", strerror(errno)); } + /*~ fsync (mostly!) ensures that the file has reached the disk. */ if (fsync(fd) != 0) { unlink_noerr("hsm_secret"); status_failed(STATUS_FAIL_INTERNAL_ERROR, "fsync: %s", strerror(errno)); } + /*~ This should never fail if fsync succeeded. But paranoia good, and + * bugs exist. */ if (close(fd) != 0) { unlink_noerr("hsm_secret"); status_failed(STATUS_FAIL_INTERNAL_ERROR, "closing: %s", strerror(errno)); } + /*~ We actually need to sync the *directory itself* to make sure the + * file exists! You're only allowed to open directories read-only in + * modern Unix though. */ fd = open(".", O_RDONLY); if (fd < 0) { status_failed(STATUS_FAIL_INTERNAL_ERROR, @@ -289,9 +383,16 @@ static void maybe_create_new_hsm(void) "fsyncdir: %s", strerror(errno)); } close(fd); + /*~ status_unusual() is good for things which are interesting and + * definitely won't spam the logs. Only status_broken() is higher; + * status_info() is lower, then status_debug() and finally + * status_io(). */ status_unusual("HSM: created new hsm_secret file"); } +/*~ We always load the HSM file, even if we just created it above. This + * both unifies the code paths, and provides a nice sanity check that the + * file contents are as they will be for future invocations. */ static void load_hsm(void) { int fd = open("hsm_secret", O_RDONLY); @@ -306,6 +407,8 @@ static void load_hsm(void) populate_secretstuff(); } +/*~ This is the response to lightningd's HSM_INIT request, which is the first + * thing it sends. */ static struct io_plan *init_hsm(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -315,18 +418,31 @@ static struct io_plan *init_hsm(struct io_conn *conn, /* This must be the master. */ assert(&c->dc == c->master); + /*~ The fromwire_* routines are autogenerated, based on the message + * definitions in hsm_client_wire.csv. The format of those files is + * an extension of the simple comma-separated format output by the + * BOLT tools/extract-formats.py tool. */ if (!fromwire_hsm_init(msg_in)) return bad_req(conn, c, msg_in); maybe_create_new_hsm(); load_hsm(); + /*~ We tell lightning our node id and (public) bip32 seed. */ node_key(NULL, &node_id); + + /*~ Note: marshalling a bip32 tree only marshals the public side, + * not the secrets! So we're not actually handing them out here! + */ return req_reply(conn, c, take(towire_hsm_init_reply(NULL, &node_id, &secretstuff.bip32))); } +/*~ The client has asked us to extract the shared secret from an EC Diffie + * Hellman token. This doesn't leak any information, but requires the private + * key, so the hsmd performs it. It's used to set up an encryption key for the + * connection handshaking (BOLT #8) and for the onion wrapping (BOLT #4). */ static struct io_plan *handle_ecdh(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -338,21 +454,44 @@ static struct io_plan *handle_ecdh(struct io_conn *conn, if (!fromwire_hsm_ecdh_req(msg_in, &point)) return bad_req(conn, c, msg_in); + /*~ We simply use the secp256k1_ecdh function, which really shouldn't + * fail (iff the point is invalid). */ node_key(&privkey, NULL); if (secp256k1_ecdh(secp256k1_ctx, ss.data, &point.pubkey, privkey.secret.data) != 1) { return bad_req_fmt(conn, c, msg_in, "secp256k1_ecdh fail"); } + /*~ In the normal case, we return the shared secret, and then read + * the next msg. */ return req_reply(conn, c, take(towire_hsm_ecdh_resp(NULL, &ss))); } +/*~ The specific routine to sign the channel_announcement message. This is + * defined in BOLT #7, and requires *two* signatures: one from this node's key + * (to prove it's from us), and one from the bitcoin key used to create the + * funding transaction (to prove we own the output). */ static struct io_plan *handle_cannouncement_sig(struct io_conn *conn, struct client *c, const u8 *msg_in) { - /* First 2 + 256 byte are the signatures and msg type, skip them */ - size_t offset = 258; + /*~ Our autogeneration code doesn't define field offsets, so we just + * copy this from the spec itself. + * + * Note that 'check-source' will actually find and check this quote + * against the spec (if available); whitespace is ignored and + * ... means some content is skipped, but it works remarkably well to + * track spec changes. */ + + /* BOLT #7: + * + * - MUST compute the double-SHA256 hash `h` of the message, beginning + * at offset 256, up to the end of the message. + * - Note: the hash skips the 4 signatures but hashes the rest of the + * message, including any future fields appended to the end. + */ + /* First type bytes are the msg type */ + size_t offset = 2 + 256; struct privkey node_pkey; secp256k1_ecdsa_signature node_sig, bitcoin_sig; struct sha256_double hash; @@ -362,10 +501,18 @@ static struct io_plan *handle_cannouncement_sig(struct io_conn *conn, struct privkey funding_privkey; struct secret channel_seed; + /*~ You'll find FIXMEs like this scattered through the code. + * Sometimes they suggest simple improvements which someone like + * yourself should go ahead an implement. Sometimes they're deceptive + * quagmires which will cause you nothing but grief. You decide! */ + /* FIXME: We should cache these. */ get_channel_seed(&c->id, c->dbid, &channel_seed); derive_funding_key(&channel_seed, &funding_pubkey, &funding_privkey); + /*~ fromwire_ routines which need to do allocation take a tal context + * as their first field; tmpctx is good here since we won't need it + * after this function. */ if (!fromwire_hsm_cannouncement_sig_req(tmpctx, msg_in, &ca)) return bad_req(conn, c, msg_in); @@ -374,6 +521,8 @@ static struct io_plan *handle_cannouncement_sig(struct io_conn *conn, "bad cannounce length %zu", tal_count(ca)); + /*~ Christian uses TODO(cdecker), but I'm sure he won't mind if you fix + * this for him! */ /* TODO(cdecker) Check that this is actually a valid * channel_announcement */ node_key(&node_pkey, NULL); @@ -387,10 +536,17 @@ static struct io_plan *handle_cannouncement_sig(struct io_conn *conn, return req_reply(conn, c, take(reply)); } +/*~ The specific routine to sign the channel_update message. */ static struct io_plan *handle_channel_update_sig(struct io_conn *conn, struct client *c, const u8 *msg_in) { + /* BOLT #7: + * + * - MUST set `signature` to the signature of the double-SHA256 of the + * entire remaining packet after `signature`, using its own + * `node_id`. + */ /* 2 bytes msg type + 64 bytes signature */ size_t offset = 66; struct privkey node_pkey; @@ -428,6 +584,12 @@ static struct io_plan *handle_channel_update_sig(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_cupdate_sig_reply(NULL, cu))); } +/*~ This gets the basepoints for a channel; it's not privite information really + * (we tell the peer this to establish a channel, as it sets up the keys used + * for each transaction). + * + * Note that this is asked by lightningd, so it tells us what channels it wants. + */ static struct io_plan *handle_get_channel_basepoints(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -450,6 +612,12 @@ static struct io_plan *handle_get_channel_basepoints(struct io_conn *conn, &funding_pubkey))); } +/*~ This is another lightningd-only interface; signing a commit transaction. + * This is dangerous, since if we sign a revoked commitment tx we'll lose + * funds, thus it's only available to lightningd. + * + * + * Oh look, another FIXME! */ /* FIXME: Ensure HSM never does this twice for same dbid! */ static struct io_plan *handle_sign_commitment_tx(struct io_conn *conn, struct client *c, @@ -474,10 +642,18 @@ static struct io_plan *handle_sign_commitment_tx(struct io_conn *conn, derive_basepoints(&channel_seed, &local_funding_pubkey, NULL, &secrets, NULL); + /*~ Bitcoin signatures cover the (part of) the script they're + * executing; the rules are a bit complex in general, but for + * Segregated Witness it's simply the current script. */ funding_wscript = bitcoin_redeem_2of2(tmpctx, &local_funding_pubkey, &remote_funding_pubkey); - /* Need input amount for signing */ + /*~ Segregated Witness also added the input amount to the signing + * algorithm; it's only part of the input implicitly (it's part of the + * output it's spending), so in our 'bitcoin_tx' structure it's a + * pointer, as we don't always know it (and zero is a valid amount, so + * NULL is better to mean 'unknown' and has the nice property that + * you'll crash if you assume it's there and you're wrong. */ tx->input[0].amount = tal_dup(tx->input, u64, &funding_amount); sign_tx_input(tx, 0, NULL, funding_wscript, &secrets.funding_privkey, @@ -488,6 +664,13 @@ static struct io_plan *handle_sign_commitment_tx(struct io_conn *conn, take(towire_hsm_sign_commitment_tx_reply(NULL, &sig))); } +/*~ This is used by channeld to create signatures for the remote peer's + * commitment transaction. It's functionally identical to signing our own, + * but we expect to do this repeatedly as commitment transactions are + * updated. + * + * The HSM almost certainly *should* do more checks before signing! + */ /* FIXME: make sure it meets some criteria? */ static struct io_plan *handle_sign_remote_commitment_tx(struct io_conn *conn, struct client *c, @@ -524,6 +707,8 @@ static struct io_plan *handle_sign_remote_commitment_tx(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig))); } +/*~ This is used by channeld to create signatures for the remote peer's + * HTLC transactions. */ static struct io_plan *handle_sign_remote_htlc_tx(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -567,6 +752,8 @@ static struct io_plan *handle_sign_remote_htlc_tx(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig))); } +/*~ This covers several cases where onchaind is creating a transaction which + * sends funds to our internal wallet. */ /* FIXME: Derive output address for this client, and check it here! */ static struct io_plan *handle_sign_to_us_tx(struct io_conn *conn, struct client *c, @@ -591,6 +778,10 @@ static struct io_plan *handle_sign_to_us_tx(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig))); } +/*~ When we send a commitment transaction onchain (unilateral close), there's + * a delay before we can spend it. onchaind does an explicit transaction to + * transfer it to the wallet so that doesn't need to remember how to spend + * this complex transaction. */ static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -604,6 +795,7 @@ static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn, struct privkey privkey; u8 *wscript; + /*~ We don't derive the wscript ourselves, but perhaps we should? */ if (!fromwire_hsm_sign_delayed_payment_to_us(tmpctx, msg_in, &commit_num, &tx, &wscript, @@ -612,14 +804,22 @@ static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn, get_channel_seed(&c->id, c->dbid, &channel_seed); + /*~ ccan/crypto/shachain how we efficiently derive 2^48 ordered + * preimages from a single seed; the twist is that as the preimages + * are revealed, you can generate the previous ones yourself, needing + * to only keep log(N) of them at any time. */ if (!derive_shaseed(&channel_seed, &shaseed)) return bad_req_fmt(conn, c, msg_in, "bad derive_shaseed"); + /*~ BOLT #3 describes exactly how this is used to generate the Nth + * per-commitment point. */ if (!per_commit_point(&shaseed, &per_commitment_point, commit_num)) return bad_req_fmt(conn, c, msg_in, "bad per_commitment_point %"PRIu64, commit_num); + /*~ ... which is combined with the basepoint to generate then N'th key. + */ if (!derive_delayed_payment_basepoint(&channel_seed, &basepoint, &basepoint_secret)) @@ -635,6 +835,9 @@ static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn, tx, &privkey, wscript, input_amount); } +/*~ This is used when the a commitment transaction is onchain, and has an HTLC + * output paying to us (because we have the preimage); this signs that + * transaction, which lightningd will broadcast to collect the funds. */ static struct io_plan *handle_sign_remote_htlc_to_us(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -671,6 +874,9 @@ static struct io_plan *handle_sign_remote_htlc_to_us(struct io_conn *conn, tx, &privkey, wscript, input_amount); } +/*~ This is used when the remote peer's commitment transaction is revoked; + * we can use the revocation secret to spend the outputs. For simplicity, + * we do them one at a time, though. */ static struct io_plan *handle_sign_penalty_to_us(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -711,6 +917,9 @@ static struct io_plan *handle_sign_penalty_to_us(struct io_conn *conn, tx, &privkey, wscript, input_amount); } +/*~ This is used when the a commitment transaction is onchain, and has an HTLC + * output paying to them, which has timed out; this signs that transaction, + * which lightningd will broadcast to collect the funds. */ static struct io_plan *handle_sign_local_htlc_tx(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -766,6 +975,11 @@ static struct io_plan *handle_sign_local_htlc_tx(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig))); } +/*~ This get the Nth a per-commitment point, and for N > 2, returns the + * grandparent per-commitment secret. This pattern is because after + * negotiating commitment N-1, we send them the next per-commitment point, + * and reveal the previous per-commitment secret as a promise not to spend + * the previous commitment transaction. */ static struct io_plan *handle_get_per_commitment_point(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -797,12 +1011,19 @@ static struct io_plan *handle_get_per_commitment_point(struct io_conn *conn, } else old_secret = NULL; + /*~ hsm_client_wire.csv marks the secret field here optional, so it only + * gets included if the parameter is non-NULL. We violate 80 columns + * pretty badly here, but it's a recommendation not a religion. */ return req_reply(conn, c, take(towire_hsm_get_per_commitment_point_reply(NULL, &per_commitment_point, old_secret))); } +/*~ This is used when the remote peer claims to have knowledge of future + * commitment states (option_data_loss_protect in the spec) which means we've + * been restored from backup or something, and may have already revealed + * secrets. We carefully check that this is true, here. */ static struct io_plan *handle_check_future_secret(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -823,11 +1044,16 @@ static struct io_plan *handle_check_future_secret(struct io_conn *conn, return bad_req_fmt(conn, c, msg_in, "bad commit secret #%"PRIu64, n); + /*~ Note the special secret_eq_consttime: we generate foo_eq for many + * types using ccan/structeq, but not 'struct secret' because any + * comparison risks leaking information about the secret if it is + * timing dependent. */ return req_reply(conn, c, take(towire_hsm_check_future_secret_reply(NULL, secret_eq_consttime(&secret, &suggested)))); } +/* This is used by closingd to sign off on a mutual close tx. */ static struct io_plan *handle_sign_mutual_close_tx(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -866,6 +1092,8 @@ static struct io_plan *handle_sign_mutual_close_tx(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig))); } +/* This is used by by the master to create a new client connection (which + * becomes the HSM_FD for the subdaemon after forking). */ static struct io_plan *pass_client_hsmfd(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -880,15 +1108,30 @@ static struct io_plan *pass_client_hsmfd(struct io_conn *conn, if (!fromwire_hsm_client_hsmfd(msg_in, &id, &dbid, &capabilities)) return bad_req(conn, c, msg_in); + /* socketpair is a bi-directional pipe, which is what we want. */ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) != 0) - status_failed(STATUS_FAIL_INTERNAL_ERROR, "creating fds: %s", strerror(errno)); + status_failed(STATUS_FAIL_INTERNAL_ERROR, "creating fds: %s", + strerror(errno)); new_client(&c->dc, &id, dbid, capabilities, fds[0]); daemon_conn_send(&c->dc, take(towire_hsm_client_hsmfd_reply(NULL))); + /* There's arcane UNIX magic to send an open file descriptor over a + * UNIX domain socket. There's no great way to autogenerate this + * though; especially for the receive side, so we always pass these + * manually immediately following the message. */ daemon_conn_send_fd(&c->dc, fds[1]); return daemon_conn_read_next(conn, &c->dc); } +/*~ For almost every wallet tx we use the BIP32 seed, but not for onchain + * unilateral closes from a peer: they (may) have an output to us using a + * public key based on the channel basepoints. It's a bit spammy to spend + * those immediately just to make the wallet simpler, and we didn't appreciate + * the problem when we designed the protocol for commitment transaction keys. + * + * So we store just enough about the channel it came from (which may be + * long-gone) to regenerate the keys here. That has the added advantage that + * the secrets themselves stay within the HSM. */ static void hsm_unilateral_close_privkey(struct privkey *dst, struct unilateral_close_info *info) { @@ -907,9 +1150,7 @@ static void hsm_unilateral_close_privkey(struct privkey *dst, } } -/** - * hsm_key_for_utxo - generate the keypair matching the utxo - */ +/* This gets the bitcoin private key needed to spend from our wallet. */ static void hsm_key_for_utxo(struct privkey *privkey, struct pubkey *pubkey, const struct utxo *utxo) { @@ -919,17 +1160,32 @@ static void hsm_key_for_utxo(struct privkey *privkey, struct pubkey *pubkey, status_debug("Unilateral close output, deriving secrets"); hsm_unilateral_close_privkey(privkey, utxo->close_info); pubkey_from_privkey(privkey, pubkey); - status_debug("Derived public key %s from unilateral close", type_to_string(tmpctx, struct pubkey, pubkey)); + status_debug("Derived public key %s from unilateral close", + type_to_string(tmpctx, struct pubkey, pubkey)); } else { /* Simple case: just get derive via HD-derivation */ bitcoin_key(privkey, pubkey, utxo->keyindex); } } +/* This completes the tx by filling in the input scripts with signatures. */ static void sign_all_inputs(struct bitcoin_tx *tx, struct utxo **utxos) { + /* FIXME: sign_tx_input is dumb and needs all input->script to be + * NULL, so we gather these here and assign them at the end */ u8 **scriptSigs = tal_arr(tmpctx, u8 *, tal_count(utxos)); + /*~ Deep in my mind there's a continuous battle: should arrays be + * named as singular or plural? Is consistency the sign of a weak + * mind? + * + * ZmnSCPxj answers thusly: One must make peace with the fact, that + * the array itself is singular, yet its contents are plural. Do you + * name the array, or do you name its contents? Is the array itself + * the thing and the whole of the thing, or is it its contents that + * define what it is? + * + *... I'm not sure that helps! */ assert(tal_count(tx->input) == tal_count(utxos)); for (size_t i = 0; i < tal_count(utxos); i++) { struct pubkey inkey; @@ -938,29 +1194,38 @@ static void sign_all_inputs(struct bitcoin_tx *tx, struct utxo **utxos) u8 *subscript, *wscript; secp256k1_ecdsa_signature sig; + /* Figure out keys to spend this. */ hsm_key_for_utxo(&inprivkey, &inkey, in); + /* It's either a p2wpkh or p2sh (we support that so people from + * the last bitcoin era can put funds into the wallet) */ wscript = p2wpkh_scriptcode(tmpctx, &inkey); if (in->is_p2sh) { + /* For P2SH-wrapped Segwit, the (implied) redeemScript + * is defined in BIP141 */ subscript = bitcoin_redeem_p2sh_p2wpkh(tmpctx, &inkey); scriptSigs[i] = bitcoin_scriptsig_p2sh_p2wpkh(tx, &inkey); } else { + /* Pure segwit uses an empty inputScript; NULL has + * tal_count() == 0, so it works great here. */ subscript = NULL; scriptSigs[i] = NULL; } + /* This is the core crypto magic. */ sign_tx_input(tx, i, subscript, wscript, &inprivkey, &inkey, &sig); + /* The witness is [sig] [key] */ tx->input[i].witness = bitcoin_witness_p2wpkh(tx, &sig, &inkey); } - /* Now complete the transaction by attaching the scriptSigs where necessary */ + /* Now complete the transaction by attaching the scriptSigs */ for (size_t i = 0; i < tal_count(utxos); i++) tx->input[i].script = scriptSigs[i]; } -/* Note that it's the main daemon that asks for the funding signature so it - * can broadcast it. */ +/*~ lightningd asks us to sign the transaction to fund a channel; it feeds us + * the set of inputs and the local and remote pubkeys, and we sign it. */ static struct io_plan *handle_sign_funding_tx(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -987,6 +1252,11 @@ static struct io_plan *handle_sign_funding_tx(struct io_conn *conn, changekey = NULL; tx = funding_tx(tmpctx, &outnum, + /*~ For simplicity, our generated code is not const + * correct. The C rules around const and + * pointer-to-pointer are a bit weird, so we use + * ccan/cast which ensures the type is correct and + * we're not casting something random */ cast_const2(const struct utxo **, utxos), satoshi_out, &local_pubkey, &remote_pubkey, change_out, changekey, @@ -996,9 +1266,8 @@ static struct io_plan *handle_sign_funding_tx(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_sign_funding_reply(NULL, tx))); } -/** - * sign_withdrawal_tx - Generate and sign a withdrawal transaction from the master - */ +/*~ lightningd asks us to sign a withdrawal; same as above but we in theory + * we can do more to check the previous case is valid. */ static struct io_plan *handle_sign_withdrawal_tx(struct io_conn *conn, struct client *c, const u8 *msg_in) @@ -1032,13 +1301,18 @@ static struct io_plan *handle_sign_withdrawal_tx(struct io_conn *conn, take(towire_hsm_sign_withdrawal_reply(NULL, tx))); } -/** - * sign_invoice - Sign an invoice with our key. - */ +/*~ Lightning invoices, defined by BOLT 11, are signed. This has been + * surprisingly controversial; it means a node needs to be online to create + * invoices. However, it seems clear to me that in a world without + * intermedaries you need proof that you have received an offer (the + * signature), as well as proof that you've paid it (the preimage). */ static struct io_plan *handle_sign_invoice(struct io_conn *conn, struct client *c, const u8 *msg_in) { + /*~ We make up a 'u5' type to represent BOLT11's 5-bits-per-byte + * format: it's only for human consumption, as typedefs are almost + * entirely transparent to the C compiler. */ u5 *u5bytes; u8 *hrpu8; char *hrp; @@ -1050,8 +1324,25 @@ static struct io_plan *handle_sign_invoice(struct io_conn *conn, if (!fromwire_hsm_sign_invoice(tmpctx, msg_in, &u5bytes, &hrpu8)) return bad_req(conn, c, msg_in); + /* BOLT #11: + * + * A writer MUST set `signature` to a valid 512-bit secp256k1 + * signature of the SHA2 256-bit hash of the human-readable part, + * represented as UTF-8 bytes, concatenated with the data part + * (excluding the signature) with zero bits appended to pad the data + * to the next byte boundary, with a trailing byte containing the + * recovery ID (0, 1, 2 or 3). + */ + /* FIXME: Check invoice! */ + /* tal_dup_arr() does what you'd expect: allocate an array by copying + * another; the cast is needed because the hrp is a 'char' array, not + * a 'u8' (unsigned char) as it's the "human readable" part. + * + * The final arg of tal_dup_arr() is how many extra bytes to allocate: + * it's so often zero that I've thought about dropping the argument, but + * in cases like this (adding a NUL terminator) it's perfect. */ hrp = tal_dup_arr(tmpctx, char, (char *)hrpu8, tal_count(hrpu8), 1); hrp[tal_count(hrpu8)] = '\0'; @@ -1060,6 +1351,8 @@ static struct io_plan *handle_sign_invoice(struct io_conn *conn, hash_u5_done(&hu5, &sha); node_key(&node_pkey, NULL); + /*~ By no small coincidence, this libsecp routine uses the exact + * recovery signature format mandated by BOLT 11. */ if (!secp256k1_ecdsa_sign_recoverable(secp256k1_ctx, &rsig, (const u8 *)&sha, node_pkey.secret.data, @@ -1071,10 +1364,21 @@ static struct io_plan *handle_sign_invoice(struct io_conn *conn, take(towire_hsm_sign_invoice_reply(NULL, &rsig))); } +/*~ It's optional for nodes to send node_announcement, but it lets us set our + * favourite color and cool alias! Plus other minor details like how to + * connect to us. */ static struct io_plan *handle_sign_node_announcement(struct io_conn *conn, struct client *c, const u8 *msg_in) { + /* BOLT #7: + * + * The origin node: + *... + * - MUST set `signature` to the signature of the double-SHA256 of the + * entire remaining packet after `signature` (using the key given by + * `node_id`). + */ /* 2 bytes msg type + 64 bytes signature */ size_t offset = 66; struct sha256_double hash; @@ -1100,9 +1404,24 @@ static struct io_plan *handle_sign_node_announcement(struct io_conn *conn, return req_reply(conn, c, take(reply)); } +/*~ This routine checks that a client is allowed to call the handler. */ static bool check_client_capabilities(struct client *client, enum hsm_wire_type t) { + /*~ Here's a useful trick: enums in C are not real types, they're + * semantic sugar sprinkled over an int, bascally (in fact, older + * versions of gcc used to convert the values ints in the parser!). + * + * But GCC will do one thing for us: if we have a switch statement + * with a controlling expression which is an enum, it will warn us + * if a declared enum value is *not* handled in the switch, eg: + * enumeration value ‘FOOBAR’ not handled in switch [-Werror=switch] + * + * This only works if there's no 'default' label, which is sometimes + * hard, as we *can* have non-enum values in our enum. But the tradeoff + * is worth it so the compiler tells us everywhere we have to fix when + * we add a new enum identifier! + */ switch (t) { case WIRE_HSM_ECDH_REQ: return (client->capabilities & HSM_CAP_ECDH) != 0; @@ -1138,7 +1457,9 @@ static bool check_client_capabilities(struct client *client, case WIRE_HSM_GET_CHANNEL_BASEPOINTS: return (client->capabilities & HSM_CAP_MASTER) != 0; - /* These are messages sent by the HSM so we should never receive them */ + /*~ These are messages sent by the HSM so we should never receive them. + * FIXME: Since we autogenerate these, we should really generate separate + * enums for replies to avoid this kind of clutter! */ case WIRE_HSM_ECDH_RESP: case WIRE_HSM_CANNOUNCEMENT_SIG_REPLY: case WIRE_HSM_CUPDATE_SIG_REPLY: @@ -1159,9 +1480,16 @@ static bool check_client_capabilities(struct client *client, return false; } +/*~ This is the core of the HSM daemon: handling requests. */ static struct io_plan *handle_client(struct io_conn *conn, struct daemon_conn *dc) { + /*~ Note the use of container_of here: this is the Linux kernel way of + * doing callbacks. Rather than have struct daemon_conn contain a + * void * pointer to the structure for this use, we simply embed the + * daemon_conn in the structure; container_of is a fancy way of doing + * pointer arithmetic to get the containing structure, saving a + * pointer. */ struct client *c = container_of(dc, struct client, dc); enum hsm_wire_type t = fromwire_peektype(dc->msg_in); @@ -1256,6 +1584,9 @@ static struct io_plan *handle_client(struct io_conn *conn, return bad_req_fmt(conn, c, dc->msg_in, "Unknown request"); } +/*~ This is the destructor on our client: we may call it manually, but + * generally it's called because the io_conn associated with the client is + * closed by the other end. */ static void destroy_client(struct client *c) { if (!uintmap_del(&clients, c->dbid)) @@ -1271,6 +1602,7 @@ static struct client *new_client(struct daemon_conn *master, { struct client *c = tal(master, struct client); + /*~ All-zero pubkey is used for the initial master connection */ if (id) { c->id = *id; } else { @@ -1280,13 +1612,25 @@ static struct client *new_client(struct daemon_conn *master, c->master = master; c->capabilities = capabilities; + /*~ This is our daemon_conn infrastructure, which does the queueing for + * us; we just tell it what our handler function is. */ daemon_conn_init(c, &c->dc, fd, handle_client, NULL); - /* Free the connection if we exit everything. */ + /*~ tal_steal() moves a pointer to a new parent. At this point, the + * hierarchy is: + * + * master -> c -> daemon_conn.conn + * + * We want to invert the bottom two, so that if the io_conn closes, + * the client is freed: + * + * master -> c->conn -> c. + */ tal_steal(master, c->dc.conn); - /* Free client when connection freed. */ tal_steal(c->dc.conn, c); + /* We put the special zero-db HSM connections into an array, the rest + * go into the map. */ if (dbid == 0) { assert(num_dbid_zero_clients < ARRAY_SIZE(dbid_zero_clients)); dbid_zero_clients[num_dbid_zero_clients++] = c; @@ -1319,6 +1663,7 @@ int main(int argc, char *argv[]) setup_locale(); + /* This sets up tmpctx, various DEVELOPER options, backtraces, etc. */ subdaemon_setup(argc, argv); /* A trivial daemon_conn just for writing. */ @@ -1337,6 +1682,18 @@ int main(int argc, char *argv[]) /* When conn closes, everything is freed. */ io_set_finish(master->dc.conn, master_gone, &master->dc); + /*~ The two NULL args a list of timers, and the timer which expired: + * we don't have any timers. */ io_loop(NULL, NULL); + + /*~ This should never be reached: io_loop only exits on io_break which + * we don't call, a timer expiry which we don't have, or all connections + * being closed, and closing the master calls master_gone. */ abort(); } + +/*~ Congratulations on making it through the first of the seven dwarves! + * (And Christian wondered why I'm so fond of having separate daemons!). + * + * We continue our story in the next-more-complex daemon: connectd/connectd.c + */