From 573f2f065a473eb93cb51680349d668db1031432 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 20 Sep 2018 14:01:09 +0930
Subject: [PATCH] hsmd: document as part II of our journey.

Thanks greatly to the four people who I *know* have read this:

	@wythe, @ZmnSCPxj, @SimonVrouwe, and @cdecker

Your feedback will help future developers seeking enlightenment!

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 hsmd/hsmd.c | 425 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 391 insertions(+), 34 deletions(-)

diff --git a/hsmd/hsmd.c b/hsmd/hsmd.c
index 2c637176f..21cb1cc97 100644
--- a/hsmd/hsmd.c
+++ b/hsmd/hsmd.c
@@ -1,3 +1,11 @@
+/*~ Welcome to the hsm daemon: keeper of our secrets!
+ *
+ * This is a separate daemon which keeps a root secret from which all others
+ * are generated.  It starts with one client: lightningd, which can ask for
+ * new sockets for other clients.  Each client has a simple capability map
+ * which indicates what it's allowed to ask for.  We're entirely driven
+ * by request, response.
+ */
 #include <bitcoin/address.h>
 #include <bitcoin/privkey.h>
 #include <bitcoin/pubkey.h>
@@ -31,6 +39,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <hsmd/capabilities.h>
+/*~ All gen_ files are autogenerated; in this case by tools/generate-wire.py */
 #include <hsmd/gen_hsm_wire.h>
 #include <inttypes.h>
 #include <secp256k1_ecdh.h>
@@ -43,35 +52,55 @@
 #include <wire/gen_peer_wire.h>
 #include <wire/wire_io.h>
 
+/*~ Each subdaemon is started with stdin connected to lightningd (for status
+ * messages), and stderr untouched (for emergency printing).  File descriptors
+ * 3 and beyond are set up on other sockets: for hsmd, fd 3 is the request
+ * stream from lightningd. */
 #define REQ_FD 3
 
-/* Nobody will ever find it here! */
+/*~ Nobody will ever find it here!  hsm_secret is our root secret, the bip32
+ * tree is derived from that, and cached here. */
 static struct {
 	struct secret hsm_secret;
 	struct ext_key bip32;
 } secretstuff;
 
+/*~ We keep track of clients, but there's not much to keep. */
 struct client {
 	struct daemon_conn dc;
 	struct daemon_conn *master;
 
+	/* ~Useful for logging, but also used to derive the per-channel seed. */
 	struct pubkey id;
+
+	/* ~This is a unique value handed to us from lightningd, used for
+	 * per-channel seed generation (a single id may have multiple channels
+	 * over time).
+	 *
+	 * It's actually zero for the initial lightningd client connection and
+	 * the ones for gossipd and connectd, which don't have channels
+	 * associated. */
 	u64 dbid;
 
 	/* What is this client allowed to ask for? */
 	u64 capabilities;
 };
 
-/* We keep a map of nonzero dbid -> clients */
+/*~ We keep a map of nonzero dbid -> clients, mainly for leak detection.
+ * This is ccan/uintmap, which maps u64 to some (non-NULL) pointer.
+ * I really dislike these kinds of declaration-via-magic macro things, as
+ * tags can't find them without special hacks, but the payoff here is that
+ * the map is typesafe: the compiler won't let you put anything in but a
+ * struct client pointer. */
 static UINTMAP(struct client *) clients;
-/* We get three zero-dbid clients: master, gossipd and connnectd. */
+/*~ Plus the three zero-dbid clients: master, gossipd and connnectd. */
 static struct client *dbid_zero_clients[3];
 static size_t num_dbid_zero_clients;
 
-/* For reporting issues. */
+/*~ We need this deep inside bad_req_fmt, so we make it a global. */
 static struct daemon_conn *status_conn;
 
-/* FIXME: This is used by debug.c, but doesn't apply to us. */
+/*~ FIXME: This is used by debug.c.  Doesn't apply to us, but lets us link. */
 extern void dev_disconnect_init(int fd);
 void dev_disconnect_init(int fd UNUSED) { }
 
@@ -82,6 +111,13 @@ static struct client *new_client(struct daemon_conn *master,
 				 const u64 capabilities,
 				 int fd);
 
+/*~ ccan/compiler.h defines PRINTF_FMT as the gcc compiler hint so it will
+ * check that fmt and other trailing arguments really are the correct type.
+ *
+ * This is a convenient helper to tell lightningd we've received a bad request
+ * and closes the client connection.  This should never happen, of course, but
+ * we definitely want to log if it does.
+ */
 static PRINTF_FMT(4,5)
 	struct io_plan *bad_req_fmt(struct io_conn *conn,
 				    struct client *c,
@@ -95,20 +131,32 @@ static PRINTF_FMT(4,5)
 	str = tal_fmt(tmpctx, fmt, ap);
 	va_end(ap);
 
-	/* If the client was actually lightningd, it's Game Over. */
+	/*~ If the client was actually lightningd, it's Game Over; we actually
+	 * fail in this case, and it will too. */
 	if (&c->dc == c->master) {
 		status_broken("%s", str);
 		master_badmsg(fromwire_peektype(msg_in), msg_in);
 	}
 
+	/*~ Note the use of NULL as the ctx arg to towire_hsmstatus_: only
+	 * use NULL as the allocation when we're about to immediately free it
+	 * or hand it off with take(), as here.  That makes it clear we don't
+	 * expect it to linger, and in fact our memleak detection will
+	 * complain if it does (unlike using the deliberately-transient
+	 * tmpctx). */
 	daemon_conn_send(status_conn,
 			 take(towire_hsmstatus_client_bad_request(NULL,
 								  &c->id,
 								  str,
 								  msg_in)));
+
+	/*~ The way ccan/io works is that you return the "plan" for what to do
+	 * next (eg. io_read).  io_close() is special: it means to close the
+	 * connection. */
 	return io_close(conn);
 }
 
+/* Convenience wrapper for when we simply can't parse. */
 static struct io_plan *bad_req(struct io_conn *conn,
 			       struct client *c,
 			       const u8 *msg_in)
@@ -116,6 +164,7 @@ static struct io_plan *bad_req(struct io_conn *conn,
 	return bad_req_fmt(conn, c, msg_in, "could not parse request");
 }
 
+/* This is the common pattern for the tail of each handler in this file. */
 static struct io_plan *req_reply(struct io_conn *conn,
 				 struct client *c,
 				 const u8 *msg_out TAKES)
@@ -124,18 +173,25 @@ static struct io_plan *req_reply(struct io_conn *conn,
 	return daemon_conn_read_next(conn, &c->dc);
 }
 
+/*~ This returns the secret and/or public key for this node. */
 static void node_key(struct privkey *node_privkey, struct pubkey *node_id)
 {
 	u32 salt = 0;
 	struct privkey unused_s;
 	struct pubkey unused_k;
 
+	/* If caller specifies NULL, they don't want the results. */
 	if (node_privkey == NULL)
 		node_privkey = &unused_s;
 	else if (node_id == NULL)
 		node_id = &unused_k;
 
+	/*~ So, there is apparently a 1 in 2^127 chance that a random value is
+	 * not a valid private key, so this never actually loops. */
 	do {
+		/*~ ccan/crypto/hkdf_sha256 implements RFC5869 "Hardened Key
+		 * Derivation Functions".  That means that if a derived key
+		 * leaks somehow, the other keys are not compromised. */
 		hkdf_sha256(node_privkey, sizeof(*node_privkey),
 			    &salt, sizeof(salt),
 			    &secretstuff.hsm_secret,
@@ -146,28 +202,40 @@ static void node_key(struct privkey *node_privkey, struct pubkey *node_id)
 					     node_privkey->secret.data));
 }
 
-/**
- * hsm_channel_secret_base -- Derive the base secret seed for per-channel seeds
- *
- * This secret is the basis for all per-channel secrets: the per-channel seeds
- * will be generated mixing in the channel_id and the peer node_id.
- */
+/*~ This secret is the basis for all per-channel secrets: the per-channel seeds
+ * will be generated by mixing in the dbid and the peer node_id. */
 static void hsm_channel_secret_base(struct secret *channel_seed_base)
 {
 	hkdf_sha256(channel_seed_base, sizeof(struct secret), NULL, 0,
 		    &secretstuff.hsm_secret, sizeof(secretstuff.hsm_secret),
+		    /*~ Initially, we didn't support multiple channels per
+		     * peer at all: a channel had to be completely forgotten
+		     * before another could exist.  That was slightly relaxed,
+		     * but the phrase "peer seed" is wired into the seed
+		     * generation here, so we need to keep it that way for
+		     * existing clients, rather than using "channel seed". */
 		    "peer seed", strlen("peer seed"));
 }
 
+/*~ This gets the seed for this particular channel. */
 static void get_channel_seed(const struct pubkey *peer_id, u64 dbid,
 			     struct secret *channel_seed)
 {
 	struct secret channel_base;
 	u8 input[PUBKEY_DER_LEN + sizeof(dbid)];
+	/*~ Again, "per-peer" should be "per-channel", but Hysterical Raisins */
 	const char *info = "per-peer seed";
 
+	/*~ We use the DER encoding of the pubkey, because it's platform
+	 * independent.  Since the dbid is unique, however, it's completely
+	 * unnecessary, but again, existing users can't be broken. */
+	/* FIXME: lnd has a nicer BIP32 method for deriving secrets which we
+	 * should migrate to. */
 	hsm_channel_secret_base(&channel_base);
 	pubkey_to_der(input, peer_id);
+	/*~ For all that talk about platform-independence, note that this
+	 * field is endian-dependent!  But let's face it, little-endian won.
+	 * In related news, we don't support EBCDIC or middle-endian. */
 	memcpy(input + PUBKEY_DER_LEN, &dbid, sizeof(dbid));
 
 	hkdf_sha256(channel_seed, sizeof(*channel_seed),
@@ -176,6 +244,7 @@ static void get_channel_seed(const struct pubkey *peer_id, u64 dbid,
 		    info, strlen(info));
 }
 
+/*~ Called at startup to derive the bip32 field. */
 static void populate_secretstuff(void)
 {
 	u8 bip32_seed[BIP32_ENTROPY_LEN_256];
@@ -216,6 +285,9 @@ static void populate_secretstuff(void)
 	/* Hence child 0, then child 0 again to get extkey to derive from. */
 	if (bip32_key_from_parent(&master_extkey, 0, BIP32_FLAG_KEY_PRIVATE,
 				  &child_extkey) != WALLY_OK)
+		/*~ status_failed() is a helper which exits and sends lightningd
+		 * a message about what happened.  For hsmd, that's fatal to
+		 * lightningd. */
 		status_failed(STATUS_FAIL_INTERNAL_ERROR,
 			      "Can't derive child bip32 key");
 
@@ -225,7 +297,8 @@ static void populate_secretstuff(void)
 			      "Can't derive private bip32 key");
 }
 
-/* If privkey is NULL, we don't fill it in */
+/*~ Get the keys for this given BIP32 index: if privkey is NULL, we
+ * don't fill it in. */
 static void bitcoin_key(struct privkey *privkey, struct pubkey *pubkey,
 			u32 index)
 {
@@ -239,12 +312,15 @@ static void bitcoin_key(struct privkey *privkey, struct pubkey *pubkey,
 		status_failed(STATUS_FAIL_MASTER_IO,
 			      "Index %u too great", index);
 
+	/*~ This uses libwally, which doesn't dovetail directly with
+	 * libsecp256k1 even though it, too, uses it internally. */
 	if (bip32_key_from_parent(&secretstuff.bip32, index,
 				  BIP32_FLAG_KEY_PRIVATE, &ext) != WALLY_OK)
 		status_failed(STATUS_FAIL_INTERNAL_ERROR,
 			      "BIP32 of %u failed", index);
 
-	/* libwally says: The private key with prefix byte 0 */
+	/* libwally says: The private key with prefix byte 0; remove it
+	 * for libsecp256k1. */
 	memcpy(privkey->secret.data, ext.priv_key+1, 32);
 	if (!secp256k1_ec_pubkey_create(secp256k1_ctx, &pubkey->pubkey,
 					privkey->secret.data))
@@ -252,32 +328,50 @@ static void bitcoin_key(struct privkey *privkey, struct pubkey *pubkey,
 			      "BIP32 pubkey %u create failed", index);
 }
 
+/*~ We store our root secret in a "hsm_secret" file (like all of c-lightning,
+ * we run in the user's .lightningd directory). */
 static void maybe_create_new_hsm(void)
 {
+	/*~ Note that this is opened for write-only, even though the permissions
+	 * are set to read-only.  That's perfectly valid! */
 	int fd = open("hsm_secret", O_CREAT|O_EXCL|O_WRONLY, 0400);
 	if (fd < 0) {
+		/* If this is not the first time we've run, it will exist. */
 		if (errno == EEXIST)
 			return;
 		status_failed(STATUS_FAIL_INTERNAL_ERROR,
 			      "creating: %s", strerror(errno));
 	}
 
+	/*~ This is libsodium's cryptographic randomness routine: we assume
+	 * it's doing a good job. */
 	randombytes_buf(&secretstuff.hsm_secret, sizeof(secretstuff.hsm_secret));
+	/*~ ccan/read_write_all has a more convenient return than write() where
+	 * we'd have to check the return value == the length we gave: write()
+	 * can return short on normal files if we run out of disk space. */
 	if (!write_all(fd, &secretstuff.hsm_secret, sizeof(secretstuff.hsm_secret))) {
+		/* ccan/noerr contains useful routines like this, which don't
+		 * clobber errno, so we can use it in our error report. */
 		unlink_noerr("hsm_secret");
 		status_failed(STATUS_FAIL_INTERNAL_ERROR,
 			      "writing: %s", strerror(errno));
 	}
+	/*~ fsync (mostly!) ensures that the file has reached the disk. */
 	if (fsync(fd) != 0) {
 		unlink_noerr("hsm_secret");
 		status_failed(STATUS_FAIL_INTERNAL_ERROR,
 			      "fsync: %s", strerror(errno));
 	}
+	/*~ This should never fail if fsync succeeded.  But paranoia good, and
+	 * bugs exist. */
 	if (close(fd) != 0) {
 		unlink_noerr("hsm_secret");
 		status_failed(STATUS_FAIL_INTERNAL_ERROR,
 			      "closing: %s", strerror(errno));
 	}
+	/*~ We actually need to sync the *directory itself* to make sure the
+	 * file exists!  You're only allowed to open directories read-only in
+	 * modern Unix though. */
 	fd = open(".", O_RDONLY);
 	if (fd < 0) {
 		status_failed(STATUS_FAIL_INTERNAL_ERROR,
@@ -289,9 +383,16 @@ static void maybe_create_new_hsm(void)
 			      "fsyncdir: %s", strerror(errno));
 	}
 	close(fd);
+	/*~ status_unusual() is good for things which are interesting and
+	 * definitely won't spam the logs.  Only status_broken() is higher;
+	 * status_info() is lower, then status_debug() and finally
+	 * status_io(). */
 	status_unusual("HSM: created new hsm_secret file");
 }
 
+/*~ We always load the HSM file, even if we just created it above.  This
+ * both unifies the code paths, and provides a nice sanity check that the
+ * file contents are as they will be for future invocations. */
 static void load_hsm(void)
 {
 	int fd = open("hsm_secret", O_RDONLY);
@@ -306,6 +407,8 @@ static void load_hsm(void)
 	populate_secretstuff();
 }
 
+/*~ This is the response to lightningd's HSM_INIT request, which is the first
+ * thing it sends. */
 static struct io_plan *init_hsm(struct io_conn *conn,
 				struct client *c,
 				const u8 *msg_in)
@@ -315,18 +418,31 @@ static struct io_plan *init_hsm(struct io_conn *conn,
 	/* This must be the master. */
 	assert(&c->dc == c->master);
 
+	/*~ The fromwire_* routines are autogenerated, based on the message
+	 * definitions in hsm_client_wire.csv.  The format of those files is
+	 * an extension of the simple comma-separated format output by the
+	 * BOLT tools/extract-formats.py tool. */
 	if (!fromwire_hsm_init(msg_in))
 		return bad_req(conn, c, msg_in);
 
 	maybe_create_new_hsm();
 	load_hsm();
 
+	/*~ We tell lightning our node id and (public) bip32 seed. */
 	node_key(NULL, &node_id);
+
+	/*~ Note: marshalling a bip32 tree only marshals the public side,
+	 * not the secrets!  So we're not actually handing them out here!
+	 */
 	return req_reply(conn, c,
 			 take(towire_hsm_init_reply(NULL, &node_id,
 						    &secretstuff.bip32)));
 }
 
+/*~ The client has asked us to extract the shared secret from an EC Diffie
+ * Hellman token.  This doesn't leak any information, but requires the private
+ * key, so the hsmd performs it.  It's used to set up an encryption key for the
+ * connection handshaking (BOLT #8) and for the onion wrapping (BOLT #4). */
 static struct io_plan *handle_ecdh(struct io_conn *conn,
 				   struct client *c,
 				   const u8 *msg_in)
@@ -338,21 +454,44 @@ static struct io_plan *handle_ecdh(struct io_conn *conn,
 	if (!fromwire_hsm_ecdh_req(msg_in, &point))
 		return bad_req(conn, c, msg_in);
 
+	/*~ We simply use the secp256k1_ecdh function, which really shouldn't
+	 * fail (iff the point is invalid). */
 	node_key(&privkey, NULL);
 	if (secp256k1_ecdh(secp256k1_ctx, ss.data, &point.pubkey,
 			   privkey.secret.data) != 1) {
 		return bad_req_fmt(conn, c, msg_in, "secp256k1_ecdh fail");
 	}
 
+	/*~ In the normal case, we return the shared secret, and then read
+	 * the next msg. */
 	return req_reply(conn, c, take(towire_hsm_ecdh_resp(NULL, &ss)));
 }
 
+/*~ The specific routine to sign the channel_announcement message.  This is
+ * defined in BOLT #7, and requires *two* signatures: one from this node's key
+ * (to prove it's from us), and one from the bitcoin key used to create the
+ * funding transaction (to prove we own the output). */
 static struct io_plan *handle_cannouncement_sig(struct io_conn *conn,
 						struct client *c,
 						const u8 *msg_in)
 {
-	/* First 2 + 256 byte are the signatures and msg type, skip them */
-	size_t offset = 258;
+	/*~ Our autogeneration code doesn't define field offsets, so we just
+	 * copy this from the spec itself.
+	 *
+	 * Note that 'check-source' will actually find and check this quote
+	 * against the spec (if available); whitespace is ignored and
+	 * ... means some content is skipped, but it works remarkably well to
+	 * track spec changes. */
+
+	/* BOLT #7:
+	 *
+	 * - MUST compute the double-SHA256 hash `h` of the message, beginning
+	 *   at offset 256, up to the end of the message.
+	 *     - Note: the hash skips the 4 signatures but hashes the rest of the
+	 *       message, including any future fields appended to the end.
+	 */
+	/* First type bytes are the msg type */
+	size_t offset = 2 + 256;
 	struct privkey node_pkey;
 	secp256k1_ecdsa_signature node_sig, bitcoin_sig;
 	struct sha256_double hash;
@@ -362,10 +501,18 @@ static struct io_plan *handle_cannouncement_sig(struct io_conn *conn,
 	struct privkey funding_privkey;
 	struct secret channel_seed;
 
+	/*~ You'll find FIXMEs like this scattered through the code.
+	 * Sometimes they suggest simple improvements which someone like
+	 * yourself should go ahead an implement.  Sometimes they're deceptive
+	 * quagmires which will cause you nothing but grief.  You decide! */
+
 	/* FIXME: We should cache these. */
 	get_channel_seed(&c->id, c->dbid, &channel_seed);
 	derive_funding_key(&channel_seed, &funding_pubkey, &funding_privkey);
 
+	/*~ fromwire_ routines which need to do allocation take a tal context
+	 * as their first field; tmpctx is good here since we won't need it
+	 * after this function. */
 	if (!fromwire_hsm_cannouncement_sig_req(tmpctx, msg_in, &ca))
 		return bad_req(conn, c, msg_in);
 
@@ -374,6 +521,8 @@ static struct io_plan *handle_cannouncement_sig(struct io_conn *conn,
 				   "bad cannounce length %zu",
 				   tal_count(ca));
 
+	/*~ Christian uses TODO(cdecker), but I'm sure he won't mind if you fix
+	 * this for him! */
 	/* TODO(cdecker) Check that this is actually a valid
 	 * channel_announcement */
 	node_key(&node_pkey, NULL);
@@ -387,10 +536,17 @@ static struct io_plan *handle_cannouncement_sig(struct io_conn *conn,
 	return req_reply(conn, c, take(reply));
 }
 
+/*~ The specific routine to sign the channel_update message. */
 static struct io_plan *handle_channel_update_sig(struct io_conn *conn,
 						 struct client *c,
 						 const u8 *msg_in)
 {
+	/* BOLT #7:
+	 *
+	 * - MUST set `signature` to the signature of the double-SHA256 of the
+	 *   entire remaining packet after `signature`, using its own
+	 *   `node_id`.
+	 */
 	/* 2 bytes msg type + 64 bytes signature */
 	size_t offset = 66;
 	struct privkey node_pkey;
@@ -428,6 +584,12 @@ static struct io_plan *handle_channel_update_sig(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_cupdate_sig_reply(NULL, cu)));
 }
 
+/*~ This gets the basepoints for a channel; it's not privite information really
+ * (we tell the peer this to establish a channel, as it sets up the keys used
+ * for each transaction).
+ *
+ * Note that this is asked by lightningd, so it tells us what channels it wants.
+ */
 static struct io_plan *handle_get_channel_basepoints(struct io_conn *conn,
 						     struct client *c,
 						     const u8 *msg_in)
@@ -450,6 +612,12 @@ static struct io_plan *handle_get_channel_basepoints(struct io_conn *conn,
 							      &funding_pubkey)));
 }
 
+/*~ This is another lightningd-only interface; signing a commit transaction.
+ * This is dangerous, since if we sign a revoked commitment tx we'll lose
+ * funds, thus it's only available to lightningd.
+ *
+ *
+ * Oh look, another FIXME! */
 /* FIXME: Ensure HSM never does this twice for same dbid! */
 static struct io_plan *handle_sign_commitment_tx(struct io_conn *conn,
 						 struct client *c,
@@ -474,10 +642,18 @@ static struct io_plan *handle_sign_commitment_tx(struct io_conn *conn,
 	derive_basepoints(&channel_seed,
 			  &local_funding_pubkey, NULL, &secrets, NULL);
 
+	/*~ Bitcoin signatures cover the (part of) the script they're
+	 * executing; the rules are a bit complex in general, but for
+	 * Segregated Witness it's simply the current script. */
 	funding_wscript = bitcoin_redeem_2of2(tmpctx,
 					      &local_funding_pubkey,
 					      &remote_funding_pubkey);
-	/* Need input amount for signing */
+	/*~ Segregated Witness also added the input amount to the signing
+	 * algorithm; it's only part of the input implicitly (it's part of the
+	 * output it's spending), so in our 'bitcoin_tx' structure it's a
+	 * pointer, as we don't always know it (and zero is a valid amount, so
+	 * NULL is better to mean 'unknown' and has the nice property that
+	 * you'll crash if you assume it's there and you're wrong. */
 	tx->input[0].amount = tal_dup(tx->input, u64, &funding_amount);
 	sign_tx_input(tx, 0, NULL, funding_wscript,
 		      &secrets.funding_privkey,
@@ -488,6 +664,13 @@ static struct io_plan *handle_sign_commitment_tx(struct io_conn *conn,
 			 take(towire_hsm_sign_commitment_tx_reply(NULL, &sig)));
 }
 
+/*~ This is used by channeld to create signatures for the remote peer's
+ * commitment transaction.  It's functionally identical to signing our own,
+ * but we expect to do this repeatedly as commitment transactions are
+ * updated.
+ *
+ * The HSM almost certainly *should* do more checks before signing!
+ */
 /* FIXME: make sure it meets some criteria? */
 static struct io_plan *handle_sign_remote_commitment_tx(struct io_conn *conn,
 							struct client *c,
@@ -524,6 +707,8 @@ static struct io_plan *handle_sign_remote_commitment_tx(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig)));
 }
 
+/*~ This is used by channeld to create signatures for the remote peer's
+ * HTLC transactions. */
 static struct io_plan *handle_sign_remote_htlc_tx(struct io_conn *conn,
 						  struct client *c,
 						  const u8 *msg_in)
@@ -567,6 +752,8 @@ static struct io_plan *handle_sign_remote_htlc_tx(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig)));
 }
 
+/*~ This covers several cases where onchaind is creating a transaction which
+ * sends funds to our internal wallet. */
 /* FIXME: Derive output address for this client, and check it here! */
 static struct io_plan *handle_sign_to_us_tx(struct io_conn *conn,
 					    struct client *c,
@@ -591,6 +778,10 @@ static struct io_plan *handle_sign_to_us_tx(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig)));
 }
 
+/*~ When we send a commitment transaction onchain (unilateral close), there's
+ * a delay before we can spend it.  onchaind does an explicit transaction to
+ * transfer it to the wallet so that doesn't need to remember how to spend
+ * this complex transaction. */
 static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn,
 							 struct client *c,
 							 const u8 *msg_in)
@@ -604,6 +795,7 @@ static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn,
 	struct privkey privkey;
 	u8 *wscript;
 
+	/*~ We don't derive the wscript ourselves, but perhaps we should? */
 	if (!fromwire_hsm_sign_delayed_payment_to_us(tmpctx, msg_in,
 						     &commit_num,
 						     &tx, &wscript,
@@ -612,14 +804,22 @@ static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn,
 
 	get_channel_seed(&c->id, c->dbid, &channel_seed);
 
+	/*~ ccan/crypto/shachain how we efficiently derive 2^48 ordered
+	 * preimages from a single seed; the twist is that as the preimages
+	 * are revealed, you can generate the previous ones yourself, needing
+	 * to only keep log(N) of them at any time. */
 	if (!derive_shaseed(&channel_seed, &shaseed))
 		return bad_req_fmt(conn, c, msg_in, "bad derive_shaseed");
 
+	/*~ BOLT #3 describes exactly how this is used to generate the Nth
+	 * per-commitment point. */
 	if (!per_commit_point(&shaseed, &per_commitment_point, commit_num))
 		return bad_req_fmt(conn, c, msg_in,
 				   "bad per_commitment_point %"PRIu64,
 				   commit_num);
 
+	/*~ ... which is combined with the basepoint to generate then N'th key.
+	 */
 	if (!derive_delayed_payment_basepoint(&channel_seed,
 					      &basepoint,
 					      &basepoint_secret))
@@ -635,6 +835,9 @@ static struct io_plan *handle_sign_delayed_payment_to_us(struct io_conn *conn,
 				    tx, &privkey, wscript, input_amount);
 }
 
+/*~ This is used when the a commitment transaction is onchain, and has an HTLC
+ * output paying to us (because we have the preimage); this signs that
+ * transaction, which lightningd will broadcast to collect the funds. */
 static struct io_plan *handle_sign_remote_htlc_to_us(struct io_conn *conn,
 						     struct client *c,
 						     const u8 *msg_in)
@@ -671,6 +874,9 @@ static struct io_plan *handle_sign_remote_htlc_to_us(struct io_conn *conn,
 				    tx, &privkey, wscript, input_amount);
 }
 
+/*~ This is used when the remote peer's commitment transaction is revoked;
+ * we can use the revocation secret to spend the outputs.  For simplicity,
+ * we do them one at a time, though. */
 static struct io_plan *handle_sign_penalty_to_us(struct io_conn *conn,
 						 struct client *c,
 						 const u8 *msg_in)
@@ -711,6 +917,9 @@ static struct io_plan *handle_sign_penalty_to_us(struct io_conn *conn,
 				    tx, &privkey, wscript, input_amount);
 }
 
+/*~ This is used when the a commitment transaction is onchain, and has an HTLC
+ * output paying to them, which has timed out; this signs that transaction,
+ * which lightningd will broadcast to collect the funds. */
 static struct io_plan *handle_sign_local_htlc_tx(struct io_conn *conn,
 						 struct client *c,
 						 const u8 *msg_in)
@@ -766,6 +975,11 @@ static struct io_plan *handle_sign_local_htlc_tx(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig)));
 }
 
+/*~ This get the Nth a per-commitment point, and for N > 2, returns the
+ * grandparent per-commitment secret.  This pattern is because after
+ * negotiating commitment N-1, we send them the next per-commitment point,
+ * and reveal the previous per-commitment secret as a promise not to spend
+ * the previous commitment transaction. */
 static struct io_plan *handle_get_per_commitment_point(struct io_conn *conn,
 						       struct client *c,
 						       const u8 *msg_in)
@@ -797,12 +1011,19 @@ static struct io_plan *handle_get_per_commitment_point(struct io_conn *conn,
 	} else
 		old_secret = NULL;
 
+	/*~ hsm_client_wire.csv marks the secret field here optional, so it only
+	 * gets included if the parameter is non-NULL.  We violate 80 columns
+	 * pretty badly here, but it's a recommendation not a religion. */
 	return req_reply(conn, c,
 			 take(towire_hsm_get_per_commitment_point_reply(NULL,
 									&per_commitment_point,
 									old_secret)));
 }
 
+/*~ This is used when the remote peer claims to have knowledge of future
+ * commitment states (option_data_loss_protect in the spec) which means we've
+ * been restored from backup or something, and may have already revealed
+ * secrets.  We carefully check that this is true, here. */
 static struct io_plan *handle_check_future_secret(struct io_conn *conn,
 						  struct client *c,
 						  const u8 *msg_in)
@@ -823,11 +1044,16 @@ static struct io_plan *handle_check_future_secret(struct io_conn *conn,
 		return bad_req_fmt(conn, c, msg_in,
 				   "bad commit secret #%"PRIu64, n);
 
+	/*~ Note the special secret_eq_consttime: we generate foo_eq for many
+	 * types using ccan/structeq, but not 'struct secret' because any
+	 * comparison risks leaking information about the secret if it is
+	 * timing dependent. */
 	return req_reply(conn, c,
 			 take(towire_hsm_check_future_secret_reply(NULL,
 				   secret_eq_consttime(&secret, &suggested))));
 }
 
+/* This is used by closingd to sign off on a mutual close tx. */
 static struct io_plan *handle_sign_mutual_close_tx(struct io_conn *conn,
 						   struct client *c,
 						   const u8 *msg_in)
@@ -866,6 +1092,8 @@ static struct io_plan *handle_sign_mutual_close_tx(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig)));
 }
 
+/* This is used by by the master to create a new client connection (which
+ * becomes the HSM_FD for the subdaemon after forking). */
 static struct io_plan *pass_client_hsmfd(struct io_conn *conn,
 					 struct client *c,
 					 const u8 *msg_in)
@@ -880,15 +1108,30 @@ static struct io_plan *pass_client_hsmfd(struct io_conn *conn,
 	if (!fromwire_hsm_client_hsmfd(msg_in, &id, &dbid, &capabilities))
 		return bad_req(conn, c, msg_in);
 
+	/* socketpair is a bi-directional pipe, which is what we want. */
 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) != 0)
-		status_failed(STATUS_FAIL_INTERNAL_ERROR, "creating fds: %s", strerror(errno));
+		status_failed(STATUS_FAIL_INTERNAL_ERROR, "creating fds: %s",
+			      strerror(errno));
 
 	new_client(&c->dc, &id, dbid, capabilities, fds[0]);
 	daemon_conn_send(&c->dc, take(towire_hsm_client_hsmfd_reply(NULL)));
+	/* There's arcane UNIX magic to send an open file descriptor over a
+	 * UNIX domain socket.  There's no great way to autogenerate this
+	 * though; especially for the receive side, so we always pass these
+	 * manually immediately following the message. */
 	daemon_conn_send_fd(&c->dc, fds[1]);
 	return daemon_conn_read_next(conn, &c->dc);
 }
 
+/*~ For almost every wallet tx we use the BIP32 seed, but not for onchain
+ * unilateral closes from a peer: they (may) have an output to us using a
+ * public key based on the channel basepoints.  It's a bit spammy to spend
+ * those immediately just to make the wallet simpler, and we didn't appreciate
+ * the problem when we designed the protocol for commitment transaction keys.
+ *
+ * So we store just enough about the channel it came from (which may be
+ * long-gone) to regenerate the keys here.  That has the added advantage that
+ * the secrets themselves stay within the HSM. */
 static void hsm_unilateral_close_privkey(struct privkey *dst,
 					 struct unilateral_close_info *info)
 {
@@ -907,9 +1150,7 @@ static void hsm_unilateral_close_privkey(struct privkey *dst,
 	}
 }
 
-/**
- * hsm_key_for_utxo - generate the keypair matching the utxo
- */
+/* This gets the bitcoin private key needed to spend from our wallet. */
 static void hsm_key_for_utxo(struct privkey *privkey, struct pubkey *pubkey,
 			     const struct utxo *utxo)
 {
@@ -919,17 +1160,32 @@ static void hsm_key_for_utxo(struct privkey *privkey, struct pubkey *pubkey,
 		status_debug("Unilateral close output, deriving secrets");
 		hsm_unilateral_close_privkey(privkey, utxo->close_info);
 		pubkey_from_privkey(privkey, pubkey);
-		status_debug("Derived public key %s from unilateral close", type_to_string(tmpctx, struct pubkey, pubkey));
+		status_debug("Derived public key %s from unilateral close",
+			     type_to_string(tmpctx, struct pubkey, pubkey));
 	} else {
 		/* Simple case: just get derive via HD-derivation */
 		bitcoin_key(privkey, pubkey, utxo->keyindex);
 	}
 }
 
+/* This completes the tx by filling in the input scripts with signatures. */
 static void sign_all_inputs(struct bitcoin_tx *tx, struct utxo **utxos)
 {
+	/* FIXME: sign_tx_input is dumb and needs all input->script to be
+	 * NULL, so we gather these here and assign them at the end */
 	u8 **scriptSigs = tal_arr(tmpctx, u8 *, tal_count(utxos));
 
+	/*~ Deep in my mind there's a continuous battle: should arrays be
+	 * named as singular or plural?  Is consistency the sign of a weak
+	 * mind?
+	 *
+	 * ZmnSCPxj answers thusly: One must make peace with the fact, that
+	 * the array itself is singular, yet its contents are plural. Do you
+	 * name the array, or do you name its contents? Is the array itself
+	 * the thing and the whole of the thing, or is it its contents that
+	 * define what it is?
+	 *
+	 *... I'm not sure that helps! */
 	assert(tal_count(tx->input) == tal_count(utxos));
 	for (size_t i = 0; i < tal_count(utxos); i++) {
 		struct pubkey inkey;
@@ -938,29 +1194,38 @@ static void sign_all_inputs(struct bitcoin_tx *tx, struct utxo **utxos)
 		u8 *subscript, *wscript;
 		secp256k1_ecdsa_signature sig;
 
+		/* Figure out keys to spend this. */
 		hsm_key_for_utxo(&inprivkey, &inkey, in);
 
+		/* It's either a p2wpkh or p2sh (we support that so people from
+		 * the last bitcoin era can put funds into the wallet) */
 		wscript = p2wpkh_scriptcode(tmpctx, &inkey);
 		if (in->is_p2sh) {
+			/* For P2SH-wrapped Segwit, the (implied) redeemScript
+			 * is defined in BIP141 */
 			subscript = bitcoin_redeem_p2sh_p2wpkh(tmpctx, &inkey);
 			scriptSigs[i] = bitcoin_scriptsig_p2sh_p2wpkh(tx, &inkey);
 		} else {
+			/* Pure segwit uses an empty inputScript; NULL has
+			 * tal_count() == 0, so it works great here. */
 			subscript = NULL;
 			scriptSigs[i] = NULL;
 		}
+		/* This is the core crypto magic. */
 		sign_tx_input(tx, i, subscript, wscript, &inprivkey, &inkey,
 			      &sig);
 
+		/* The witness is [sig] [key] */
 		tx->input[i].witness = bitcoin_witness_p2wpkh(tx, &sig, &inkey);
 	}
 
-	/* Now complete the transaction by attaching the scriptSigs where necessary */
+	/* Now complete the transaction by attaching the scriptSigs */
 	for (size_t i = 0; i < tal_count(utxos); i++)
 		tx->input[i].script = scriptSigs[i];
 }
 
-/* Note that it's the main daemon that asks for the funding signature so it
- * can broadcast it. */
+/*~ lightningd asks us to sign the transaction to fund a channel; it feeds us
+ * the set of inputs and the local and remote pubkeys, and we sign it. */
 static struct io_plan *handle_sign_funding_tx(struct io_conn *conn,
 					      struct client *c,
 					      const u8 *msg_in)
@@ -987,6 +1252,11 @@ static struct io_plan *handle_sign_funding_tx(struct io_conn *conn,
 		changekey = NULL;
 
 	tx = funding_tx(tmpctx, &outnum,
+			/*~ For simplicity, our generated code is not const
+			 * correct.  The C rules around const and
+			 * pointer-to-pointer are a bit weird, so we use
+			 * ccan/cast which ensures the type is correct and
+			 * we're not casting something random */
 			cast_const2(const struct utxo **, utxos),
 			satoshi_out, &local_pubkey, &remote_pubkey,
 			change_out, changekey,
@@ -996,9 +1266,8 @@ static struct io_plan *handle_sign_funding_tx(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_sign_funding_reply(NULL, tx)));
 }
 
-/**
- * sign_withdrawal_tx - Generate and sign a withdrawal transaction from the master
- */
+/*~ lightningd asks us to sign a withdrawal; same as above but we in theory
+ * we can do more to check the previous case is valid. */
 static struct io_plan *handle_sign_withdrawal_tx(struct io_conn *conn,
 						 struct client *c,
 						 const u8 *msg_in)
@@ -1032,13 +1301,18 @@ static struct io_plan *handle_sign_withdrawal_tx(struct io_conn *conn,
 			 take(towire_hsm_sign_withdrawal_reply(NULL, tx)));
 }
 
-/**
- * sign_invoice - Sign an invoice with our key.
- */
+/*~ Lightning invoices, defined by BOLT 11, are signed.  This has been
+ * surprisingly controversial; it means a node needs to be online to create
+ * invoices.  However, it seems clear to me that in a world without
+ * intermedaries you need proof that you have received an offer (the
+ * signature), as well as proof that you've paid it (the preimage). */
 static struct io_plan *handle_sign_invoice(struct io_conn *conn,
 					   struct client *c,
 					   const u8 *msg_in)
 {
+	/*~ We make up a 'u5' type to represent BOLT11's 5-bits-per-byte
+	 * format: it's only for human consumption, as typedefs are almost
+	 * entirely transparent to the C compiler. */
 	u5 *u5bytes;
 	u8 *hrpu8;
 	char *hrp;
@@ -1050,8 +1324,25 @@ static struct io_plan *handle_sign_invoice(struct io_conn *conn,
 	if (!fromwire_hsm_sign_invoice(tmpctx, msg_in, &u5bytes, &hrpu8))
 		return bad_req(conn, c, msg_in);
 
+	/* BOLT #11:
+	 *
+	 * A writer MUST set `signature` to a valid 512-bit secp256k1
+	 * signature of the SHA2 256-bit hash of the human-readable part,
+	 * represented as UTF-8 bytes, concatenated with the data part
+	 * (excluding the signature) with zero bits appended to pad the data
+	 * to the next byte boundary, with a trailing byte containing the
+	 * recovery ID (0, 1, 2 or 3).
+	 */
+
 	/* FIXME: Check invoice! */
 
+	/* tal_dup_arr() does what you'd expect: allocate an array by copying
+	 * another; the cast is needed because the hrp is a 'char' array, not
+	 * a 'u8' (unsigned char) as it's the "human readable" part.
+	 *
+	 * The final arg of tal_dup_arr() is how many extra bytes to allocate:
+	 * it's so often zero that I've thought about dropping the argument, but
+	 * in cases like this (adding a NUL terminator) it's perfect. */
 	hrp = tal_dup_arr(tmpctx, char, (char *)hrpu8, tal_count(hrpu8), 1);
 	hrp[tal_count(hrpu8)] = '\0';
 
@@ -1060,6 +1351,8 @@ static struct io_plan *handle_sign_invoice(struct io_conn *conn,
 	hash_u5_done(&hu5, &sha);
 
 	node_key(&node_pkey, NULL);
+	/*~ By no small coincidence, this libsecp routine uses the exact
+	 * recovery signature format mandated by BOLT 11. */
         if (!secp256k1_ecdsa_sign_recoverable(secp256k1_ctx, &rsig,
                                               (const u8 *)&sha,
                                               node_pkey.secret.data,
@@ -1071,10 +1364,21 @@ static struct io_plan *handle_sign_invoice(struct io_conn *conn,
 			 take(towire_hsm_sign_invoice_reply(NULL, &rsig)));
 }
 
+/*~ It's optional for nodes to send node_announcement, but it lets us set our
+ * favourite color and cool alias!  Plus other minor details like how to
+ * connect to us. */
 static struct io_plan *handle_sign_node_announcement(struct io_conn *conn,
 						     struct client *c,
 						     const u8 *msg_in)
 {
+	/* BOLT #7:
+	 *
+	 * The origin node:
+	 *...
+	 * - MUST set `signature` to the signature of the double-SHA256 of the
+	 *   entire remaining packet after `signature` (using the key given by
+	 *   `node_id`).
+	 */
 	/* 2 bytes msg type + 64 bytes signature */
 	size_t offset = 66;
 	struct sha256_double hash;
@@ -1100,9 +1404,24 @@ static struct io_plan *handle_sign_node_announcement(struct io_conn *conn,
 	return req_reply(conn, c, take(reply));
 }
 
+/*~ This routine checks that a client is allowed to call the handler. */
 static bool check_client_capabilities(struct client *client,
 				      enum hsm_wire_type t)
 {
+	/*~ Here's a useful trick: enums in C are not real types, they're
+	 * semantic sugar sprinkled over an int, bascally (in fact, older
+	 * versions of gcc used to convert the values ints in the parser!).
+	 *
+	 * But GCC will do one thing for us: if we have a switch statement
+	 * with a controlling expression which is an enum, it will warn us
+	 * if a declared enum value is *not* handled in the switch, eg:
+	 *     enumeration value ‘FOOBAR’ not handled in switch [-Werror=switch]
+	 *
+	 * This only works if there's no 'default' label, which is sometimes
+	 * hard, as we *can* have non-enum values in our enum.  But the tradeoff
+	 * is worth it so the compiler tells us everywhere we have to fix when
+	 * we add a new enum identifier!
+	 */
 	switch (t) {
 	case WIRE_HSM_ECDH_REQ:
 		return (client->capabilities & HSM_CAP_ECDH) != 0;
@@ -1138,7 +1457,9 @@ static bool check_client_capabilities(struct client *client,
 	case WIRE_HSM_GET_CHANNEL_BASEPOINTS:
 		return (client->capabilities & HSM_CAP_MASTER) != 0;
 
-	/* These are messages sent by the HSM so we should never receive them */
+	/*~ These are messages sent by the HSM so we should never receive them.
+	 * FIXME: Since we autogenerate these, we should really generate separate
+	 * enums for replies to avoid this kind of clutter! */
 	case WIRE_HSM_ECDH_RESP:
 	case WIRE_HSM_CANNOUNCEMENT_SIG_REPLY:
 	case WIRE_HSM_CUPDATE_SIG_REPLY:
@@ -1159,9 +1480,16 @@ static bool check_client_capabilities(struct client *client,
 	return false;
 }
 
+/*~ This is the core of the HSM daemon: handling requests. */
 static struct io_plan *handle_client(struct io_conn *conn,
 				     struct daemon_conn *dc)
 {
+	/*~ Note the use of container_of here: this is the Linux kernel way of
+	 * doing callbacks.  Rather than have struct daemon_conn contain a
+	 * void * pointer to the structure for this use, we simply embed the
+	 * daemon_conn in the structure; container_of is a fancy way of doing
+	 * pointer arithmetic to get the containing structure, saving a
+	 * pointer. */
 	struct client *c = container_of(dc, struct client, dc);
 	enum hsm_wire_type t = fromwire_peektype(dc->msg_in);
 
@@ -1256,6 +1584,9 @@ static struct io_plan *handle_client(struct io_conn *conn,
 	return bad_req_fmt(conn, c, dc->msg_in, "Unknown request");
 }
 
+/*~ This is the destructor on our client: we may call it manually, but
+ * generally it's called because the io_conn associated with the client is
+ * closed by the other end. */
 static void destroy_client(struct client *c)
 {
 	if (!uintmap_del(&clients, c->dbid))
@@ -1271,6 +1602,7 @@ static struct client *new_client(struct daemon_conn *master,
 {
 	struct client *c = tal(master, struct client);
 
+	/*~ All-zero pubkey is used for the initial master connection */
 	if (id) {
 		c->id = *id;
 	} else {
@@ -1280,13 +1612,25 @@ static struct client *new_client(struct daemon_conn *master,
 
 	c->master = master;
 	c->capabilities = capabilities;
+	/*~ This is our daemon_conn infrastructure, which does the queueing for
+	 * us; we just tell it what our handler function is. */
 	daemon_conn_init(c, &c->dc, fd, handle_client, NULL);
 
-	/* Free the connection if we exit everything. */
+	/*~ tal_steal() moves a pointer to a new parent.  At this point, the
+	 * hierarchy is:
+	 *
+	 *   master -> c -> daemon_conn.conn
+	 *
+	 * We want to invert the bottom two, so that if the io_conn closes,
+	 * the client is freed:
+	 *
+	 *   master -> c->conn -> c.
+	 */
 	tal_steal(master, c->dc.conn);
-	/* Free client when connection freed. */
 	tal_steal(c->dc.conn, c);
 
+	/* We put the special zero-db HSM connections into an array, the rest
+	 * go into the map. */
 	if (dbid == 0) {
 		assert(num_dbid_zero_clients < ARRAY_SIZE(dbid_zero_clients));
 		dbid_zero_clients[num_dbid_zero_clients++] = c;
@@ -1319,6 +1663,7 @@ int main(int argc, char *argv[])
 
 	setup_locale();
 
+	/* This sets up tmpctx, various DEVELOPER options, backtraces, etc. */
 	subdaemon_setup(argc, argv);
 
 	/* A trivial daemon_conn just for writing. */
@@ -1337,6 +1682,18 @@ int main(int argc, char *argv[])
 	/* When conn closes, everything is freed. */
 	io_set_finish(master->dc.conn, master_gone, &master->dc);
 
+	/*~ The two NULL args a list of timers, and the timer which expired:
+	 * we don't have any timers. */
 	io_loop(NULL, NULL);
+
+	/*~ This should never be reached: io_loop only exits on io_break which
+	 * we don't call, a timer expiry which we don't have, or all connections
+	 * being closed, and closing the master calls master_gone. */
 	abort();
 }
+
+/*~ Congratulations on making it through the first of the seven dwarves!
+ * (And Christian wondered why I'm so fond of having separate daemons!).
+ *
+ * We continue our story in the next-more-complex daemon: connectd/connectd.c
+ */