From b0769d9c0cd77877cf1fa98fc27bc062d620de2d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 20 Sep 2018 14:01:26 +0930 Subject: [PATCH] hsmd: don't use daemon_conn for clients. It offers them a DoS vector, if they don't read the replies. We really want to use raw ccan/io so we can avoid buffering for this. It makes the handing of fds for new clients a bit more complex (callback based), but it's not too bad. Signed-off-by: Rusty Russell --- hsmd/hsmd.c | 182 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 120 insertions(+), 62 deletions(-) diff --git a/hsmd/hsmd.c b/hsmd/hsmd.c index 21cb1cc97..1f227acdb 100644 --- a/hsmd/hsmd.c +++ b/hsmd/hsmd.c @@ -67,8 +67,15 @@ static struct { /*~ We keep track of clients, but there's not much to keep. */ struct client { - struct daemon_conn dc; - struct daemon_conn *master; + /* The connection to lightningd itself */ + struct client *master; + + /* The ccan/io async io connection for this client: it closes, we die. */ + struct io_conn *conn; + + /*~ io_read_wire needs a pointer to store incoming messages until + * it has the complete thing; this is it. */ + u8 *msg_in; /* ~Useful for logging, but also used to derive the per-channel seed. */ struct pubkey id; @@ -105,11 +112,12 @@ extern void dev_disconnect_init(int fd); void dev_disconnect_init(int fd UNUSED) { } /* Pre-declare this, due to mutual recursion */ -static struct client *new_client(struct daemon_conn *master, +static struct client *new_client(struct client *master, const struct pubkey *id, u64 dbid, const u64 capabilities, int fd); +static struct io_plan *handle_client(struct io_conn *conn, struct client *c); /*~ ccan/compiler.h defines PRINTF_FMT as the gcc compiler hint so it will * check that fmt and other trailing arguments really are the correct type. @@ -133,7 +141,7 @@ static PRINTF_FMT(4,5) /*~ If the client was actually lightningd, it's Game Over; we actually * fail in this case, and it will too. */ - if (&c->dc == c->master) { + if (c == c->master) { status_broken("%s", str); master_badmsg(fromwire_peektype(msg_in), msg_in); } @@ -164,13 +172,39 @@ static struct io_plan *bad_req(struct io_conn *conn, return bad_req_fmt(conn, c, msg_in, "could not parse request"); } +/*~ This plan simply says: read the next packet into 'c->msg_in' (parent 'c'), + * and then call handle_client with argument 'c' */ +static struct io_plan *client_read_next(struct io_conn *conn, struct client *c) +{ + return io_read_wire(conn, c, &c->msg_in, handle_client, c); +} + /* This is the common pattern for the tail of each handler in this file. */ static struct io_plan *req_reply(struct io_conn *conn, struct client *c, const u8 *msg_out TAKES) { - daemon_conn_send(&c->dc, msg_out); - return daemon_conn_read_next(conn, &c->dc); + /*~ Write this out, then read the next one. This works perfectly for + * a simple request/response system like this. + * + * Internally, the ccan/io subsystem gathers all the file descriptors, + * figures out which want to write and read, asks the OS which ones + * are available, and for those file descriptors, tries to do the + * reads/writes we've asked it. It handles retry in the case where a + * read or write is done partially. + * + * Since the OS does buffering internally (on my system, over 100k + * worth) writes will normally succeed immediately. However, if the + * client is slow or malicious, and doesn't read from the socket as + * fast as we're writing, eventually the socket buffer will fill up; + * we don't care, because ccan/io will wait until there's room to + * write this reply before it will read again. The client just hurts + * themselves, and there's no Denial of Service on us. + * + * If we were to queue outgoing messages ourselves, we *would* have to + * consider such scenarios; this is why our daemons generally avoid + * buffering from untrusted parties. */ + return io_write_wire(conn, msg_out, client_read_next, c); } /*~ This returns the secret and/or public key for this node. */ @@ -416,7 +450,7 @@ static struct io_plan *init_hsm(struct io_conn *conn, struct pubkey node_id; /* This must be the master. */ - assert(&c->dc == c->master); + assert(c == c->master); /*~ The fromwire_* routines are autogenerated, based on the message * definitions in hsm_client_wire.csv. The format of those files is @@ -1092,7 +1126,37 @@ static struct io_plan *handle_sign_mutual_close_tx(struct io_conn *conn, return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig))); } -/* This is used by by the master to create a new client connection (which +/*~ Since we process requests then service them in strict order, and because + * only lightningd can request a new client fd, we can get away with a global + * here! But because we are being tricky, I set it to an invalid value when + * not in use, and sprinkle assertions around. */ +static int pending_client_fd = -1; + +/*~ This is the callback from below: having sent the reply, we now send the + * fd for the client end of the new socketpair. */ +static struct io_plan *send_pending_client_fd(struct io_conn *conn, + struct client *master) +{ + int fd = pending_client_fd; + /* This must be the master. */ + assert(master == master->master); + assert(fd != -1); + + /* This sanity check shouldn't be necessary, but it's cheap. */ + pending_client_fd = -1; + + /*~There's arcane UNIX magic to send an open file descriptor over a + * UNIX domain socket. There's no great way to autogenerate this + * though; especially for the receive side, so we always pass these + * manually immediately following the message. + * + * io_send_fd()'s third parameter is whether to close the local one + * after sending; that saves us YA callback. + */ + return io_send_fd(conn, fd, true, client_read_next, master); +} + +/*~ This is used by by the master to create a new client connection (which * becomes the HSM_FD for the subdaemon after forking). */ static struct io_plan *pass_client_hsmfd(struct io_conn *conn, struct client *c, @@ -1102,8 +1166,8 @@ static struct io_plan *pass_client_hsmfd(struct io_conn *conn, u64 dbid, capabilities; struct pubkey id; - /* This must be the master. */ - assert(&c->dc == c->master); + /* This must be lightningd itself. */ + assert(c == c->master); if (!fromwire_hsm_client_hsmfd(msg_in, &id, &dbid, &capabilities)) return bad_req(conn, c, msg_in); @@ -1113,14 +1177,15 @@ static struct io_plan *pass_client_hsmfd(struct io_conn *conn, status_failed(STATUS_FAIL_INTERNAL_ERROR, "creating fds: %s", strerror(errno)); - new_client(&c->dc, &id, dbid, capabilities, fds[0]); - daemon_conn_send(&c->dc, take(towire_hsm_client_hsmfd_reply(NULL))); - /* There's arcane UNIX magic to send an open file descriptor over a - * UNIX domain socket. There's no great way to autogenerate this - * though; especially for the receive side, so we always pass these - * manually immediately following the message. */ - daemon_conn_send_fd(&c->dc, fds[1]); - return daemon_conn_read_next(conn, &c->dc); + status_trace("new_client: %"PRIu64, dbid); + new_client(c, &id, dbid, capabilities, fds[0]); + + /*~ We stash this in a global, because we need to get both the fd and + * the client pointer to the callback. The other way would be to + * create a boutique structure and hand that, but we don't need to. */ + pending_client_fd = fds[1]; + return io_write_wire(conn, take(towire_hsm_client_hsmfd_reply(NULL)), + send_pending_client_fd, c); } /*~ For almost every wallet tx we use the BIP32 seed, but not for onchain @@ -1481,87 +1546,79 @@ static bool check_client_capabilities(struct client *client, } /*~ This is the core of the HSM daemon: handling requests. */ -static struct io_plan *handle_client(struct io_conn *conn, - struct daemon_conn *dc) +static struct io_plan *handle_client(struct io_conn *conn, struct client *c) { - /*~ Note the use of container_of here: this is the Linux kernel way of - * doing callbacks. Rather than have struct daemon_conn contain a - * void * pointer to the structure for this use, we simply embed the - * daemon_conn in the structure; container_of is a fancy way of doing - * pointer arithmetic to get the containing structure, saving a - * pointer. */ - struct client *c = container_of(dc, struct client, dc); - enum hsm_wire_type t = fromwire_peektype(dc->msg_in); + enum hsm_wire_type t = fromwire_peektype(c->msg_in); status_debug("Client: Received message %d from client", t); /* Before we do anything else, is this client allowed to do * what he asks for? */ if (!check_client_capabilities(c, t)) - return bad_req_fmt(conn, c, dc->msg_in, + return bad_req_fmt(conn, c, c->msg_in, "does not have capability to run %d", t); /* Now actually go and do what the client asked for */ switch (t) { case WIRE_HSM_INIT: - return init_hsm(conn, c, dc->msg_in); + return init_hsm(conn, c, c->msg_in); case WIRE_HSM_CLIENT_HSMFD: - return pass_client_hsmfd(conn, c, dc->msg_in); + return pass_client_hsmfd(conn, c, c->msg_in); case WIRE_HSM_GET_CHANNEL_BASEPOINTS: - return handle_get_channel_basepoints(conn, c, dc->msg_in); + return handle_get_channel_basepoints(conn, c, c->msg_in); case WIRE_HSM_ECDH_REQ: - return handle_ecdh(conn, c, dc->msg_in); + return handle_ecdh(conn, c, c->msg_in); case WIRE_HSM_CANNOUNCEMENT_SIG_REQ: - return handle_cannouncement_sig(conn, c, dc->msg_in); + return handle_cannouncement_sig(conn, c, c->msg_in); case WIRE_HSM_CUPDATE_SIG_REQ: - return handle_channel_update_sig(conn, c, dc->msg_in); + return handle_channel_update_sig(conn, c, c->msg_in); case WIRE_HSM_SIGN_FUNDING: - return handle_sign_funding_tx(conn, c, dc->msg_in); + return handle_sign_funding_tx(conn, c, c->msg_in); case WIRE_HSM_NODE_ANNOUNCEMENT_SIG_REQ: - return handle_sign_node_announcement(conn, c, dc->msg_in); + return handle_sign_node_announcement(conn, c, c->msg_in); case WIRE_HSM_SIGN_INVOICE: - return handle_sign_invoice(conn, c, dc->msg_in); + return handle_sign_invoice(conn, c, c->msg_in); case WIRE_HSM_SIGN_WITHDRAWAL: - return handle_sign_withdrawal_tx(conn, c, dc->msg_in); + return handle_sign_withdrawal_tx(conn, c, c->msg_in); case WIRE_HSM_SIGN_COMMITMENT_TX: - return handle_sign_commitment_tx(conn, c, dc->msg_in); + return handle_sign_commitment_tx(conn, c, c->msg_in); case WIRE_HSM_SIGN_DELAYED_PAYMENT_TO_US: - return handle_sign_delayed_payment_to_us(conn, c, dc->msg_in); + return handle_sign_delayed_payment_to_us(conn, c, c->msg_in); case WIRE_HSM_SIGN_REMOTE_HTLC_TO_US: - return handle_sign_remote_htlc_to_us(conn, c, dc->msg_in); + return handle_sign_remote_htlc_to_us(conn, c, c->msg_in); case WIRE_HSM_SIGN_PENALTY_TO_US: - return handle_sign_penalty_to_us(conn, c, dc->msg_in); + return handle_sign_penalty_to_us(conn, c, c->msg_in); case WIRE_HSM_SIGN_LOCAL_HTLC_TX: - return handle_sign_local_htlc_tx(conn, c, dc->msg_in); + return handle_sign_local_htlc_tx(conn, c, c->msg_in); case WIRE_HSM_GET_PER_COMMITMENT_POINT: - return handle_get_per_commitment_point(conn, c, dc->msg_in); + return handle_get_per_commitment_point(conn, c, c->msg_in); case WIRE_HSM_CHECK_FUTURE_SECRET: - return handle_check_future_secret(conn, c, dc->msg_in); + return handle_check_future_secret(conn, c, c->msg_in); case WIRE_HSM_SIGN_REMOTE_COMMITMENT_TX: - return handle_sign_remote_commitment_tx(conn, c, dc->msg_in); + return handle_sign_remote_commitment_tx(conn, c, c->msg_in); case WIRE_HSM_SIGN_REMOTE_HTLC_TX: - return handle_sign_remote_htlc_tx(conn, c, dc->msg_in); + return handle_sign_remote_htlc_tx(conn, c, c->msg_in); case WIRE_HSM_SIGN_MUTUAL_CLOSE_TX: - return handle_sign_mutual_close_tx(conn, c, dc->msg_in); + return handle_sign_mutual_close_tx(conn, c, c->msg_in); case WIRE_HSM_ECDH_RESP: case WIRE_HSM_CANNOUNCEMENT_SIG_REPLY: @@ -1581,7 +1638,7 @@ static struct io_plan *handle_client(struct io_conn *conn, break; } - return bad_req_fmt(conn, c, dc->msg_in, "Unknown request"); + return bad_req_fmt(conn, c, c->msg_in, "Unknown request"); } /*~ This is the destructor on our client: we may call it manually, but @@ -1594,7 +1651,7 @@ static void destroy_client(struct client *c) "Failed to remove client dbid %"PRIu64, c->dbid); } -static struct client *new_client(struct daemon_conn *master, +static struct client *new_client(struct client *master, const struct pubkey *id, u64 dbid, const u64 capabilities, @@ -1612,22 +1669,23 @@ static struct client *new_client(struct daemon_conn *master, c->master = master; c->capabilities = capabilities; - /*~ This is our daemon_conn infrastructure, which does the queueing for - * us; we just tell it what our handler function is. */ - daemon_conn_init(c, &c->dc, fd, handle_client, NULL); + /*~ This is the core of ccan/io: the connection creation calls a + * callback which returns the initial plan to execute: in our case, + * read a message.*/ + c->conn = io_new_conn(master, fd, client_read_next, c); /*~ tal_steal() moves a pointer to a new parent. At this point, the * hierarchy is: * - * master -> c -> daemon_conn.conn + * master -> c + * master -> c->conn * - * We want to invert the bottom two, so that if the io_conn closes, + * We want to the c->conn to own 'c', so that if the io_conn closes, * the client is freed: * * master -> c->conn -> c. */ - tal_steal(master, c->dc.conn); - tal_steal(c->dc.conn, c); + tal_steal(c->conn, c); /* We put the special zero-db HSM connections into an array, the rest * go into the map. */ @@ -1639,7 +1697,7 @@ static struct client *new_client(struct daemon_conn *master, /* Close conn and free any old client of this dbid. */ if (old_client) - io_close(old_client->dc.conn); + io_close(old_client->conn); if (!uintmap_add(&clients, dbid, c)) status_failed(STATUS_FAIL_INTERNAL_ERROR, @@ -1650,7 +1708,7 @@ static struct client *new_client(struct daemon_conn *master, return c; } -static void master_gone(struct io_conn *unused UNUSED, struct daemon_conn *dc UNUSED) +static void master_gone(struct io_conn *unused UNUSED, struct client *c UNUSED) { daemon_shutdown(); /* Can't tell master, it's gone. */ @@ -1677,10 +1735,10 @@ int main(int argc, char *argv[]) REQ_FD); /* We're our own master! */ - master->master = &master->dc; + master->master = master; /* When conn closes, everything is freed. */ - io_set_finish(master->dc.conn, master_gone, &master->dc); + io_set_finish(master->conn, master_gone, master); /*~ The two NULL args a list of timers, and the timer which expired: * we don't have any timers. */