From cf80f0520adacf1cf108b8e077ec7bb435478577 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 9 Apr 2023 13:53:39 +0930 Subject: [PATCH] connectd: dev-report-fds to do file descriptor audit. Signed-off-by: Rusty Russell --- connectd/connectd.c | 142 +++++++++++++++++++++ connectd/connectd_wire.csv | 3 + contrib/pyln-testing/pyln/testing/utils.py | 4 +- lightningd/connect_control.c | 23 ++++ 4 files changed, 171 insertions(+), 1 deletion(-) diff --git a/connectd/connectd.c b/connectd/connectd.c index 30c88440e..7ceb488cc 100644 --- a/connectd/connectd.c +++ b/connectd/connectd.c @@ -8,11 +8,13 @@ * it. */ #include "config.h" +#include #include #include #include #include #include +#include #include #include #include @@ -1902,6 +1904,141 @@ static void dev_suppress_gossip(struct daemon *daemon, const u8 *msg) { daemon->dev_suppress_gossip = true; } + +static const char *addr2name(const tal_t *ctx, + const struct sockaddr_storage *sa, + socklen_t addrlen) +{ + const struct sockaddr_in *in = (struct sockaddr_in *)sa; + const struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)sa; + const struct sockaddr_un *un = (struct sockaddr_un *)sa; + char addr[1000]; + + switch (sa->ss_family) { + case AF_UNIX: + if (addrlen == sizeof(un->sun_family)) + return tal_fmt(ctx, "unix socket "); + else + return tal_fmt(ctx, "unix socket %s", un->sun_path); + case AF_INET: + if (!inet_ntop(sa->ss_family, &in->sin_addr, addr, sizeof(addr))) + return tal_fmt(ctx, "IPv4 socket "); + else + return tal_fmt(ctx, "IPv4 socket %s:%u", + addr, ntohs(in->sin_port)); + case AF_INET6: + if (!inet_ntop(sa->ss_family, &in6->sin6_addr, addr, sizeof(addr))) + return tal_fmt(ctx, "IPv6 socket "); + else + return tal_fmt(ctx, "IPv6 socket %s:%u", + addr, ntohs(in6->sin6_port)); + default: + return tal_fmt(ctx, "unknown family %u (**BROKEN**)", + (unsigned)sa->ss_family); + } +} + +static void describe_fd(int fd) +{ + struct sockaddr_storage sa; + socklen_t addrlen = sizeof(sa); + + if (getsockname(fd, (void *)&sa, &addrlen) != 0) { + status_broken("dev_report_fds: %i cannot get sockname (%s)", + fd, strerror(errno)); + return; + } + status_info("dev_report_fds: %i name %s", fd, addr2name(tmpctx, &sa, addrlen)); + + if (getpeername(fd, (void *)&sa, &addrlen) != 0) + return; + status_info("dev_report_fds: %i peer %s", fd, addr2name(tmpctx, &sa, addrlen)); +} + +static const char *io_plan_status_str(enum io_plan_status status) +{ + switch (status) { + case IO_UNSET: return "IO_UNSET"; + case IO_POLLING_NOTSTARTED: return "IO_POLLING_NOTSTARTED"; + case IO_POLLING_STARTED: return "IO_POLLING_STARTED"; + case IO_WAITING: return "IO_WAITING"; + case IO_ALWAYS: return "IO_ALWAYS"; + } + return "INVALID-STATUS"; +} + +/* Stupid and slow, but machines are fast! */ +static const tal_t *find_tal_ptr(const tal_t *root, const tal_t *p) +{ + if (root == p) + return root; + + for (tal_t *t = tal_first(root); t; t = tal_next(t)) { + const tal_t *ret = find_tal_ptr(t, p); + if (ret) + return ret; + } + return NULL; +} + +/* Looks up ptr in hash tree, to try to find name */ +static const char *try_tal_name(const tal_t *ctx, const void *p) +{ + const tal_t *t = find_tal_ptr(NULL, p); + if (t) + return tal_name(t); + return tal_fmt(ctx, "%p", p); +} + +static void dev_report_fds(struct daemon *daemon, const u8 *msg) +{ + for (int fd = 3; fd < 4096; fd++) { + bool listener; + const struct io_conn *c; + const struct io_listener *l; + if (!isatty(fd) && errno == EBADF) + continue; + if (fd == HSM_FD) { + status_info("dev_report_fds: %i -> hsm fd", fd); + continue; + } + if (fd == GOSSIPCTL_FD) { + status_info("dev_report_fds: %i -> gossipd fd", fd); + continue; + } +#if DEVELOPER + if (fd == daemon->dev_disconnect_fd) { + status_info("dev_report_fds: %i -> dev_disconnect_fd", fd); + continue; + } +#endif + if (fd == daemon->gossip_store_fd) { + status_info("dev_report_fds: %i -> gossip_store", fd); + continue; + } + c = io_have_fd(fd, &listener); + if (!c) { + status_broken("dev_report_fds: %i open but unowned?", fd); + continue; + } else if (listener) { + l = (void *)c; + status_info("dev_report_fds: %i -> listener (%s)", fd, + backtrace_symname(tmpctx, l->init)); + } else { + status_info("dev_report_fds: %i -> IN=%s:%s+%s(%s), OUT=%s:%s+%s(%s)", + fd, + io_plan_status_str(c->plan[IO_IN].status), + backtrace_symname(tmpctx, c->plan[IO_IN].io), + backtrace_symname(tmpctx, c->plan[IO_IN].next), + try_tal_name(tmpctx, c->plan[IO_IN].next_arg), + io_plan_status_str(c->plan[IO_OUT].status), + backtrace_symname(tmpctx, c->plan[IO_OUT].io), + backtrace_symname(tmpctx, c->plan[IO_OUT].next), + try_tal_name(tmpctx, c->plan[IO_OUT].next_arg)); + } + describe_fd(fd); + } +} #endif /* DEVELOPER */ static struct io_plan *recv_peer_connect_subd(struct io_conn *conn, @@ -1972,6 +2109,11 @@ static struct io_plan *recv_req(struct io_conn *conn, #if DEVELOPER dev_suppress_gossip(daemon, msg); goto out; +#endif + case WIRE_CONNECTD_DEV_REPORT_FDS: +#if DEVELOPER + dev_report_fds(daemon, msg); + goto out; #endif /* We send these, we don't receive them */ case WIRE_CONNECTD_INIT_REPLY: diff --git a/connectd/connectd_wire.csv b/connectd/connectd_wire.csv index e376eda09..d42d5f506 100644 --- a/connectd/connectd_wire.csv +++ b/connectd/connectd_wire.csv @@ -105,6 +105,9 @@ msgtype,connectd_dev_memleak,2033 msgtype,connectd_dev_memleak_reply,2133 msgdata,connectd_dev_memleak_reply,leak,bool, +# master -> connectd: dump status of your fds. +msgtype,connectd_dev_report_fds,2034 + # Ping/pong test. Waits for a reply if it expects one. msgtype,connectd_ping,2030 msgdata,connectd_ping,id,node_id, diff --git a/contrib/pyln-testing/pyln/testing/utils.py b/contrib/pyln-testing/pyln/testing/utils.py index 1ed1b35bc..0c1d27a25 100644 --- a/contrib/pyln-testing/pyln/testing/utils.py +++ b/contrib/pyln-testing/pyln/testing/utils.py @@ -1578,12 +1578,14 @@ class NodeFactory(object): err_msgs = [] for i in range(len(self.nodes)): leaks = None - # leak detection upsets VALGRIND by reading uninitialized mem. + # leak detection upsets VALGRIND by reading uninitialized mem, + # and valgrind adds extra fds. # If it's dead, we'll catch it below. if not self.valgrind and DEVELOPER: try: # This also puts leaks in log. leaks = self.nodes[i].rpc.dev_memleak()['leaks'] + self.nodes[i].rpc.dev_report_fds() except Exception: pass diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index 93c617a0c..3b16c7548 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -560,6 +560,7 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd case WIRE_CONNECTD_DISCARD_PEER: case WIRE_CONNECTD_DEV_MEMLEAK: case WIRE_CONNECTD_DEV_SUPPRESS_GOSSIP: + case WIRE_CONNECTD_DEV_REPORT_FDS: case WIRE_CONNECTD_PEER_FINAL_MSG: case WIRE_CONNECTD_PEER_CONNECT_SUBD: case WIRE_CONNECTD_PING: @@ -843,4 +844,26 @@ static const struct json_command dev_suppress_gossip = { "Stop this node from sending any more gossip." }; AUTODATA(json_command, &dev_suppress_gossip); + +static struct command_result *json_dev_report_fds(struct command *cmd, + const char *buffer, + const jsmntok_t *obj UNNEEDED, + const jsmntok_t *params) +{ + if (!param(cmd, buffer, params, NULL)) + return command_param_failed(); + + subd_send_msg(cmd->ld->connectd, + take(towire_connectd_dev_report_fds(NULL))); + + return command_success(cmd, json_stream_success(cmd)); +} + +static const struct json_command dev_report_fds = { + "dev-report-fds", + "developer", + json_dev_report_fds, + "Ask connectd to report status of all its open files." +}; +AUTODATA(json_command, &dev_report_fds); #endif /* DEVELOPER */