diff --git a/lightningd/lightningd.c b/lightningd/lightningd.c index 4727e679a..a73017835 100644 --- a/lightningd/lightningd.c +++ b/lightningd/lightningd.c @@ -151,6 +151,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx) * This method of manually declaring the list hooks avoids dynamic * allocations to put things into a list. */ list_head_init(&ld->peers); + list_head_init(&ld->subds); /*~ These are hash tables of incoming and outgoing HTLCs (contracts), * defined as `struct htlc_in` and `struct htlc_out` in htlc_end.h. @@ -409,10 +410,8 @@ void test_subdaemons(const struct lightningd *ld) || verstring[strlen(version())] != '\n') errx(1, "%s: bad version '%s'", subdaemons[i], verstring); - - /*~ finally reap the child process, freeing all OS - * resources that go with it */ - waitpid(pid, NULL, 0); + /*~ The child will be reaped by sigchld_rfd_in, so we don't + * need to waitpid() here. */ } } @@ -784,9 +783,13 @@ static struct io_plan *sigchld_rfd_in(struct io_conn *conn, /* We don't actually care what we read, so we stuff things here. */ static u8 ignorebuf; static size_t len; + pid_t childpid; + int wstatus; /* Reap the plugins, since we otherwise ignore them. */ - while (waitpid(-1, NULL, WNOHANG) != 0); + while ((childpid = waitpid(-1, &wstatus, WNOHANG)) != 0) { + maybe_subd_child(ld, childpid, wstatus); + } return io_read_partial(conn, &ignorebuf, 1, &len, sigchld_rfd_in, ld); } diff --git a/lightningd/lightningd.h b/lightningd/lightningd.h index 8f870dbf7..25a5db475 100644 --- a/lightningd/lightningd.h +++ b/lightningd/lightningd.h @@ -202,6 +202,9 @@ struct lightningd { /* RPC response to send once we've shut down. */ const char *stop_response; + /* All the subdaemons. */ + struct list_head subds; + /* Used these feerates instead of whatever bcli returns (up to * FEERATE_PENALTY). */ u32 *force_feerates; diff --git a/lightningd/subd.c b/lightningd/subd.c index 36c836308..81614f8d1 100644 --- a/lightningd/subd.c +++ b/lightningd/subd.c @@ -21,6 +21,16 @@ #include #include +void maybe_subd_child(struct lightningd *ld, int childpid, int wstatus) +{ + struct subd *sd; + + list_for_each(&ld->subds, sd, list) { + if (sd->pid == childpid) + sd->wstatus = tal_dup(sd, int, &wstatus); + } +} + /* Carefully move fd *@from to @to: on success *from set to to */ static bool move_fd(int *from, int to) { @@ -580,24 +590,30 @@ static void destroy_subd(struct subd *sd) bool fail_if_subd_fails; fail_if_subd_fails = IFDEV(sd->ld->dev_subdaemon_fail, false); + list_del_from(&sd->ld->subds, &sd->list); - switch (waitpid(sd->pid, &status, WNOHANG)) { - case 0: - /* If it's an essential daemon, don't kill: we want the - * exit status */ - if (!sd->must_not_exit) { - log_debug(sd->log, - "Status closed, but not exited. Killing"); - kill(sd->pid, SIGKILL); + /* lightningd may have already done waitpid() */ + if (sd->wstatus != NULL) { + status = *sd->wstatus; + } else { + switch (waitpid(sd->pid, &status, WNOHANG)) { + case 0: + /* If it's an essential daemon, don't kill: we want the + * exit status */ + if (!sd->must_not_exit) { + log_debug(sd->log, + "Status closed, but not exited. Killing"); + kill(sd->pid, SIGKILL); + } + waitpid(sd->pid, &status, 0); + fail_if_subd_fails = false; + break; + case -1: + log_broken(sd->log, "Status closed, but waitpid %i says %s", + sd->pid, strerror(errno)); + status = -1; + break; } - waitpid(sd->pid, &status, 0); - fail_if_subd_fails = false; - break; - case -1: - log_unusual(sd->log, "Status closed, but waitpid %i says %s", - sd->pid, strerror(errno)); - status = -1; - break; } if (fail_if_subd_fails && WIFSIGNALED(status)) { @@ -742,6 +758,8 @@ static struct subd *new_subd(struct lightningd *ld, sd->billboardcb = billboardcb; sd->fds_in = NULL; sd->outq = msg_queue_new(sd, true); + sd->wstatus = NULL; + list_add(&ld->subds, &sd->list); tal_add_destructor(sd, destroy_subd); list_head_init(&sd->reqs); sd->channel = channel; @@ -868,6 +886,7 @@ struct subd *subd_shutdown(struct subd *sd, unsigned int seconds) if (waitpid(sd->pid, NULL, 0) > 0) { alarm(0); sigaction(SIGALRM, &old, NULL); + list_del_from(&sd->ld->subds, &sd->list); return tal_free(sd); } diff --git a/lightningd/subd.h b/lightningd/subd.h index 4be12e248..564b2f35f 100644 --- a/lightningd/subd.h +++ b/lightningd/subd.h @@ -20,6 +20,9 @@ struct peer_fd; /* One of our subds. */ struct subd { + /* Inside ld->subds */ + struct list_node list; + /* Name, like John, or "lightning_hsmd" */ const char *name; /* The Big Cheese. */ @@ -74,6 +77,9 @@ struct subd { /* Callbacks for replies. */ struct list_head reqs; + + /* Did lightningd already wait for this pid? */ + int *wstatus; }; /** @@ -219,6 +225,9 @@ struct subd *subd_shutdown(struct subd *subd, unsigned int seconds); /* Ugly helper to get full pathname of the current binary. */ const char *find_my_abspath(const tal_t *ctx, const char *argv0); +/* lightningd captures SIGCHLD and waits, but so does subd. */ +void maybe_subd_child(struct lightningd *ld, int childpid, int wstatus); + #if DEVELOPER char *opt_subd_dev_disconnect(const char *optarg, struct lightningd *ld);