From ec63c0d10b8d33642c3741a1bcfa889ccc3a5f9e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 12 Sep 2017 14:25:54 +0930 Subject: [PATCH] lightningd: give option to crash if a subdaemon fails. Either when it exits with a signal, or sends an error status message. Then we make test_lightningd.py use it. Signed-off-by: Rusty Russell --- lightningd/lightningd.h | 3 +++ lightningd/options.c | 2 ++ lightningd/subd.c | 13 +++++++++++++ tests/test_lightningd.py | 1 + 4 files changed, 19 insertions(+) diff --git a/lightningd/lightningd.h b/lightningd/lightningd.h index e2aaa3d99..93464d953 100644 --- a/lightningd/lightningd.h +++ b/lightningd/lightningd.h @@ -112,6 +112,9 @@ struct lightningd { /* If we have a --dev-disconnect file */ int dev_disconnect_fd; + /* If we have --dev-fail-on-subdaemon-fail */ + bool dev_subdaemon_fail; + /* HTLCs in flight. */ struct htlc_in_map htlcs_in; struct htlc_out_map htlcs_out; diff --git a/lightningd/options.c b/lightningd/options.c index 45f7422e1..fb171108c 100644 --- a/lightningd/options.c +++ b/lightningd/options.c @@ -239,6 +239,8 @@ static void dev_register_opts(struct lightningd *ld) { opt_register_noarg("--dev-no-broadcast", opt_set_bool, &ld->topology->dev_no_broadcast, opt_hidden); + opt_register_noarg("--dev-fail-on-subdaemon-fail", opt_set_bool, + &ld->dev_subdaemon_fail, opt_hidden); } static const struct config testnet_config = { diff --git a/lightningd/subd.c b/lightningd/subd.c index 695938658..f902e0a0d 100644 --- a/lightningd/subd.c +++ b/lightningd/subd.c @@ -318,6 +318,9 @@ static void subdaemon_malformed_msg(struct subd *sd, const u8 *msg) tal_hexstr(msg, msg + sizeof(be16), tal_count(msg) - sizeof(be16))); + + if (sd->ld->dev_subdaemon_fail) + fatal("Subdaemon %s sent malformed message", sd->name); } /* Returns true if logged, false if malformed. */ @@ -361,6 +364,10 @@ log_str_peer: /* Shouldn't happen. */ log_str_broken: log_broken(sd->log, "%s: %.*s", name, str_len, str); + + if (sd->ld->dev_subdaemon_fail) + fatal("Subdaemon %s hit error", sd->name); + return true; } @@ -444,12 +451,14 @@ next: static void destroy_subd(struct subd *sd) { int status; + bool fail_if_subd_fails = sd->ld->dev_subdaemon_fail; switch (waitpid(sd->pid, &status, WNOHANG)) { case 0: log_debug(sd->log, "Status closed, but not exited. Killing"); kill(sd->pid, SIGKILL); waitpid(sd->pid, &status, 0); + fail_if_subd_fails = false; break; case -1: log_unusual(sd->log, "Status closed, but waitpid %i says %s", @@ -458,6 +467,10 @@ static void destroy_subd(struct subd *sd) break; } + if (fail_if_subd_fails && WIFSIGNALED(status)) + fatal("Subdaemon %s killed with signal %i", + sd->name, WTERMSIG(status)); + /* In case we're freed manually, such as peer_fail_permanent */ if (sd->conn) sd->conn = tal_free(sd->conn); diff --git a/tests/test_lightningd.py b/tests/test_lightningd.py index 0cac522eb..f23cbbca2 100644 --- a/tests/test_lightningd.py +++ b/tests/test_lightningd.py @@ -101,6 +101,7 @@ class NodeFactory(object): with open(os.path.join(lightning_dir, "dev_disconnect"), "w") as f: f.write("\n".join(disconnect)) daemon.cmd_line.append("--dev-disconnect=dev_disconnect") + daemon.cmd_line.append("--dev-fail-on-subdaemon-fail") rpc = LightningRpc(socket_path, self.executor) node = utils.LightningNode(daemon, rpc, bitcoind, self.executor)