lightningd: track weird CI crash in test_important_plugin

Looks like we woke one of the startup io_loops early, and thus
we thought we'd finished connectd_activate and we hadn't.  This
caused us to use an uninitialized ld->announceable array, and
finally caused an assert fail in the main loop.

Make *every* loop assert that it was exited for the correct reason,
so if it happens again, we can maybe figure out what part of
the code to look at.

```
lightningd: lightningd/lightningd.c:1186: main: Assertion `io_loop_ret == ld' failed.
lightningd: FATAL SIGNAL 6 (version 4df66fa)
...
------------------------------- Valgrind errors --------------------------------
Valgrind error file: valgrind-errors.895509
==895509== Conditional jump or move depends on uninitialised value(s)
==895509==    at 0x22C58E: to_tal_hdr_or_null (tal.c:184)
==895509==    by 0x22D531: tal_bytelen (tal.c:637)
==895509==    by 0x1F10B6: towire_gossipd_init (gossipd_wiregen.c:100)
==895509==    by 0x13AC6E: gossip_init (gossip_control.c:254)
==895509==    by 0x1497EC: main (lightningd.c:1090)
==895509== 
```

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell
2022-06-26 14:21:01 +09:30
parent 9137ea262b
commit 3f98cf3fce
9 changed files with 45 additions and 10 deletions

View File

@@ -40,12 +40,14 @@ static void plugin_config_cb(const char *buffer,
struct plugin *plugin)
{
plugin->plugin_state = INIT_COMPLETE;
log_debug(plugin->plugins->ld->log, "io_break: %s", __func__);
io_break(plugin);
}
static void config_plugin(struct plugin *plugin)
{
struct jsonrpc_request *req;
void *ret;
req = jsonrpc_request_start(plugin, "init", plugin->log,
NULL, plugin_config_cb, plugin);
@@ -55,7 +57,9 @@ static void config_plugin(struct plugin *plugin)
tal_add_destructor(plugin, bitcoin_destructor);
io_loop_with_timers(plugin->plugins->ld);
ret = io_loop_with_timers(plugin->plugins->ld);
log_debug(plugin->plugins->ld->log, "io_loop_with_timers: %s", __func__);
assert(ret == plugin);
}
static void wait_plugin(struct bitcoind *bitcoind, const char *method,

View File

@@ -38,6 +38,7 @@ static void maybe_completed_init(struct chain_topology *topo)
return;
if (!topo->root)
return;
log_debug(topo->ld->log, "io_break: %s", __func__);
io_break(topo);
}
@@ -1087,6 +1088,7 @@ static void retry_check_chain(struct chain_topology *topo)
void setup_topology(struct chain_topology *topo,
u32 min_blockheight, u32 max_blockheight)
{
void *ret;
memset(&topo->feerate, 0, sizeof(topo->feerate));
topo->min_blockheight = min_blockheight;
@@ -1107,7 +1109,9 @@ void setup_topology(struct chain_topology *topo,
start_fee_estimate(topo);
/* Once it gets initial block, it calls io_break() and we return. */
io_loop_with_timers(topo->ld);
ret = io_loop_with_timers(topo->ld);
assert(ret == topo);
log_debug(topo->ld->log, "io_loop_with_timers: %s", __func__);
}
void begin_topology(struct chain_topology *topo)

View File

@@ -504,6 +504,7 @@ static void connect_init_done(struct subd *connectd,
}
/* Break out of loop, so we can begin */
log_debug(connectd->ld->log, "io_break: %s", __func__);
io_break(connectd);
}
@@ -515,6 +516,7 @@ int connectd_init(struct lightningd *ld)
struct wireaddr_internal *wireaddrs = ld->proposed_wireaddr;
enum addr_listen_announce *listen_announce = ld->proposed_listen_announce;
const char *websocket_helper_path;
void *ret;
websocket_helper_path = subdaemon_path(tmpctx, ld,
"lightning_websocketd");
@@ -566,7 +568,9 @@ int connectd_init(struct lightningd *ld)
connect_init_done, NULL);
/* Wait for init_reply */
io_loop(NULL, NULL);
ret = io_loop(NULL, NULL);
log_debug(ld->log, "io_loop: %s", __func__);
assert(ret == ld->connectd);
return fds[0];
}
@@ -589,18 +593,22 @@ static void connect_activate_done(struct subd *connectd,
}
/* Break out of loop, so we can begin */
log_debug(connectd->ld->log, "io_break: %s", __func__);
io_break(connectd);
}
void connectd_activate(struct lightningd *ld)
{
void *ret;
const u8 *msg = towire_connectd_activate(NULL, ld->listen);
subd_req(ld->connectd, ld->connectd, take(msg), -1, 0,
connect_activate_done, NULL);
/* Wait for activate_reply */
io_loop(NULL, NULL);
ret = io_loop(NULL, NULL);
log_debug(ld->log, "io_loop: %s", __func__);
assert(ret == ld->connectd);
}
void maybe_disconnect_peer(struct lightningd *ld, struct peer *peer)

View File

@@ -229,6 +229,7 @@ static void gossipd_init_done(struct subd *gossipd,
void *unused)
{
/* Break out of loop, so we can begin */
log_debug(gossipd->ld->log, "io_break: %s", __func__);
io_break(gossipd);
}
@@ -238,6 +239,7 @@ void gossip_init(struct lightningd *ld, int connectd_fd)
{
u8 *msg;
int hsmfd;
void *ret;
hsmfd = hsm_get_global_fd(ld, HSM_CAP_ECDH|HSM_CAP_SIGN_GOSSIP);
@@ -267,7 +269,9 @@ void gossip_init(struct lightningd *ld, int connectd_fd)
gossipd_init_done, NULL);
/* Wait for gossipd_init_reply */
io_loop(NULL, NULL);
ret = io_loop(NULL, NULL);
log_debug(ld->log, "io_loop: %s", __func__);
assert(ret == ld->gossip);
}
void gossipd_notify_spend(struct lightningd *ld,

View File

@@ -183,6 +183,7 @@ static struct command_result *json_stop(struct command *cmd,
/* With rpc_command_hook, jcon might have closed in the meantime! */
if (!cmd->jcon) {
/* Return us to toplevel lightningd.c */
log_debug(cmd->ld->log, "io_break: %s", __func__);
io_break(cmd->ld);
return command_still_pending(cmd);
}
@@ -986,6 +987,7 @@ static struct io_plan *start_json_stream(struct io_conn *conn,
/* Once the stop_conn conn is drained, we can shut down. */
if (jcon->ld->stop_conn == conn && jcon->ld->state == LD_STATE_RUNNING) {
/* Return us to toplevel lightningd.c */
log_debug(jcon->ld->log, "io_break: %s", __func__);
io_break(jcon->ld);
/* We never come back. */
return io_out_wait(conn, conn, io_never, conn);

View File

@@ -856,6 +856,7 @@ void lightningd_exit(struct lightningd *ld, int exit_code)
{
ld->exit_code = tal(ld, int);
*ld->exit_code = exit_code;
log_debug(ld->log, "io_break: %s", __func__);
io_break(ld);
}
@@ -1184,6 +1185,7 @@ int main(int argc, char *argv[])
* shut down.
*/
assert(io_loop_ret == ld);
log_debug(ld->log, "io_loop_with_timers: %s", __func__);
/* Fail JSON RPC requests and ignore plugin's responses */
ld->state = LD_STATE_SHUTDOWN;

View File

@@ -159,9 +159,10 @@ static void check_plugins_manifests(struct plugins *plugins)
}
/* As startup, we break out once all getmanifest are returned */
if (plugins->startup)
if (plugins->startup) {
log_debug(plugins->ld->log, "io_break: %s", __func__);
io_break(plugins);
else
} else
/* Otherwise we go straight into configuring them */
plugins_config(plugins);
}
@@ -214,8 +215,10 @@ static void destroy_plugin(struct plugin *p)
/* Daemon shutdown overrules plugin's importance; aborts init checks */
if (p->plugins->ld->state == LD_STATE_SHUTDOWN) {
/* But return if this was the last plugin! */
if (list_empty(&p->plugins->plugins))
if (list_empty(&p->plugins->plugins)) {
log_debug(p->plugins->ld->log, "io_break: %s", __func__);
io_break(destroy_plugin);
}
return;
}
@@ -1761,8 +1764,12 @@ void plugins_init(struct plugins *plugins)
setenv("LIGHTNINGD_PLUGIN", "1", 1);
setenv("LIGHTNINGD_VERSION", version(), 1);
if (plugins_send_getmanifest(plugins))
io_loop_with_timers(plugins->ld);
if (plugins_send_getmanifest(plugins)) {
void *ret;
ret = io_loop_with_timers(plugins->ld);
log_debug(plugins->ld->log, "io_loop_with_timers: %s", __func__);
assert(ret == plugins);
}
}
static void plugin_config_cb(const char *buffer,
@@ -2058,6 +2065,7 @@ void *plugins_exclusive_loop(struct plugin **plugins)
/* We don't service timers here, either! */
ret = io_loop(NULL, NULL);
log_debug(plugins[0]->plugins->ld->log, "io_loop: %s", __func__);
for (i = 0; i < tal_count(plugins); ++i) {
io_conn_out_exclusive(plugins[i]->stdin_conn, false);

View File

@@ -332,6 +332,7 @@ static void db_hook_response(const char *buffer, const jsmntok_t *toks,
return;
/* We're done, exit exclusive loop. */
log_debug(dwh_req->plugin->plugins->ld->log, "io_break: %s", __func__);
io_break(dwh_req->ph_req);
}
@@ -392,6 +393,7 @@ void plugin_hook_db_sync(struct db *db)
if (ret != ph_req) {
void *ret2 = plugins_exclusive_loop(plugins);
assert(ret2 == ph_req);
log_debug(plugins[0]->plugins->ld->log, "io_break: %s", __func__);
io_break(ret);
}
assert(num_hooks == 0);

View File

@@ -463,6 +463,7 @@ static bool handle_version(struct subd *sd, const u8 *msg)
ver, version());
sd->ld->try_reexec = true;
/* Return us to toplevel lightningd.c */
log_debug(sd->ld->log, "io_break: %s", __func__);
io_break(sd->ld);
return false;
}