lightningd: shutdown plugins after subdaemons and assert no write access to db

because:
    - shutdown_subdaemons can trigger db write, comments in that function say so at least
    - resurrecting the main event loop with subdaemons still running is counter productive
      in shutting down activity (such as htlc's, hook_calls etc.)
    - custom behavior injected by plugins via hooks should be consistent, see test
      in previous commmit

    IDEA:

    in shutdown_plugins, when starting new io_loop:

    - A plugin that is still running can return a jsonrpc_request response, this triggers
      response_cb, which cannot be handled because subdaemons are gone -> so any response_cb should be blocked/aborted

    - jsonrpc is still there, so users (such as plugins) can make new jsonrpc_request's which
      cannot be handled because subdaemons are gone -> so new rpc_request should also be blocked

    - But we do want to send/receive notifications and log messages (handled in jsonrpc as jsonrpc_notification)
      as these do not trigger subdaemon calls or db_write's
      Log messages and notifications do not have "id" field, where jsonrpc_request *do* have an "id" field

    PLAN (hypothesis):
    - hack into plugin_read_json_one OR plugin_response_handle to filter-out json with
      an "id" field, this should
      block/abandon any jsonrpc_request responses (and new jsonrpc_requests for plugins?)

  Q. Can internal (so not via plugin) jsonrpc_requests called in the main io_loop return/revive in
     the shutdown io_loop?
  A. No. All code under lightningd/ returning command_still_pending depends on either a subdaemon, timer or
     plugin. In shutdown loop the subdaemons are dead, timer struct cleared and plugins will be taken
     care of (in next commits).

 fixup: we can only io_break the main io_loop once
This commit is contained in:
Simon Vrouwe
2021-11-10 11:39:02 +02:00
committed by Rusty Russell
parent 5fb3674233
commit 5f69674faa
8 changed files with 42 additions and 27 deletions

View File

@@ -972,7 +972,7 @@ static struct io_plan *start_json_stream(struct io_conn *conn,
io_wake(conn);
/* Once the stop_conn conn is drained, we can shut down. */
if (jcon->ld->stop_conn == conn) {
if (jcon->ld->stop_conn == conn && jcon->ld->state == LD_STATE_RUNNING) {
/* Return us to toplevel lightningd.c */
io_break(jcon->ld);
/* We never come back. */

View File

@@ -242,6 +242,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
*/
ld->plugins = plugins_new(ld, ld->log_book, ld);
ld->plugins->startup = true;
ld->plugins->shutdown = false;
/*~ This is set when a JSON RPC command comes in to shut us down. */
ld->stop_conn = NULL;
@@ -1187,11 +1188,11 @@ int main(int argc, char *argv[])
/* We're not going to collect our children. */
remove_sigchild_handler();
/* Tell plugins we're shutting down. */
shutdown_plugins(ld);
shutdown_subdaemons(ld);
/* Tell plugins we're shutting down, closes the db for write access. */
shutdown_plugins(ld);
/* Clean up the JSON-RPC. This needs to happen in a DB transaction since
* it might actually be touching the DB in some destructors, e.g.,
* unreserving UTXOs (see #1737) */

View File

@@ -187,19 +187,18 @@ static void destroy_plugin(struct plugin *p)
if (p->plugin_state == AWAITING_GETMANIFEST_RESPONSE)
check_plugins_manifests(p->plugins);
/* If this was the last one init was waiting for, handle cmd replies */
if (p->plugin_state == AWAITING_INIT_RESPONSE)
check_plugins_initted(p->plugins);
/* If we are shutting down, do not continue to checking if
* the dying plugin is important. */
/* Daemon shutdown overrules plugin's importance; aborts init checks */
if (p->plugins->shutdown) {
/* But return if this was the last plugin! */
if (list_empty(&p->plugins->plugins))
io_break(p->plugins);
io_break(destroy_plugin);
return;
}
/* If this was the last one init was waiting for, handle cmd replies */
if (p->plugin_state == AWAITING_INIT_RESPONSE)
check_plugins_initted(p->plugins);
/* Now check if the dying plugin is important. */
if (p->important) {
log_broken(p->log,
@@ -2082,46 +2081,48 @@ bool was_plugin_destroyed(struct plugin_destroyed *pd)
static void plugin_shutdown_timeout(struct lightningd *ld)
{
io_break(ld->plugins);
io_break(plugin_shutdown_timeout);
}
void shutdown_plugins(struct lightningd *ld)
{
struct plugin *p, *next;
/* This makes sure we don't complain about important plugins
* vanishing! */
/* Don't complain about important plugins vanishing and
* crash any attempt to write to db. */
ld->plugins->shutdown = true;
/* Tell them all to shutdown; if they care. */
list_for_each_safe(&ld->plugins->plugins, p, next, list) {
/* Kill immediately, deletes self from list. */
if (!notify_plugin_shutdown(ld, p))
if (p->plugin_state != INIT_COMPLETE || !notify_plugin_shutdown(ld, p))
tal_free(p);
}
/* If anyone was interested in shutdown, give them time. */
if (!list_empty(&ld->plugins->plugins)) {
struct oneshot *t;
struct timers *orig_timers, *timer;
/* 30 seconds should do it. */
t = new_reltimer(ld->timers, ld,
time_from_sec(30),
plugin_shutdown_timeout, ld);
/* 30 seconds should do it, use a clean timers struct */
orig_timers = ld->timers;
timer = tal(NULL, struct timers);
timers_init(timer, time_mono());
new_reltimer(timer, timer, time_from_sec(30),
plugin_shutdown_timeout, ld);
io_loop_with_timers(ld);
tal_free(t);
ld->timers = timer;
void *ret = io_loop_with_timers(ld);
assert(ret == plugin_shutdown_timeout || ret == destroy_plugin);
ld->timers = orig_timers;
tal_free(timer);
/* Report and free remaining plugins. */
while (!list_empty(&ld->plugins->plugins)) {
p = list_pop(&ld->plugins->plugins, struct plugin, list);
log_debug(ld->log,
"%s: failed to shutdown, killing.",
"%s: failed to self-terminate in time, killing.",
p->shortname);
tal_free(p);
}
}
/* NULL stops notifications trying to access plugins. */
ld->plugins = tal_free(ld->plugins);
}

View File

@@ -113,7 +113,7 @@ struct plugins {
/* Blacklist of plugins from --disable-plugin */
const char **blacklist;
/* Whether we are shutting down (`plugins_free` is called) */
/* Whether we are shutting down, blocks db write's */
bool shutdown;
/* Index to show what order they were added in */

View File

@@ -337,6 +337,7 @@ void plugin_hook_db_sync(struct db *db)
size_t i;
size_t num_hooks;
db_check_plugins_not_shutdown(db);
const char **changes = db_changes(db);
num_hooks = tal_count(hook->hooks);
if (num_hooks == 0)

View File

@@ -1266,6 +1266,7 @@ struct db *db_setup(const tal_t *ctx, struct lightningd *ld,
struct db *db = db_open(ctx, ld->wallet_dsn);
bool migrated;
db->log = new_log(db, ld->log_book, NULL, "database");
db->plugins_shutdown = &ld->plugins->shutdown;
db_begin_transaction(db);
@@ -2340,6 +2341,11 @@ void db_changes_add(struct db_stmt *stmt, const char * expanded)
tal_arr_expand(&db->changes, tal_strdup(db->changes, expanded));
}
void db_check_plugins_not_shutdown(struct db *db)
{
assert(!*db->plugins_shutdown);
}
const char **db_changes(struct db *db)
{
return db->changes;

View File

@@ -249,6 +249,9 @@ struct db_stmt *db_prepare_v2_(const char *location, struct db *db,
#define db_prepare_v2(db,query) \
db_prepare_v2_(__FILE__ ":" stringify(__LINE__), db, query)
/* Check that plugins are not shutting down when calling db_write hook */
void db_check_plugins_not_shutdown(struct db *db);
/**
* Access pending changes that have been added to the current transaction.
*/

View File

@@ -34,6 +34,9 @@ struct db {
* Used to bump the data_version in the DB.*/
bool dirty;
/* Only needed to check shutdown state of plugins */
const bool *plugins_shutdown;
/* The current DB version we expect to update if changes are
* committed. */
u32 data_version;