lightningd: simplify --daemon.

Dumb programs which have a --daemon option call fork() early.  This is
terrible UX since startup errors get lost: the program exits with
"success" immediately then you discover via the logs that it didn't
start at all.

However, forking late introduced a heap of problems with changing
pids.  Instead, fork early but keep stderr and the parent around: if
we fail early on, the parent fails with us.  We release our parent
with an explicit action just before the main loop.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell
2019-08-01 15:50:43 +09:30
committed by Christian Decker
parent fc024f81d6
commit 979fbeb3b0
13 changed files with 84 additions and 266 deletions

View File

@@ -42,7 +42,6 @@
#include <ccan/array_size/array_size.h>
#include <ccan/cast/cast.h>
#include <ccan/crypto/hkdf_sha256/hkdf_sha256.h>
#include <ccan/daemonize/daemonize.h>
#include <ccan/err/err.h>
#include <ccan/io/fdpass/fdpass.h>
#include <ccan/io/io.h>
@@ -77,6 +76,7 @@
#include <lightningd/options.h>
#include <onchaind/onchain_wire.h>
#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
@@ -198,7 +198,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
/*~ This is detailed in chaintopology.c */
ld->topology = new_topology(ld, ld->log);
ld->daemon = false;
ld->daemon_parent_fd = -1;
ld->config_filename = NULL;
ld->pidfile = NULL;
ld->proxyaddr = NULL;
@@ -501,30 +501,37 @@ static void init_txfilter(struct wallet *w, struct txfilter *filter)
* don't prevent unmounting whatever filesystem you happen to start in.
*
* But we define every path relative to our (~/.lightning) data dir, so we
* make sure we stay there.
* make sure we stay there. The rest of this is taken from ccan/daemonize,
* which was based on W. Richard Steven's advice in Programming in The Unix
* Environment.
*/
static void daemonize_but_keep_dir(struct lightningd *ld)
static void complete_daemonize(struct lightningd *ld)
{
/* daemonize moves us into /, but we want to be here */
const char *cwd = path_cwd(NULL);
int ok_status = 0;
/*~ SQLite3 does NOT like being open across fork(), a.k.a. daemonize() */
db_close_for_fork(ld->wallet->db);
if (!cwd)
fatal("Could not get current directory: %s", strerror(errno));
if (!daemonize())
fatal("Could not become a daemon: %s", strerror(errno));
/* Don't hold files open. */
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
/*~ Move back: important, since lightning dir may be relative! */
if (chdir(cwd) != 0)
fatal("Could not return to directory %s: %s",
cwd, strerror(errno));
/* Many routines write to stderr; that can cause chaos if used
* for something else, so set it here. */
if (open("/dev/null", O_WRONLY) != 0)
fatal("Could not open /dev/null: %s", strerror(errno));
if (dup2(0, STDERR_FILENO) != STDERR_FILENO)
fatal("Could not dup /dev/null for stderr: %s", strerror(errno));
close(0);
db_reopen_after_fork(ld->wallet->db);
/* Session leader so ^C doesn't whack us. */
if (setsid() == (pid_t)-1)
fatal("Could not setsid: %s", strerror(errno));
/*~ Why not allocate cwd off tmpctx? Probably because this code predates
* tmpctx. So we free manually here. */
tal_free(cwd);
/* Discard our parent's old-fashioned umask prejudices. */
umask(0);
/* OK, parent, you can exit(0) now. */
write_all(ld->daemon_parent_fd, &ok_status, sizeof(ok_status));
close(ld->daemon_parent_fd);
}
/*~ It's pretty standard behaviour (especially for daemons) to create and
@@ -785,13 +792,6 @@ int main(int argc, char *argv[])
* in case that runs into trouble. */
crashlog = ld->log;
/*~ We defer --daemon until we've completed most initialization: that
* way we'll exit with an error rather than silently exiting 0, then
* realizing we can't start and forcing the confused user to read the
* logs. */
if (ld->daemon)
daemonize_but_keep_dir(ld);
/*~ We have to do this after daemonize, since that changes our pid! */
pidfile_write(ld, pid_fd);
@@ -830,6 +830,16 @@ int main(int argc, char *argv[])
* can start the poll loop which queries bitcoind for new blocks. */
begin_topology(ld->topology);
/*~ To handle --daemon, we fork the daemon early (otherwise we hit
* issues with our pid changing), but keep the parent around until
* we've completed most initialization: that way we'll exit with an
* error rather than silently exiting 0, then realizing we can't start
* and forcing the confused user to read the logs.
*
* But we're all initialized, so detach and have parent exit now. */
if (ld->daemon_parent_fd != -1)
complete_daemonize(ld);
/*~ The root of every backtrace (almost). This is our main event
* loop. */
void *io_loop_ret = io_loop_with_timers(ld);