/*~ Welcome, wonderful reader! * * This is the core of c-lightning: the main file of the master daemon * `lightningd`. It's mainly cluttered with the miscellany of setup, * and a few startup sanity checks. * * The role of this daemon is to start the subdaemons, shuffle peers * between them, handle the JSON RPC requests, bitcoind, the database * and centralize logging. In theory, it doesn't trust the other * daemons, though we expect hsmd to be responsive. * * Comments beginning with a ~ (like this one!) are part of our shared * adventure through the source, so they're more meta than normal code * comments, and mean to be read in a certain order. */ /*~ Notice how includes are in ASCII order: this is actually enforced by * the build system under 'make check-source'. It avoids merge conflicts * and keeps things consistent. */ #include "gossip_control.h" #include "hsm_control.h" #include "lightningd.h" #include "peer_control.h" #include "subd.h" /*~ This is Ian Lance Taylor's libbacktrace. It turns out that it's * horrifically difficult to obtain a decent backtrace in C; the standard * backtrace function is useless in most programs. */ #include /*~ These headers are from CCAN: http://ccodearchive.net. * * It's another one of Rusty's projects, and we copy and paste it * automatically into the source tree here, so you should never edit * it. There's a Makefile target update-ccan to update it (and add modules * if CCAN_NEW is specified). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*~ This is common code: routines shared by one or more programs. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*~ The core lightning object: it's passed everywhere, and is basically a * global variable. This new_xxx pattern is something we'll see often: * it allocates and initializes a new structure, using *tal*, the heirarchitcal * allocator. */ static struct lightningd *new_lightningd(const tal_t *ctx) { /*~ tal: each allocation is a child of an existing object (or NULL, * the top-level object). When an object is freed, all the objects * 'tallocated' off it are also freed. In this case, freeing 'ctx' * will free 'ld'. * * It's incredibly useful for grouping object lifetimes, as we'll see. */ struct lightningd *ld = tal(ctx, struct lightningd); /*~ Note that we generally EXPLICITLY #if-wrap DEVELOPER code. This * is a nod to keeping it minimal and explicit: we need this code for * testing, but its existence means we're not actually testing the * same exact code users will be running. */ #if DEVELOPER ld->dev_debug_subdaemon = NULL; ld->dev_disconnect_fd = -1; ld->dev_subdaemon_fail = false; ld->dev_allow_localhost = false; /*~ Behaving differently depending on environment variables is a hack, * *but* hacks are allowed for dev-mode stuff. In this case, there's * a significant overhead to the memory leak detection stuff, and * we can't use it under valgrind, so the test harness uses this var * to disable it in that case. */ if (getenv("LIGHTNINGD_DEV_MEMLEAK")) memleak_init(); #endif /*~ These are CCAN lists: an embedded double-linked list. It's not * really typesafe, but relies on convention to access the contents. * It's inspired by the closely-related Linux kernel list.h. */ list_head_init(&ld->peers); /*~ These are hash tables of incoming and outgoing HTLCs (contracts) */ htlc_in_map_init(&ld->htlcs_in); htlc_out_map_init(&ld->htlcs_out); /*~ We have a log-book infrastructure: we define a 20MB log book and * point our log objects into it. */ ld->log_book = new_log_book(20*1024*1024, LOG_INFORM); /*~ Note the tal context arg (by convention, the first argument to any * allocation function): ld->log will be implicitly freed when ld * is. */ ld->log = new_log(ld, ld->log_book, "lightningd(%u):", (int)getpid()); ld->logfile = NULL; /*~ We explicitly set these to NULL: if they're still NULL after option * parsing, we know they're to be set to the defaults. */ ld->alias = NULL; ld->rgb = NULL; list_head_init(&ld->connects); list_head_init(&ld->waitsendpay_commands); list_head_init(&ld->sendpay_commands); list_head_init(&ld->close_commands); /*~ Tal also explicitly supports arrays: it stores the number of * elements, which can be accessed with tal_count() (or tal_bytelen() * for raw bytecount). It's common for simple arrays to use * tal_resize(), which is a typesafe realloc function, but as all * talocations need a parent, we start with an empty array rather than * NULL. */ ld->proposed_wireaddr = tal_arr(ld, struct wireaddr_internal, 0); ld->proposed_listen_announce = tal_arr(ld, enum addr_listen_announce, 0); ld->portnum = DEFAULT_PORT; ld->listen = true; ld->autolisten = true; ld->reconnect = true; /*~ This is from ccan/timer: a scalable timer system which has a * fascinating implementation you should read if you have a spare * few hours */ timers_init(&ld->timers, time_mono()); /*~ This is detailed in chaintopology.c */ ld->topology = new_topology(ld, ld->log); ld->daemon = false; ld->config_filename = NULL; ld->pidfile = NULL; ld->ini_autocleaninvoice_cycle = 0; ld->ini_autocleaninvoice_expiredby = 86400; ld->proxyaddr = NULL; ld->use_proxy_always = false; ld->pure_tor_setup = false; ld->tor_service_password = NULL; ld->max_funding_unconfirmed = 2016; return ld; } /*~ We list our daemons here so on startup we can test they're the * correct versions and that they exist. */ static const char *subdaemons[] = { "lightning_channeld", "lightning_closingd", "lightning_connectd", "lightning_gossipd", "lightning_hsmd", "lightning_onchaind", "lightning_openingd" }; /*~ Check we can run them, and check their versions */ void test_subdaemons(const struct lightningd *ld) { size_t i; /*~ CCAN's ARRAY_SIZE() should always be used on defined arrays: it will * fail to build if the argument is actually a pointer, not an array! */ for (i = 0; i < ARRAY_SIZE(subdaemons); i++) { int outfd; /*~ CCAN's path module uses tal, so wants a context to allocate * from. We have a magic context 'tmpctx' which is freed in * the event loop for transient allocations like this. */ const char *dpath = path_join(tmpctx, ld->daemon_dir, subdaemons[i]); const char *verstring; /*~ CCAN's pipecmd module is like popen for grownups. */ pid_t pid = pipecmd(&outfd, NULL, &outfd, dpath, "--version", NULL); /*~ Our logging system: spam goes in at log_debug level */ log_debug(ld->log, "testing %s", dpath); if (pid == -1) err(1, "Could not run %s", dpath); /*~ CCAN's grab_file module contains a routine to read into a * tallocated buffer until EOF */ verstring = grab_fd(tmpctx, outfd); /*~ Like many CCAN modules, it set errno on failure, which * err (ccan/err, but usually just the BSD ) prints */ if (!verstring) err(1, "Could not get output from %s", dpath); /*~ strstarts is from CCAN/str. */ if (!strstarts(verstring, version()) || verstring[strlen(version())] != '\n') errx(1, "%s: bad version '%s'", subdaemons[i], verstring); } } /* Check if all subdaemons exist in specified directory. */ static bool has_all_subdaemons(const char* daemon_dir) { size_t i; bool missing_daemon = false; for (i = 0; i < ARRAY_SIZE(subdaemons); ++i) { if (!path_is_file(path_join(tmpctx, daemon_dir, subdaemons[i]))) { missing_daemon = true; break; } } return !missing_daemon; } /* This routine tries to determine what path the lightningd binary is in. * It's not actually that simple! */ static const char *find_my_path(const tal_t *ctx, const char *argv0) { char *me; /* A command containing / is run relative to the current directory, * not searched through the path. The shell sets argv0 to the command * run, though something else could set it to a arbitrary value and * this logic would be wrong. */ if (strchr(argv0, PATH_SEP)) { const char *path; /* Absolute paths are easy. */ if (strstarts(argv0, PATH_SEP_STR)) path = argv0; /* It contains a '/', it's relative to current dir. */ else path = path_join(tmpctx, path_cwd(tmpctx), argv0); me = path_canon(ctx, path); if (!me || access(me, X_OK) != 0) errx(1, "I cannot find myself at %s based on my name %s", path, argv0); } else { /* No /, search path */ char **pathdirs; const char *pathenv = getenv("PATH"); size_t i; /* This replicates the standard shell path search algorithm */ if (!pathenv) errx(1, "Cannot find myself: no $PATH set"); pathdirs = tal_strsplit(tmpctx, pathenv, ":", STR_NO_EMPTY); me = NULL; for (i = 0; pathdirs[i]; i++) { /* This returns NULL if it doesn't exist. */ me = path_canon(ctx, path_join(tmpctx, pathdirs[i], argv0)); if (me && access(me, X_OK) == 0) break; /* Nope, try again. */ me = tal_free(me); } if (!me) errx(1, "Cannot find %s in $PATH", argv0); } /*~ The caller just wants the directory we're in. * * Note the magic "take()" macro here: it annotates a pointer as "to * be taken", and the recipient is expected to take ownership of the * pointer. * * Many CCAN and our own routines support this, but if you hand a take() * to a non-take routine unfortunately you don't get a compile error. */ return path_dirname(ctx, take(me)); } /*~ This returns the PKGLIBEXEC path which is where binaries get installed. * Note the 'TAKES' annotation which is merely documentation that it will * take ownership of 'my_path' if the caller hands take() there. */ static const char *find_my_pkglibexec_path(const tal_t *ctx, const char *my_path TAKES) { const char *pkglibexecdir; pkglibexecdir = path_join(ctx, my_path, BINTOPKGLIBEXECDIR); /*~ Sometimes take() can be more efficient, since the routine can * manipulate the string in place. This is the case here. */ return path_simplify(ctx, take(pkglibexecdir)); } /* Determine the correct daemon dir. */ static const char *find_daemon_dir(const tal_t *ctx, const char *argv0) { const char *my_path = find_my_path(ctx, argv0); /* If we're running in-tree, all the subdaemons are with lightningd. */ if (has_all_subdaemons(my_path)) return my_path; /* Otherwise we assume they're in the installed dir. */ return find_my_pkglibexec_path(ctx, take(my_path)); } /*~ We like to free everything on exit, so valgrind doesn't complain. In some * ways it would be neater not to do this, but it turns out some transient * objects still need cleaning. */ static void shutdown_subdaemons(struct lightningd *ld) { struct peer *p; /*~ Because tal objects can be free indirectly, by freeing their parents * it turns out to be vital to be able to add *destructors* to objects. * As a result, freeing them may cause callbacks; in this case, some * objects freed here can cause database writes, which must be inside * a transaction */ db_begin_transaction(ld->wallet->db); /* Let everyone shutdown cleanly. */ close(ld->hsm_fd); /*~ The three "global" daemons, which we shutdown explicitly. */ subd_shutdown(ld->connectd, 10); subd_shutdown(ld->gossip, 10); subd_shutdown(ld->hsm, 10); /* Now we free all the HTLCs */ free_htlcs(ld, NULL); /*~ For every peer, we free every channel. Note that the peer has a * destructor (by convention, called destroy_peer) which removes it * from the list. Thus we use list_top() not list_pop() here. */ while ((p = list_top(&ld->peers, struct peer, list)) != NULL) { struct channel *c; /*~ A peer can have multiple channels; we only allow one to be * open at any time, but we remember old ones for 100 blocks, * after all the outputs we care about are spent. */ while ((c = list_top(&p->channels, struct channel, list)) != NULL) { /* Removes itself from list as we free it */ tal_free(c); } /* A peer may have a channel in the process of opening. */ if (p->uncommitted_channel) { struct uncommitted_channel *uc = p->uncommitted_channel; /* Setting to NULL stops destroy_uncommitted_channel * from trying to remove peer from db! */ p->uncommitted_channel = NULL; tal_free(uc); } /* Removes itself from list as we free it */ tal_free(p); } /*~ Commit the transaction. Note that the db is actually * single-threaded, so commits never fail and we don't need * spin-and-retry logic everywhere. */ db_commit_transaction(ld->wallet->db); } /*~ Chainparams are the parameters for eg. testnet vs mainnet. This wrapper * saves lots of struggles with our 80-column guideline! */ const struct chainparams *get_chainparams(const struct lightningd *ld) { /* "The lightningd is connected to the chain topology." * "The chain topology is connected to the bitcoind API." * "The bitcoind API is connected chain parameters." * -- Worst childhood song ever. */ return ld->topology->bitcoind->chainparams; } /*~ Our wallet logic needs to know what outputs we might be interested in: we * keep the maximum-ever-used key index in the db, and add them all to the * filter here. */ static void init_txfilter(struct wallet *w, struct txfilter *filter) { /*~ This is defined in libwally, so we didn't have to reimplement */ struct ext_key ext; /*~ Note the use of ccan/short_types u64 rather than uint64_t. * Thank me later. */ u64 bip32_max_index; bip32_max_index = db_get_intvar(w->db, "bip32_max_index", 0); /*~ One of the C99 things I unequivocally approve: for-loop scope. */ for (u64 i = 0; i <= bip32_max_index; i++) { if (bip32_key_from_parent(w->bip32_base, i, BIP32_FLAG_KEY_PUBLIC, &ext) != WALLY_OK) { abort(); } txfilter_add_derkey(filter, ext.pub_key); } } /*~ The normal advice for daemons is to move into the root directory, so you * don't prevent unmounting whatever filesystem you happen to start in. * * But we define every path relative to our (~/.lightning) data dir, so we * make sure we stay there. */ static void daemonize_but_keep_dir(struct lightningd *ld) { /* daemonize moves us into /, but we want to be here */ const char *cwd = path_cwd(NULL); /*~ SQLite3 does NOT like being open across fork(), a.k.a. daemonize() */ db_close_for_fork(ld->wallet->db); if (!cwd) fatal("Could not get current directory: %s", strerror(errno)); if (!daemonize()) fatal("Could not become a daemon: %s", strerror(errno)); /*~ Move back: important, since lightning dir may be relative! */ if (chdir(cwd) != 0) fatal("Could not return to directory %s: %s", cwd, strerror(errno)); db_reopen_after_fork(ld->wallet->db); /*~ Why not allocate cwd off tmpctx? Probably because this code predates * tmpctx. So we free manually here. */ tal_free(cwd); } /*~ It's pretty standard behaviour (especially for daemons) to create and * file-lock a pidfile. This not only prevents accidentally running multiple * daemons on the same database at once, but lets nosy sysadmins see what pid * the currently-running daemon is supposed to be. */ static void pidfile_create(const struct lightningd *ld) { char *pid; int pid_fd; /* Create PID file */ pid_fd = open(ld->pidfile, O_WRONLY|O_CREAT, 0640); if (pid_fd < 0) err(1, "Failed to open PID file"); /* Lock PID file: this will stay locked until we exit. */ if (lockf(pid_fd, F_TLOCK, 0) < 0) /* Problem locking file */ err(1, "lightningd already running? Error locking PID file"); /*~ Note that tal_fmt() is what asprintf() dreams of being. */ pid = tal_fmt(tmpctx, "%d\n", getpid()); /*~ CCAN's write_all writes to a file descriptor, looping if necessary * (which, on a file unlike a socket, is never, for historical UNIX * reasons). It also isn't declared with GCC's warn_unused_result * which write() is when FORTIFY_SOURCE is defined, so we're allowed * to ignore the result without jumping through hoops. */ write_all(pid_fd, pid, strlen(pid)); /* Leave file open: we close it implicitly when we exit */ } /*~ Yuck, we need a global here. * * ccan/io allows overriding the poll() function for special effects: for * lightningd, we make sure we haven't left a db transaction open. All * daemons which use ccan/io add sanity checks in this loop, so we chain * that after our own override. */ static int (*io_poll_debug)(struct pollfd *, nfds_t, int); static int io_poll_lightningd(struct pollfd *fds, nfds_t nfds, int timeout) { db_assert_no_outstanding_statements(); return io_poll_debug(fds, nfds, timeout); } /*~ Ever had one of those functions which doesn't quite fit anywhere? Me too. * Implementing a generic notifier framework is overkill in a static codebase * like this, and it's always better to have compile-time calls than runtime, * as it makes the code more explicit. But pasting in direct calls is also an * abstraction violation, so we use this middleman function. */ void notify_new_block(struct lightningd *ld, u32 block_height) { /* Inform our subcomponents individually. */ htlcs_notify_new_block(ld, block_height); channel_notify_new_block(ld, block_height); } int main(int argc, char *argv[]) { struct lightningd *ld; u32 min_blockheight, max_blockheight; int connectd_gossipd_fd; /*~ What happens in strange locales should stay there. */ setup_locale(); /*~ Every daemon calls this in some form: the hooks are for dumping * backtraces when we crash (if supported on this platform). */ daemon_setup(argv[0], log_backtrace_print, log_backtrace_exit); /*~ There's always a battle between what a constructor like this * should do, and what should be added later by the caller. In * general, because we use valgrind heavily for testing, we prefer not * to intialize unused fields which we expect the caller to set: * valgrind will warn us if we make decisions based on uninitialized * variables. */ ld = new_lightningd(NULL); /* Figure out where our daemons are first. */ ld->daemon_dir = find_daemon_dir(ld, argv[0]); if (!ld->daemon_dir) errx(1, "Could not find daemons"); /*~ The ccan/opt code requires registration then parsing; we * mimic this API here, even though they're on separate lines.*/ register_opts(ld); /*~ Handle options and config; move to .lightningd (--lightning-dir) */ handle_opts(ld, argc, argv); /*~ Make sure we can reach the subdaemons, and versions match. */ test_subdaemons(ld); /*~ Our "wallet" code really wraps the db, which is more than a simple * bitcoin wallet (though it's that too). */ ld->wallet = wallet_new(ld, ld->log, &ld->timers); /*~ We keep a filter of scriptpubkeys we're interested in. */ ld->owned_txfilter = txfilter_new(ld); /*~ This is the ccan/io central poll override from above. */ io_poll_debug = io_poll_override(io_poll_lightningd); /*~ Set up HSM: it knows our node secret key, so tells us who we are. */ hsm_init(ld); /*~ Our default color and alias are derived from our node id, so we * can only set those now (if not set by config options). */ setup_color_and_alias(ld); /*~ Set up connect daemon: this manages receiving and making * TCP connections. It needs to talk to the gossip daemon * which knows (via node_announcement messages) the public * addresses of nodes, so connectd_init hands it one end of a * socket pair, and gives us the other */ connectd_gossipd_fd = connectd_init(ld); /*~ The gossip daemon looks after the routing gossip; * channel_announcement, channel_update, node_announcement and gossip * queries. */ gossip_init(ld, connectd_gossipd_fd); /*~ We do every database operation within a transaction; usually this * is covered by the infrastructure (eg. opening a transaction before * handling a message or expiring a timer), but for startup we do this * explicitly. */ db_begin_transaction(ld->wallet->db); /*~ Our default names, eg. for the database file, are not dependent on * the network. Instead, the db knows what chain it belongs to, and we * simple barf here if it's wrong. */ if (!wallet_network_check(ld->wallet, get_chainparams(ld))) errx(1, "Wallet network check failed."); /*~ Initialize the transaction filter with our pubkeys. */ init_txfilter(ld->wallet, ld->owned_txfilter); /*~ Set up invoice autoclean. */ wallet_invoice_autoclean(ld->wallet, ld->ini_autocleaninvoice_cycle, ld->ini_autocleaninvoice_expiredby); /*~ Pull peers, channels and HTLCs from db. */ load_channels_from_wallet(ld); /*~ Get the blockheight we are currently at, UINT32_MAX is used to signal * an unitialized wallet and that we should start off of bitcoind's * current height */ wallet_blocks_heights(ld->wallet, UINT32_MAX, &min_blockheight, &max_blockheight); /*~ If we were asked to rescan from an absolute height (--rescan < 0) * then just go there. Otherwise compute the diff to our current height, * lowerbounded by 0. */ if (ld->config.rescan < 0) max_blockheight = -ld->config.rescan; else if (max_blockheight < (u32)ld->config.rescan) max_blockheight = 0; else if (max_blockheight != UINT32_MAX) max_blockheight -= ld->config.rescan; /*~ That's all of the wallet db operations for now. */ db_commit_transaction(ld->wallet->db); /*~ Initialize block topology. This does its own io_loop to * talk to bitcoind, so does its own db transactions. */ setup_topology(ld->topology, &ld->timers, min_blockheight, max_blockheight); /*~ Create RPC socket (if any): now we can talk to clients. */ setup_jsonrpc(ld, ld->rpc_filename); /*~ We defer --daemon until we've completed most initialization: that * way we'll exit with an error rather than silently exiting 0, then * realizing we can't start and forcing the confused user to read the * logs. */ if (ld->daemon) daemonize_but_keep_dir(ld); /*~ Now create the PID file: this has to be after daemonize, since that * changes our pid! */ pidfile_create(ld); /*~ Activate connect daemon. Needs to be after the initialization of * chaintopology, otherwise peers may connect and ask for * uninitialized data. */ connectd_activate(ld); /*~ "onchaind" is a dumb daemon which tries to get our funds back: it * doesn't handle reorganizations, but it's idempotent, so we can * simply just restart it if the chain moves. Similarly, we replay it * chain events from the database on restart, beginning with the * "funding transaction spent" event which creates it. */ onchaind_replay_channels(ld); /*~ Mark ourselves live. * * Note the use of type_to_string() here: it's a typesafe formatter, * often handed 'tmpctx' like here to allocate a throwaway string for * formatting. json_escape() avoids printing weird characters in our * log. And tal_hex() is a helper from utils which returns a hex string; * it's assumed that the argument was allocated with tal or tal_arr * so it can use tal_bytelen() to get the length. */ log_info(ld->log, "Server started with public key %s, alias %s (color #%s) and lightningd %s", type_to_string(tmpctx, struct pubkey, &ld->id), json_escape(tmpctx, (const char *)ld->alias)->s, tal_hex(tmpctx, ld->rgb), version()); /*~ This is where we ask connectd to reconnect to any peers who have * live channels with us, and makes sure we're watching the funding * tx. */ activate_peers(ld); /*~ Now that all the notifications for transactions are in place, we * can start the poll loop which queries bitcoind for new blocks. */ begin_topology(ld->topology); /*~ Setting this (global) activates the crash log: we don't usually need * a backtrace if we fail during startup. */ crashlog = ld->log; /*~ The root of every backtrace (almost). */ for (;;) { /* ~io_loop returns if there's an expired timer, *or* someone * calls io_break, or if there are no more IO connections * (which never happens in our code). */ struct timer *expired; void *v = io_loop(&ld->timers, &expired); /*~ We use io_break(ld) to shut down. */ if (v == ld) break; /*~ Notice that timers are called here in the event loop like * anything else, so there are no weird concurrency issues. */ if (expired) { db_begin_transaction(ld->wallet->db); timer_expired(ld, expired); db_commit_transaction(ld->wallet->db); } } shutdown_subdaemons(ld); /* Clean up the JSON-RPC. This needs to happen in a DB transaction since * it might actually be touching the DB in some destructors, e.g., * unreserving UTXOs (see #1737) */ db_begin_transaction(ld->wallet->db); tal_free(ld->rpc_listener); db_commit_transaction(ld->wallet->db); remove(ld->pidfile); /* FIXME: pay can have children off tmpctx which unlink from * ld->payments, so clean that up. */ clean_tmpctx(); tal_free(ld); opt_free_table(); #if DEVELOPER memleak_cleanup(); #endif daemon_shutdown(); /*~ Farewell. Next stop: hsmd/hsm.c. */ return 0; }