From f41f5c7fa60de56a14befe8b001d08a4875de747 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Mon, 27 Sep 2021 20:40:41 +0800 Subject: [PATCH] multi: add tor connection healthcheck This commit adds a new health check, tor connection, to our liveness monitor. A monitor refactor is applied to the server creation such that the scope of health check creation is managed within one function. --- config.go | 15 ++++++++ lncfg/healthcheck.go | 6 +++ sample-lnd.conf | 16 ++++++++ server.go | 90 ++++++++++++++++++++++++++++++-------------- 4 files changed, 98 insertions(+), 29 deletions(-) diff --git a/config.go b/config.go index 6044ae0c..35fc24df 100644 --- a/config.go +++ b/config.go @@ -139,6 +139,15 @@ const ( defaultTLSBackoff = time.Minute defaultTLSAttempts = 0 + // Set defaults for a health check which ensures that the tor + // connection is alive. Although this check is off by default (not all + // setups require it), we still set the other default values so that + // the health check can be easily enabled with sane defaults. + defaultTCInterval = time.Minute + defaultTCTimeout = time.Second * 5 + defaultTCBackoff = time.Minute + defaultTCAttempts = 0 + // defaultRemoteMaxHtlcs specifies the default limit for maximum // concurrent HTLCs the remote party may add to commitment transactions. // This value can be overridden with --default-remote-max-htlcs. @@ -541,6 +550,12 @@ func DefaultConfig() Config { Attempts: defaultTLSAttempts, Backoff: defaultTLSBackoff, }, + TorConnection: &lncfg.CheckConfig{ + Interval: defaultTCInterval, + Timeout: defaultTCTimeout, + Attempts: defaultTCAttempts, + Backoff: defaultTCBackoff, + }, }, Gossip: &lncfg.Gossip{ MaxChannelUpdateBurst: discovery.DefaultMaxChannelUpdateBurst, diff --git a/lncfg/healthcheck.go b/lncfg/healthcheck.go index bee569b3..2c2a77c9 100644 --- a/lncfg/healthcheck.go +++ b/lncfg/healthcheck.go @@ -28,6 +28,8 @@ type HealthCheckConfig struct { DiskCheck *DiskCheckConfig `group:"diskspace" namespace:"diskspace"` TLSCheck *CheckConfig `group:"tls" namespace:"tls"` + + TorConnection *CheckConfig `group:"torconnection" namespace:"torconnection"` } // Validate checks the values configured for our health checks. @@ -50,6 +52,10 @@ func (h *HealthCheckConfig) Validate() error { return errors.New("disk required ratio must be in [0:1)") } + if err := h.TorConnection.validate("tor connection"); err != nil { + return err + } + return nil } diff --git a/sample-lnd.conf b/sample-lnd.conf index ad0ea1dc..cef33bab 100644 --- a/sample-lnd.conf +++ b/sample-lnd.conf @@ -990,6 +990,22 @@ litecoin.node=ltcd ; This value must be >= 1m. ; healthcheck.tls.interval=1m +; The number of times we should attempt to check our tor connection before +; gracefully shutting down. Set this value to 0 to disable this health check. +; healthcheck.torconnection.attempts=3 + +; The amount of time we allow a call to our tor connection to take before we +; fail the attempt. This value must be >= 1s. +; healthcheck.torconnection.timeout=10s + +; The amount of time we should backoff between failed attempts to check tor +; connection. This value must be >= 1s. +; healthcheck.torconnection.backoff=30s + +; The amount of time we should wait between tor connection health checks. This +; value must be >= 1m. +; healthcheck.torconnection.interval=1m + [signrpc] diff --git a/server.go b/server.go index f8fddfd3..30dadc3d 100644 --- a/server.go +++ b/server.go @@ -1470,9 +1470,40 @@ func newServer(cfg *Config, listenAddrs []net.Addr, }) } - // Create a set of health checks using our configured values. If a - // health check has been disabled by setting attempts to 0, our monitor - // will not run it. + // Create liveliness monitor. + s.createLivenessMonitor(cfg, cc) + + // Create the connection manager which will be responsible for + // maintaining persistent outbound connections and also accepting new + // incoming connections + cmgr, err := connmgr.New(&connmgr.Config{ + Listeners: listeners, + OnAccept: s.InboundPeerConnected, + RetryDuration: time.Second * 5, + TargetOutbound: 100, + Dial: noiseDial( + nodeKeyECDH, s.cfg.net, s.cfg.ConnectionTimeout, + ), + OnConnection: s.OutboundPeerConnected, + }) + if err != nil { + return nil, err + } + s.connMgr = cmgr + + return s, nil +} + +// createLivenessMonitor creates a set of health checks using our configured +// values and uses these checks to create a liveliness monitor. Available +// health checks, +// - chainHealthCheck +// - diskCheck +// - tlsHealthCheck +// - torController, only created when tor is enabled. +// If a health check has been disabled by setting attempts to 0, our monitor +// will not run it. +func (s *server) createLivenessMonitor(cfg *Config, cc *chainreg.ChainControl) { chainHealthCheck := healthcheck.NewObservation( "chain backend", cc.HealthCheck, @@ -1521,11 +1552,12 @@ func newServer(cfg *Config, listenAddrs []net.Addr, // If the current time is passed the certificate's // expiry time, then it is considered expired if time.Now().After(parsedCert.NotAfter) { - return fmt.Errorf("TLS certificate is expired as of %v", parsedCert.NotAfter) + return fmt.Errorf("TLS certificate is "+ + "expired as of %v", parsedCert.NotAfter) } - // If the certificate is not outdated, no error needs to - // be returned + // If the certificate is not outdated, no error needs + // to be returned return nil }, cfg.HealthChecks.TLSCheck.Interval, @@ -1534,36 +1566,36 @@ func newServer(cfg *Config, listenAddrs []net.Addr, cfg.HealthChecks.TLSCheck.Attempts, ) + checks := []*healthcheck.Observation{ + chainHealthCheck, diskCheck, tlsHealthCheck, + } + + // If Tor is enabled, add the healthcheck for tor connection. + if s.torController != nil { + torConnectionCheck := healthcheck.NewObservation( + "tor connection", + func() error { + return healthcheck.CheckTorServiceStatus( + s.torController, + s.createNewHiddenService, + ) + }, + cfg.HealthChecks.TorConnection.Interval, + cfg.HealthChecks.TorConnection.Timeout, + cfg.HealthChecks.TorConnection.Backoff, + cfg.HealthChecks.TorConnection.Attempts, + ) + checks = append(checks, torConnectionCheck) + } + // If we have not disabled all of our health checks, we create a // liveliness monitor with our configured checks. s.livelinessMonitor = healthcheck.NewMonitor( &healthcheck.Config{ - Checks: []*healthcheck.Observation{ - chainHealthCheck, diskCheck, tlsHealthCheck, - }, + Checks: checks, Shutdown: srvrLog.Criticalf, }, ) - - // Create the connection manager which will be responsible for - // maintaining persistent outbound connections and also accepting new - // incoming connections - cmgr, err := connmgr.New(&connmgr.Config{ - Listeners: listeners, - OnAccept: s.InboundPeerConnected, - RetryDuration: time.Second * 5, - TargetOutbound: 100, - Dial: noiseDial( - nodeKeyECDH, s.cfg.net, s.cfg.ConnectionTimeout, - ), - OnConnection: s.OutboundPeerConnected, - }) - if err != nil { - return nil, err - } - s.connMgr = cmgr - - return s, nil } // Started returns true if the server has been started, and false otherwise.