Skip to content

Commit cb7f9ab

Browse files
committed
multi: track LiT and LND status
With this commit, we now also track the start-up state of LiT and LiT's connection to LND. These differ from the other sub-servers (loop, pool & faraday) because a failure to start LiT or LND is fatal and so should stop the rest of the start-process, however, we still want the webserver to continue serving the new Status server so that the UI can query the start-up status of LND and LiT. So: if any errors occur while starting/connecting to LND or any other errors occur while starting any of LiTs other processes, then we throw an error but we dont kill the main LiT process. The main process is only killed upon receiving a shutdown signal.
1 parent 2e4fa89 commit cb7f9ab

File tree

2 files changed

+93
-41
lines changed

2 files changed

+93
-41
lines changed

rpc_proxy.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,16 @@ func (p *rpcProxy) checkSubSystemStarted(requestURI string) error {
424424
system = subServerName
425425

426426
case p.permsMgr.IsLndURI(requestURI):
427-
return nil
427+
system = LNDSubServer
428428

429429
case p.permsMgr.IsLitURI(requestURI):
430-
return nil
430+
system = LitSubServer
431+
432+
// If the request is for the status server, then we allow the
433+
// request even if Lit has not properly started.
434+
if isStatusReq(requestURI) {
435+
return nil
436+
}
431437

432438
default:
433439
return fmt.Errorf("unknown gRPC web request: %v", requestURI)

terminal.go

Lines changed: 85 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,49 @@ func (g *LightningTerminal) Run() error {
237237
return fmt.Errorf("error starting UI HTTP server: %v", err)
238238
}
239239

240+
// Attempt to start Lit and all of its sub-servers. If an error is
241+
// returned, it means that either one of Lit's internal sub-servers
242+
// could not start or LND could not start or be connected to.
243+
startErr := g.start()
244+
if startErr != nil {
245+
g.statusServer.setServerErrored(
246+
LitSubServer, "could not start Lit: %v", startErr,
247+
)
248+
}
249+
250+
// Now block until we receive an error or the main shutdown
251+
// signal.
252+
<-shutdownInterceptor.ShutdownChannel()
253+
log.Infof("Shutdown signal received")
254+
255+
if g.rpcProxy != nil {
256+
if err := g.rpcProxy.Stop(); err != nil {
257+
log.Errorf("Error stopping rpc proxy: %v", err)
258+
}
259+
}
260+
261+
if g.httpServer != nil {
262+
if err := g.httpServer.Close(); err != nil {
263+
log.Errorf("Error stopping UI server: %v", err)
264+
}
265+
}
266+
267+
g.wg.Wait()
268+
269+
return startErr
270+
}
271+
272+
// start attempts to start all the various components of Litd. Only Litd and
273+
// LND errors are considered fatal and will result in an error being returned.
274+
// If any of the sub-servers managed by the subServerMgr error while starting
275+
// up, these are considered non-fatal and will not result in an error being
276+
// returned.
277+
func (g *LightningTerminal) start() error {
240278
// Create the instances of our subservers now so we can hook them up to
241279
// lnd once it's fully running.
242280
g.initSubServers()
243281

282+
var err error
244283
g.accountService, err = accounts.NewService(
245284
filepath.Dir(g.cfg.MacaroonPath), g.errQueue.ChanIn(),
246285
)
@@ -328,9 +367,7 @@ func (g *LightningTerminal) Run() error {
328367
go func() {
329368
defer g.wg.Done()
330369

331-
err := lnd.Main(
332-
g.cfg.Lnd, lisCfg, implCfg, shutdownInterceptor,
333-
)
370+
err := lnd.Main(g.cfg.Lnd, lisCfg, implCfg, interceptor)
334371
if e, ok := err.(*flags.Error); err != nil &&
335372
(!ok || e.Type != flags.ErrHelp) {
336373

@@ -361,19 +398,25 @@ func (g *LightningTerminal) Run() error {
361398
case <-readyChan:
362399

363400
case err := <-g.errQueue.ChanOut():
364-
return err
401+
g.statusServer.setServerErrored(
402+
LNDSubServer, "error from errQueue channel",
403+
)
404+
return fmt.Errorf("could not start LND: %v", err)
365405

366406
case <-lndQuit:
367-
return nil
407+
g.statusServer.setServerErrored(
408+
LNDSubServer, "lndQuit channel closed",
409+
)
410+
return fmt.Errorf("LND has stopped")
368411

369-
case <-shutdownInterceptor.ShutdownChannel():
370-
return errors.New("shutting down")
412+
case <-interceptor.ShutdownChannel():
413+
return fmt.Errorf("received the shutdown signal")
371414
}
372415

373416
// We now know that starting lnd was successful. If we now run into an
374417
// error, we must shut down lnd correctly.
375418
defer func() {
376-
err := g.shutdown()
419+
err := g.shutdownSubServers()
377420
if err != nil {
378421
log.Errorf("Error shutting down: %v", err)
379422
}
@@ -384,6 +427,12 @@ func (g *LightningTerminal) Run() error {
384427
if err != nil {
385428
return fmt.Errorf("could not connect to LND: %v", err)
386429
}
430+
if err != nil {
431+
g.statusServer.setServerErrored(
432+
LNDSubServer, "could not connect to LND: %v", err,
433+
)
434+
return fmt.Errorf("could not connect to LND")
435+
}
387436

388437
// Initialise any connections to sub-servers that we are running in
389438
// remote mode.
@@ -410,12 +459,18 @@ func (g *LightningTerminal) Run() error {
410459
return err
411460

412461
case <-lndQuit:
413-
return nil
462+
g.statusServer.setServerErrored(
463+
LNDSubServer, "lndQuit channel closed",
464+
)
465+
return fmt.Errorf("LND is not running")
414466

415-
case <-shutdownInterceptor.ShutdownChannel():
467+
case <-interceptor.ShutdownChannel():
416468
return errors.New("shutting down")
417469
}
418470

471+
// We can now set the status of LND as running.
472+
g.statusServer.setServerRunning(LNDSubServer)
473+
419474
// If we're in integrated mode, we'll need to wait for lnd to send the
420475
// macaroon after unlock before going any further.
421476
if g.cfg.LndMode == ModeIntegrated {
@@ -426,8 +481,11 @@ func (g *LightningTerminal) Run() error {
426481
// Set up all the LND clients required by LiT.
427482
err = g.setUpLNDClients()
428483
if err != nil {
429-
log.Errorf("Could not set up LND clients: %w", err)
430-
return err
484+
g.statusServer.setServerErrored(
485+
LNDSubServer, "could not set up LND clients: %v", err,
486+
)
487+
488+
return fmt.Errorf("could not start LND")
431489
}
432490

433491
// If we're in integrated and stateless init mode, we won't create
@@ -454,22 +512,28 @@ func (g *LightningTerminal) Run() error {
454512
return fmt.Errorf("could not start litd sub-servers: %v", err)
455513
}
456514

515+
// We can now set the status of LiT as running.
516+
g.statusServer.setServerRunning(LitSubServer)
517+
457518
// Now block until we receive an error or the main shutdown signal.
458519
select {
459520
case err := <-g.errQueue.ChanOut():
460521
if err != nil {
461-
log.Errorf("Received critical error from subsystem, "+
462-
"shutting down: %v", err)
522+
return fmt.Errorf("received critical error from "+
523+
"subsystem, shutting down: %v", err,
524+
)
463525
}
464526

465527
case <-lndQuit:
466-
return nil
528+
g.statusServer.setServerErrored(
529+
LNDSubServer, "lndQuit channel closed",
530+
)
531+
return fmt.Errorf("LND is not running")
467532

468-
case <-shutdownInterceptor.ShutdownChannel():
469-
log.Infof("Shutdown signal received")
533+
case <-interceptor.ShutdownChannel():
470534
}
471535

472-
return nil
536+
return fmt.Errorf("received the shutdown signal")
473537
}
474538

475539
// initSubServers registers the faraday, loop and pool sub-servers with the
@@ -947,8 +1011,9 @@ func (g *LightningTerminal) BuildWalletConfig(ctx context.Context,
9471011
)
9481012
}
9491013

950-
// shutdown stops all subservers that were started and attached to lnd.
951-
func (g *LightningTerminal) shutdown() error {
1014+
// shutdownSubServers stops all subservers that were started and attached to
1015+
// lnd.
1016+
func (g *LightningTerminal) shutdownSubServers() error {
9521017
var returnErr error
9531018

9541019
err := g.subServerMgr.Stop()
@@ -989,32 +1054,13 @@ func (g *LightningTerminal) shutdown() error {
9891054
g.restCancel()
9901055
}
9911056

992-
if g.rpcProxy != nil {
993-
if err := g.rpcProxy.Stop(); err != nil {
994-
log.Errorf("Error stopping lnd proxy: %v", err)
995-
returnErr = err
996-
}
997-
}
998-
9991057
if g.lndConn != nil {
10001058
if err := g.lndConn.Close(); err != nil {
10011059
log.Errorf("Error closing lnd connection: %v", err)
10021060
returnErr = err
10031061
}
10041062
}
10051063

1006-
if g.httpServer != nil {
1007-
if err := g.httpServer.Close(); err != nil {
1008-
log.Errorf("Error stopping UI server: %v", err)
1009-
returnErr = err
1010-
}
1011-
}
1012-
1013-
// In case the error wasn't thrown by lnd, make sure we stop it too.
1014-
interceptor.RequestShutdown()
1015-
1016-
g.wg.Wait()
1017-
10181064
// Do we have any last errors to display? We use an anonymous function,
10191065
// so we can use return instead of breaking to a label in the default
10201066
// case.

0 commit comments

Comments
 (0)