feat: implement exponential backoff for paid_invoices_stream (#778)

* feat: implement exponential backoff for paid_invoices_stream across all Lightning backends

- Add exponential backoff retry logic to CLN REST, LND REST, and LND gRPC backends
- Start with 1 second delay, exponentially increase up to 5 minutes maximum
- Reset delay to 1 second on successful reconnection
- Improve error logging to include retry delay information
- Replace fixed delays with adaptive backoff to handle network issues gracefully
- Prevents system overload during persistent connection problems

Resolves issues with rapid reconnection attempts that could overwhelm Lightning nodes during network instability.

* remove unused import

* feat: extend exponential backoff to all remaining backends and invoice listener

- Implement exponential backoff in LNbits paid_invoices_stream for both SSE and WebSocket modes
- Add exponential backoff guidance comments to Blink and Strike backends (not implemented)
- Apply exponential backoff to invoice_listener in tasks.py that calls paid_invoices_stream
- Ensure consistent retry behavior across all Lightning backend integrations
- Improve system resilience during network interruptions and backend failures

All backends and the invoice listener now use the same exponential backoff strategy:
- Start with 1 second delay, exponentially increase up to 5 minutes maximum
- Reset delay to 1 second on successful reconnection
- Enhanced error logging with retry delay information

* blink + strike remove comments

* remove hardcoded values in favor of settings

* immediate first retry
This commit is contained in:
lollerfirst
2025-09-08 16:14:19 +02:00
committed by GitHub
parent ffaac6a5ea
commit ff3fdd5aef
6 changed files with 125 additions and 75 deletions

View File

@@ -71,6 +71,9 @@ class MintSettings(CashuSettings):
description="Interval (in seconds) for running regular tasks like the invoice checker.", description="Interval (in seconds) for running regular tasks like the invoice checker.",
) )
mint_retry_exponential_backoff_base_delay: int = Field(default=1)
mint_retry_exponential_backoff_max_delay: int = Field(default=10)
class MintWatchdogSettings(MintSettings): class MintWatchdogSettings(MintSettings):
mint_watchdog_enabled: bool = Field( mint_watchdog_enabled: bool = Field(

View File

@@ -298,6 +298,10 @@ class CLNRestWallet(LightningBackend):
else 0 else 0
) )
self.last_pay_index = last_pay_index self.last_pay_index = last_pay_index
retry_delay = 0
max_retry_delay = settings.mint_retry_exponential_backoff_max_delay
while True: while True:
try: try:
url = "/v1/waitanyinvoice" url = "/v1/waitanyinvoice"
@@ -309,6 +313,8 @@ class CLNRestWallet(LightningBackend):
}, },
timeout=None, timeout=None,
) as r: ) as r:
# Reset retry delay on successful connection
retry_delay = 0
async for line in r.aiter_lines(): async for line in r.aiter_lines():
inv = json.loads(line) inv = json.loads(line)
if "code" in inv and "message" in inv: if "code" in inv and "message" in inv:
@@ -332,11 +338,14 @@ class CLNRestWallet(LightningBackend):
yield payment_hash yield payment_hash
except Exception as exc: except Exception as exc:
logger.debug( logger.error(
f"lost connection to clnrest invoices stream: '{exc}', " f"lost connection to clnrest invoices stream: '{exc}', retrying in {retry_delay}"
"reconnecting..." " seconds"
) )
await asyncio.sleep(0.02) await asyncio.sleep(retry_delay)
# Exponential backoff
retry_delay = max(settings.mint_retry_exponential_backoff_base_delay, min(retry_delay * 2, max_retry_delay))
async def get_payment_quote( async def get_payment_quote(
self, melt_quote: PostMeltQuoteRequest self, melt_quote: PostMeltQuoteRequest

View File

@@ -229,70 +229,83 @@ class LNbitsWallet(LightningBackend):
) )
async def paid_invoices_stream(self) -> AsyncGenerator[str, None]: async def paid_invoices_stream(self) -> AsyncGenerator[str, None]:
# --- LNBITS RETRO-COMPATIBILITY --- retry_delay = 0
if self.old_api: max_retry_delay = settings.mint_retry_exponential_backoff_max_delay
url = f"{self.endpoint}/api/v1/payments/sse"
while True:
try: try:
sse_headers = self.client.headers.copy() # --- LNBITS RETRO-COMPATIBILITY ---
sse_headers.update( if self.old_api:
{ url = f"{self.endpoint}/api/v1/payments/sse"
"accept": "text/event-stream",
"cache-control": "no-cache", try:
"connection": "keep-alive", sse_headers = self.client.headers.copy()
} sse_headers.update(
{
"accept": "text/event-stream",
"cache-control": "no-cache",
"connection": "keep-alive",
}
)
async with self.client.stream(
"GET",
url,
content="text/event-stream",
timeout=None,
headers=sse_headers,
) as r:
# Reset retry delay on successful connection
retry_delay = 0
sse_trigger = False
async for line in r.aiter_lines():
if "Payment does not exist." in line:
logger.debug("New API detected. Setting old_api = False")
self.old_api = False
# The data we want to listen to is of this shape:
# event: payment-received
# data: {.., "payment_hash" : "asd"}
if line.startswith("event: payment-received"):
sse_trigger = True
continue
elif sse_trigger and line.startswith("data:"):
data = json.loads(line[len("data:") :])
sse_trigger = False
yield data["payment_hash"]
else:
sse_trigger = False
except (OSError, httpx.ReadError, httpx.ConnectError, httpx.ReadTimeout):
pass
if self.old_api:
await asyncio.sleep(retry_delay)
# Exponential backoff
retry_delay = max(settings.mint_retry_exponential_backoff_base_delay, min(retry_delay * 2, max_retry_delay))
continue
# --- END LNBITS RETRO-COMPATIBILITY ---
async with connect(self.ws_url) as ws:
logger.info("connected to LNbits fundingsource websocket.")
# Reset retry delay on successful connection
retry_delay = 0
while True:
message = await ws.recv()
message_dict = json.loads(message)
if (
message_dict
and message_dict.get("payment")
and message_dict["payment"].get("payment_hash")
and message_dict["payment"].get("amount") > 0
):
payment_hash = message_dict["payment"]["payment_hash"]
logger.info(f"payment-received: {payment_hash}")
yield payment_hash
except Exception as exc:
logger.error(
f"lost connection to LNbits fundingsource websocket: '{exc}', retrying in {retry_delay}"
" seconds"
) )
async with self.client.stream( await asyncio.sleep(retry_delay)
"GET",
url,
content="text/event-stream",
timeout=None,
headers=sse_headers,
) as r:
sse_trigger = False
async for line in r.aiter_lines():
if "Payment does not exist." in line:
logger.debug("New API detected. Setting old_api = False")
self.old_api = False
# The data we want to listen to is of this shape:
# event: payment-received
# data: {.., "payment_hash" : "asd"}
if line.startswith("event: payment-received"):
sse_trigger = True
continue
elif sse_trigger and line.startswith("data:"):
data = json.loads(line[len("data:") :])
sse_trigger = False
yield data["payment_hash"]
else:
sse_trigger = False
except (OSError, httpx.ReadError, httpx.ConnectError, httpx.ReadTimeout): # Exponential backoff
pass retry_delay = max(settings.mint_retry_exponential_backoff_base_delay, min(retry_delay * 2, max_retry_delay))
if self.old_api:
await asyncio.sleep(1)
return
# --- END LNBITS RETRO-COMPATIBILITY ---
try:
async with connect(self.ws_url) as ws:
logger.info("connected to LNbits fundingsource websocket.")
while True:
message = await ws.recv()
message_dict = json.loads(message)
if (
message_dict
and message_dict.get("payment")
and message_dict["payment"].get("payment_hash")
and message_dict["payment"].get("amount") > 0
):
payment_hash = message_dict["payment"]["payment_hash"]
logger.info(f"payment-received: {payment_hash}")
yield payment_hash
except Exception as exc:
logger.error(
f"lost connection to LNbits fundingsource websocket: '{exc}'"
"retrying in 5 seconds"
)
await asyncio.sleep(5)

View File

@@ -375,12 +375,17 @@ class LndRPCWallet(LightningBackend):
return PaymentStatus(result=PaymentResult.UNKNOWN) return PaymentStatus(result=PaymentResult.UNKNOWN)
async def paid_invoices_stream(self) -> AsyncGenerator[str, None]: async def paid_invoices_stream(self) -> AsyncGenerator[str, None]:
retry_delay = 0
max_retry_delay = settings.mint_retry_exponential_backoff_max_delay
while True: while True:
try: try:
async with grpc.aio.secure_channel( async with grpc.aio.secure_channel(
self.endpoint, self.combined_creds self.endpoint, self.combined_creds
) as channel: ) as channel:
lnstub = lightningstub.LightningStub(channel) lnstub = lightningstub.LightningStub(channel)
# Reset retry delay on successful connection
retry_delay = 0
async for invoice in lnstub.SubscribeInvoices( async for invoice in lnstub.SubscribeInvoices(
lnrpc.InvoiceSubscription() lnrpc.InvoiceSubscription()
): ):
@@ -389,8 +394,11 @@ class LndRPCWallet(LightningBackend):
payment_hash = invoice.r_hash.hex() payment_hash = invoice.r_hash.hex()
yield payment_hash yield payment_hash
except AioRpcError as exc: except AioRpcError as exc:
logger.error(f"SubscribeInvoices failed: {exc}. Retrying in 1 sec...") logger.error(f"SubscribeInvoices failed: {exc}. Retrying in {retry_delay} sec...")
await asyncio.sleep(1) await asyncio.sleep(retry_delay)
# Exponential backoff
retry_delay = max(settings.mint_retry_exponential_backoff_base_delay, min(retry_delay * 2, max_retry_delay))
async def get_payment_quote( async def get_payment_quote(
self, melt_quote: PostMeltQuoteRequest self, melt_quote: PostMeltQuoteRequest

View File

@@ -415,10 +415,15 @@ class LndRestWallet(LightningBackend):
return PaymentStatus(result=PaymentResult.UNKNOWN, error_message="timeout") return PaymentStatus(result=PaymentResult.UNKNOWN, error_message="timeout")
async def paid_invoices_stream(self) -> AsyncGenerator[str, None]: async def paid_invoices_stream(self) -> AsyncGenerator[str, None]:
retry_delay = 0
max_retry_delay = settings.mint_retry_exponential_backoff_max_delay
while True: while True:
try: try:
url = "/v1/invoices/subscribe" url = "/v1/invoices/subscribe"
async with self.client.stream("GET", url, timeout=None) as r: async with self.client.stream("GET", url, timeout=None) as r:
# Reset retry delay on successful connection
retry_delay = 0
async for line in r.aiter_lines(): async for line in r.aiter_lines():
try: try:
inv = json.loads(line)["result"] inv = json.loads(line)["result"]
@@ -431,10 +436,13 @@ class LndRestWallet(LightningBackend):
yield payment_hash yield payment_hash
except Exception as exc: except Exception as exc:
logger.error( logger.error(
f"lost connection to lnd invoices stream: '{exc}', retrying in 5" f"lost connection to lnd invoices stream: '{exc}', retrying in {retry_delay}"
" seconds" " seconds"
) )
await asyncio.sleep(5) await asyncio.sleep(retry_delay)
# Exponential backoff with jitter
retry_delay = max(settings.mint_retry_exponential_backoff_base_delay, min(retry_delay * 2, max_retry_delay))
async def get_payment_quote( async def get_payment_quote(
self, melt_quote: PostMeltQuoteRequest self, melt_quote: PostMeltQuoteRequest

View File

@@ -4,6 +4,7 @@ from typing import List
from loguru import logger from loguru import logger
from ..core.base import MintQuoteState from ..core.base import MintQuoteState
from ..core.settings import settings
from ..lightning.base import LightningBackend from ..lightning.base import LightningBackend
from .protocols import SupportsBackends, SupportsDb, SupportsEvents from .protocols import SupportsBackends, SupportsDb, SupportsEvents
@@ -21,14 +22,22 @@ class LedgerTasks(SupportsDb, SupportsBackends, SupportsEvents):
async def invoice_listener(self, backend: LightningBackend) -> None: async def invoice_listener(self, backend: LightningBackend) -> None:
if backend.supports_incoming_payment_stream: if backend.supports_incoming_payment_stream:
retry_delay = settings.mint_retry_exponential_backoff_base_delay
max_retry_delay = settings.mint_retry_exponential_backoff_max_delay
while True: while True:
try: try:
# Reset retry delay on successful connection to backend stream
retry_delay = settings.mint_retry_exponential_backoff_base_delay
async for checking_id in backend.paid_invoices_stream(): async for checking_id in backend.paid_invoices_stream():
await self.invoice_callback_dispatcher(checking_id) await self.invoice_callback_dispatcher(checking_id)
except Exception as e: except Exception as e:
logger.error(f"Error in invoice listener: {e}") logger.error(f"Error in invoice listener: {e}")
logger.info("Restarting invoice listener...") logger.info(f"Restarting invoice listener in {retry_delay} seconds...")
await asyncio.sleep(1) await asyncio.sleep(retry_delay)
# Exponential backoff
retry_delay = min(retry_delay * 2, max_retry_delay)
async def invoice_callback_dispatcher(self, checking_id: str) -> None: async def invoice_callback_dispatcher(self, checking_id: str) -> None:
logger.debug(f"Invoice callback dispatcher: {checking_id}") logger.debug(f"Invoice callback dispatcher: {checking_id}")