From 8e1d5c19d681a6de4ba94a4da61585a0096aa94d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 16 Jul 2022 14:19:30 +0930 Subject: [PATCH] pytest: test to reproduce "channeld: sent ERROR bad reestablish revocation_number: 0 vs 3" It's caused by a reconnection race: we hold the new incoming connection while we ask lightningd to kill the old connection. But under some circumstances we leave the new incoming hanging (with, in this case, old reestablish messages unread!) and another connection comes in. Then, later we service the long-gone "incoming" connection, channeld reads the ancient reestablish message and gets upset. This test used to hang, but now we've fixed reconnection races it is fine. Signed-off-by: Rusty Russell --- tests/test_connection.py | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test_connection.py b/tests/test_connection.py index 198556242..47239ce05 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -4031,3 +4031,48 @@ def test_multichan(node_factory, executor, bitcoind): inv = l3.rpc.invoice(100000000, "invoice4", "invoice4") l1.rpc.pay(inv['bolt11']) + + +@pytest.mark.xfail(reason="race in reconnect logic") +@pytest.mark.developer("dev-no-reconnect required") +def test_mutual_reconnect_race(node_factory, executor, bitcoind): + """Test simultaneous reconnect between nodes""" + l1, l2 = node_factory.line_graph(2, opts={'may_reconnect': True, + 'dev-no-reconnect': None}) + + def send_many_payments(): + for i in range(20): + time.sleep(0.5) + inv = l2.rpc.invoice(100, "label-" + str(i), "desc")['bolt11'] + try: + l1.rpc.pay(inv) + except RpcError: + pass + + # Send a heap of payments, while reconnecting... + fut = executor.submit(send_many_payments) + + for i in range(10): + try: + l1.rpc.disconnect(l2.info['id'], force=True) + except RpcError: + pass + time.sleep(1) + # Aim for both at once! + executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.port) + executor.submit(l2.rpc.connect, l1.info['id'], 'localhost', l1.port) + + # Wait for things to settle down, then make sure we're actually connected. + # Naively, you'd think we should be, but in fact, two connects which race + # can (do!) result in both disconnecting, thinking the other side is more + # recent. + time.sleep(1) + if not only_one(l1.rpc.listpeers(l2.info['id'])['peers'])['connected']: + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) + + # Now payments should finish! + fut.result(TIMEOUT) + + wait_for(lambda: only_one(l1.rpc.listpeers(l2.info['id'])['peers'])['connected']) + inv = l2.rpc.invoice(100000000, "invoice4", "invoice4") + l1.rpc.pay(inv['bolt11'])