From 6a2817101d07fdc497cc57fb22592a45cea056e5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 18 Jul 2022 21:42:27 +0930 Subject: [PATCH] connectd: don't move parent while we're being freed. A subtle case I hadn't come across before: if a child tal_resizes() its parent while the parent is being deleted, tal gets confused. The subd destructor does this using tal_arr_remove() on peer->subds, which is currently being freed: ``` ==61056== Invalid read of size 8 ==61056== at 0x185632: del_tree (tal.c:417) ==61056== by 0x18560D: del_tree (tal.c:412) ==61056== by 0x185957: tal_free (tal.c:486) ==61056== by 0x1183BC: peer_discard (connectd.c:1861) ==61056== by 0x11869E: recv_req (connectd.c:1942) ==61056== by 0x12774B: handle_read (daemon_conn.c:35) ==61056== by 0x173453: next_plan (io.c:59) ==61056== by 0x17405B: do_plan (io.c:407) ==61056== by 0x17409D: io_ready (io.c:417) ==61056== by 0x176390: io_loop (poll.c:453) ==61056== by 0x118A68: main (connectd.c:2082) ==61056== Address 0x4bd8850 is 16 bytes inside a block of size 48 free'd ==61056== at 0x483DFAF: realloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so) ==61056== by 0x1860E6: tal_resize_ (tal.c:699) ==61056== by 0x1373DD: tal_arr_remove_ (utils.c:184) ==61056== by 0x11D508: destroy_subd (multiplex.c:930) ==61056== by 0x1850A4: notify (tal.c:240) ==61056== by 0x1855BB: del_tree (tal.c:402) ==61056== by 0x18560D: del_tree (tal.c:412) ==61056== by 0x18560D: del_tree (tal.c:412) ==61056== by 0x185957: tal_free (tal.c:486) ==61056== by 0x1183BC: peer_discard (connectd.c:1861) ==61056== by 0x11869E: recv_req (connectd.c:1942) ==61056== by 0x12774B: handle_read (daemon_conn.c:35) ``` So simply make the subds children of `peer` not the `peer->subds` array. The only effect is that drain_peer() can't simply free the subds array but must free the subds one at a time. Signed-off-by: Rusty Russell --- connectd/multiplex.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/connectd/multiplex.c b/connectd/multiplex.c index 5b721fab9..2ae1b7514 100644 --- a/connectd/multiplex.c +++ b/connectd/multiplex.c @@ -99,8 +99,10 @@ static void drain_peer(struct peer *peer) /* This is a 5-second leak, worst case! */ notleak(peer); - /* We no longer want subds feeding us more messages! */ - peer->subds = tal_free(peer->subds); + /* We no longer want subds feeding us more messages (they + * remove themselves from array when freed) */ + while (tal_count(peer->subds)) + tal_free(peer->subds[0]); peer->draining = true; /* You have 5 seconds to drain... */ @@ -997,7 +999,7 @@ static struct subd *new_subd(struct peer *peer, { struct subd *subd; - subd = tal(peer->subds, struct subd); + subd = tal(peer, struct subd); subd->peer = peer; subd->outq = msg_queue_new(subd, false); subd->channel_id = *channel_id;