Merge 'core/btree: improve documentation' from Jussi Saurio

This PR should have no functional changes, just variable renaming and comments Using `///` comment format for better IDE support Reviewed-by: Pere Diaz Bou <penberg@iki.fi> Closes #539
2026-01-26 03:14:23 +01:00 · 2024-12-24 09:44:15 +02:00
parent 3ab7f7a0b8 c727ed7e8a
commit a94d4ca8bc
4 changed files with 427 additions and 172 deletions
--- a/core/storage/btree.rs
+++ b/core/storage/btree.rs
@@ -20,22 +20,37 @@ use super::sqlite3_ondisk::{
 /*
    These are offsets of fields in the header of a b-tree page.
 */
-const BTREE_HEADER_OFFSET_TYPE: usize = 0; /* type of btree page -> u8 */
-const BTREE_HEADER_OFFSET_FREEBLOCK: usize = 1; /* pointer to first freeblock -> u16 */
-const BTREE_HEADER_OFFSET_CELL_COUNT: usize = 3; /* number of cells in the page -> u16 */
-const BTREE_HEADER_OFFSET_CELL_CONTENT: usize = 5; /* pointer to first byte of cell allocated content from top -> u16 */
-const BTREE_HEADER_OFFSET_FRAGMENTED: usize = 7; /* number of fragmented bytes -> u8 */
-const BTREE_HEADER_OFFSET_RIGHTMOST: usize = 8; /* if internalnode, pointer right most pointer (saved separately from cells) -> u32 */

-/*
-** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
-** this will be declared corrupt. This value is calculated based on a
-** maximum database size of 2^31 pages a minimum fanout of 2 for a
-** root-node and 3 for all other internal nodes.
-**
-** If a tree that appears to be taller than this is encountered, it is
-** assumed that the database is corrupt.
-*/
+/// type of btree page -> u8
+const PAGE_HEADER_OFFSET_PAGE_TYPE: usize = 0;
+/// pointer to first freeblock -> u16
+/// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page.
+/// A freeblock is a structure used to identify unallocated space within a b-tree page.
+/// Freeblocks are organized as a chain.
+///
+/// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead
+/// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions.
+const PAGE_HEADER_OFFSET_FIRST_FREEBLOCK: usize = 1;
+/// number of cells in the page -> u16
+const PAGE_HEADER_OFFSET_CELL_COUNT: usize = 3;
+/// pointer to first byte of cell allocated content from top -> u16
+/// SQLite strives to place cells as far toward the end of the b-tree page as it can,
+/// in order to leave space for future growth of the cell pointer array.
+/// = the cell content area pointer moves leftward as cells are added to the page
+const PAGE_HEADER_OFFSET_CELL_CONTENT_AREA: usize = 5;
+/// number of fragmented bytes -> u8
+/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area.
+const PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT: usize = 7;
+/// if internalnode, pointer right most pointer (saved separately from cells) -> u32
+const PAGE_HEADER_OFFSET_RIGHTMOST_PTR: usize = 8;
+
+/// Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
+/// this will be declared corrupt. This value is calculated based on a
+/// maximum database size of 2^31 pages a minimum fanout of 2 for a
+/// root-node and 3 for all other internal nodes.
+///
+/// If a tree that appears to be taller than this is encountered, it is
+/// assumed that the database is corrupt.
 pub const BTCURSOR_MAX_DEPTH: usize = 20;

 /// Evaluate a Result<CursorResult<T>>, if IO return IO.
@@ -57,6 +72,8 @@ macro_rules! return_if_locked {
    }};
 }

+/// State machine of a write operation.
+/// May involve balancing due to overflow.
 #[derive(Debug)]
 enum WriteState {
    Start,
@@ -67,11 +84,16 @@ enum WriteState {
 }

 struct WriteInfo {
+    /// State of the write operation state machine.
    state: WriteState,
+    /// Pages allocated during the write operation due to balancing.
    new_pages: RefCell<Vec<PageRef>>,
+    /// Scratch space used during balancing.
    scratch_cells: RefCell<Vec<&'static [u8]>>,
+    /// Bookkeeping of the rightmost pointer so the PAGE_HEADER_OFFSET_RIGHTMOST_PTR can be updated.
    rightmost_pointer: RefCell<Option<u32>>,
-    page_copy: RefCell<Option<PageContent>>, // this holds the copy a of a page needed for buffer references
+    /// Copy of the current page needed for buffer references.
+    page_copy: RefCell<Option<PageContent>>,
 }

 pub struct BTreeCursor {
@@ -142,6 +164,8 @@ impl BTreeCursor {
        }
    }

+    /// Check if the table is empty.
+    /// This is done by checking if the root page has no cells.
    fn is_empty_table(&mut self) -> Result<CursorResult<bool>> {
        let page = self.pager.read_page(self.root_page)?;
        return_if_locked!(page);
@@ -150,16 +174,18 @@ impl BTreeCursor {
        Ok(CursorResult::Ok(cell_count == 0))
    }

+    /// Move the cursor to the previous record and return it.
+    /// Used in backwards iteration.
    fn get_prev_record(&mut self) -> Result<CursorResult<(Option<u64>, Option<OwnedRecord>)>> {
        loop {
            let page = self.stack.top();
-            let cell_idx = self.stack.current_index();
+            let cell_idx = self.stack.current_cell_index();

-            // moved to current page begin
+            // moved to beginning of current page
            // todo: find a better way to flag moved to end or begin of page
-            if self.stack.curr_idx_out_of_begin() {
+            if self.stack.current_cell_index_less_than_min() {
                loop {
-                    if self.stack.current_index() > 0 {
+                    if self.stack.current_cell_index() > 0 {
                        self.stack.retreat();
                        break;
                    }
@@ -198,8 +224,8 @@ impl BTreeCursor {
            let cell = contents.cell_get(
                cell_idx,
                self.pager.clone(),
-                self.max_local(contents.page_type()),
-                self.min_local(contents.page_type()),
+                self.payload_overflow_threshold_max(contents.page_type()),
+                self.payload_overflow_threshold_min(contents.page_type()),
                self.usable_space(),
            )?;

@@ -228,13 +254,15 @@ impl BTreeCursor {
        }
    }

+    /// Move the cursor to the next record and return it.
+    /// Used in forwards iteration, which is the default.
    fn get_next_record(
        &mut self,
        predicate: Option<(SeekKey<'_>, SeekOp)>,
    ) -> Result<CursorResult<(Option<u64>, Option<OwnedRecord>)>> {
        loop {
            let mem_page_rc = self.stack.top();
-            let cell_idx = self.stack.current_index() as usize;
+            let cell_idx = self.stack.current_cell_index() as usize;

            debug!("current id={} cell={}", mem_page_rc.get().id, cell_idx);
            return_if_locked!(mem_page_rc);
@@ -286,8 +314,8 @@ impl BTreeCursor {
            let cell = contents.cell_get(
                cell_idx,
                self.pager.clone(),
-                self.max_local(contents.page_type()),
-                self.min_local(contents.page_type()),
+                self.payload_overflow_threshold_max(contents.page_type()),
+                self.payload_overflow_threshold_min(contents.page_type()),
                self.usable_space(),
            )?;
            match &cell {
@@ -386,6 +414,9 @@ impl BTreeCursor {
        }
    }

+    /// Move the cursor to the record that matches the seek key and seek operation.
+    /// This may be used to seek to a specific record in a point query (e.g. SELECT * FROM table WHERE col = 10)
+    /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10).
    fn seek(
        &mut self,
        key: SeekKey<'_>,
@@ -403,8 +434,8 @@ impl BTreeCursor {
                let cell = contents.cell_get(
                    cell_idx,
                    self.pager.clone(),
-                    self.max_local(contents.page_type()),
-                    self.min_local(contents.page_type()),
+                    self.payload_overflow_threshold_max(contents.page_type()),
+                    self.payload_overflow_threshold_min(contents.page_type()),
                    self.usable_space(),
                )?;
                match &cell {
@@ -476,12 +507,14 @@ impl BTreeCursor {
        Ok(CursorResult::Ok((None, None)))
    }

+    /// Move the cursor to the root page of the btree.
    fn move_to_root(&mut self) {
        let mem_page = self.pager.read_page(self.root_page).unwrap();
        self.stack.clear();
        self.stack.push(mem_page);
    }

+    /// Move the cursor to the rightmost record in the btree.
    fn move_to_rightmost(&mut self) -> Result<CursorResult<()>> {
        self.move_to_root();

@@ -553,8 +586,8 @@ impl BTreeCursor {
                match &contents.cell_get(
                    cell_idx,
                    self.pager.clone(),
-                    self.max_local(contents.page_type()),
-                    self.min_local(contents.page_type()),
+                    self.payload_overflow_threshold_max(contents.page_type()),
+                    self.payload_overflow_threshold_min(contents.page_type()),
                    self.usable_space(),
                )? {
                    BTreeCell::TableInteriorCell(TableInteriorCell {
@@ -634,6 +667,8 @@ impl BTreeCursor {
        }
    }

+    /// Insert a record into the btree.
+    /// If the insert operation overflows the page, it will be split and the btree will be balanced.
    fn insert_into_page(
        &mut self,
        key: &OwnedValue,
@@ -700,7 +735,11 @@ impl BTreeCursor {
        }
    }

-    /* insert to position and shift other pointers */
+    /// Insert a record into a cell.
+    /// If the cell overflows, an overflow cell is created.
+    /// insert_into_cell() is called from insert_into_page(),
+    /// and the overflow cell count is used to determine if the page overflows,
+    /// i.e. whether we need to balance the btree after the insert.
    fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) {
        let free = self.compute_free_space(page, RefCell::borrow(&self.database_header));
        let enough_space = payload.len() + 2 <= free as usize;
@@ -734,41 +773,54 @@ impl BTreeCursor {
        page.write_u16(pointer_area_pc_by_idx - page.offset, pc);

        // update first byte of content area
-        page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, pc);
+        page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, pc);

        // update cell count
        let new_n_cells = (page.cell_count() + 1) as u16;
-        page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, new_n_cells);
+        page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, new_n_cells);
    }

+    /// Free the range of bytes that a cell occupies.
+    /// This function also updates the freeblock list in the page.
+    /// Freeblocks are used to keep track of free space in the page,
+    /// and are organized as a linked list.
    fn free_cell_range(&self, page: &mut PageContent, offset: u16, len: u16) {
+        // if the freeblock list is empty, we set this block as the first freeblock in the page header.
        if page.first_freeblock() == 0 {
-            // insert into empty list
-            page.write_u16(offset as usize, 0);
-            page.write_u16(offset as usize + 2, len);
-            page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, offset);
+            page.write_u16(offset as usize, 0); // next freeblock = null
+            page.write_u16(offset as usize + 2, len); // size of this freeblock
+            page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, offset); // first freeblock in page = this block
            return;
        }
        let first_block = page.first_freeblock();

+        // if the freeblock list is not empty, and the offset is less than the first freeblock,
+        // we insert this block at the head of the list
        if offset < first_block {
-            // insert into head of list
-            page.write_u16(offset as usize, first_block);
-            page.write_u16(offset as usize + 2, len);
-            page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, offset);
+            page.write_u16(offset as usize, first_block); // next freeblock = previous first freeblock
+            page.write_u16(offset as usize + 2, len); // size of this freeblock
+            page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, offset); // first freeblock in page = this block
            return;
        }

+        // if we clear space that is at the start of the cell content area,
+        // we need to update the cell content area pointer forward to account for the removed space
+        // FIXME: is offset ever < cell_content_area? cell content area grows leftwards and the pointer
+        // is to the start of the last allocated cell. should we assert!(offset >= page.cell_content_area())
+        // and change this to if offset == page.cell_content_area()?
        if offset <= page.cell_content_area() {
-            // extend boundary of content area
-            page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, page.first_freeblock());
-            page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, offset + len);
+            // FIXME: remove the line directly below this, it does not change anything.
+            page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, page.first_freeblock());
+            page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, offset + len);
            return;
        }

+        // if the freeblock list is not empty, and the offset is greater than the first freeblock,
+        // then we need to do some more calculation to figure out where to insert the freeblock
+        // in the freeblock linked list.
        let maxpc = {
            let db_header = self.database_header.borrow();
-            let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
+            let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
            usable_space as u16
        };

@@ -799,17 +851,23 @@ impl BTreeCursor {
        }
    }

+    /// Drop a cell from a page.
+    /// This is done by freeing the range of bytes that the cell occupies.
    fn drop_cell(&self, page: &mut PageContent, cell_idx: usize) {
        let (cell_start, cell_len) = page.cell_get_raw_region(
            cell_idx,
-            self.max_local(page.page_type()),
-            self.min_local(page.page_type()),
+            self.payload_overflow_threshold_max(page.page_type()),
+            self.payload_overflow_threshold_min(page.page_type()),
            self.usable_space(),
        );
        self.free_cell_range(page, cell_start as u16, cell_len as u16);
-        page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1);
+        page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1);
    }

+    /// Balance a leaf page.
+    /// Balancing is done when a page overflows.
+    /// see e.g. https://en.wikipedia.org/wiki/B-tree
+    ///
    /// This is a naive algorithm that doesn't try to distribute cells evenly by content.
    /// It will try to split the page in half by keys not by content.
    /// Sqlite tries to have a page at least 40% full.
@@ -852,8 +910,8 @@ impl BTreeCursor {
                for cell_idx in 0..page_copy.cell_count() {
                    let (start, len) = page_copy.cell_get_raw_region(
                        cell_idx,
-                        self.max_local(page_copy.page_type()),
-                        self.min_local(page_copy.page_type()),
+                        self.payload_overflow_threshold_max(page_copy.page_type()),
+                        self.payload_overflow_threshold_min(page_copy.page_type()),
                        self.usable_space(),
                    );
                    let buf = page_copy.as_ptr();
@@ -930,14 +988,14 @@ impl BTreeCursor {
                assert_eq!(parent_contents.overflow_cells.len(), 0);

                // Right page pointer is u32 in right most pointer, and in cell is u32 too, so we can use a *u32 to hold where we want to change this value
-                let mut right_pointer = BTREE_HEADER_OFFSET_RIGHTMOST;
+                let mut right_pointer = PAGE_HEADER_OFFSET_RIGHTMOST_PTR;
                for cell_idx in 0..parent_contents.cell_count() {
                    let cell = parent_contents
                        .cell_get(
                            cell_idx,
                            self.pager.clone(),
-                            self.max_local(page_type.clone()),
-                            self.min_local(page_type.clone()),
+                            self.payload_overflow_threshold_max(page_type.clone()),
+                            self.payload_overflow_threshold_min(page_type.clone()),
                            self.usable_space(),
                        )
                        .unwrap();
@@ -950,8 +1008,8 @@ impl BTreeCursor {
                    if found {
                        let (start, _len) = parent_contents.cell_get_raw_region(
                            cell_idx,
-                            self.max_local(page_type.clone()),
-                            self.min_local(page_type.clone()),
+                            self.payload_overflow_threshold_max(page_type.clone()),
+                            self.payload_overflow_threshold_min(page_type.clone()),
                            self.usable_space(),
                        );
                        right_pointer = start;
@@ -967,17 +1025,20 @@ impl BTreeCursor {
                    assert!(page.is_dirty());
                    let contents = page.get().contents.as_mut().unwrap();

-                    contents.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0);
-                    contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0);
+                    contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0);
+                    contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0);

                    let db_header = RefCell::borrow(&self.database_header);
                    let cell_content_area_start =
-                        db_header.page_size - db_header.unused_space as u16;
-                    contents.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start);
+                        db_header.page_size - db_header.reserved_space as u16;
+                    contents.write_u16(
+                        PAGE_HEADER_OFFSET_CELL_CONTENT_AREA,
+                        cell_content_area_start,
+                    );

-                    contents.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0);
+                    contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0);
                    if !contents.is_leaf() {
-                        contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0);
+                        contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0);
                    }
                }

@@ -1035,8 +1096,8 @@ impl BTreeCursor {
                            .cell_get(
                                contents.cell_count() - 1,
                                self.pager.clone(),
-                                self.max_local(contents.page_type()),
-                                self.min_local(contents.page_type()),
+                                self.payload_overflow_threshold_max(contents.page_type()),
+                                self.payload_overflow_threshold_min(contents.page_type()),
                                self.usable_space(),
                            )
                            .unwrap();
@@ -1045,13 +1106,13 @@ impl BTreeCursor {
                            _ => unreachable!(),
                        };
                        self.drop_cell(contents, contents.cell_count() - 1);
-                        contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, last_cell_pointer);
+                        contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, last_cell_pointer);
                    }
                    // last page right most pointer points to previous right most pointer before splitting
                    let last_page = new_pages.last().unwrap();
                    let last_page_contents = last_page.get().contents.as_mut().unwrap();
                    last_page_contents.write_u32(
-                        BTREE_HEADER_OFFSET_RIGHTMOST,
+                        PAGE_HEADER_OFFSET_RIGHTMOST_PTR,
                        self.write_info.rightmost_pointer.borrow().unwrap(),
                    );
                }
@@ -1069,8 +1130,8 @@ impl BTreeCursor {
                        &contents.page_type(),
                        0,
                        self.pager.clone(),
-                        self.max_local(contents.page_type()),
-                        self.min_local(contents.page_type()),
+                        self.payload_overflow_threshold_max(contents.page_type()),
+                        self.payload_overflow_threshold_min(contents.page_type()),
                        self.usable_space(),
                    )
                    .unwrap();
@@ -1119,6 +1180,9 @@ impl BTreeCursor {
        }
    }

+    /// Balance the root page.
+    /// This is done when the root page overflows, and we need to create a new root page.
+    /// See e.g. https://en.wikipedia.org/wiki/B-tree
    fn balance_root(&mut self) {
        /* todo: balance deeper, create child and copy contents of root there. Then split root */
        /* if we are in root page then we just need to create a new root and push key there */
@@ -1145,8 +1209,8 @@ impl BTreeCursor {
            }
            // point new root right child to previous root
            new_root_page_contents
-                .write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, new_root_page_id as u32);
-            new_root_page_contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0);
+                .write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, new_root_page_id as u32);
+            new_root_page_contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0);
        }

        /* swap splitted page buffer with new root buffer so we don't have to update page idx */
@@ -1195,12 +1259,16 @@ impl BTreeCursor {
        }
    }

+    /// Allocate a new page to the btree via the pager.
+    /// This marks the page as dirty and writes the page header.
    fn allocate_page(&self, page_type: PageType, offset: usize) -> PageRef {
        let page = self.pager.allocate_page().unwrap();
        btree_init_page(&page, page_type, &self.database_header.borrow(), offset);
        page
    }

+    /// Allocate a new overflow page.
+    /// This is done when a cell overflows and new space is needed.
    fn allocate_overflow_page(&self) -> PageRef {
        let page = self.pager.allocate_page().unwrap();

@@ -1212,9 +1280,7 @@ impl BTreeCursor {
        page
    }

-    /*
-        Allocate space for a cell on a page.
-    */
+    /// Allocate space for a cell on a page.
    fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 {
        let amount = amount as usize;

@@ -1236,24 +1302,25 @@ impl BTreeCursor {
        if gap + 2 + amount > top {
            // defragment
            self.defragment_page(page_ref, RefCell::borrow(&self.database_header));
-            top = page_ref.read_u16(BTREE_HEADER_OFFSET_CELL_CONTENT) as usize;
+            top = page_ref.read_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA) as usize;
        }

        let db_header = RefCell::borrow(&self.database_header);
        top -= amount;

-        page_ref.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, top as u16);
+        page_ref.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, top as u16);

-        let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
+        let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
        assert!(top + amount <= usable_space);
        top as u16
    }

+    /// Defragment a page. This means packing all the cells to the end of the page.
    fn defragment_page(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) {
        log::debug!("defragment_page");
        let cloned_page = page.clone();
        // TODO(pere): usable space should include offset probably
-        let usable_space = (db_header.page_size - db_header.unused_space as u16) as u64;
+        let usable_space = (db_header.page_size - db_header.reserved_space as u16) as u64;
        let mut cbrk = usable_space;

        // TODO: implement fast algorithm
@@ -1330,24 +1397,33 @@ impl BTreeCursor {
        let write_buf = page.as_ptr();

        // set new first byte of cell content
-        page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cbrk as u16);
+        page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, cbrk as u16);
        // set free block to 0, unused spaced can be retrieved from gap between cell pointer end and content start
-        page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0);
+        page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0);
        // set unused space to 0
        let first_cell = cloned_page.cell_content_area() as u64;
        assert!(first_cell <= cbrk);
        write_buf[first_cell as usize..cbrk as usize].fill(0);
    }

-    // Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected to be between first cell byte
-    // and end of cell pointer area.
+    /// Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected to be between first cell byte
+    /// and end of cell pointer area.
    #[allow(unused_assignments)]
    fn compute_free_space(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) -> u16 {
        // TODO(pere): maybe free space is not calculated correctly with offset
        let buf = page.as_ptr();

-        let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
+        let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
        let mut first_byte_in_cell_content = page.cell_content_area();
+        // A zero value for the cell content area pointer is interpreted as 65536.
+        // See https://www.sqlite.org/fileformat.html
+        // The max page size for a sqlite database is 64kiB i.e. 65536 bytes.
+        // 65536 is u16::MAX + 1, and since cell content grows from right to left, this means
+        // the cell content area pointer is at the end of the page,
+        // i.e.
+        // 1. the page size is 64kiB
+        // 2. there are no cells on the page
+        // 3. there is no reserved space at the end of the page
        if first_byte_in_cell_content == 0 {
            first_byte_in_cell_content = u16::MAX;
        }
@@ -1360,12 +1436,16 @@ impl BTreeCursor {
        let child_pointer_size = if page.is_leaf() { 0 } else { 4 };
        let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16;

+        // The amount of free space is the sum of:
+        // 1. 0..first_byte_in_cell_content (everything to the left of the cell content area pointer is unused free space)
+        // 2. fragmented_free_bytes.
        let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize;

        let mut pc = free_block_pointer as usize;
        if pc > 0 {
            if pc < first_byte_in_cell_content as usize {
-                // corrupt
+                // Freeblocks exist in the cell content area e.g. after deletions
+                // They should never exist in the unused area of the page.
                todo!("corrupted page");
            }

@@ -1399,6 +1479,8 @@ impl BTreeCursor {
        nfree as u16
    }

+    /// Fill in the cell payload with the record.
+    /// If the record is too large to fit in the cell, it will spill onto overflow pages.
    fn fill_cell_payload(
        &self,
        page_type: PageType,
@@ -1423,13 +1505,13 @@ impl BTreeCursor {
            write_varint_to_vec(record_buf.len() as u64, cell_payload);
        }

-        let max_local = self.max_local(page_type.clone());
+        let payload_overflow_threshold_max = self.payload_overflow_threshold_max(page_type.clone());
        log::debug!(
-            "fill_cell_payload(record_size={}, max_local={})",
+            "fill_cell_payload(record_size={}, payload_overflow_threshold_max={})",
            record_buf.len(),
-            max_local
+            payload_overflow_threshold_max
        );
-        if record_buf.len() <= max_local {
+        if record_buf.len() <= payload_overflow_threshold_max {
            // enough allowed space to fit inside a btree page
            cell_payload.extend_from_slice(record_buf.as_slice());
            cell_payload.resize(cell_payload.len() + 4, 0);
@@ -1437,11 +1519,13 @@ impl BTreeCursor {
        }
        log::debug!("fill_cell_payload(overflow)");

-        let min_local = self.min_local(page_type);
-        let mut space_left = min_local + (record_buf.len() - min_local) % (self.usable_space() - 4);
+        let payload_overflow_threshold_min = self.payload_overflow_threshold_min(page_type);
+        // see e.g. https://github.com/sqlite/sqlite/blob/9591d3fe93936533c8c3b0dc4d025ac999539e11/src/dbstat.c#L371
+        let mut space_left = payload_overflow_threshold_min
+            + (record_buf.len() - payload_overflow_threshold_min) % (self.usable_space() - 4);

-        if space_left > max_local {
-            space_left = min_local;
+        if space_left > payload_overflow_threshold_max {
+            space_left = payload_overflow_threshold_min;
        }

        // cell_size must be equal to first value of space_left as this will be the bytes copied to non-overflow page.
@@ -1487,31 +1571,54 @@ impl BTreeCursor {
        assert_eq!(cell_size, cell_payload.len());
    }

-    fn max_local(&self, page_type: PageType) -> usize {
-        let usable_space = self.usable_space();
+    /// Returns the maximum payload size (X) that can be stored directly on a b-tree page without spilling to overflow pages.
+    ///
+    /// For table leaf pages: X = usable_size - 35
+    /// For index pages: X = ((usable_size - 12) * 64/255) - 23
+    ///
+    /// The usable size is the total page size less the reserved space at the end of each page.
+    /// These thresholds are designed to:
+    /// - Give a minimum fanout of 4 for index b-trees
+    /// - Ensure enough payload is on the b-tree page that the record header can usually be accessed
+    ///   without consulting an overflow page
+    fn payload_overflow_threshold_max(&self, page_type: PageType) -> usize {
+        let usable_size = self.usable_space();
        match page_type {
-            PageType::IndexInterior | PageType::TableInterior => {
-                (usable_space - 12) * 64 / 255 - 23
+            PageType::IndexInterior | PageType::IndexLeaf => {
+                ((usable_size - 12) * 64 / 255) - 23 // Index page formula
+            }
+            PageType::TableInterior | PageType::TableLeaf => {
+                usable_size - 35 // Table leaf page formula
            }
-            PageType::IndexLeaf | PageType::TableLeaf => usable_space - 35,
        }
    }

-    fn min_local(&self, page_type: PageType) -> usize {
-        let usable_space = self.usable_space();
-        match page_type {
-            PageType::IndexInterior | PageType::TableInterior => {
-                (usable_space - 12) * 32 / 255 - 23
-            }
-            PageType::IndexLeaf | PageType::TableLeaf => (usable_space - 12) * 32 / 255 - 23,
-        }
+    /// Returns the minimum payload size (M) that must be stored on the b-tree page before spilling to overflow pages is allowed.
+    ///
+    /// For all page types: M = ((usable_size - 12) * 32/255) - 23
+    ///
+    /// When payload size P exceeds max_local():
+    /// - If K = M + ((P-M) % (usable_size-4)) <= max_local(): store K bytes on page
+    /// - Otherwise: store M bytes on page
+    ///
+    /// The remaining bytes are stored on overflow pages in both cases.
+    fn payload_overflow_threshold_min(&self, _page_type: PageType) -> usize {
+        let usable_size = self.usable_space();
+        // Same formula for all page types
+        ((usable_size - 12) * 32 / 255) - 23
    }

+    /// The "usable size" of a database page is the page size specified by the 2-byte integer at offset 16
+    /// in the header, minus the "reserved" space size recorded in the 1-byte integer at offset 20 in the header.
+    /// The usable size of a page might be an odd number. However, the usable size is not allowed to be less than 480.
+    /// In other words, if the page size is 512, then the reserved space size cannot exceed 32.
    fn usable_space(&self) -> usize {
        let db_header = RefCell::borrow(&self.database_header);
-        (db_header.page_size - db_header.unused_space as u16) as usize
+        (db_header.page_size - db_header.reserved_space as u16) as usize
    }

+    /// Find the index of the cell in the page that contains the given rowid.
+    /// BTree tables only.
    fn find_cell(&self, page: &PageContent, int_key: u64) -> usize {
        let mut cell_idx = 0;
        let cell_count = page.cell_count();
@@ -1520,8 +1627,8 @@ impl BTreeCursor {
                .cell_get(
                    cell_idx,
                    self.pager.clone(),
-                    self.max_local(page.page_type()),
-                    self.min_local(page.page_type()),
+                    self.payload_overflow_threshold_max(page.page_type()),
+                    self.payload_overflow_threshold_min(page.page_type()),
                    self.usable_space(),
                )
                .unwrap()
@@ -1545,6 +1652,8 @@ impl BTreeCursor {
 }

 impl PageStack {
+    /// Push a new page onto the stack.
+    /// This effectively means traversing to a child page.
    fn push(&self, page: PageRef) {
        debug!(
            "pagestack::push(current={}, new_page_id={})",
@@ -1561,6 +1670,8 @@ impl PageStack {
        self.cell_indices.borrow_mut()[current as usize] = 0;
    }

+    /// Pop a page off the stack.
+    /// This effectively means traversing back up to a parent page.
    fn pop(&self) {
        let current = *self.current_page.borrow();
        debug!("pagestack::pop(current={})", current);
@@ -1569,6 +1680,8 @@ impl PageStack {
        *self.current_page.borrow_mut() -= 1;
    }

+    /// Get the top page on the stack.
+    /// This is the page that is currently being traversed.
    fn top(&self) -> PageRef {
        let current = *self.current_page.borrow();
        let page = self.stack.borrow()[current as usize]
@@ -1583,6 +1696,7 @@ impl PageStack {
        page
    }

+    /// Get the parent page of the current page.
    fn parent(&self) -> PageRef {
        let current = *self.current_page.borrow();
        self.stack.borrow()[current as usize - 1]
@@ -1597,13 +1711,15 @@ impl PageStack {
    }

    /// Cell index of the current page
-    fn current_index(&self) -> i32 {
+    fn current_cell_index(&self) -> i32 {
        let current = self.current();
        self.cell_indices.borrow()[current]
    }

-    fn curr_idx_out_of_begin(&self) -> bool {
-        let cell_idx = self.current_index();
+    /// Check if the current cell index is less than 0.
+    /// This means we have been iterating backwards and have reached the start of the page.
+    fn current_cell_index_less_than_min(&self) -> bool {
+        let cell_idx = self.current_cell_index();
        cell_idx < 0
    }

@@ -1639,7 +1755,7 @@ fn find_free_cell(page_ref: &PageContent, db_header: Ref<DatabaseHeader>, amount

    let buf = page_ref.as_ptr();

-    let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
+    let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
    let maxpc = usable_space - amount;
    let mut found = false;
    while pc <= maxpc {
@@ -1785,8 +1901,8 @@ impl Cursor for BTreeCursor {
            let equals = match &contents.cell_get(
                cell_idx,
                self.pager.clone(),
-                self.max_local(contents.page_type()),
-                self.min_local(contents.page_type()),
+                self.payload_overflow_threshold_max(contents.page_type()),
+                self.payload_overflow_threshold_min(contents.page_type()),
                self.usable_space(),
            )? {
                BTreeCell::TableLeafCell(l) => l._rowid == int_key,
@@ -1823,15 +1939,18 @@ pub fn btree_init_page(
    let contents = contents.contents.as_mut().unwrap();
    contents.offset = offset;
    let id = page_type as u8;
-    contents.write_u8(BTREE_HEADER_OFFSET_TYPE, id);
-    contents.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0);
-    contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0);
+    contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, id);
+    contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0);
+    contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0);

-    let cell_content_area_start = db_header.page_size - db_header.unused_space as u16;
-    contents.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start);
+    let cell_content_area_start = db_header.page_size - db_header.reserved_space as u16;
+    contents.write_u16(
+        PAGE_HEADER_OFFSET_CELL_CONTENT_AREA,
+        cell_content_area_start,
+    );

-    contents.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0);
-    contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0);
+    contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0);
+    contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0);
 }

 fn to_static_buf(buf: &[u8]) -> &'static [u8] {
--- a/core/storage/pager.rs
+++ b/core/storage/pager.rs
@@ -482,7 +482,7 @@ impl Pager {

    pub fn usable_size(&self) -> usize {
        let db_header = self.db_header.borrow();
-        (db_header.page_size - db_header.unused_space as u16) as usize
+        (db_header.page_size - db_header.reserved_space as u16) as usize
    }
 }

--- a/core/storage/sqlite3_ondisk.rs
+++ b/core/storage/sqlite3_ondisk.rs
@@ -64,30 +64,84 @@ const DEFAULT_CACHE_SIZE: i32 = -2000;
 // Minimum number of pages that cache can hold.
 pub const MIN_PAGE_CACHE_SIZE: usize = 10;

+/// The database header.
+/// The first 100 bytes of the database file comprise the database file header.
+/// The database file header is divided into fields as shown by the table below.
+/// All multibyte fields in the database file header are stored with the most significant byte first (big-endian).
 #[derive(Debug, Clone)]
 pub struct DatabaseHeader {
+    /// The header string: "SQLite format 3\0"
    magic: [u8; 16],
+
+    /// The database page size in bytes. Must be a power of two between 512 and 32768 inclusive,
+    /// or the value 1 representing a page size of 65536.
    pub page_size: u16,
+
+    /// File format write version. 1 for legacy; 2 for WAL.
    write_version: u8,
+
+    /// File format read version. 1 for legacy; 2 for WAL.
    read_version: u8,
-    pub unused_space: u8,
+
+    /// Bytes of unused "reserved" space at the end of each page. Usually 0.
+    /// SQLite has the ability to set aside a small number of extra bytes at the end of every page for use by extensions.
+    /// These extra bytes are used, for example, by the SQLite Encryption Extension to store a nonce and/or
+    /// cryptographic checksum associated with each page.
+    pub reserved_space: u8,
+
+    /// Maximum embedded payload fraction. Must be 64.
    max_embed_frac: u8,
+
+    /// Minimum embedded payload fraction. Must be 32.
    min_embed_frac: u8,
+
+    /// Leaf payload fraction. Must be 32.
    min_leaf_frac: u8,
+
+    /// File change counter, incremented when database is modified.
    change_counter: u32,
+
+    /// Size of the database file in pages. The "in-header database size".
    pub database_size: u32,
+
+    /// Page number of the first freelist trunk page.
    freelist_trunk_page: u32,
+
+    /// Total number of freelist pages.
    freelist_pages: u32,
+
+    /// The schema cookie. Incremented when the database schema changes.
    schema_cookie: u32,
+
+    /// The schema format number. Supported formats are 1, 2, 3, and 4.
    schema_format: u32,
-    pub default_cache_size: i32,
-    vacuum: u32,
+
+    /// Default page cache size.
+    pub default_page_cache_size: i32,
+
+    /// The page number of the largest root b-tree page when in auto-vacuum or
+    /// incremental-vacuum modes, or zero otherwise.
+    vacuum_mode_largest_root_page: u32,
+
+    /// The database text encoding. 1=UTF-8, 2=UTF-16le, 3=UTF-16be.
    text_encoding: u32,
+
+    /// The "user version" as read and set by the user_version pragma.
    user_version: u32,
-    incremental_vacuum: u32,
+
+    /// True (non-zero) for incremental-vacuum mode. False (zero) otherwise.
+    incremental_vacuum_enabled: u32,
+
+    /// The "Application ID" set by PRAGMA application_id.
    application_id: u32,
-    reserved: [u8; 20],
+
+    /// Reserved for expansion. Must be zero.
+    reserved_for_expansion: [u8; 20],
+
+    /// The version-valid-for number.
    version_valid_for: u32,
+
+    /// SQLITE_VERSION_NUMBER
    pub version_number: u32,
 }

@@ -98,28 +152,62 @@ pub const WAL_FRAME_HEADER_SIZE: usize = 24;
 pub const WAL_MAGIC_LE: u32 = 0x377f0682;
 pub const WAL_MAGIC_BE: u32 = 0x377f0683;

+/// The Write-Ahead Log (WAL) header.
+/// The first 32 bytes of a WAL file comprise the WAL header.
+/// The WAL header is divided into the following fields stored in big-endian order.
 #[derive(Debug, Default, Clone)]
 #[repr(C)] // This helps with encoding because rust does not respect the order in structs, so in
           // this case we want to keep the order
 pub struct WalHeader {
+    /// Magic number. 0x377f0682 or 0x377f0683
+    /// If the LSB is 0, checksums are native byte order, else checksums are serialized
    pub magic: u32,
+
+    /// WAL format version. Currently 3007000
    pub file_format: u32,
+
+    /// Database page size in bytes. Power of two between 512 and 32768 inclusive
    pub page_size: u32,
+
+    /// Checkpoint sequence number. Increases with each checkpoint
    pub checkpoint_seq: u32,
+
+    /// Random value used for the first salt in checksum calculations
    pub salt_1: u32,
+
+    /// Random value used for the second salt in checksum calculations
    pub salt_2: u32,
+
+    /// First checksum value in the wal-header
    pub checksum_1: u32,
+
+    /// Second checksum value in the wal-header
    pub checksum_2: u32,
 }

+/// Immediately following the wal-header are zero or more frames.
+/// Each frame consists of a 24-byte frame-header followed by <page-size> bytes of page data.
+/// The frame-header is six big-endian 32-bit unsigned integer values, as follows:
 #[allow(dead_code)]
 #[derive(Debug, Default)]
 pub struct WalFrameHeader {
+    /// Page number
    page_number: u32,
+
+    /// For commit records, the size of the database file in pages after the commit.
+    /// For all other records, zero.
    db_size: u32,
+
+    /// Salt-1 copied from the WAL header
    salt_1: u32,
+
+    /// Salt-2 copied from the WAL header
    salt_2: u32,
+
+    /// Checksum-1: Cumulative checksum up through and including this page
    checksum_1: u32,
+
+    /// Checksum-2: Second half of the cumulative checksum
    checksum_2: u32,
 }

@@ -130,7 +218,7 @@ impl Default for DatabaseHeader {
            page_size: 4096,
            write_version: 2,
            read_version: 2,
-            unused_space: 0,
+            reserved_space: 0,
            max_embed_frac: 64,
            min_embed_frac: 32,
            min_leaf_frac: 32,
@@ -140,13 +228,13 @@ impl Default for DatabaseHeader {
            freelist_pages: 0,
            schema_cookie: 0,
            schema_format: 4, // latest format, new sqlite3 databases use this format
-            default_cache_size: 500, // pages
-            vacuum: 0,
+            default_page_cache_size: 500, // pages
+            vacuum_mode_largest_root_page: 0,
            text_encoding: 1, // utf-8
            user_version: 1,
-            incremental_vacuum: 0,
+            incremental_vacuum_enabled: 0,
            application_id: 0,
-            reserved: [0; 20],
+            reserved_for_expansion: [0; 20],
            version_valid_for: 3047000,
            version_number: 3047000,
        }
@@ -180,7 +268,7 @@ fn finish_read_database_header(
    header.page_size = u16::from_be_bytes([buf[16], buf[17]]);
    header.write_version = buf[18];
    header.read_version = buf[19];
-    header.unused_space = buf[20];
+    header.reserved_space = buf[20];
    header.max_embed_frac = buf[21];
    header.min_embed_frac = buf[22];
    header.min_leaf_frac = buf[23];
@@ -190,16 +278,16 @@ fn finish_read_database_header(
    header.freelist_pages = u32::from_be_bytes([buf[36], buf[37], buf[38], buf[39]]);
    header.schema_cookie = u32::from_be_bytes([buf[40], buf[41], buf[42], buf[43]]);
    header.schema_format = u32::from_be_bytes([buf[44], buf[45], buf[46], buf[47]]);
-    header.default_cache_size = i32::from_be_bytes([buf[48], buf[49], buf[50], buf[51]]);
-    if header.default_cache_size == 0 {
-        header.default_cache_size = DEFAULT_CACHE_SIZE;
+    header.default_page_cache_size = i32::from_be_bytes([buf[48], buf[49], buf[50], buf[51]]);
+    if header.default_page_cache_size == 0 {
+        header.default_page_cache_size = DEFAULT_CACHE_SIZE;
    }
-    header.vacuum = u32::from_be_bytes([buf[52], buf[53], buf[54], buf[55]]);
+    header.vacuum_mode_largest_root_page = u32::from_be_bytes([buf[52], buf[53], buf[54], buf[55]]);
    header.text_encoding = u32::from_be_bytes([buf[56], buf[57], buf[58], buf[59]]);
    header.user_version = u32::from_be_bytes([buf[60], buf[61], buf[62], buf[63]]);
-    header.incremental_vacuum = u32::from_be_bytes([buf[64], buf[65], buf[66], buf[67]]);
+    header.incremental_vacuum_enabled = u32::from_be_bytes([buf[64], buf[65], buf[66], buf[67]]);
    header.application_id = u32::from_be_bytes([buf[68], buf[69], buf[70], buf[71]]);
-    header.reserved.copy_from_slice(&buf[72..92]);
+    header.reserved_for_expansion.copy_from_slice(&buf[72..92]);
    header.version_valid_for = u32::from_be_bytes([buf[92], buf[93], buf[94], buf[95]]);
    header.version_number = u32::from_be_bytes([buf[96], buf[97], buf[98], buf[99]]);
    Ok(())
@@ -258,7 +346,7 @@ fn write_header_to_buf(buf: &mut [u8], header: &DatabaseHeader) {
    buf[16..18].copy_from_slice(&header.page_size.to_be_bytes());
    buf[18] = header.write_version;
    buf[19] = header.read_version;
-    buf[20] = header.unused_space;
+    buf[20] = header.reserved_space;
    buf[21] = header.max_embed_frac;
    buf[22] = header.min_embed_frac;
    buf[23] = header.min_leaf_frac;
@@ -268,15 +356,15 @@ fn write_header_to_buf(buf: &mut [u8], header: &DatabaseHeader) {
    buf[36..40].copy_from_slice(&header.freelist_pages.to_be_bytes());
    buf[40..44].copy_from_slice(&header.schema_cookie.to_be_bytes());
    buf[44..48].copy_from_slice(&header.schema_format.to_be_bytes());
-    buf[48..52].copy_from_slice(&header.default_cache_size.to_be_bytes());
+    buf[48..52].copy_from_slice(&header.default_page_cache_size.to_be_bytes());

-    buf[52..56].copy_from_slice(&header.vacuum.to_be_bytes());
+    buf[52..56].copy_from_slice(&header.vacuum_mode_largest_root_page.to_be_bytes());
    buf[56..60].copy_from_slice(&header.text_encoding.to_be_bytes());
    buf[60..64].copy_from_slice(&header.user_version.to_be_bytes());
-    buf[64..68].copy_from_slice(&header.incremental_vacuum.to_be_bytes());
+    buf[64..68].copy_from_slice(&header.incremental_vacuum_enabled.to_be_bytes());

    buf[68..72].copy_from_slice(&header.application_id.to_be_bytes());
-    buf[72..92].copy_from_slice(&header.reserved);
+    buf[72..92].copy_from_slice(&header.reserved_for_expansion);
    buf[92..96].copy_from_slice(&header.version_valid_for.to_be_bytes());
    buf[96..100].copy_from_slice(&header.version_number.to_be_bytes());
 }
@@ -387,6 +475,12 @@ impl PageContent {
        buf[self.offset + pos..self.offset + pos + 4].copy_from_slice(&value.to_be_bytes());
    }

+    /// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page.
+    /// A freeblock is a structure used to identify unallocated space within a b-tree page.
+    /// Freeblocks are organized as a chain.
+    ///
+    /// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead
+    /// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions.
    pub fn first_freeblock(&self) -> u16 {
        self.read_u16(1)
    }
@@ -395,10 +489,16 @@ impl PageContent {
        self.read_u16(3) as usize
    }

+    /// The start of the cell content area.
+    /// SQLite strives to place cells as far toward the end of the b-tree page as it can,
+    /// in order to leave space for future growth of the cell pointer array.
+    /// = the cell content area pointer moves leftward as cells are added to the page
    pub fn cell_content_area(&self) -> u16 {
        self.read_u16(5)
    }

+    /// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header.
+    /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area.
    pub fn num_frag_free_bytes(&self) -> u8 {
        self.read_u8(7)
    }
@@ -416,22 +516,24 @@ impl PageContent {
        &self,
        idx: usize,
        pager: Rc<Pager>,
-        max_local: usize,
-        min_local: usize,
+        payload_overflow_threshold_max: usize,
+        payload_overflow_threshold_min: usize,
        usable_size: usize,
    ) -> Result<BTreeCell> {
        log::debug!("cell_get(idx={})", idx);
        let buf = self.as_ptr();

        let ncells = self.cell_count();
-        let cell_start = match self.page_type() {
+        // the page header is 12 bytes for interior pages, 8 bytes for leaf pages
+        // this is because the 4 last bytes in the interior page's header are used for the rightmost pointer.
+        let cell_pointer_array_start = match self.page_type() {
            PageType::IndexInterior => 12,
            PageType::TableInterior => 12,
            PageType::IndexLeaf => 8,
            PageType::TableLeaf => 8,
        };
        assert!(idx < ncells, "cell_get: idx out of bounds");
-        let cell_pointer = cell_start + (idx * 2);
+        let cell_pointer = cell_pointer_array_start + (idx * 2);
        let cell_pointer = self.read_u16(cell_pointer) as usize;

        read_btree_cell(
@@ -439,13 +541,17 @@ impl PageContent {
            &self.page_type(),
            cell_pointer,
            pager,
-            max_local,
-            min_local,
+            payload_overflow_threshold_max,
+            payload_overflow_threshold_min,
            usable_size,
        )
    }
-
-    /// When using this fu
+    /// The cell pointer array of a b-tree page immediately follows the b-tree page header.
+    /// Let K be the number of cells on the btree.
+    /// The cell pointer array consists of K 2-byte integer offsets to the cell contents.
+    /// The cell pointers are arranged in key order with:
+    /// - left-most cell (the cell with the smallest key) first and
+    /// - the right-most cell (the cell with the largest key) last.
    pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) {
        let cell_start = match self.page_type() {
            PageType::IndexInterior => 12,
@@ -460,27 +566,31 @@ impl PageContent {
    pub fn cell_get_raw_region(
        &self,
        idx: usize,
-        max_local: usize,
-        min_local: usize,
+        payload_overflow_threshold_max: usize,
+        payload_overflow_threshold_min: usize,
        usable_size: usize,
    ) -> (usize, usize) {
        let buf = self.as_ptr();
        let ncells = self.cell_count();
-        let cell_start = match self.page_type() {
+        let cell_pointer_array_start = match self.page_type() {
            PageType::IndexInterior => 12,
            PageType::TableInterior => 12,
            PageType::IndexLeaf => 8,
            PageType::TableLeaf => 8,
        };
        assert!(idx < ncells, "cell_get: idx out of bounds");
-        let cell_pointer = cell_start + (idx * 2);
+        let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each
        let cell_pointer = self.read_u16(cell_pointer) as usize;
        let start = cell_pointer;
        let len = match self.page_type() {
            PageType::IndexInterior => {
                let (len_payload, n_payload) = read_varint(&buf[cell_pointer + 4..]).unwrap();
-                let (overflows, to_read) =
-                    payload_overflows(len_payload as usize, max_local, min_local, usable_size);
+                let (overflows, to_read) = payload_overflows(
+                    len_payload as usize,
+                    payload_overflow_threshold_max,
+                    payload_overflow_threshold_min,
+                    usable_size,
+                );
                if overflows {
                    4 + to_read + n_payload + 4
                } else {
@@ -493,8 +603,12 @@ impl PageContent {
            }
            PageType::IndexLeaf => {
                let (len_payload, n_payload) = read_varint(&buf[cell_pointer..]).unwrap();
-                let (overflows, to_read) =
-                    payload_overflows(len_payload as usize, max_local, min_local, usable_size);
+                let (overflows, to_read) = payload_overflows(
+                    len_payload as usize,
+                    payload_overflow_threshold_max,
+                    payload_overflow_threshold_min,
+                    usable_size,
+                );
                if overflows {
                    to_read + n_payload + 4
                } else {
@@ -504,8 +618,12 @@ impl PageContent {
            PageType::TableLeaf => {
                let (len_payload, n_payload) = read_varint(&buf[cell_pointer..]).unwrap();
                let (_, n_rowid) = read_varint(&buf[cell_pointer + n_payload..]).unwrap();
-                let (overflows, to_read) =
-                    payload_overflows(len_payload as usize, max_local, min_local, usable_size);
+                let (overflows, to_read) = payload_overflows(
+                    len_payload as usize,
+                    payload_overflow_threshold_max,
+                    payload_overflow_threshold_min,
+                    usable_size,
+                );
                if overflows {
                    to_read + n_payload + n_rowid
                } else {
@@ -1170,28 +1288,46 @@ pub fn begin_write_wal_header(io: &Rc<dyn File>, header: &WalHeader) -> Result<(
    Ok(())
 }

-/*
-    Checks if payload will overflow a cell based on max local and
-    it will return the min size that will be stored in that case,
-    including overflow pointer
-*/
+/// Checks if payload will overflow a cell based on the maximum allowed size.
+/// It will return the min size that will be stored in that case,
+/// including overflow pointer
+/// see e.g. https://github.com/sqlite/sqlite/blob/9591d3fe93936533c8c3b0dc4d025ac999539e11/src/dbstat.c#L371
 pub fn payload_overflows(
    payload_size: usize,
-    max_local: usize,
-    min_local: usize,
+    payload_overflow_threshold_max: usize,
+    payload_overflow_threshold_min: usize,
    usable_size: usize,
 ) -> (bool, usize) {
-    if payload_size <= max_local {
+    if payload_size <= payload_overflow_threshold_max {
        return (false, 0);
    }

-    let mut space_left = min_local + (payload_size - min_local) % (usable_size - 4);
-    if space_left > max_local {
-        space_left = min_local;
+    let mut space_left = payload_overflow_threshold_min
+        + (payload_size - payload_overflow_threshold_min) % (usable_size - 4);
+    if space_left > payload_overflow_threshold_max {
+        space_left = payload_overflow_threshold_min;
    }
    (true, space_left + 4)
 }

+/// The checksum is computed by interpreting the input as an even number of unsigned 32-bit integers: x(0) through x(N).
+/// The 32-bit integers are big-endian if the magic number in the first 4 bytes of the WAL header is 0x377f0683
+/// and the integers are little-endian if the magic number is 0x377f0682.
+/// The checksum values are always stored in the frame header in a big-endian format regardless of which byte order is used to compute the checksum.
+
+/// The checksum algorithm only works for content which is a multiple of 8 bytes in length.
+/// In other words, if the inputs are x(0) through x(N) then N must be odd.
+/// The checksum algorithm is as follows:
+///
+/// s0 = s1 = 0
+/// for i from 0 to n-1 step 2:
+///    s0 += x(i) + s1;
+///    s1 += x(i+1) + s0;
+/// endfor
+///
+/// The outputs s0 and s1 are both weighted checksums using Fibonacci weights in reverse order.
+/// (The largest Fibonacci weight occurs on the first element of the sequence being summed.)
+/// The s1 value spans all 32-bit integer terms of the sequence whereas s0 omits the final term.
 pub fn checksum_wal(
    buf: &[u8],
    _wal_header: &WalHeader,
--- a/core/translate/mod.rs
+++ b/core/translate/mod.rs
@@ -386,7 +386,7 @@ fn query_pragma(
    match pragma {
        PragmaName::CacheSize => {
            program.emit_insn(Insn::Integer {
-                value: database_header.borrow().default_cache_size.into(),
+                value: database_header.borrow().default_page_cache_size.into(),
                dest: register,
            });
        }
@@ -424,7 +424,7 @@ fn update_cache_size(value: i64, header: Rc<RefCell<DatabaseHeader>>, pager: Rc<
    }

    // update in-memory header
-    header.borrow_mut().default_cache_size = cache_size_unformatted
+    header.borrow_mut().default_page_cache_size = cache_size_unformatted
        .try_into()
        .unwrap_or_else(|_| panic!("invalid value, too big for a i32 {}", value));