turso/core/storage/btree.rs

use tracing::{instrument, Level};

use crate::{
    schema::Index,
    storage::{
        header_accessor,
        pager::{BtreePageAllocMode, Pager},
        sqlite3_ondisk::{
            read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell,
            TableLeafCell, CELL_PTR_SIZE_BYTES, INTERIOR_PAGE_HEADER_SIZE_BYTES,
            LEAF_PAGE_HEADER_SIZE_BYTES, LEFT_CHILD_PTR_SIZE_BYTES,
        },
    },
    translate::plan::IterationDirection,
    turso_assert,
    types::{
        find_compare, get_tie_breaker_from_seek_op, IndexInfo, ParseRecordState, RecordCompare,
        RecordCursor, SeekResult,
    },
    MvCursor,
};

use crate::{
    return_corrupt, return_if_io,
    types::{compare_immutable, IOResult, ImmutableRecord, RefValue, SeekKey, SeekOp, Value},
    LimboError, Result,
};

use super::{
    pager::PageRef,
    sqlite3_ondisk::{
        write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, DATABASE_HEADER_SIZE,
        MINIMUM_CELL_SIZE,
    },
};
#[cfg(debug_assertions)]
use std::collections::HashSet;
use std::{
    cell::{Cell, Ref, RefCell},
    cmp::{Ordering, Reverse},
    collections::BinaryHeap,
    fmt::Debug,
    ops::DerefMut,
    pin::Pin,
    rc::Rc,
    sync::Arc,
};

/// The B-Tree page header is 12 bytes for interior pages and 8 bytes for leaf pages.
///
/// +--------+-----------------+-----------------+-----------------+--------+----- ..... ----+
/// | Page   | First Freeblock | Cell Count      | Cell Content    | Frag.  | Right-most     |
/// | Type   | Offset          |                 | Area Start      | Bytes  | pointer        |
/// +--------+-----------------+-----------------+-----------------+--------+----- ..... ----+
///     0        1        2        3        4        5        6        7        8       11
///
pub mod offset {
    /// Type of the B-Tree page (u8).
    pub const BTREE_PAGE_TYPE: usize = 0;

    /// A pointer to the first freeblock (u16).
    ///
    /// This field of the B-Tree page header is an offset to the first freeblock, or zero if
    /// there are no freeblocks on the page.  A freeblock is a structure used to identify
    /// unallocated space within a B-Tree page, organized as a chain.
    ///
    /// Please note that freeblocks do not mean the regular unallocated free space to the left
    /// of the cell content area pointer, but instead blocks of at least 4
    /// bytes WITHIN the cell content area that are not in use due to e.g.
    /// deletions.
    pub const BTREE_FIRST_FREEBLOCK: usize = 1;

    /// The number of cells in the page (u16).
    pub const BTREE_CELL_COUNT: usize = 3;

    /// A pointer to the first byte of cell allocated content from top (u16).
    ///
    /// A zero value for this integer is interpreted as 65,536.
    /// If a page contains no cells (which is only possible for a root page of a table that
    /// contains no rows) then the offset to the cell content area will equal the page size minus
    /// the bytes of reserved space. If the database uses a 65536-byte page size and the
    /// reserved space is zero (the usual value for reserved space) then the cell content offset of
    /// an empty page wants to be 6,5536
    ///
    /// SQLite strives to place cells as far toward the end of the b-tree page as it can, in
    /// order to leave space for future growth of the cell pointer array. This means that the
    /// cell content area pointer moves leftward as cells are added to the page.
    pub const BTREE_CELL_CONTENT_AREA: usize = 5;

    /// The number of fragmented bytes (u8).
    ///
    /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area.
    pub const BTREE_FRAGMENTED_BYTES_COUNT: usize = 7;

    /// The right-most pointer (saved separately from cells) (u32)
    pub const BTREE_RIGHTMOST_PTR: usize = 8;
}

/// Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
/// this will be declared corrupt. This value is calculated based on a
/// maximum database size of 2^31 pages a minimum fanout of 2 for a
/// root-node and 3 for all other internal nodes.
///
/// If a tree that appears to be taller than this is encountered, it is
/// assumed that the database is corrupt.
pub const BTCURSOR_MAX_DEPTH: usize = 20;

/// Maximum number of sibling pages that balancing is performed on.
pub const MAX_SIBLING_PAGES_TO_BALANCE: usize = 3;

/// We only need maximum 5 pages to balance 3 pages, because we can guarantee that cells from 3 pages will fit in 5 pages.
pub const MAX_NEW_SIBLING_PAGES_AFTER_BALANCE: usize = 5;

/// Check if the page is unlocked, if not return IO.
macro_rules! return_if_locked {
    ($expr:expr) => {{
        if $expr.is_locked() {
            return Ok(IOResult::IO);
        }
    }};
}

/// Validate cells in a page are in a valid state. Only in debug mode.
macro_rules! debug_validate_cells {
    ($page_contents:expr, $usable_space:expr) => {
        #[cfg(debug_assertions)]
        {
            debug_validate_cells_core($page_contents, $usable_space);
        }
    };
}
/// Check if the page is unlocked, if not return IO. If the page is not locked but not loaded, then try to load it.
macro_rules! return_if_locked_maybe_load {
    ($pager:expr, $btree_page:expr) => {{
        if $btree_page.get().is_locked() {
            return Ok(IOResult::IO);
        }
        if !$btree_page.get().is_loaded() {
            let page = $pager.read_page($btree_page.get().get().id)?;
            $btree_page.page.replace(page);
            return Ok(IOResult::IO);
        }
    }};
}

/// Wrapper around a page reference used in order to update the reference in case page was unloaded
/// and we need to update the reference.
pub struct BTreePageInner {
    pub page: RefCell<PageRef>,
}

pub type BTreePage = Arc<BTreePageInner>;
unsafe impl Send for BTreePageInner {}
unsafe impl Sync for BTreePageInner {}
/// State machine of destroy operations
/// Keep track of traversal so that it can be resumed when IO is encountered
#[derive(Debug, Clone)]
enum DestroyState {
    Start,
    LoadPage,
    ProcessPage,
    ClearOverflowPages { cell: BTreeCell },
    FreePage,
}

struct DestroyInfo {
    state: DestroyState,
}

#[derive(Debug, Clone)]
enum DeleteSavepoint {
    Rowid(i64),
    Payload(ImmutableRecord),
}

#[derive(Debug, Clone)]
enum DeleteState {
    Start,
    DeterminePostBalancingSeekKey,
    LoadPage {
        post_balancing_seek_key: Option<DeleteSavepoint>,
    },
    FindCell {
        post_balancing_seek_key: Option<DeleteSavepoint>,
    },
    ClearOverflowPages {
        cell_idx: usize,
        cell: BTreeCell,
        original_child_pointer: Option<u32>,
        post_balancing_seek_key: Option<DeleteSavepoint>,
    },
    InteriorNodeReplacement {
        page: PageRef,
        /// the btree level of the page where the cell replacement happened.
        /// if the replacement causes the page to overflow/underflow, we need to remember it and balance it
        /// after the deletion process is otherwise complete.
        btree_depth: usize,
        cell_idx: usize,
        original_child_pointer: Option<u32>,
        post_balancing_seek_key: Option<DeleteSavepoint>,
    },
    CheckNeedsBalancing {
        /// same as `InteriorNodeReplacement::btree_depth`
        btree_depth: usize,
        post_balancing_seek_key: Option<DeleteSavepoint>,
    },
    WaitForBalancingToComplete {
        /// If provided, will also balance an ancestor page at depth `balance_ancestor_at_depth`.
        /// If not provided, balancing will stop as soon as a level is encountered where no balancing is required.
        balance_ancestor_at_depth: Option<usize>,
        target_key: DeleteSavepoint,
    },
    SeekAfterBalancing {
        target_key: DeleteSavepoint,
    },
    /// If the seek performed in [DeleteState::SeekAfterBalancing] returned a [SeekResult::TryAdvance] we need to call next()/prev() to get to the right location.
    /// We need to have this separate state for re-entrancy as calling next()/prev() might yield on IO.
    /// FIXME: refactor DeleteState not to have SeekAfterBalancing and instead use save_context() and restore_context()
    TryAdvance,
}

#[derive(Clone)]
struct DeleteInfo {
    state: DeleteState,
    balance_write_info: Option<WriteInfo>,
}

/// State machine of a write operation.
/// May involve balancing due to overflow.
#[derive(Debug, Clone, Copy)]
enum WriteState {
    Start,
    BalanceStart,
    BalanceFreePages {
        curr_page: usize,
        sibling_count_new: usize,
    },
    /// Choose which sibling pages to balance (max 3).
    /// Generally, the siblings involved will be the page that triggered the balancing and its left and right siblings.
    /// The exceptions are:
    /// 1. If the leftmost page triggered balancing, up to 3 leftmost pages will be balanced.
    /// 2. If the rightmost page triggered balancing, up to 3 rightmost pages will be balanced.
    BalanceNonRootPickSiblings,
    /// Perform the actual balancing. This will result in 1-5 pages depending on the number of total cells to be distributed
    /// from the source pages.
    BalanceNonRootDoBalancing,
    Finish,
}

struct ReadPayloadOverflow {
    payload: Vec<u8>,
    next_page: u32,
    remaining_to_read: usize,
    page: BTreePage,
}

enum PayloadOverflowWithOffset {
    SkipOverflowPages {
        next_page: u32,
        pages_left_to_skip: u32,
        page_offset: u32,
        amount: u32,
        buffer_offset: usize,
        is_write: bool,
    },
    ProcessPage {
        next_page: u32,
        remaining_to_read: u32,
        page: BTreePage,
        current_offset: usize,
        buffer_offset: usize,
        is_write: bool,
    },
}

#[derive(Clone, Debug)]
pub enum BTreeKey<'a> {
    TableRowId((i64, Option<&'a ImmutableRecord>)),
    IndexKey(&'a ImmutableRecord),
}

impl BTreeKey<'_> {
    /// Create a new table rowid key from a rowid and an optional immutable record.
    /// The record is optional because it may not be available when the key is created.
    pub fn new_table_rowid(rowid: i64, record: Option<&ImmutableRecord>) -> BTreeKey<'_> {
        BTreeKey::TableRowId((rowid, record))
    }

    /// Create a new index key from an immutable record.
    pub fn new_index_key(record: &ImmutableRecord) -> BTreeKey<'_> {
        BTreeKey::IndexKey(record)
    }

    /// Get the record, if present. Index will always be present,
    fn get_record(&self) -> Option<&'_ ImmutableRecord> {
        match self {
            BTreeKey::TableRowId((_, record)) => *record,
            BTreeKey::IndexKey(record) => Some(record),
        }
    }

    /// Get the rowid, if present. Index will never be present.
    fn maybe_rowid(&self) -> Option<i64> {
        match self {
            BTreeKey::TableRowId((rowid, _)) => Some(*rowid),
            BTreeKey::IndexKey(_) => None,
        }
    }

    /// Assert that the key is an integer rowid and return it.
    fn to_rowid(&self) -> i64 {
        match self {
            BTreeKey::TableRowId((rowid, _)) => *rowid,
            BTreeKey::IndexKey(_) => panic!("BTreeKey::to_rowid called on IndexKey"),
        }
    }
}

#[derive(Clone)]
struct BalanceInfo {
    /// Old pages being balanced. We can have maximum 3 pages being balanced at the same time.
    pages_to_balance: [Option<BTreePage>; MAX_SIBLING_PAGES_TO_BALANCE],
    /// Bookkeeping of the rightmost pointer so the offset::BTREE_RIGHTMOST_PTR can be updated.
    rightmost_pointer: *mut u8,
    /// Divider cells of old pages. We can have maximum 2 divider cells because of 3 pages.
    divider_cell_payloads: [Option<Vec<u8>>; MAX_SIBLING_PAGES_TO_BALANCE - 1],
    /// Number of siblings being used to balance
    sibling_count: usize,
    /// First divider cell to remove that marks the first sibling
    first_divider_cell: usize,
}

#[derive(Clone)]
struct WriteInfo {
    /// State of the write operation state machine.
    state: WriteState,
    balance_info: RefCell<Option<BalanceInfo>>,
}

impl WriteInfo {
    fn new() -> WriteInfo {
        WriteInfo {
            state: WriteState::Start,
            balance_info: RefCell::new(None),
        }
    }
}

/// Holds the state machine for the operation that was in flight when the cursor
/// was suspended due to IO.
enum CursorState {
    None,
    ReadWritePayload(PayloadOverflowWithOffset),
    Write(WriteInfo),
    Destroy(DestroyInfo),
    Delete(DeleteInfo),
}

impl CursorState {
    fn write_info(&self) -> Option<&WriteInfo> {
        match self {
            CursorState::Write(x) => Some(x),
            _ => None,
        }
    }
    fn mut_write_info(&mut self) -> Option<&mut WriteInfo> {
        match self {
            CursorState::Write(x) => Some(x),
            _ => None,
        }
    }

    fn destroy_info(&self) -> Option<&DestroyInfo> {
        match self {
            CursorState::Destroy(x) => Some(x),
            _ => None,
        }
    }
    fn mut_destroy_info(&mut self) -> Option<&mut DestroyInfo> {
        match self {
            CursorState::Destroy(x) => Some(x),
            _ => None,
        }
    }

    fn delete_info(&self) -> Option<&DeleteInfo> {
        match self {
            CursorState::Delete(x) => Some(x),
            _ => None,
        }
    }

    fn mut_delete_info(&mut self) -> Option<&mut DeleteInfo> {
        match self {
            CursorState::Delete(x) => Some(x),
            _ => None,
        }
    }
}

impl Debug for CursorState {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Delete(..) => write!(f, "Delete"),
            Self::Destroy(..) => write!(f, "Destroy"),
            Self::None => write!(f, "None"),
            Self::ReadWritePayload(..) => write!(f, "ReadWritePayload"),
            Self::Write(..) => write!(f, "Write"),
        }
    }
}

enum OverflowState {
    Start,
    ProcessPage { next_page: u32 },
    Done,
}

/// Holds a Record or RowId, so that these can be transformed into a SeekKey to restore
/// cursor position to its previous location.
pub enum CursorContext {
    TableRowId(i64),

    /// If we are in an index tree we can then reuse this field to save
    /// our cursor information
    IndexKeyRowId(ImmutableRecord),
}

/// In the future, we may expand these general validity states
#[derive(Debug, PartialEq, Eq)]
pub enum CursorValidState {
    /// Cursor is pointing a to an existing location/cell in the Btree
    Valid,
    /// Cursor may be pointing to a non-existent location/cell. This can happen after balancing operations
    RequireSeek,
    /// Cursor requires an advance after a seek
    RequireAdvance(IterationDirection),
}

#[derive(Debug)]
/// State used for seeking
pub enum CursorSeekState {
    Start,
    MovingBetweenPages {
        eq_seen: Cell<bool>,
    },
    InteriorPageBinarySearch {
        min_cell_idx: Cell<isize>,
        max_cell_idx: Cell<isize>,
        nearest_matching_cell: Cell<Option<usize>>,
        eq_seen: Cell<bool>,
    },
    FoundLeaf {
        eq_seen: Cell<bool>,
    },
    LeafPageBinarySearch {
        min_cell_idx: Cell<isize>,
        max_cell_idx: Cell<isize>,
        nearest_matching_cell: Cell<Option<usize>>,
        /// Indicates if we have seen an exact match during the downwards traversal of the btree.
        /// This is only needed in index seeks, in cases where we need to determine whether we call
        /// an additional next()/prev() to fetch a matching record from an interior node. We will not
        /// do that if both are true:
        /// 1. We have not seen an EQ during the traversal
        /// 2. We are looking for an exact match ([SeekOp::GE] or [SeekOp::LE] with eq_only: true)
        eq_seen: Cell<bool>,
        /// In multiple places, we do a seek that checks for an exact match (SeekOp::EQ) in the tree.
        /// In those cases, we need to know where to land if we don't find an exact match in the leaf page.
        /// For non-eq-only conditions (GT, LT, GE, LE), this is pretty simple:
        /// - If we are looking for GT/GE and don't find a match, we should end up beyond the end of the page (idx=cell count).
        /// - If we are looking for LT/LE and don't find a match, we should end up before the beginning of the page (idx=-1).
        ///
        /// For eq-only conditions (GE { eq_only: true } or LE { eq_only: true }), we need to know where to land if we don't find an exact match.
        /// For GE, we want to land at the first cell that is greater than the seek key.
        /// For LE, we want to land at the last cell that is less than the seek key.
        /// This is because e.g. when we attempt to insert rowid 666, we first check if it exists.
        /// If it doesn't, we want to land in the place where rowid 666 WOULD be inserted.
        target_cell_when_not_found: Cell<i32>,
    },
}

pub struct BTreeCursor {
    /// The multi-version cursor that is used to read and write to the database file.
    mv_cursor: Option<Rc<RefCell<MvCursor>>>,
    /// The pager that is used to read and write to the database file.
    pager: Rc<Pager>,
    /// Page id of the root page used to go back up fast.
    root_page: usize,
    /// Rowid and record are stored before being consumed.
    has_record: Cell<bool>,
    null_flag: bool,
    /// Index internal pages are consumed on the way up, so we store going upwards flag in case
    /// we just moved to a parent page and the parent page is an internal index page which requires
    /// to be consumed.
    going_upwards: bool,
    /// Information maintained across execution attempts when an operation yields due to I/O.
    state: CursorState,
    /// Information maintained while freeing overflow pages. Maintained separately from cursor state since
    /// any method could require freeing overflow pages
    overflow_state: Option<OverflowState>,
    /// Page stack used to traverse the btree.
    /// Each cursor has a stack because each cursor traverses the btree independently.
    stack: PageStack,
    /// Reusable immutable record, used to allow better allocation strategy.
    reusable_immutable_record: RefCell<Option<ImmutableRecord>>,
    /// Reusable immutable record, used to allow better allocation strategy.
    parse_record_state: RefCell<ParseRecordState>,
    /// Information about the index key structure (sort order, collation, etc)
    pub index_info: Option<IndexInfo>,
    /// Maintain count of the number of records in the btree. Used for the `Count` opcode
    count: usize,
    /// Stores the cursor context before rebalancing so that a seek can be done later
    context: Option<CursorContext>,
    /// Store whether the Cursor is in a valid state. Meaning if it is pointing to a valid cell index or not
    pub valid_state: CursorValidState,
    seek_state: CursorSeekState,
    /// Separate state to read a record with overflow pages. This separation from `state` is necessary as
    /// we can be in a function that relies on `state`, but also needs to process overflow pages
    read_overflow_state: RefCell<Option<ReadPayloadOverflow>>,
    /// `RecordCursor` is used to parse SQLite record format data retrieved from B-tree
    /// leaf pages. It provides incremental parsing, only deserializing the columns that are
    /// actually accessed, which is crucial for performance when dealing with wide tables
    /// where only a subset of columns are needed.
    ///
    /// - Record parsing is logically a read operation from the caller's perspective
    /// - But internally requires updating the cursor's cached parsing state
    /// - Multiple methods may need to access different columns from the same record
    ///
    /// # Lifecycle
    ///
    /// The cursor is invalidated and reset when:
    /// - Moving to a different record/row
    /// - The underlying `ImmutableRecord` is modified
    pub record_cursor: RefCell<RecordCursor>,
}

/// We store the cell index and cell count for each page in the stack.
/// The reason we store the cell count is because we need to know when we are at the end of the page,
/// without having to perform IO to get the ancestor pages.
#[derive(Debug, Clone, Copy, Default)]
struct BTreeNodeState {
    cell_idx: i32,
    cell_count: Option<i32>,
}

impl BTreeNodeState {
    /// Check if the current cell index is at the end of the page.
    /// This information is used to determine whether a child page should move up to its parent.
    /// If the child page is the rightmost leaf page and it has reached the end, this means all of its ancestors have
    /// already reached the end, so it should not go up because there are no more records to traverse.
    fn is_at_end(&self) -> bool {
        let cell_count = self.cell_count.expect("cell_count is not set");
        // cell_idx == cell_count means: we will traverse to the rightmost pointer next.
        // cell_idx == cell_count + 1 means: we have already gone down to the rightmost pointer.
        self.cell_idx == cell_count + 1
    }
}

impl BTreeCursor {
    pub fn new(
        mv_cursor: Option<Rc<RefCell<MvCursor>>>,
        pager: Rc<Pager>,
        root_page: usize,
        num_columns: usize,
    ) -> Self {
        Self {
            mv_cursor,
            pager,
            root_page,
            has_record: Cell::new(false),
            null_flag: false,
            going_upwards: false,
            state: CursorState::None,
            overflow_state: None,
            stack: PageStack {
                current_page: Cell::new(-1),
                node_states: RefCell::new([BTreeNodeState::default(); BTCURSOR_MAX_DEPTH + 1]),
                stack: RefCell::new([const { None }; BTCURSOR_MAX_DEPTH + 1]),
            },
            reusable_immutable_record: RefCell::new(None),
            index_info: None,
            count: 0,
            context: None,
            valid_state: CursorValidState::Valid,
            seek_state: CursorSeekState::Start,
            read_overflow_state: RefCell::new(None),
            parse_record_state: RefCell::new(ParseRecordState::Init),
            record_cursor: RefCell::new(RecordCursor::with_capacity(num_columns)),
        }
    }

    pub fn new_table(
        mv_cursor: Option<Rc<RefCell<MvCursor>>>,
        pager: Rc<Pager>,
        root_page: usize,
        num_columns: usize,
    ) -> Self {
        Self::new(mv_cursor, pager, root_page, num_columns)
    }

    pub fn new_index(
        mv_cursor: Option<Rc<RefCell<MvCursor>>>,
        pager: Rc<Pager>,
        root_page: usize,
        index: &Index,
        num_columns: usize,
    ) -> Self {
        let mut cursor = Self::new(mv_cursor, pager, root_page, num_columns);
        cursor.index_info = Some(IndexInfo::new_from_index(index));
        cursor
    }

    pub fn has_rowid(&self) -> bool {
        match &self.index_info {
            Some(index_key_info) => index_key_info.has_rowid,
            None => true, // currently we don't support WITHOUT ROWID tables
        }
    }

    pub fn get_index_rowid_from_record(&self) -> Option<i64> {
        if !self.has_rowid() {
            return None;
        }
        let mut record_cursor_ref = self.record_cursor.borrow_mut();
        let record_cursor = record_cursor_ref.deref_mut();
        let rowid = match self
            .get_immutable_record()
            .as_ref()
            .unwrap()
            .last_value(record_cursor)
        {
            Some(Ok(RefValue::Integer(rowid))) => rowid,
            _ => unreachable!(
                "index where has_rowid() is true should have an integer rowid as the last value"
            ),
        };
        Some(rowid)
    }

    /// Check if the table is empty.
    /// This is done by checking if the root page has no cells.
    #[instrument(skip_all, level = Level::DEBUG)]
    fn is_empty_table(&self) -> Result<IOResult<bool>> {
        if let Some(mv_cursor) = &self.mv_cursor {
            let mv_cursor = mv_cursor.borrow();
            return Ok(IOResult::Done(mv_cursor.is_empty()));
        }
        let page = self.pager.read_page(self.root_page)?;
        return_if_locked!(page);

        let cell_count = page.get().contents.as_ref().unwrap().cell_count();
        Ok(IOResult::Done(cell_count == 0))
    }

    /// Move the cursor to the previous record and return it.
    /// Used in backwards iteration.
    #[instrument(skip(self), level = Level::DEBUG, name = "prev")]
    fn get_prev_record(&mut self) -> Result<IOResult<bool>> {
        loop {
            let page = self.stack.top();

            return_if_locked_maybe_load!(self.pager, page);
            let page = page.get();
            let contents = page.get().contents.as_ref().unwrap();

            let cell_count = contents.cell_count();
            let cell_idx = self.stack.current_cell_index();

            // If we are at the end of the page and we haven't just come back from the right child,
            // we now need to move to the rightmost child.
            if self.stack.current_cell_index() == i32::MAX && !self.going_upwards {
                let rightmost_pointer = contents.rightmost_pointer();
                if let Some(rightmost_pointer) = rightmost_pointer {
                    let past_rightmost_pointer = cell_count as i32 + 1;
                    self.stack.set_cell_index(past_rightmost_pointer);
                    self.stack
                        .push_backwards(self.read_page(rightmost_pointer as usize)?);
                    continue;
                }
            }
            if cell_idx >= cell_count as i32 {
                self.stack.set_cell_index(cell_count as i32 - 1);
            } else if !self.stack.current_cell_index_less_than_min() {
                let is_index = page.is_index();
                // skip retreat in case we still haven't visited this cell in index
                let should_visit_internal_node = is_index && self.going_upwards; // we are going upwards, this means we still need to visit divider cell in an index
                let page_type = contents.page_type();
                if should_visit_internal_node {
                    self.going_upwards = false;
                    return Ok(IOResult::Done(true));
                } else if matches!(
                    page_type,
                    PageType::IndexLeaf | PageType::TableLeaf | PageType::TableInterior
                ) {
                    self.stack.retreat();
                }
            }
            // moved to beginning of current page
            // todo: find a better way to flag moved to end or begin of page
            if self.stack.current_cell_index_less_than_min() {
                loop {
                    if self.stack.current_cell_index() >= 0 {
                        break;
                    }
                    if self.stack.has_parent() {
                        self.going_upwards = true;
                        self.stack.pop();
                    } else {
                        // moved to begin of btree
                        // dbg!(false);
                        return Ok(IOResult::Done(false));
                    }
                }
                // continue to next loop to get record from the new page
                continue;
            }
            let cell_idx = self.stack.current_cell_index() as usize;

            let cell = contents.cell_get(cell_idx, self.usable_space())?;

            match cell {
                BTreeCell::TableInteriorCell(TableInteriorCell {
                    left_child_page, ..
                }) => {
                    let mem_page = self.read_page(left_child_page as usize)?;
                    self.stack.push_backwards(mem_page);
                    continue;
                }
                BTreeCell::TableLeafCell(TableLeafCell { .. }) => {
                    return Ok(IOResult::Done(true));
                }
                BTreeCell::IndexInteriorCell(IndexInteriorCell {
                    left_child_page, ..
                }) => {
                    if !self.going_upwards {
                        // In backwards iteration, if we haven't just moved to this interior node from the
                        // right child, but instead are about to move to the left child, we need to retreat
                        // so that we don't come back to this node again.
                        // For example:
                        // this parent: key 666
                        // left child has: key 663, key 664, key 665
                        // we need to move to the previous parent (with e.g. key 662) when iterating backwards.
                        let mem_page = self.read_page(left_child_page as usize)?;
                        self.stack.retreat();
                        self.stack.push_backwards(mem_page);
                        continue;
                    }

                    // Going upwards = we just moved to an interior cell from the right child.
                    // On the first pass we must take the record from the interior cell (since unlike table btrees, index interior cells have payloads)
                    // We then mark going_upwards=false so that we go back down the tree on the next invocation.
                    self.going_upwards = false;
                    return Ok(IOResult::Done(true));
                }
                BTreeCell::IndexLeafCell(IndexLeafCell { .. }) => {
                    return Ok(IOResult::Done(true));
                }
            }
        }
    }

    /// Reads the record of a cell that has overflow pages. This is a state machine that requires to be called until completion so everything
    /// that calls this function should be reentrant.
    #[instrument(skip_all, level = Level::DEBUG)]
    fn process_overflow_read(
        &self,
        payload: &'static [u8],
        start_next_page: u32,
        payload_size: u64,
    ) -> Result<IOResult<()>> {
        if self.read_overflow_state.borrow().is_none() {
            let page = self.read_page(start_next_page as usize)?;
            *self.read_overflow_state.borrow_mut() = Some(ReadPayloadOverflow {
                payload: payload.to_vec(),
                next_page: start_next_page,
                remaining_to_read: payload_size as usize - payload.len(),
                page,
            });
            return Ok(IOResult::IO);
        }
        let mut read_overflow_state = self.read_overflow_state.borrow_mut();
        let ReadPayloadOverflow {
            payload,
            next_page,
            remaining_to_read,
            page: page_btree,
        } = read_overflow_state.as_mut().unwrap();

        if page_btree.get().is_locked() {
            return Ok(IOResult::IO);
        }
        tracing::debug!(next_page, remaining_to_read, "reading overflow page");
        let page = page_btree.get();
        let contents = page.get_contents();
        // The first four bytes of each overflow page are a big-endian integer which is the page number of the next page in the chain, or zero for the final page in the chain.
        let next = contents.read_u32_no_offset(0);
        let buf = contents.as_ptr();
        let usable_space = self.pager.usable_space();
        let to_read = (*remaining_to_read).min(usable_space - 4);
        payload.extend_from_slice(&buf[4..4 + to_read]);
        *remaining_to_read -= to_read;

        if *remaining_to_read != 0 && next != 0 {
            let new_page = self.pager.read_page(next as usize).map(|page| {
                Arc::new(BTreePageInner {
                    page: RefCell::new(page),
                })
            })?;
            *page_btree = new_page;
            *next_page = next;
            return Ok(IOResult::IO);
        }
        turso_assert!(
            *remaining_to_read == 0 && next == 0,
            "we can't have more pages to read while also have read everything"
        );
        let mut payload_swap = Vec::new();
        std::mem::swap(payload, &mut payload_swap);

        let mut reuse_immutable = self.get_immutable_record_or_create();
        reuse_immutable.as_mut().unwrap().invalidate();

        reuse_immutable
            .as_mut()
            .unwrap()
            .start_serialization(&payload_swap);
        self.record_cursor.borrow_mut().invalidate();

        let _ = read_overflow_state.take();
        Ok(IOResult::Done(()))
    }

    /// Calculates how much of a cell's payload should be stored locally vs in overflow pages
    ///
    /// Parameters:
    /// - payload_len: Total length of the payload data
    /// - page_type: Type of the B-tree page (affects local storage thresholds)
    ///
    /// Returns:
    /// - A tuple of (n_local, payload_len) where:
    ///   - n_local: Amount of payload to store locally on the page
    ///   - payload_len: Total payload length (unchanged from input)
    pub fn parse_cell_info(
        &self,
        payload_len: usize,
        page_type: PageType,
        usable_size: usize,
    ) -> Result<(usize, usize)> {
        let max_local = payload_overflow_threshold_max(page_type, usable_size);
        let min_local = payload_overflow_threshold_min(page_type, usable_size);

        // This matches btreeParseCellAdjustSizeForOverflow logic
        let n_local = if payload_len <= max_local {
            // Common case - everything fits locally
            payload_len
        } else {
            // For payloads that need overflow pages:
            // Calculate how much should be stored locally using the following formula:
            // surplus = min_local + (payload_len - min_local) % (usable_space - 4)
            //
            // This tries to minimize unused space on overflow pages while keeping
            // the local storage between min_local and max_local thresholds.
            // The (usable_space - 4) factor accounts for overhead in overflow pages.
            let surplus = min_local + (payload_len - min_local) % (self.usable_space() - 4);
            if surplus <= max_local {
                surplus
            } else {
                min_local
            }
        };

        Ok((n_local, payload_len))
    }

    /// This function is used to read/write into the payload of a cell that
    /// cursor is pointing to.
    /// Parameters:
    /// - offset: offset in the payload to start reading/writing
    /// - buffer: buffer to read/write into
    /// - amount: amount of bytes to read/write
    /// - is_write: true if writing, false if reading
    ///
    /// If the cell has overflow pages, it will skip till the overflow page which
    /// is at the offset given.
    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn read_write_payload_with_offset(
        &mut self,
        mut offset: u32,
        buffer: &mut Vec<u8>,
        mut amount: u32,
        is_write: bool,
    ) -> Result<IOResult<()>> {
        if let CursorState::ReadWritePayload(PayloadOverflowWithOffset::SkipOverflowPages {
            ..
        })
        | CursorState::ReadWritePayload(PayloadOverflowWithOffset::ProcessPage { .. }) =
            &self.state
        {
            return self.continue_payload_overflow_with_offset(buffer, self.usable_space());
        }

        let page_btree = self.stack.top();
        return_if_locked_maybe_load!(self.pager, page_btree);

        let page = page_btree.get();
        let contents = page.get().contents.as_ref().unwrap();
        let cell_idx = self.stack.current_cell_index() as usize - 1;

        if cell_idx >= contents.cell_count() {
            return Err(LimboError::Corrupt("Invalid cell index".into()));
        }

        let usable_size = self.usable_space();
        let cell = contents.cell_get(cell_idx, usable_size).unwrap();

        let (payload, payload_size, first_overflow_page) = match cell {
            BTreeCell::TableLeafCell(cell) => {
                (cell.payload, cell.payload_size, cell.first_overflow_page)
            }
            BTreeCell::IndexLeafCell(cell) => {
                (cell.payload, cell.payload_size, cell.first_overflow_page)
            }
            BTreeCell::IndexInteriorCell(cell) => {
                (cell.payload, cell.payload_size, cell.first_overflow_page)
            }
            BTreeCell::TableInteriorCell(_) => {
                return Err(LimboError::Corrupt(
                    "Cannot access payload of table interior cell".into(),
                ));
            }
        };
        turso_assert!(
            offset + amount <= payload_size as u32,
            "offset + amount <= payload_size"
        );

        let (local_size, _) =
            self.parse_cell_info(payload_size as usize, contents.page_type(), usable_size)?;
        let mut bytes_processed: u32 = 0;
        if offset < local_size as u32 {
            let mut local_amount: u32 = amount;
            if local_amount + offset > local_size as u32 {
                local_amount = local_size as u32 - offset;
            }
            if is_write {
                self.write_payload_to_page(
                    offset,
                    local_amount,
                    payload,
                    buffer,
                    page_btree.clone(),
                );
            } else {
                self.read_payload_from_page(offset, local_amount, payload, buffer);
            }
            offset = 0;
            amount -= local_amount;
            bytes_processed += local_amount;
        } else {
            offset -= local_size as u32;
        }

        if amount > 0 {
            if first_overflow_page.is_none() {
                return Err(LimboError::Corrupt(
                    "Expected overflow page but none found".into(),
                ));
            }

            let overflow_size = usable_size - 4;
            let pages_to_skip = offset / overflow_size as u32;
            let page_offset = offset % overflow_size as u32;

            self.state =
                CursorState::ReadWritePayload(PayloadOverflowWithOffset::SkipOverflowPages {
                    next_page: first_overflow_page.unwrap(),
                    pages_left_to_skip: pages_to_skip,
                    page_offset,
                    amount,
                    buffer_offset: bytes_processed as usize,
                    is_write,
                });

            return Ok(IOResult::IO);
        }
        Ok(IOResult::Done(()))
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn continue_payload_overflow_with_offset(
        &mut self,
        buffer: &mut Vec<u8>,
        usable_space: usize,
    ) -> Result<IOResult<()>> {
        loop {
            let mut state = std::mem::replace(&mut self.state, CursorState::None);

            match &mut state {
                CursorState::ReadWritePayload(PayloadOverflowWithOffset::SkipOverflowPages {
                    next_page,
                    pages_left_to_skip,
                    page_offset,
                    amount,
                    buffer_offset,
                    is_write,
                }) => {
                    if *pages_left_to_skip == 0 {
                        let page = self.read_page(*next_page as usize)?;
                        return_if_locked_maybe_load!(self.pager, page);
                        self.state =
                            CursorState::ReadWritePayload(PayloadOverflowWithOffset::ProcessPage {
                                next_page: *next_page,
                                remaining_to_read: *amount,
                                page,
                                current_offset: *page_offset as usize,
                                buffer_offset: *buffer_offset,
                                is_write: *is_write,
                            });

                        continue;
                    }

                    let page = self.read_page(*next_page as usize)?;
                    return_if_locked_maybe_load!(self.pager, page);
                    let page = page.get();
                    let contents = page.get_contents();
                    let next = contents.read_u32_no_offset(0);

                    if next == 0 {
                        return Err(LimboError::Corrupt(
                            "Overflow chain ends prematurely".into(),
                        ));
                    }
                    *next_page = next;
                    *pages_left_to_skip -= 1;

                    self.state = CursorState::ReadWritePayload(
                        PayloadOverflowWithOffset::SkipOverflowPages {
                            next_page: next,
                            pages_left_to_skip: *pages_left_to_skip,
                            page_offset: *page_offset,
                            amount: *amount,
                            buffer_offset: *buffer_offset,
                            is_write: *is_write,
                        },
                    );

                    return Ok(IOResult::IO);
                }

                CursorState::ReadWritePayload(PayloadOverflowWithOffset::ProcessPage {
                    next_page,
                    remaining_to_read,
                    page: page_btree,
                    current_offset,
                    buffer_offset,
                    is_write,
                }) => {
                    if page_btree.get().is_locked() {
                        self.state =
                            CursorState::ReadWritePayload(PayloadOverflowWithOffset::ProcessPage {
                                next_page: *next_page,
                                remaining_to_read: *remaining_to_read,
                                page: page_btree.clone(),
                                current_offset: *current_offset,
                                buffer_offset: *buffer_offset,
                                is_write: *is_write,
                            });

                        return Ok(IOResult::IO);
                    }

                    let page = page_btree.get();
                    let contents = page.get_contents();
                    let overflow_size = usable_space - 4;

                    let page_offset = *current_offset;
                    let bytes_to_process = std::cmp::min(
                        *remaining_to_read,
                        overflow_size as u32 - page_offset as u32,
                    );

                    let payload_offset = 4 + page_offset;
                    let page_payload = contents.as_ptr();
                    if *is_write {
                        self.write_payload_to_page(
                            payload_offset as u32,
                            bytes_to_process,
                            page_payload,
                            buffer,
                            page_btree.clone(),
                        );
                    } else {
                        self.read_payload_from_page(
                            payload_offset as u32,
                            bytes_to_process,
                            page_payload,
                            buffer,
                        );
                    }
                    *remaining_to_read -= bytes_to_process;
                    *buffer_offset += bytes_to_process as usize;

                    if *remaining_to_read == 0 {
                        self.state = CursorState::None;
                        return Ok(IOResult::Done(()));
                    }
                    let next = contents.read_u32_no_offset(0);
                    if next == 0 {
                        return Err(LimboError::Corrupt(
                            "Overflow chain ends prematurely".into(),
                        ));
                    }

                    // Load next page
                    *next_page = next;
                    *current_offset = 0; // Reset offset for new page
                    *page_btree = self.read_page(next as usize)?;

                    // Return IO to allow other operations
                    return Ok(IOResult::IO);
                }
                _ => {
                    return Err(LimboError::InternalError(
                        "Invalid state for continue_payload_overflow_with_offset".into(),
                    ))
                }
            }
        }
    }

    fn read_payload_from_page(
        &self,
        payload_offset: u32,
        num_bytes: u32,
        payload: &[u8],
        buffer: &mut Vec<u8>,
    ) {
        buffer.extend_from_slice(
            &payload[payload_offset as usize..(payload_offset + num_bytes) as usize],
        );
    }

    /// This function write from a buffer into a page.
    /// SAFETY: This function uses unsafe in the write path to write to the page payload directly.
    /// - Make sure the page is pointing to valid data ie the page is not evicted from the page-cache.
    fn write_payload_to_page(
        &mut self,
        payload_offset: u32,
        num_bytes: u32,
        payload: &[u8],
        buffer: &mut [u8],
        page: BTreePage,
    ) {
        self.pager.add_dirty(&page.get());
        // SAFETY: This is safe as long as the page is not evicted from the cache.
        let payload_mut =
            unsafe { std::slice::from_raw_parts_mut(payload.as_ptr() as *mut u8, payload.len()) };
        payload_mut[payload_offset as usize..payload_offset as usize + num_bytes as usize]
            .copy_from_slice(&buffer[..num_bytes as usize]);
    }

    /// Check if any ancestor pages still have cells to iterate.
    /// If not, traversing back up to parent is of no use because we are at the end of the tree.
    fn ancestor_pages_have_more_children(&self) -> bool {
        let node_states = self.stack.node_states.borrow();
        (0..self.stack.current())
            .rev()
            .any(|idx| !node_states[idx].is_at_end())
    }

    /// Move the cursor to the next record and return it.
    /// Used in forwards iteration, which is the default.
    #[instrument(skip(self), level = Level::DEBUG, name = "next")]
    fn get_next_record(&mut self) -> Result<IOResult<bool>> {
        if let Some(mv_cursor) = &self.mv_cursor {
            let mut mv_cursor = mv_cursor.borrow_mut();
            mv_cursor.forward();
            let rowid = mv_cursor.current_row_id();
            match rowid {
                Some(_rowid) => {
                    return Ok(IOResult::Done(true));
                }
                None => return Ok(IOResult::Done(false)),
            }
        }
        loop {
            let mem_page_rc = self.stack.top();
            return_if_locked_maybe_load!(self.pager, mem_page_rc);
            let mem_page = mem_page_rc.get();

            let contents = mem_page.get().contents.as_ref().unwrap();
            let cell_count = contents.cell_count();
            tracing::debug!(
                id = mem_page_rc.get().get().id,
                cell = self.stack.current_cell_index(),
                cell_count,
                "current_before_advance",
            );

            let is_index = mem_page_rc.get().is_index();
            let should_skip_advance = is_index
                && self.going_upwards // we are going upwards, this means we still need to visit divider cell in an index
                && self.stack.current_cell_index() >= 0 && self.stack.current_cell_index() < cell_count as i32; // if we weren't on a
                                                                                                                // valid cell then it means we will have to move upwards again or move to right page,
                                                                                                                // anyways, we won't visit this invalid cell index
            if should_skip_advance {
                tracing::debug!(
                    going_upwards = self.going_upwards,
                    page = mem_page_rc.get().get().id,
                    cell_idx = self.stack.current_cell_index(),
                    "skipping advance",
                );
                self.going_upwards = false;
                return Ok(IOResult::Done(true));
            }

            // Important to advance only after loading the page in order to not advance > 1 times
            self.stack.advance();
            let cell_idx = self.stack.current_cell_index() as usize;
            tracing::debug!(id = mem_page_rc.get().get().id, cell = cell_idx, "current");

            if cell_idx >= cell_count {
                let rightmost_already_traversed = cell_idx > cell_count;
                match (contents.rightmost_pointer(), rightmost_already_traversed) {
                    (Some(right_most_pointer), false) => {
                        // do rightmost
                        self.stack.advance();
                        let mem_page = self.read_page(right_most_pointer as usize)?;
                        self.stack.push(mem_page);
                        continue;
                    }
                    _ => {
                        if self.ancestor_pages_have_more_children() {
                            tracing::trace!("moving simple upwards");
                            self.going_upwards = true;
                            self.stack.pop();
                            continue;
                        } else {
                            // If none of the ancestor pages have more children to iterate, that means we are at the end of the btree and should stop iterating.
                            return Ok(IOResult::Done(false));
                        }
                    }
                }
            }

            turso_assert!(
                cell_idx < contents.cell_count(),
                "cell index out of bounds: cell_idx={}, cell_count={}, page_type={:?} page_id={}",
                cell_idx,
                contents.cell_count(),
                contents.page_type(),
                mem_page_rc.get().get().id
            );

            let cell = contents.cell_get(cell_idx, self.usable_space())?;
            match &cell {
                BTreeCell::TableInteriorCell(TableInteriorCell {
                    left_child_page, ..
                }) => {
                    let mem_page = self.read_page(*left_child_page as usize)?;
                    self.stack.push(mem_page);
                    continue;
                }
                BTreeCell::TableLeafCell(TableLeafCell { .. }) => {
                    return Ok(IOResult::Done(true));
                }
                BTreeCell::IndexInteriorCell(IndexInteriorCell {
                    left_child_page, ..
                }) => {
                    if self.going_upwards {
                        self.going_upwards = false;
                        return Ok(IOResult::Done(true));
                    } else {
                        let mem_page = self.read_page(*left_child_page as usize)?;
                        self.stack.push(mem_page);
                        continue;
                    }
                }
                BTreeCell::IndexLeafCell(IndexLeafCell { .. }) => {
                    return Ok(IOResult::Done(true));
                }
            }
        }
    }

    /// Move the cursor to the record that matches the seek key and seek operation.
    /// This may be used to seek to a specific record in a point query (e.g. SELECT * FROM table WHERE col = 10)
    /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10).
    /// We don't include the rowid in the comparison and that's why the last value from the record is not included.
    fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result<IOResult<SeekResult>> {
        let ret = return_if_io!(match key {
            SeekKey::TableRowId(rowid) => {
                self.tablebtree_seek(rowid, op)
            }
            SeekKey::IndexKey(index_key) => {
                self.indexbtree_seek(index_key, op)
            }
        });
        self.valid_state = CursorValidState::Valid;
        Ok(IOResult::Done(ret))
    }

    /// Move the cursor to the root page of the btree.
    #[instrument(skip_all, level = Level::DEBUG)]
    fn move_to_root(&mut self) -> Result<()> {
        self.seek_state = CursorSeekState::Start;
        self.going_upwards = false;
        tracing::trace!(root_page = self.root_page);
        let mem_page = self.read_page(self.root_page)?;
        self.stack.clear();
        self.stack.push(mem_page);
        Ok(())
    }

    /// Move the cursor to the rightmost record in the btree.
    #[instrument(skip(self), level = Level::DEBUG)]
    fn move_to_rightmost(&mut self) -> Result<IOResult<bool>> {
        self.move_to_root()?;

        loop {
            let mem_page = self.stack.top();
            let page_idx = mem_page.get().get().id;
            let page = self.read_page(page_idx)?;
            return_if_locked_maybe_load!(self.pager, page);
            let page = page.get();
            let contents = page.get().contents.as_ref().unwrap();
            if contents.is_leaf() {
                if contents.cell_count() > 0 {
                    self.stack.set_cell_index(contents.cell_count() as i32 - 1);
                    return Ok(IOResult::Done(true));
                }
                return Ok(IOResult::Done(false));
            }

            match contents.rightmost_pointer() {
                Some(right_most_pointer) => {
                    self.stack.set_cell_index(contents.cell_count() as i32 + 1);
                    let mem_page = self.read_page(right_most_pointer as usize)?;
                    self.stack.push(mem_page);
                    continue;
                }

                None => {
                    unreachable!("interior page should have a rightmost pointer");
                }
            }
        }
    }

    /// Specialized version of move_to() for table btrees.
    #[instrument(skip(self), level = Level::DEBUG)]
    fn tablebtree_move_to(&mut self, rowid: i64, seek_op: SeekOp) -> Result<IOResult<()>> {
        'outer: loop {
            let page = self.stack.top();
            return_if_locked_maybe_load!(self.pager, page);
            let page = page.get();
            let contents = page.get().contents.as_ref().unwrap();
            if contents.is_leaf() {
                self.seek_state = CursorSeekState::FoundLeaf {
                    eq_seen: Cell::new(false),
                };
                return Ok(IOResult::Done(()));
            }

            let cell_count = contents.cell_count();
            if matches!(
                self.seek_state,
                CursorSeekState::Start | CursorSeekState::MovingBetweenPages { .. }
            ) {
                let eq_seen = match &self.seek_state {
                    CursorSeekState::MovingBetweenPages { eq_seen } => eq_seen.get(),
                    _ => false,
                };
                let min_cell_idx = Cell::new(0);
                let max_cell_idx = Cell::new(cell_count as isize - 1);
                let nearest_matching_cell = Cell::new(None);

                self.seek_state = CursorSeekState::InteriorPageBinarySearch {
                    min_cell_idx,
                    max_cell_idx,
                    nearest_matching_cell,
                    eq_seen: Cell::new(eq_seen),
                };
            }

            let CursorSeekState::InteriorPageBinarySearch {
                min_cell_idx,
                max_cell_idx,
                nearest_matching_cell,
                eq_seen,
                ..
            } = &self.seek_state
            else {
                unreachable!("we must be in an interior binary search state");
            };

            loop {
                let min = min_cell_idx.get();
                let max = max_cell_idx.get();
                if min > max {
                    if let Some(nearest_matching_cell) = nearest_matching_cell.get() {
                        let left_child_page =
                            contents.cell_interior_read_left_child_page(nearest_matching_cell);
                        self.stack.set_cell_index(nearest_matching_cell as i32);
                        let mem_page = self.read_page(left_child_page as usize)?;
                        self.stack.push(mem_page);
                        self.seek_state = CursorSeekState::MovingBetweenPages {
                            eq_seen: Cell::new(eq_seen.get()),
                        };
                        continue 'outer;
                    }
                    self.stack.set_cell_index(cell_count as i32 + 1);
                    match contents.rightmost_pointer() {
                        Some(right_most_pointer) => {
                            let mem_page = self.read_page(right_most_pointer as usize)?;
                            self.stack.push(mem_page);
                            self.seek_state = CursorSeekState::MovingBetweenPages {
                                eq_seen: Cell::new(eq_seen.get()),
                            };
                            continue 'outer;
                        }
                        None => {
                            unreachable!("we shall not go back up! The only way is down the slope");
                        }
                    }
                }
                let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here.
                let cell_rowid = contents.cell_table_interior_read_rowid(cur_cell_idx as usize)?;
                // in sqlite btrees left child pages have <= keys.
                // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10,
                // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees)
                // the left page may have a rowid=10.
                // Logic table for determining if target leaf page is in left subtree
                //
                // Forwards iteration (looking for first match in tree):
                // OP  | Current Cell vs Seek Key   | Action?  | Explanation
                // GT  | >                          | go left  | First > key is in left subtree
                // GT  | = or <                     | go right | First > key is in right subtree
                // GE  | > or =                     | go left  | First >= key is in left subtree
                // GE  | <                          | go right | First >= key is in right subtree
                //
                // Backwards iteration (looking for last match in tree):
                // OP  | Current Cell vs Seek Key   | Action?  | Explanation
                // LE  | > or =                     | go left  | Last <= key is in left subtree
                // LE  | <                          | go right | Last <= key is in right subtree
                // LT  | > or =                     | go left  | Last < key is in left subtree
                // LT  | <                          | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less
                //
                // No iteration (point query):
                // EQ  | > or =                     | go left  | Last = key is in left subtree
                // EQ  | <                          | go right | Last = key is in right subtree
                let is_on_left = match seek_op {
                    SeekOp::GT => cell_rowid > rowid,
                    SeekOp::GE { .. } => cell_rowid >= rowid,
                    SeekOp::LE { .. } => cell_rowid >= rowid,
                    SeekOp::LT => cell_rowid + 1 >= rowid,
                };
                if is_on_left {
                    nearest_matching_cell.set(Some(cur_cell_idx as usize));
                    max_cell_idx.set(cur_cell_idx - 1);
                } else {
                    min_cell_idx.set(cur_cell_idx + 1);
                }
            }
        }
    }

    /// Specialized version of move_to() for index btrees.
    #[instrument(skip(self, index_key), level = Level::DEBUG)]
    fn indexbtree_move_to(
        &mut self,
        index_key: &ImmutableRecord,
        cmp: SeekOp,
    ) -> Result<IOResult<()>> {
        let iter_dir = cmp.iteration_direction();

        let key_values = index_key.get_values();
        let record_comparer = {
            let index_info = self
                .index_info
                .as_ref()
                .expect("indexbtree_move_to without index_info");
            find_compare(&key_values, index_info)
        };
        tracing::debug!("Using record comparison strategy: {:?}", record_comparer);
        let tie_breaker = get_tie_breaker_from_seek_op(cmp);

        'outer: loop {
            let page = self.stack.top();
            return_if_locked_maybe_load!(self.pager, page);
            let page = page.get();
            let contents = page.get().contents.as_ref().unwrap();
            if contents.is_leaf() {
                let eq_seen = match &self.seek_state {
                    CursorSeekState::MovingBetweenPages { eq_seen } => eq_seen.get(),
                    _ => false,
                };
                self.seek_state = CursorSeekState::FoundLeaf {
                    eq_seen: Cell::new(eq_seen),
                };
                return Ok(IOResult::Done(()));
            }

            if matches!(
                self.seek_state,
                CursorSeekState::Start | CursorSeekState::MovingBetweenPages { .. }
            ) {
                let eq_seen = match &self.seek_state {
                    CursorSeekState::MovingBetweenPages { eq_seen } => eq_seen.get(),
                    _ => false,
                };
                let cell_count = contents.cell_count();
                let min_cell_idx = Cell::new(0);
                let max_cell_idx = Cell::new(cell_count as isize - 1);
                let nearest_matching_cell = Cell::new(None);

                self.seek_state = CursorSeekState::InteriorPageBinarySearch {
                    min_cell_idx,
                    max_cell_idx,
                    nearest_matching_cell,
                    eq_seen: Cell::new(eq_seen),
                };
            }

            let CursorSeekState::InteriorPageBinarySearch {
                min_cell_idx,
                max_cell_idx,
                nearest_matching_cell,
                eq_seen,
            } = &self.seek_state
            else {
                unreachable!(
                    "we must be in an interior binary search state, got {:?}",
                    self.seek_state
                );
            };

            loop {
                let min = min_cell_idx.get();
                let max = max_cell_idx.get();
                if min > max {
                    let Some(leftmost_matching_cell) = nearest_matching_cell.get() else {
                        self.stack.set_cell_index(contents.cell_count() as i32 + 1);
                        match contents.rightmost_pointer() {
                            Some(right_most_pointer) => {
                                let mem_page = self.read_page(right_most_pointer as usize)?;
                                self.stack.push(mem_page);
                                self.seek_state = CursorSeekState::MovingBetweenPages {
                                    eq_seen: Cell::new(eq_seen.get()),
                                };
                                continue 'outer;
                            }
                            None => {
                                unreachable!(
                                    "we shall not go back up! The only way is down the slope"
                                );
                            }
                        }
                    };
                    let matching_cell =
                        contents.cell_get(leftmost_matching_cell, self.usable_space())?;
                    self.stack.set_cell_index(leftmost_matching_cell as i32);
                    // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up.
                    // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again.
                    // Example:
                    // this parent: key 666, and we found the target key in the left child.
                    // left child has: key 663, key 664, key 665
                    // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again.
                    if iter_dir == IterationDirection::Backwards {
                        self.stack.retreat();
                    }
                    let BTreeCell::IndexInteriorCell(IndexInteriorCell {
                        left_child_page, ..
                    }) = &matching_cell
                    else {
                        unreachable!("unexpected cell type: {:?}", matching_cell);
                    };

                    turso_assert!(
                        page.get().id != *left_child_page as usize,
                        "corrupt: current page and left child page of cell {} are both {}",
                        leftmost_matching_cell,
                        page.get().id
                    );

                    let mem_page = self.read_page(*left_child_page as usize)?;
                    self.stack.push(mem_page);
                    self.seek_state = CursorSeekState::MovingBetweenPages {
                        eq_seen: Cell::new(eq_seen.get()),
                    };
                    continue 'outer;
                }

                let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here.
                self.stack.set_cell_index(cur_cell_idx as i32);
                let cell = contents.cell_get(cur_cell_idx as usize, self.usable_space())?;
                let BTreeCell::IndexInteriorCell(IndexInteriorCell {
                    payload,
                    payload_size,
                    first_overflow_page,
                    ..
                }) = &cell
                else {
                    unreachable!("unexpected cell type: {:?}", cell);
                };

                if let Some(next_page) = first_overflow_page {
                    return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size))
                } else {
                    self.get_immutable_record_or_create()
                        .as_mut()
                        .unwrap()
                        .invalidate();
                    self.get_immutable_record_or_create()
                        .as_mut()
                        .unwrap()
                        .start_serialization(payload);
                    self.record_cursor.borrow_mut().invalidate();
                };
                let (target_leaf_page_is_in_left_subtree, is_eq) = {
                    let record = self.get_immutable_record();
                    let record = record.as_ref().unwrap();

                    let interior_cell_vs_index_key = record_comparer
                        .compare(
                            record,
                            &key_values,
                            self.index_info
                                .as_ref()
                                .expect("indexbtree_move_to without index_info"),
                            0,
                            tie_breaker,
                        )
                        .unwrap();

                    // in sqlite btrees left child pages have <= keys.
                    // in general, in forwards iteration we want to find the first key that matches the seek condition.
                    // in backwards iteration we want to find the last key that matches the seek condition.
                    //
                    // Logic table for determining if target leaf page is in left subtree.
                    // For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload).
                    // and for non-unique indexes there might be several cells with the same key.
                    //
                    // Forwards iteration (looking for first match in tree):
                    // OP  | Current Cell vs Seek Key  | Action?  | Explanation
                    // GT  | >                         | go left  | First > key could be exactly this one, or in left subtree
                    // GT  | = or <                    | go right | First > key must be in right subtree
                    // GE  | >                         | go left  | First >= key could be exactly this one, or in left subtree
                    // GE  | =                         | go left  | First >= key could be exactly this one, or in left subtree
                    // GE  | <                         | go right | First >= key must be in right subtree
                    //
                    // Backwards iteration (looking for last match in tree):
                    // OP  | Current Cell vs Seek Key  | Action?  | Explanation
                    // LE  | >                         | go left  | Last <= key must be in left subtree
                    // LE  | =                         | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure
                    // LE  | <                         | go right | Last <= key must be in right subtree
                    // LT  | >                         | go left  | Last < key must be in left subtree
                    // LT  | =                         | go left  | Last < key must be in left subtree since we want strictly less than
                    // LT  | <                         | go right | Last < key could be exactly this one, or in right subtree
                    //
                    // No iteration (point query):
                    // EQ  | >                         | go left  | First = key must be in left subtree
                    // EQ  | =                         | go left  | First = key could be exactly this one, or in left subtree
                    // EQ  | <                         | go right | First = key must be in right subtree

                    (
                        match cmp {
                            SeekOp::GT => interior_cell_vs_index_key.is_gt(),
                            SeekOp::GE { .. } => interior_cell_vs_index_key.is_ge(),
                            SeekOp::LE { .. } => interior_cell_vs_index_key.is_gt(),
                            SeekOp::LT => interior_cell_vs_index_key.is_ge(),
                        },
                        interior_cell_vs_index_key.is_eq(),
                    )
                };

                if is_eq {
                    eq_seen.set(true);
                }

                if target_leaf_page_is_in_left_subtree {
                    nearest_matching_cell.set(Some(cur_cell_idx as usize));
                    max_cell_idx.set(cur_cell_idx - 1);
                } else {
                    min_cell_idx.set(cur_cell_idx + 1);
                }
            }
        }
    }

    /// Specialized version of do_seek() for table btrees that uses binary search instead
    /// of iterating cells in order.
    #[instrument(skip_all, level = Level::DEBUG)]
    fn tablebtree_seek(&mut self, rowid: i64, seek_op: SeekOp) -> Result<IOResult<SeekResult>> {
        turso_assert!(
            self.mv_cursor.is_none(),
            "attempting to seek with MV cursor"
        );
        let iter_dir = seek_op.iteration_direction();

        if matches!(
            self.seek_state,
            CursorSeekState::Start
                | CursorSeekState::MovingBetweenPages { .. }
                | CursorSeekState::InteriorPageBinarySearch { .. }
        ) {
            // No need for another move_to_root. Move_to already moves to root
            return_if_io!(self.move_to(SeekKey::TableRowId(rowid), seek_op));
            let page = self.stack.top();
            return_if_locked_maybe_load!(self.pager, page);
            let page = page.get();
            let contents = page.get().contents.as_ref().unwrap();
            turso_assert!(
                contents.is_leaf(),
                "tablebtree_seek() called on non-leaf page"
            );

            let cell_count = contents.cell_count();
            if cell_count == 0 {
                self.stack.set_cell_index(0);
                return Ok(IOResult::Done(SeekResult::NotFound));
            }
            let min_cell_idx = Cell::new(0);
            let max_cell_idx = Cell::new(cell_count as isize - 1);

            // If iter dir is forwards, we want the first cell that matches;
            // If iter dir is backwards, we want the last cell that matches.
            let nearest_matching_cell = Cell::new(None);

            self.seek_state = CursorSeekState::LeafPageBinarySearch {
                min_cell_idx,
                max_cell_idx,
                nearest_matching_cell,
                eq_seen: Cell::new(false), // not relevant for table btrees
                target_cell_when_not_found: Cell::new(match seek_op.iteration_direction() {
                    IterationDirection::Forwards => cell_count as i32,
                    IterationDirection::Backwards => -1,
                }),
            };
        }

        let CursorSeekState::LeafPageBinarySearch {
            min_cell_idx,
            max_cell_idx,
            nearest_matching_cell,
            target_cell_when_not_found,
            ..
        } = &self.seek_state
        else {
            unreachable!("we must be in a leaf binary search state");
        };

        let page = self.stack.top();
        return_if_locked_maybe_load!(self.pager, page);
        let page = page.get();
        let contents = page.get().contents.as_ref().unwrap();

        loop {
            let min = min_cell_idx.get();
            let max = max_cell_idx.get();
            if min > max {
                if let Some(nearest_matching_cell) = nearest_matching_cell.get() {
                    self.stack.set_cell_index(nearest_matching_cell as i32);
                    self.has_record.set(true);
                    return Ok(IOResult::Done(SeekResult::Found));
                } else {
                    // if !eq_only - matching entry can exist in neighbour leaf page
                    // this can happen if key in the interiour page was deleted - but divider kept untouched
                    // in such case BTree can navigate to the leaf which no longer has matching key for seek_op
                    // in this case, caller must advance cursor if necessary
                    return Ok(IOResult::Done(if seek_op.eq_only() {
                        let has_record = target_cell_when_not_found.get() >= 0
                            && target_cell_when_not_found.get() < contents.cell_count() as i32;
                        self.has_record.set(has_record);
                        self.stack.set_cell_index(target_cell_when_not_found.get());
                        SeekResult::NotFound
                    } else {
                        // set cursor to the position where which would hold the op-boundary if it were present
                        self.stack.set_cell_index(target_cell_when_not_found.get());
                        SeekResult::TryAdvance
                    }));
                };
            }

            let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here.
            let cell_rowid = contents.cell_table_leaf_read_rowid(cur_cell_idx as usize)?;

            let cmp = cell_rowid.cmp(&rowid);

            let found = match seek_op {
                SeekOp::GT => cmp.is_gt(),
                SeekOp::GE { eq_only: true } => cmp.is_eq(),
                SeekOp::GE { eq_only: false } => cmp.is_ge(),
                SeekOp::LE { eq_only: true } => cmp.is_eq(),
                SeekOp::LE { eq_only: false } => cmp.is_le(),
                SeekOp::LT => cmp.is_lt(),
            };

            // rowids are unique, so we can return the rowid immediately
            if found && seek_op.eq_only() {
                self.stack.set_cell_index(cur_cell_idx as i32);
                self.has_record.set(true);
                return Ok(IOResult::Done(SeekResult::Found));
            }

            if found {
                nearest_matching_cell.set(Some(cur_cell_idx as usize));
                match iter_dir {
                    IterationDirection::Forwards => {
                        max_cell_idx.set(cur_cell_idx - 1);
                    }
                    IterationDirection::Backwards => {
                        min_cell_idx.set(cur_cell_idx + 1);
                    }
                }
            } else if cmp.is_gt() {
                if matches!(seek_op, SeekOp::GE { eq_only: true }) {
                    target_cell_when_not_found
                        .set(target_cell_when_not_found.get().min(cur_cell_idx as i32));
                }
                max_cell_idx.set(cur_cell_idx - 1);
            } else if cmp.is_lt() {
                if matches!(seek_op, SeekOp::LE { eq_only: true }) {
                    target_cell_when_not_found
                        .set(target_cell_when_not_found.get().max(cur_cell_idx as i32));
                }
                min_cell_idx.set(cur_cell_idx + 1);
            } else {
                match iter_dir {
                    IterationDirection::Forwards => {
                        min_cell_idx.set(cur_cell_idx + 1);
                    }
                    IterationDirection::Backwards => {
                        max_cell_idx.set(cur_cell_idx - 1);
                    }
                }
            }
        }
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    fn indexbtree_seek(
        &mut self,
        key: &ImmutableRecord,
        seek_op: SeekOp,
    ) -> Result<IOResult<SeekResult>> {
        let key_values = key.get_values();
        let record_comparer = {
            let index_info = self
                .index_info
                .as_ref()
                .expect("indexbtree_seek without index_info");
            find_compare(&key_values, index_info)
        };

        tracing::debug!(
            "Using record comparison strategy for seek: {:?}",
            record_comparer
        );

        if matches!(
            self.seek_state,
            CursorSeekState::Start
                | CursorSeekState::MovingBetweenPages { .. }
                | CursorSeekState::InteriorPageBinarySearch { .. }
        ) {
            // No need for another move_to_root. Move_to already moves to root
            return_if_io!(self.move_to(SeekKey::IndexKey(key), seek_op));
            let CursorSeekState::FoundLeaf { eq_seen } = &self.seek_state else {
                unreachable!(
                    "We must still be in FoundLeaf state after move_to, got: {:?}",
                    self.seek_state
                );
            };
            let eq_seen = eq_seen.get();
            let page = self.stack.top();
            return_if_locked_maybe_load!(self.pager, page);

            let page = page.get();
            let contents = page.get().contents.as_ref().unwrap();
            let cell_count = contents.cell_count();
            if cell_count == 0 {
                return Ok(IOResult::Done(SeekResult::NotFound));
            }

            let min = Cell::new(0);
            let max = Cell::new(cell_count as isize - 1);

            // If iter dir is forwards, we want the first cell that matches;
            // If iter dir is backwards, we want the last cell that matches.
            let nearest_matching_cell = Cell::new(None);

            self.seek_state = CursorSeekState::LeafPageBinarySearch {
                min_cell_idx: min,
                max_cell_idx: max,
                nearest_matching_cell,
                eq_seen: Cell::new(eq_seen),
                target_cell_when_not_found: Cell::new(match seek_op.iteration_direction() {
                    IterationDirection::Forwards => cell_count as i32,
                    IterationDirection::Backwards => -1,
                }),
            };
        }

        let CursorSeekState::LeafPageBinarySearch {
            min_cell_idx,
            max_cell_idx,
            nearest_matching_cell,
            eq_seen,
            target_cell_when_not_found,
        } = &self.seek_state
        else {
            unreachable!(
                "we must be in a leaf binary search state, got: {:?}",
                self.seek_state
            );
        };

        let page = self.stack.top();
        return_if_locked_maybe_load!(self.pager, page);
        let page = page.get();
        let contents = page.get().contents.as_ref().unwrap();

        let iter_dir = seek_op.iteration_direction();

        loop {
            let min = min_cell_idx.get();
            let max = max_cell_idx.get();
            if min > max {
                if let Some(nearest_matching_cell) = nearest_matching_cell.get() {
                    self.stack.set_cell_index(nearest_matching_cell as i32);
                    self.has_record.set(true);
                    return Ok(IOResult::Done(SeekResult::Found));
                } else {
                    // set cursor to the position where which would hold the op-boundary if it were present
                    let target_cell = target_cell_when_not_found.get();
                    self.stack.set_cell_index(target_cell);
                    let has_record = target_cell >= 0 && target_cell < contents.cell_count() as i32;
                    self.has_record.set(has_record);

                    // Similar logic as in tablebtree_seek(), but for indexes.
                    // The difference is that since index keys are not necessarily unique, we need to TryAdvance
                    // even when eq_only=true and we have seen an EQ match up in the tree in an interior node.
                    if seek_op.eq_only() && !eq_seen.get() {
                        return Ok(IOResult::Done(SeekResult::NotFound));
                    }
                    return Ok(IOResult::Done(SeekResult::TryAdvance));
                };
            }

            let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here.
            self.stack.set_cell_index(cur_cell_idx as i32);

            let cell = contents.cell_get(cur_cell_idx as usize, self.usable_space())?;
            let BTreeCell::IndexLeafCell(IndexLeafCell {
                payload,
                first_overflow_page,
                payload_size,
            }) = &cell
            else {
                unreachable!("unexpected cell type: {:?}", cell);
            };

            if let Some(next_page) = first_overflow_page {
                return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size))
            } else {
                self.get_immutable_record_or_create()
                    .as_mut()
                    .unwrap()
                    .invalidate();
                self.get_immutable_record_or_create()
                    .as_mut()
                    .unwrap()
                    .start_serialization(payload);

                self.record_cursor.borrow_mut().invalidate();
            };
            let (cmp, found) = self.compare_with_current_record(
                key_values.as_slice(),
                seek_op,
                &record_comparer,
                self.index_info
                    .as_ref()
                    .expect("indexbtree_seek without index_info"),
            );
            if found {
                nearest_matching_cell.set(Some(cur_cell_idx as usize));
                match iter_dir {
                    IterationDirection::Forwards => {
                        max_cell_idx.set(cur_cell_idx - 1);
                    }
                    IterationDirection::Backwards => {
                        min_cell_idx.set(cur_cell_idx + 1);
                    }
                }
            } else if cmp.is_gt() {
                if matches!(seek_op, SeekOp::GE { eq_only: true }) {
                    target_cell_when_not_found
                        .set(target_cell_when_not_found.get().min(cur_cell_idx as i32));
                }
                max_cell_idx.set(cur_cell_idx - 1);
            } else if cmp.is_lt() {
                if matches!(seek_op, SeekOp::LE { eq_only: true }) {
                    target_cell_when_not_found
                        .set(target_cell_when_not_found.get().max(cur_cell_idx as i32));
                }
                min_cell_idx.set(cur_cell_idx + 1);
            } else {
                match iter_dir {
                    IterationDirection::Forwards => {
                        min_cell_idx.set(cur_cell_idx + 1);
                    }
                    IterationDirection::Backwards => {
                        max_cell_idx.set(cur_cell_idx - 1);
                    }
                }
            }
        }
    }

    fn compare_with_current_record(
        &self,
        key_values: &[RefValue],
        seek_op: SeekOp,
        record_comparer: &RecordCompare,
        index_info: &IndexInfo,
    ) -> (Ordering, bool) {
        let record = self.get_immutable_record();
        let record = record.as_ref().unwrap();

        let tie_breaker = get_tie_breaker_from_seek_op(seek_op);
        let cmp = record_comparer
            .compare(record, key_values, index_info, 0, tie_breaker)
            .unwrap();

        let found = match seek_op {
            SeekOp::GT => cmp.is_gt(),
            SeekOp::GE { eq_only: true } => cmp.is_eq(),
            SeekOp::GE { eq_only: false } => cmp.is_ge(),
            SeekOp::LE { eq_only: true } => cmp.is_eq(),
            SeekOp::LE { eq_only: false } => cmp.is_le(),
            SeekOp::LT => cmp.is_lt(),
        };

        (cmp, found)
    }

    #[instrument(skip_all, level = Level::INFO)]
    pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result<IOResult<()>> {
        turso_assert!(
            self.mv_cursor.is_none(),
            "attempting to move with MV cursor"
        );
        tracing::trace!(?key, ?cmp);
        // For a table with N rows, we can find any row by row id in O(log(N)) time by starting at the root page and following the B-tree pointers.
        // B-trees consist of interior pages and leaf pages. Interior pages contain pointers to other pages, while leaf pages contain the actual row data.
        //
        // Conceptually, each Interior Cell in a interior page has a rowid and a left child node, and the page itself has a right-most child node.
        // Example: consider an interior page that contains cells C1(rowid=10), C2(rowid=20), C3(rowid=30).
        // - All rows with rowids <= 10 are in the left child node of C1.
        // - All rows with rowids > 10 and <= 20 are in the left child node of C2.
        // - All rows with rowids > 20 and <= 30 are in the left child node of C3.
        // - All rows with rowids > 30 are in the right-most child node of the page.
        //
        // There will generally be multiple levels of interior pages before we reach a leaf page,
        // so we need to follow the interior page pointers until we reach the leaf page that contains the row we are looking for (if it exists).
        //
        // Here's a high-level overview of the algorithm:
        // 1. Since we start at the root page, its cells are all interior cells.
        // 2. We scan the interior cells until we find a cell whose rowid is greater than or equal to the rowid we are looking for.
        // 3. Follow the left child pointer of the cell we found in step 2.
        //    a. In case none of the cells in the page have a rowid greater than or equal to the rowid we are looking for,
        //       we follow the right-most child pointer of the page instead (since all rows with rowids greater than the rowid we are looking for are in the right-most child node).
        // 4. We are now at a new page. If it's another interior page, we repeat the process from step 2. If it's a leaf page, we continue to step 5.
        // 5. We scan the leaf cells in the leaf page until we find the cell whose rowid is equal to the rowid we are looking for.
        //    This cell contains the actual data we are looking for.
        // 6. If we find the cell, we return the record. Otherwise, we return an empty result.

        // If we are at the beginning/end of seek state, start a new move from the root.
        if matches!(
            self.seek_state,
            // these are stages that happen at the leaf page, so we can consider that the previous seek finished and we can start a new one.
            CursorSeekState::LeafPageBinarySearch { .. } | CursorSeekState::FoundLeaf { .. }
        ) {
            self.seek_state = CursorSeekState::Start;
        }
        if matches!(self.seek_state, CursorSeekState::Start) {
            self.move_to_root()?;
        }

        let ret = match key {
            SeekKey::TableRowId(rowid_key) => self.tablebtree_move_to(rowid_key, cmp),
            SeekKey::IndexKey(index_key) => self.indexbtree_move_to(index_key, cmp),
        };
        return_if_io!(ret);
        Ok(IOResult::Done(()))
    }

    /// Insert a record into the btree.
    /// If the insert operation overflows the page, it will be split and the btree will be balanced.
    #[instrument(skip_all, level = Level::DEBUG)]
    fn insert_into_page(&mut self, bkey: &BTreeKey) -> Result<IOResult<()>> {
        let record = bkey
            .get_record()
            .expect("expected record present on insert");
        let record_values = record.get_values();
        if let CursorState::None = &self.state {
            self.state = CursorState::Write(WriteInfo::new());
        }
        let ret = loop {
            let write_state = {
                let write_info = self
                    .state
                    .mut_write_info()
                    .expect("can't insert while counting");
                write_info.state
            };
            match write_state {
                WriteState::Start => {
                    let page = self.stack.top();
                    return_if_locked_maybe_load!(self.pager, page);

                    // get page and find cell
                    let cell_idx = {
                        return_if_locked!(page.get());
                        let page = page.get();

                        self.pager.add_dirty(&page);

                        self.stack.current_cell_index()
                    };
                    if cell_idx == -1 {
                        // This might be a brand new table and the cursor hasn't moved yet. Let's advance it to the first slot.
                        self.stack.set_cell_index(0);
                    }
                    let cell_idx = self.stack.current_cell_index() as usize;
                    tracing::debug!(cell_idx);

                    // if the cell index is less than the total cells, check: if its an existing
                    // rowid, we are going to update / overwrite the cell
                    if cell_idx < page.get().get_contents().cell_count() {
                        let cell = page
                            .get()
                            .get_contents()
                            .cell_get(cell_idx, self.usable_space())?;
                        match cell {
                            BTreeCell::TableLeafCell(tbl_leaf) => {
                                if tbl_leaf.rowid == bkey.to_rowid() {
                                    tracing::debug!("TableLeafCell: found exact match with cell_idx={cell_idx}, overwriting");
                                    self.overwrite_cell(page.clone(), cell_idx, record)?;
                                    let write_info = self
                                        .state
                                        .mut_write_info()
                                        .expect("expected write info");
                                    if page.get().get_contents().overflow_cells.is_empty() {
                                        write_info.state = WriteState::Finish;
                                    } else {
                                        write_info.state = WriteState::BalanceStart;
                                        // If we balance, we must save the cursor position and seek to it later.
                                        // FIXME: we shouldn't have both DeleteState::SeekAfterBalancing and
                                        // save_context()/restore/context(), they are practically the same thing.
                                        self.save_context(CursorContext::TableRowId(bkey.to_rowid()));
                                    }
                                    continue;
                                }
                            }
                            BTreeCell::IndexLeafCell(..) | BTreeCell::IndexInteriorCell(..) => {
                                return_if_io!(self.record());
                                let cmp = compare_immutable(
                                    record_values.as_slice(),
                                    self.get_immutable_record()
                                        .as_ref()
                                        .unwrap()
                                        .get_values().as_slice(),
                                        &self.index_info.as_ref().unwrap().key_info,
                                );
                                if cmp == Ordering::Equal {
                                    tracing::debug!("IndexLeafCell: found exact match with cell_idx={cell_idx}, overwriting");
                                    self.has_record.set(true);
                                    self.overwrite_cell(page.clone(), cell_idx, record)?;
                                    let write_info = self
                                        .state
                                        .mut_write_info()
                                        .expect("expected write info");
                                    if page.get().get_contents().overflow_cells.is_empty() {
                                        write_info.state = WriteState::Finish;
                                    } else {
                                        write_info.state = WriteState::BalanceStart;
                                        // If we balance, we must save the cursor position and seek to it later.
                                        // FIXME: we shouldn't have both DeleteState::SeekAfterBalancing and
                                        // save_context()/restore/context(), they are practically the same thing.
                                        self.save_context(CursorContext::IndexKeyRowId((*record).clone()));
                                    }
                                    continue;
                                } else {
                                    turso_assert!(
                                        !matches!(cell, BTreeCell::IndexInteriorCell(..)),
                                         "we should not be inserting a new index interior cell. the only valid operation on an index interior cell is an overwrite!"
                                    );
                                }
                            }
                            other => panic!("unexpected cell type, expected TableLeaf or IndexLeaf, found: {other:?}"),
                        }
                    }

                    // insert cell

                    let mut cell_payload: Vec<u8> = Vec::with_capacity(record_values.len() + 4);
                    fill_cell_payload(
                        page.get().get().contents.as_ref().unwrap(),
                        bkey.maybe_rowid(),
                        &mut cell_payload,
                        cell_idx,
                        record,
                        self.usable_space(),
                        self.pager.clone(),
                    );

                    // insert
                    let overflow = {
                        let page = page.get();
                        let contents = page.get().contents.as_mut().unwrap();
                        tracing::debug!(name: "overflow", cell_count = contents.cell_count());

                        insert_into_cell(
                            contents,
                            cell_payload.as_slice(),
                            cell_idx,
                            self.usable_space() as u16,
                        )?;
                        !contents.overflow_cells.is_empty()
                    };
                    self.stack.set_cell_index(cell_idx as i32);
                    if overflow {
                        // A balance will happen so save the key we were inserting
                        tracing::debug!(page = page.get().get().id, cell_idx, "balance triggered:");
                        self.save_context(match bkey {
                            BTreeKey::TableRowId(rowid) => CursorContext::TableRowId(rowid.0),
                            BTreeKey::IndexKey(record) => {
                                CursorContext::IndexKeyRowId((*record).clone())
                            }
                        });
                        let write_info = self
                            .state
                            .mut_write_info()
                            .expect("can't count while inserting");
                        write_info.state = WriteState::BalanceStart;
                    } else {
                        let write_info = self
                            .state
                            .mut_write_info()
                            .expect("can't count while inserting");
                        write_info.state = WriteState::Finish;
                    }
                }
                WriteState::BalanceStart
                | WriteState::BalanceFreePages { .. }
                | WriteState::BalanceNonRootPickSiblings
                | WriteState::BalanceNonRootDoBalancing => {
                    return_if_io!(self.balance(None));
                }
                WriteState::Finish => {
                    break Ok(IOResult::Done(()));
                }
            };
        };
        if matches!(self.state.write_info().unwrap().state, WriteState::Finish) {
            // if there was a balance triggered, the cursor position is invalid.
            // it's probably not the greatest idea in the world to do this eagerly here,
            // but at least it works.
            return_if_io!(self.restore_context());
        }
        self.state = CursorState::None;
        ret
    }

    /// Balance a leaf page.
    /// Balancing is done when a page overflows.
    /// see e.g. https://en.wikipedia.org/wiki/B-tree
    ///
    /// This is a naive algorithm that doesn't try to distribute cells evenly by content.
    /// It will try to split the page in half by keys not by content.
    /// Sqlite tries to have a page at least 40% full.
    ///
    /// `balance_ancestor_at_depth` specifies whether to balance an ancestor page at a specific depth.
    /// If `None`, balancing stops when a level is encountered that doesn't need balancing.
    /// If `Some(depth)`, the page on the stack at depth `depth` will be rebalanced after balancing the current page.
    #[instrument(skip(self), level = Level::DEBUG)]
    fn balance(&mut self, balance_ancestor_at_depth: Option<usize>) -> Result<IOResult<()>> {
        turso_assert!(
            matches!(self.state, CursorState::Write(_)),
            "Cursor must be in balancing state"
        );
        loop {
            let state = self.state.write_info().expect("must be balancing").state;
            match state {
                WriteState::BalanceStart => {
                    assert!(
                        self.state
                            .write_info()
                            .unwrap()
                            .balance_info
                            .borrow()
                            .is_none(),
                        "BalanceInfo should be empty on start"
                    );
                    let current_page = self.stack.top();
                    let next_balance_depth =
                        balance_ancestor_at_depth.unwrap_or(self.stack.current());
                    {
                        // check if we don't need to balance
                        // don't continue if:
                        // - current page is not overfull root
                        // OR
                        // - current page is not overfull and the amount of free space on the page
                        // is less than 2/3rds of the total usable space on the page
                        //
                        // https://github.com/sqlite/sqlite/blob/0aa95099f5003dc99f599ab77ac0004950b281ef/src/btree.c#L9064-L9071
                        let current_page = current_page.get();
                        let page = current_page.get().contents.as_mut().unwrap();
                        let usable_space = self.usable_space();
                        let free_space = compute_free_space(page, usable_space as u16);
                        let this_level_is_already_balanced = page.overflow_cells.is_empty()
                            && (!self.stack.has_parent()
                                || free_space as usize * 3 <= usable_space * 2);
                        if this_level_is_already_balanced {
                            if self.stack.current() > next_balance_depth {
                                while self.stack.current() > next_balance_depth {
                                    // Even though this level is already balanced, we know there's an upper level that needs balancing.
                                    // So we pop the stack and continue.
                                    self.stack.pop();
                                }
                                continue;
                            }
                            // Otherwise, we're done.
                            let write_info = self.state.mut_write_info().unwrap();
                            write_info.state = WriteState::Finish;
                            return Ok(IOResult::Done(()));
                        }
                    }

                    if !self.stack.has_parent() {
                        self.balance_root()?;
                    }

                    let write_info = self.state.mut_write_info().unwrap();
                    write_info.state = WriteState::BalanceNonRootPickSiblings;
                    self.stack.pop();
                    return_if_io!(self.balance_non_root());
                }
                WriteState::BalanceNonRootPickSiblings
                | WriteState::BalanceNonRootDoBalancing
                | WriteState::BalanceFreePages { .. } => {
                    return_if_io!(self.balance_non_root());
                }
                WriteState::Finish => return Ok(IOResult::Done(())),
                _ => panic!("unexpected state on balance {state:?}"),
            }
        }
    }

    /// Balance a non root page by trying to balance cells between a maximum of 3 siblings that should be neighboring the page that overflowed/underflowed.
    #[instrument(skip_all, level = Level::DEBUG)]
    fn balance_non_root(&mut self) -> Result<IOResult<()>> {
        turso_assert!(
            matches!(self.state, CursorState::Write(_)),
            "Cursor must be in balancing state"
        );
        let state = self.state.write_info().expect("must be balancing").state;
        tracing::debug!(?state);
        let (next_write_state, result) = match state {
            WriteState::Start => todo!(),
            WriteState::BalanceStart => todo!(),
            WriteState::BalanceNonRootPickSiblings => {
                let parent_page = self.stack.top();
                return_if_locked_maybe_load!(self.pager, parent_page);
                let parent_page = parent_page.get();
                let parent_contents = parent_page.get_contents();
                let page_type = parent_contents.page_type();
                turso_assert!(
                    matches!(page_type, PageType::IndexInterior | PageType::TableInterior),
                    "expected index or table interior page"
                );
                let number_of_cells_in_parent =
                    parent_contents.cell_count() + parent_contents.overflow_cells.len();

                // If `seek` moved to rightmost page, cell index will be out of bounds. Meaning cell_count+1.
                // In any other case, `seek` will stay in the correct index.
                let past_rightmost_pointer =
                    self.stack.current_cell_index() as usize == number_of_cells_in_parent + 1;
                if past_rightmost_pointer {
                    self.stack.retreat();
                } else if !parent_contents.overflow_cells.is_empty() {
                    // The ONLY way we can have an overflow cell in the parent is if we replaced an interior cell from a cell in the child, and that replacement did not fit.
                    // This can only happen on index btrees.
                    if matches!(page_type, PageType::IndexInterior) {
                        turso_assert!(parent_contents.overflow_cells.len() == 1, "index interior page must have no more than 1 overflow cell, as a result of InteriorNodeReplacement");
                    } else {
                        turso_assert!(false, "{page_type:?} must have no overflow cells");
                    }
                    let overflow_cell = parent_contents.overflow_cells.first().unwrap();
                    let parent_page_cell_idx = self.stack.current_cell_index() as usize;
                    // Parent page must be positioned at the divider cell that overflowed due to the replacement.
                    turso_assert!(
                        overflow_cell.index == parent_page_cell_idx,
                        "overflow cell index must be the result of InteriorNodeReplacement that leaves both child and parent (id={}) unbalanced, and hence parent page's position must = overflow_cell.index. Instead got: parent_page_cell_idx={parent_page_cell_idx} overflow_cell.index={}",
                        parent_page.get().id,
                        overflow_cell.index
                    );
                }
                self.pager.add_dirty(&parent_page);
                let parent_contents = parent_page.get().contents.as_ref().unwrap();
                let page_to_balance_idx = self.stack.current_cell_index() as usize;

                tracing::debug!(
                    "balance_non_root(parent_id={} page_to_balance_idx={})",
                    parent_page.get().id,
                    page_to_balance_idx
                );
                // Part 1: Find the sibling pages to balance
                let mut pages_to_balance: [Option<BTreePage>; MAX_SIBLING_PAGES_TO_BALANCE] =
                    [const { None }; MAX_SIBLING_PAGES_TO_BALANCE];
                turso_assert!(
                    page_to_balance_idx <= parent_contents.cell_count(),
                    "page_to_balance_idx={page_to_balance_idx} is out of bounds for parent cell count {number_of_cells_in_parent}"
                );
                // As there will be at maximum 3 pages used to balance:
                // sibling_pointer is the index represeneting one of those 3 pages, and we initialize it to the last possible page.
                // next_divider is the first divider that contains the first page of the 3 pages.
                let (sibling_pointer, first_cell_divider) = match number_of_cells_in_parent {
                    n if n < 2 => (number_of_cells_in_parent, 0),
                    2 => (2, 0),
                    // Here we will have at lest 2 cells and one right pointer, therefore we can get 3 siblings.
                    // In case of 2 we will have all pages to balance.
                    _ => {
                        // In case of > 3 we have to check which ones to get
                        let next_divider = if page_to_balance_idx == 0 {
                            // first cell, take first 3
                            0
                        } else if page_to_balance_idx == number_of_cells_in_parent {
                            // Page corresponds to right pointer, so take last 3
                            number_of_cells_in_parent - 2
                        } else {
                            // Some cell in the middle, so we want to take sibling on left and right.
                            page_to_balance_idx - 1
                        };
                        (2, next_divider)
                    }
                };
                let sibling_count = sibling_pointer + 1;

                let last_sibling_is_right_pointer = sibling_pointer + first_cell_divider
                    - parent_contents.overflow_cells.len()
                    == parent_contents.cell_count();
                // Get the right page pointer that we will need to update later
                let right_pointer = if last_sibling_is_right_pointer {
                    parent_contents.rightmost_pointer_raw().unwrap()
                } else {
                    let max_overflow_cells = if matches!(page_type, PageType::IndexInterior) {
                        1
                    } else {
                        0
                    };
                    turso_assert!(
                        parent_contents.overflow_cells.len() <= max_overflow_cells,
                        "must have at most {max_overflow_cells} overflow cell in the parent"
                    );
                    // OVERFLOW CELL ADJUSTMENT:
                    // Let there be parent with cells [0,1,2,3,4].
                    // Let's imagine the cell at idx 2 gets replaced with a new payload that causes it to overflow.
                    // See handling of InteriorNodeReplacement in btree.rs.
                    //
                    // In this case the rightmost divider is going to be 3 (2 is the middle one and we pick neighbors 1-3).
                    // drop_cell(): [0,1,2,3,4] -> [0,1,3,4]   <-- cells on right side get shifted left!
                    // insert_into_cell(): [0,1,3,4] -> [0,1,3,4] + overflow cell (2)  <-- crucially, no physical shifting happens, overflow cell is stored separately
                    //
                    // This means '3' is actually physically located at index '2'.
                    // So IF the parent has an overflow cell, we need to subtract 1 to get the actual rightmost divider cell idx to physically read from.
                    // The formula for the actual cell idx is:
                    // first_cell_divider + sibling_pointer - parent_contents.overflow_cells.len()
                    // so in the above case:
                    // actual_cell_idx = 1 + 2 - 1 = 2
                    //
                    // In the case where the last divider cell is the overflow cell, there would be no left-shifting of cells in drop_cell(),
                    // because they are still positioned correctly (imagine .pop() from a vector).
                    // However, note that we are always looking for the _rightmost_ child page pointer between the (max 2) dividers, and for any case where the last divider cell is the overflow cell,
                    // the 'last_sibling_is_right_pointer' condition will also be true (since the overflow cell's left child will be the middle page), so we won't enter this code branch.
                    //
                    // Hence: when we enter this branch with overflow_cells.len() == 1, we know that left-shifting has happened and we need to subtract 1.
                    let actual_cell_idx =
                        first_cell_divider + sibling_pointer - parent_contents.overflow_cells.len();
                    let (start_of_cell, _) =
                        parent_contents.cell_get_raw_region(actual_cell_idx, self.usable_space());
                    let buf = parent_contents.as_ptr().as_mut_ptr();
                    unsafe { buf.add(start_of_cell) }
                };

                // load sibling pages
                // start loading right page first
                let mut pgno: u32 = unsafe { right_pointer.cast::<u32>().read().swap_bytes() };
                let current_sibling = sibling_pointer;
                for i in (0..=current_sibling).rev() {
                    let page = self.read_page(pgno as usize)?;
                    {
                        // mark as dirty
                        let sibling_page = page.get();
                        self.pager.add_dirty(&sibling_page);
                    }
                    #[cfg(debug_assertions)]
                    {
                        return_if_locked!(page.get());
                        debug_validate_cells!(
                            &page.get().get_contents(),
                            self.usable_space() as u16
                        );
                    }
                    pages_to_balance[i].replace(page);
                    if i == 0 {
                        break;
                    }
                    let next_cell_divider = i + first_cell_divider - 1;
                    let divider_is_overflow_cell = parent_contents
                        .overflow_cells
                        .first()
                        .is_some_and(|overflow_cell| overflow_cell.index == next_cell_divider);
                    if divider_is_overflow_cell {
                        turso_assert!(
                            matches!(parent_contents.page_type(), PageType::IndexInterior),
                            "expected index interior page, got {:?}",
                            parent_contents.page_type()
                        );
                        turso_assert!(
                            parent_contents.overflow_cells.len() == 1,
                            "must have a single overflow cell in the parent, as a result of InteriorNodeReplacement"
                        );
                        let overflow_cell = parent_contents.overflow_cells.first().unwrap();
                        pgno = u32::from_be_bytes(overflow_cell.payload[0..4].try_into().unwrap());
                    } else {
                        // grep for 'OVERFLOW CELL ADJUSTMENT' for explanation.
                        // here we only subtract 1 if the divider cell has been shifted left, i.e. the overflow cell was placed to the left
                        // this cell.
                        let actual_cell_idx =
                            if let Some(overflow_cell) = parent_contents.overflow_cells.first() {
                                if next_cell_divider < overflow_cell.index {
                                    next_cell_divider
                                } else {
                                    next_cell_divider - 1
                                }
                            } else {
                                next_cell_divider
                            };
                        pgno =
                            match parent_contents.cell_get(actual_cell_idx, self.usable_space())? {
                                BTreeCell::TableInteriorCell(TableInteriorCell {
                                    left_child_page,
                                    ..
                                })
                                | BTreeCell::IndexInteriorCell(IndexInteriorCell {
                                    left_child_page,
                                    ..
                                }) => left_child_page,
                                other => {
                                    crate::bail_corrupt_error!(
                                        "expected interior cell, got {:?}",
                                        other
                                    )
                                }
                            };
                    }
                }

                #[cfg(debug_assertions)]
                {
                    let page_type_of_siblings = pages_to_balance[0]
                        .as_ref()
                        .unwrap()
                        .get()
                        .get_contents()
                        .page_type();
                    for page in pages_to_balance.iter().take(sibling_count) {
                        return_if_locked_maybe_load!(self.pager, page.as_ref().unwrap());
                        let page = page.as_ref().unwrap().get();
                        let contents = page.get_contents();
                        debug_validate_cells!(&contents, self.usable_space() as u16);
                        assert_eq!(contents.page_type(), page_type_of_siblings);
                    }
                }
                self.state
                    .write_info()
                    .unwrap()
                    .balance_info
                    .replace(Some(BalanceInfo {
                        pages_to_balance,
                        rightmost_pointer: right_pointer,
                        divider_cell_payloads: [const { None }; MAX_SIBLING_PAGES_TO_BALANCE - 1],
                        sibling_count,
                        first_divider_cell: first_cell_divider,
                    }));
                (WriteState::BalanceNonRootDoBalancing, Ok(IOResult::IO))
            }
            WriteState::BalanceNonRootDoBalancing => {
                // Ensure all involved pages are in memory.
                let write_info = self.state.write_info().unwrap();
                let mut balance_info = write_info.balance_info.borrow_mut();
                let balance_info = balance_info.as_mut().unwrap();
                for page in balance_info
                    .pages_to_balance
                    .iter()
                    .take(balance_info.sibling_count)
                {
                    let page = page.as_ref().unwrap();
                    return_if_locked_maybe_load!(self.pager, page);
                }
                // Start balancing.
                let parent_page_btree = self.stack.top();
                let parent_page = parent_page_btree.get();

                let parent_contents = parent_page.get_contents();
                let parent_is_root = !self.stack.has_parent();

                // 1. Collect cell data from divider cells, and count the total number of cells to be distributed.
                // The count includes: all cells and overflow cells from the sibling pages, and divider cells from the parent page,
                // excluding the rightmost divider, which will not be dropped from the parent; instead it will be updated at the end.
                let mut total_cells_to_redistribute = 0;
                let mut pages_to_balance_new: [Option<BTreePage>;
                    MAX_NEW_SIBLING_PAGES_AFTER_BALANCE] =
                    [const { None }; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE];
                for i in (0..balance_info.sibling_count).rev() {
                    let sibling_page = balance_info.pages_to_balance[i].as_ref().unwrap();
                    let sibling_page = sibling_page.get();
                    turso_assert!(sibling_page.is_loaded(), "sibling page is not loaded");
                    let sibling_contents = sibling_page.get_contents();
                    total_cells_to_redistribute += sibling_contents.cell_count();
                    total_cells_to_redistribute += sibling_contents.overflow_cells.len();

                    // Right pointer is not dropped, we simply update it at the end. This could be a divider cell that points
                    // to the last page in the list of pages to balance or this could be the rightmost pointer that points to a page.
                    let is_last_sibling = i == balance_info.sibling_count - 1;
                    if is_last_sibling {
                        continue;
                    }
                    // Since we know we have a left sibling, take the divider that points to left sibling of this page
                    let cell_idx = balance_info.first_divider_cell + i;
                    let divider_is_overflow_cell = parent_contents
                        .overflow_cells
                        .first()
                        .is_some_and(|overflow_cell| overflow_cell.index == cell_idx);
                    let cell_buf = if divider_is_overflow_cell {
                        turso_assert!(
                            matches!(parent_contents.page_type(), PageType::IndexInterior),
                            "expected index interior page, got {:?}",
                            parent_contents.page_type()
                        );
                        turso_assert!(
                            parent_contents.overflow_cells.len() == 1,
                            "must have a single overflow cell in the parent, as a result of InteriorNodeReplacement"
                        );
                        let overflow_cell = parent_contents.overflow_cells.first().unwrap();
                        &overflow_cell.payload
                    } else {
                        // grep for 'OVERFLOW CELL ADJUSTMENT' for explanation.
                        // here we can subtract overflow_cells.len() every time, because we are iterating right-to-left,
                        // so if we are to the left of the overflow cell, it has already been cleared from the parent and overflow_cells.len() is 0.
                        let actual_cell_idx = cell_idx - parent_contents.overflow_cells.len();
                        let (cell_start, cell_len) = parent_contents
                            .cell_get_raw_region(actual_cell_idx, self.usable_space());
                        let buf = parent_contents.as_ptr();
                        &buf[cell_start..cell_start + cell_len]
                    };

                    // Count the divider cell itself (which will be dropped from the parent)
                    total_cells_to_redistribute += 1;

                    tracing::debug!(
                        "balance_non_root(drop_divider_cell, first_divider_cell={}, divider_cell={}, left_pointer={})",
                        balance_info.first_divider_cell,
                        i,
                        read_u32(cell_buf, 0)
                    );

                    // TODO(pere): make this reference and not copy
                    balance_info.divider_cell_payloads[i].replace(cell_buf.to_vec());
                    if divider_is_overflow_cell {
                        tracing::debug!(
                            "clearing overflow cells from parent cell_idx={}",
                            cell_idx
                        );
                        parent_contents.overflow_cells.clear();
                    } else {
                        // grep for 'OVERFLOW CELL ADJUSTMENT' for explanation.
                        // here we can subtract overflow_cells.len() every time, because we are iterating right-to-left,
                        // so if we are to the left of the overflow cell, it has already been cleared from the parent and overflow_cells.len() is 0.
                        let actual_cell_idx = cell_idx - parent_contents.overflow_cells.len();
                        tracing::trace!(
                            "dropping divider cell from parent cell_idx={} count={}",
                            actual_cell_idx,
                            parent_contents.cell_count()
                        );
                        drop_cell(parent_contents, actual_cell_idx, self.usable_space() as u16)?;
                    }
                }

                /* 2. Initialize CellArray with all the cells used for distribution, this includes divider cells if !leaf. */
                let mut cell_array = CellArray {
                    cell_payloads: Vec::with_capacity(total_cells_to_redistribute),
                    cell_count_per_page_cumulative: [0; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE],
                };
                let cells_capacity_start = cell_array.cell_payloads.capacity();

                let mut total_cells_inserted = 0;
                // This is otherwise identical to CellArray.cell_count_per_page_cumulative,
                // but we exclusively track what the prefix sums were _before_ we started redistributing cells.
                let mut old_cell_count_per_page_cumulative: [u16;
                    MAX_NEW_SIBLING_PAGES_AFTER_BALANCE] = [0; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE];

                let page_type = balance_info.pages_to_balance[0]
                    .as_ref()
                    .unwrap()
                    .get()
                    .get_contents()
                    .page_type();
                tracing::debug!("balance_non_root(page_type={:?})", page_type);
                let is_table_leaf = matches!(page_type, PageType::TableLeaf);
                let is_leaf = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf);
                for (i, old_page) in balance_info
                    .pages_to_balance
                    .iter()
                    .take(balance_info.sibling_count)
                    .enumerate()
                {
                    let old_page = old_page.as_ref().unwrap().get();
                    let old_page_contents = old_page.get_contents();
                    debug_validate_cells!(&old_page_contents, self.usable_space() as u16);
                    for cell_idx in 0..old_page_contents.cell_count() {
                        let (cell_start, cell_len) =
                            old_page_contents.cell_get_raw_region(cell_idx, self.usable_space());
                        let buf = old_page_contents.as_ptr();
                        let cell_buf = &mut buf[cell_start..cell_start + cell_len];
                        // TODO(pere): make this reference and not copy
                        cell_array.cell_payloads.push(to_static_buf(cell_buf));
                    }
                    // Insert overflow cells into correct place
                    let offset = total_cells_inserted;
                    for overflow_cell in old_page_contents.overflow_cells.iter_mut() {
                        cell_array.cell_payloads.insert(
                            offset + overflow_cell.index,
                            to_static_buf(&mut Pin::as_mut(&mut overflow_cell.payload)),
                        );
                    }

                    old_cell_count_per_page_cumulative[i] = cell_array.cell_payloads.len() as u16;

                    let mut cells_inserted =
                        old_page_contents.cell_count() + old_page_contents.overflow_cells.len();

                    let is_last_sibling = i == balance_info.sibling_count - 1;
                    if !is_last_sibling && !is_table_leaf {
                        // If we are a index page or a interior table page we need to take the divider cell too.
                        // But we don't need the last divider as it will remain the same.
                        let mut divider_cell = balance_info.divider_cell_payloads[i]
                            .as_mut()
                            .unwrap()
                            .as_mut_slice();
                        // TODO(pere): in case of old pages are leaf pages, so index leaf page, we need to strip page pointers
                        // from divider cells in index interior pages (parent) because those should not be included.
                        cells_inserted += 1;
                        if !is_leaf {
                            // This divider cell needs to be updated with new left pointer,
                            let right_pointer = old_page_contents.rightmost_pointer().unwrap();
                            divider_cell[..LEFT_CHILD_PTR_SIZE_BYTES]
                                .copy_from_slice(&right_pointer.to_be_bytes());
                        } else {
                            // index leaf
                            turso_assert!(
                                divider_cell.len() >= LEFT_CHILD_PTR_SIZE_BYTES,
                                "divider cell is too short"
                            );
                            // let's strip the page pointer
                            divider_cell = &mut divider_cell[LEFT_CHILD_PTR_SIZE_BYTES..];
                        }
                        cell_array.cell_payloads.push(to_static_buf(divider_cell));
                    }
                    total_cells_inserted += cells_inserted;
                }

                turso_assert!(
                    cell_array.cell_payloads.capacity() == cells_capacity_start,
                    "calculation of max cells was wrong"
                );

                // Let's copy all cells for later checks
                #[cfg(debug_assertions)]
                let mut cells_debug = Vec::new();
                #[cfg(debug_assertions)]
                {
                    for cell in &cell_array.cell_payloads {
                        cells_debug.push(cell.to_vec());
                        if is_leaf {
                            assert!(cell[0] != 0)
                        }
                    }
                }

                #[cfg(debug_assertions)]
                validate_cells_after_insertion(&cell_array, is_table_leaf);

                /* 3. Initiliaze current size of every page including overflow cells and divider cells that might be included. */
                let mut new_page_sizes: [i64; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE] =
                    [0; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE];
                let header_size = if is_leaf {
                    LEAF_PAGE_HEADER_SIZE_BYTES
                } else {
                    INTERIOR_PAGE_HEADER_SIZE_BYTES
                };
                // number of bytes beyond header, different from global usableSapce which includes
                // header
                let usable_space = self.usable_space() - header_size;
                for i in 0..balance_info.sibling_count {
                    cell_array.cell_count_per_page_cumulative[i] =
                        old_cell_count_per_page_cumulative[i];
                    let page = &balance_info.pages_to_balance[i].as_ref().unwrap();
                    let page = page.get();
                    let page_contents = page.get_contents();
                    let free_space = compute_free_space(page_contents, self.usable_space() as u16);

                    new_page_sizes[i] = usable_space as i64 - free_space as i64;
                    for overflow in &page_contents.overflow_cells {
                        // 2 to account of pointer
                        new_page_sizes[i] += 2 + overflow.payload.len() as i64;
                    }
                    let is_last_sibling = i == balance_info.sibling_count - 1;
                    if !is_leaf && !is_last_sibling {
                        // Account for divider cell which is included in this page.
                        new_page_sizes[i] += cell_array.cell_payloads
                            [cell_array.cell_count_up_to_page(i)]
                        .len() as i64;
                    }
                }

                /* 4. Now let's try to move cells to the left trying to stack them without exceeding the maximum size of a page.
                     There are two cases:
                       * If current page has too many cells, it will move them to the next page.
                       * If it still has space, and it can take a cell from the right it will take them.
                         Here there is a caveat. Taking a cell from the right might take cells from page i+1, i+2, i+3, so not necessarily
                         adjacent. But we decrease the size of the adjacent page if we move from the right. This might cause a intermitent state
                         where page can have size <0.
                    This will also calculate how many pages are required to balance the cells and store in sibling_count_new.
                */
                // Try to pack as many cells to the left
                let mut sibling_count_new = balance_info.sibling_count;
                let mut i = 0;
                while i < sibling_count_new {
                    // First try to move cells to the right if they do not fit
                    while new_page_sizes[i] > usable_space as i64 {
                        let needs_new_page = i + 1 >= sibling_count_new;
                        if needs_new_page {
                            sibling_count_new = i + 2;
                            turso_assert!(
                                sibling_count_new <= 5,
                                "it is corrupt to require more than 5 pages to balance 3 siblings"
                            );

                            new_page_sizes[sibling_count_new - 1] = 0;
                            cell_array.cell_count_per_page_cumulative[sibling_count_new - 1] =
                                cell_array.cell_payloads.len() as u16;
                        }
                        let size_of_cell_to_remove_from_left =
                            2 + cell_array.cell_payloads[cell_array.cell_count_up_to_page(i) - 1]
                                .len() as i64;
                        new_page_sizes[i] -= size_of_cell_to_remove_from_left;
                        let size_of_cell_to_move_right = if !is_table_leaf {
                            if cell_array.cell_count_per_page_cumulative[i]
                                < cell_array.cell_payloads.len() as u16
                            {
                                // This means we move to the right page the divider cell and we
                                // promote left cell to divider
                                CELL_PTR_SIZE_BYTES as i64
                                    + cell_array.cell_payloads[cell_array.cell_count_up_to_page(i)]
                                        .len() as i64
                            } else {
                                0
                            }
                        } else {
                            size_of_cell_to_remove_from_left
                        };
                        new_page_sizes[i + 1] += size_of_cell_to_move_right;
                        cell_array.cell_count_per_page_cumulative[i] -= 1;
                    }

                    // Now try to take from the right if we didn't have enough
                    while cell_array.cell_count_per_page_cumulative[i]
                        < cell_array.cell_payloads.len() as u16
                    {
                        let size_of_cell_to_remove_from_right = CELL_PTR_SIZE_BYTES as i64
                            + cell_array.cell_payloads[cell_array.cell_count_up_to_page(i)].len()
                                as i64;
                        let can_take = new_page_sizes[i] + size_of_cell_to_remove_from_right
                            > usable_space as i64;
                        if can_take {
                            break;
                        }
                        new_page_sizes[i] += size_of_cell_to_remove_from_right;
                        cell_array.cell_count_per_page_cumulative[i] += 1;

                        let size_of_cell_to_remove_from_right = if !is_table_leaf {
                            if cell_array.cell_count_per_page_cumulative[i]
                                < cell_array.cell_payloads.len() as u16
                            {
                                CELL_PTR_SIZE_BYTES as i64
                                    + cell_array.cell_payloads[cell_array.cell_count_up_to_page(i)]
                                        .len() as i64
                            } else {
                                0
                            }
                        } else {
                            size_of_cell_to_remove_from_right
                        };

                        new_page_sizes[i + 1] -= size_of_cell_to_remove_from_right;
                    }

                    // Check if this page contains up to the last cell. If this happens it means we really just need up to this page.
                    // Let's update the number of new pages to be up to this page (i+1)
                    let page_completes_all_cells = cell_array.cell_count_per_page_cumulative[i]
                        >= cell_array.cell_payloads.len() as u16;
                    if page_completes_all_cells {
                        sibling_count_new = i + 1;
                        break;
                    }
                    i += 1;
                    if i >= sibling_count_new {
                        break;
                    }
                }

                tracing::debug!(
                    "balance_non_root(sibling_count={}, sibling_count_new={}, cells={})",
                    balance_info.sibling_count,
                    sibling_count_new,
                    cell_array.cell_payloads.len()
                );

                /* 5. Balance pages starting from a left stacked cell state and move them to right trying to maintain a balanced state
                where we only move from left to right if it will not unbalance both pages, meaning moving left to right won't make
                right page bigger than left page.
                */
                // Comment borrowed from SQLite src/btree.c
                // The packing computed by the previous block is biased toward the siblings
                // on the left side (siblings with smaller keys). The left siblings are
                // always nearly full, while the right-most sibling might be nearly empty.
                // The next block of code attempts to adjust the packing of siblings to
                // get a better balance.
                //
                // This adjustment is more than an optimization.  The packing above might
                // be so out of balance as to be illegal.  For example, the right-most
                // sibling might be completely empty.  This adjustment is not optional.
                for i in (1..sibling_count_new).rev() {
                    let mut size_right_page = new_page_sizes[i];
                    let mut size_left_page = new_page_sizes[i - 1];
                    let mut cell_left = cell_array.cell_count_per_page_cumulative[i - 1] - 1;
                    // When table leaves are being balanced, divider cells are not part of the balancing,
                    // because table dividers don't have payloads unlike index dividers.
                    // Hence:
                    // - For table leaves: the same cell that is removed from left is added to right.
                    // - For all other page types: the divider cell is added to right, and the last non-divider cell is removed from left;
                    //   the cell removed from the left will later become a new divider cell in the parent page.
                    // TABLE LEAVES BALANCING:
                    // =======================
                    // Before balancing:
                    // LEFT                          RIGHT
                    // +-----+-----+-----+-----+    +-----+-----+
                    // | C1  | C2  | C3  | C4  |    | C5  | C6  |
                    // +-----+-----+-----+-----+    +-----+-----+
                    //         ^                           ^
                    //    (too full)                  (has space)
                    // After balancing:
                    // LEFT                     RIGHT
                    // +-----+-----+-----+      +-----+-----+-----+
                    // | C1  | C2  | C3  |      | C4  | C5  | C6  |
                    // +-----+-----+-----+      +-----+-----+-----+
                    //                               ^
                    //                          (C4 moved directly)
                    //
                    // (C3's rowid also becomes the divider cell's rowid in the parent page
                    //
                    // OTHER PAGE TYPES BALANCING:
                    // ===========================
                    // Before balancing:
                    // PARENT: [...|D1|...]
                    //            |
                    // LEFT                          RIGHT
                    // +-----+-----+-----+-----+    +-----+-----+
                    // | K1  | K2  | K3  | K4  |    | K5  | K6  |
                    // +-----+-----+-----+-----+    +-----+-----+
                    //         ^                           ^
                    //    (too full)                  (has space)
                    // After balancing:
                    // PARENT: [...|K4|...]  <-- K4 becomes new divider
                    //            |
                    // LEFT                     RIGHT
                    // +-----+-----+-----+      +-----+-----+-----+
                    // | K1  | K2  | K3  |      | D1  | K5  | K6  |
                    // +-----+-----+-----+      +-----+-----+-----+
                    //                               ^
                    //                     (old divider D1 added to right)
                    // Legend:
                    // - C# = Cell (table leaf)
                    // - K# = Key cell (index/internal node)
                    // - D# = Divider cell
                    let mut cell_right = if is_table_leaf {
                        cell_left
                    } else {
                        cell_left + 1
                    };
                    loop {
                        let cell_left_size = cell_array.cell_size_bytes(cell_left as usize) as i64;
                        let cell_right_size =
                            cell_array.cell_size_bytes(cell_right as usize) as i64;
                        // TODO: add assert nMaxCells

                        let is_last_sibling = i == sibling_count_new - 1;
                        let pointer_size = if is_last_sibling {
                            0
                        } else {
                            CELL_PTR_SIZE_BYTES as i64
                        };
                        // As mentioned, this step rebalances the siblings so that cells are moved from left to right, since the previous step just
                        // packed as much as possible to the left. However, if the right-hand-side page would become larger than the left-hand-side page,
                        // we stop.
                        let would_not_improve_balance =
                            size_right_page + cell_right_size + (CELL_PTR_SIZE_BYTES as i64)
                                > size_left_page - (cell_left_size + pointer_size);
                        if size_right_page != 0 && would_not_improve_balance {
                            break;
                        }

                        size_left_page -= cell_left_size + (CELL_PTR_SIZE_BYTES as i64);
                        size_right_page += cell_right_size + (CELL_PTR_SIZE_BYTES as i64);
                        cell_array.cell_count_per_page_cumulative[i - 1] = cell_left;

                        if cell_left == 0 {
                            break;
                        }
                        cell_left -= 1;
                        cell_right -= 1;
                    }

                    new_page_sizes[i] = size_right_page;
                    new_page_sizes[i - 1] = size_left_page;
                    assert!(
                        cell_array.cell_count_per_page_cumulative[i - 1]
                            > if i > 1 {
                                cell_array.cell_count_per_page_cumulative[i - 2]
                            } else {
                                0
                            }
                    );
                }

                // Allocate pages or set dirty if not needed
                for i in 0..sibling_count_new {
                    if i < balance_info.sibling_count {
                        let page = balance_info.pages_to_balance[i].as_ref().unwrap();
                        turso_assert!(
                            page.get().is_dirty(),
                            "sibling page must be already marked dirty"
                        );
                        pages_to_balance_new[i].replace(page.clone());
                    } else {
                        // FIXME: handle page cache is full
                        let page = self.allocate_page(page_type, 0)?;
                        pages_to_balance_new[i].replace(page);
                        // Since this page didn't exist before, we can set it to cells length as it
                        // marks them as empty since it is a prefix sum of cells.
                        old_cell_count_per_page_cumulative[i] =
                            cell_array.cell_payloads.len() as u16;
                    }
                }

                // Reassign page numbers in increasing order
                {
                    let mut page_numbers: [usize; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE] =
                        [0; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE];
                    for (i, page) in pages_to_balance_new
                        .iter()
                        .take(sibling_count_new)
                        .enumerate()
                    {
                        page_numbers[i] = page.as_ref().unwrap().get().get().id;
                    }
                    page_numbers.sort();
                    for (page, new_id) in pages_to_balance_new
                        .iter()
                        .take(sibling_count_new)
                        .rev()
                        .zip(page_numbers.iter().rev().take(sibling_count_new))
                    {
                        let page = page.as_ref().unwrap();
                        if *new_id != page.get().get().id {
                            page.get().get().id = *new_id;
                            self.pager
                                .update_dirty_loaded_page_in_cache(*new_id, page.get())?;
                        }
                    }

                    #[cfg(debug_assertions)]
                    {
                        tracing::debug!(
                            "balance_non_root(parent page_id={})",
                            parent_page.get().id
                        );
                        for page in pages_to_balance_new.iter().take(sibling_count_new) {
                            tracing::debug!(
                                "balance_non_root(new_sibling page_id={})",
                                page.as_ref().unwrap().get().get().id
                            );
                        }
                    }
                }

                // pages_pointed_to helps us debug we did in fact create divider cells to all the new pages and the rightmost pointer,
                // also points to the last page.
                #[cfg(debug_assertions)]
                let mut pages_pointed_to = HashSet::new();

                // Write right pointer in parent page to point to new rightmost page. keep in mind
                // we update rightmost pointer first because inserting cells could defragment parent page,
                // therfore invalidating the pointer.
                let right_page_id = pages_to_balance_new[sibling_count_new - 1]
                    .as_ref()
                    .unwrap()
                    .get()
                    .get()
                    .id as u32;
                let rightmost_pointer = balance_info.rightmost_pointer;
                let rightmost_pointer =
                    unsafe { std::slice::from_raw_parts_mut(rightmost_pointer, 4) };
                rightmost_pointer[0..4].copy_from_slice(&right_page_id.to_be_bytes());

                #[cfg(debug_assertions)]
                pages_pointed_to.insert(right_page_id);
                tracing::debug!(
                    "balance_non_root(rightmost_pointer_update, rightmost_pointer={})",
                    right_page_id
                );

                /* 6. Update parent pointers. Update right pointer and insert divider cells with newly created distribution of cells */
                // Ensure right-child pointer of the right-most new sibling pge points to the page
                // that was originally on that place.
                let is_leaf_page = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf);
                if !is_leaf_page {
                    let last_sibling_idx = balance_info.sibling_count - 1;
                    let last_page = balance_info.pages_to_balance[last_sibling_idx]
                        .as_ref()
                        .unwrap();
                    let right_pointer = last_page.get().get_contents().rightmost_pointer().unwrap();
                    let new_last_page = pages_to_balance_new[sibling_count_new - 1]
                        .as_ref()
                        .unwrap();
                    new_last_page
                        .get()
                        .get_contents()
                        .write_u32(offset::BTREE_RIGHTMOST_PTR, right_pointer);
                }
                turso_assert!(
                    parent_contents.overflow_cells.is_empty(),
                    "parent page overflow cells should be empty before divider cell reinsertion"
                );
                // TODO: pointer map update (vacuum support)
                // Update divider cells in parent
                for (sibling_page_idx, page) in pages_to_balance_new
                    .iter()
                    .enumerate()
                    .take(sibling_count_new - 1)
                /* do not take last page */
                {
                    let page = page.as_ref().unwrap();
                    // e.g. if we have 3 pages and the leftmost child page has 3 cells,
                    // then the divider cell idx is 3 in the flat cell array.
                    let divider_cell_idx = cell_array.cell_count_up_to_page(sibling_page_idx);
                    let mut divider_cell = &mut cell_array.cell_payloads[divider_cell_idx];
                    // FIXME: dont use auxiliary space, could be done without allocations
                    let mut new_divider_cell = Vec::new();
                    if !is_leaf_page {
                        // Interior
                        // Make this page's rightmost pointer point to pointer of divider cell before modification
                        let previous_pointer_divider = read_u32(divider_cell, 0);
                        page.get()
                            .get_contents()
                            .write_u32(offset::BTREE_RIGHTMOST_PTR, previous_pointer_divider);
                        // divider cell now points to this page
                        new_divider_cell
                            .extend_from_slice(&(page.get().get().id as u32).to_be_bytes());
                        // now copy the rest of the divider cell:
                        // Table Interior page:
                        //   * varint rowid
                        // Index Interior page:
                        //   * varint payload size
                        //   * payload
                        //   * first overflow page (u32 optional)
                        new_divider_cell.extend_from_slice(&divider_cell[4..]);
                    } else if is_table_leaf {
                        // For table leaves, divider_cell_idx effectively points to the last cell of the old left page.
                        // The new divider cell's rowid becomes the second-to-last cell's rowid.
                        // i.e. in the diagram above, the new divider cell's rowid becomes the rowid of C3.
                        // FIXME: not needed conversion
                        // FIXME: need to update cell size in order to free correctly?
                        // insert into cell with correct range should be enough
                        divider_cell = &mut cell_array.cell_payloads[divider_cell_idx - 1];
                        let (_, n_bytes_payload) = read_varint(divider_cell)?;
                        let (rowid, _) = read_varint(&divider_cell[n_bytes_payload..])?;
                        new_divider_cell
                            .extend_from_slice(&(page.get().get().id as u32).to_be_bytes());
                        write_varint_to_vec(rowid, &mut new_divider_cell);
                    } else {
                        // Leaf index
                        new_divider_cell
                            .extend_from_slice(&(page.get().get().id as u32).to_be_bytes());
                        new_divider_cell.extend_from_slice(divider_cell);
                    }

                    let left_pointer = read_u32(&new_divider_cell[..LEFT_CHILD_PTR_SIZE_BYTES], 0);
                    turso_assert!(
                        left_pointer != parent_page.get().id as u32,
                        "left pointer is the same as parent page id"
                    );
                    #[cfg(debug_assertions)]
                    pages_pointed_to.insert(left_pointer);
                    tracing::debug!(
                        "balance_non_root(insert_divider_cell, first_divider_cell={}, divider_cell={}, left_pointer={})",
                        balance_info.first_divider_cell,
                        sibling_page_idx,
                        left_pointer
                    );
                    turso_assert!(
                        left_pointer == page.get().get().id as u32,
                        "left pointer is not the same as page id"
                    );
                    // FIXME: remove this lock
                    let database_size = header_accessor::get_database_size(&self.pager)?;
                    turso_assert!(
                        left_pointer <= database_size,
                        "invalid page number divider left pointer {} > database number of pages {}",
                        left_pointer,
                        database_size
                    );
                    // FIXME: defragment shouldn't be needed
                    // defragment_page(parent_contents, self.usable_space() as u16);
                    let divider_cell_insert_idx_in_parent =
                        balance_info.first_divider_cell + sibling_page_idx;
                    let overflow_cell_count_before = parent_contents.overflow_cells.len();
                    insert_into_cell(
                        parent_contents,
                        &new_divider_cell,
                        divider_cell_insert_idx_in_parent,
                        self.usable_space() as u16,
                    )?;
                    let overflow_cell_count_after = parent_contents.overflow_cells.len();
                    let divider_cell_is_overflow_cell =
                        overflow_cell_count_after > overflow_cell_count_before;
                    #[cfg(debug_assertions)]
                    self.validate_balance_non_root_divider_cell_insertion(
                        balance_info,
                        parent_contents,
                        divider_cell_insert_idx_in_parent,
                        divider_cell_is_overflow_cell,
                        &page.get(),
                    );
                }
                tracing::debug!(
                    "balance_non_root(parent_overflow={})",
                    parent_contents.overflow_cells.len()
                );

                #[cfg(debug_assertions)]
                {
                    // Let's ensure every page is pointed to by the divider cell or the rightmost pointer.
                    for page in pages_to_balance_new.iter().take(sibling_count_new) {
                        let page = page.as_ref().unwrap();
                        assert!(
                            pages_pointed_to.contains(&(page.get().get().id as u32)),
                            "page {} not pointed to by divider cell or rightmost pointer",
                            page.get().get().id
                        );
                    }
                }
                /* 7. Start real movement of cells. Next comment is borrowed from SQLite: */
                /* Now update the actual sibling pages. The order in which they are updated
                 ** is important, as this code needs to avoid disrupting any page from which
                 ** cells may still to be read. In practice, this means:
                 **
                 **  (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1])
                 **      then it is not safe to update page apNew[iPg] until after
                 **      the left-hand sibling apNew[iPg-1] has been updated.
                 **
                 **  (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1])
                 **      then it is not safe to update page apNew[iPg] until after
                 **      the right-hand sibling apNew[iPg+1] has been updated.
                 **
                 ** If neither of the above apply, the page is safe to update.
                 **
                 ** The iPg value in the following loop starts at nNew-1 goes down
                 ** to 0, then back up to nNew-1 again, thus making two passes over
                 ** the pages.  On the initial downward pass, only condition (1) above
                 ** needs to be tested because (2) will always be true from the previous
                 ** step.  On the upward pass, both conditions are always true, so the
                 ** upwards pass simply processes pages that were missed on the downward
                 ** pass.
                 */
                let mut done = [false; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE];
                let rightmost_page_negative_idx = 1 - sibling_count_new as i64;
                let rightmost_page_positive_idx = sibling_count_new as i64 - 1;
                for i in rightmost_page_negative_idx..=rightmost_page_positive_idx {
                    // As mentioned above, we do two passes over the pages:
                    // 1. Downward pass: Process pages in decreasing order
                    // 2. Upward pass: Process pages in increasing order
                    // Hence if we have 3 siblings:
                    // the order of 'i' will be: -2, -1, 0, 1, 2.
                    // and the page processing order is: 2, 1, 0, 1, 2.
                    let page_idx = i.unsigned_abs() as usize;
                    if done[page_idx] {
                        continue;
                    }
                    // As outlined above, this condition ensures we process pages in the correct order to avoid disrupting cells that still need to be read.
                    // 1. i >= 0 handles the upward pass where we process any pages not processed in the downward pass.
                    //    - condition (1) is not violated: if cells are moving right-to-left, righthand sibling has not been updated yet.
                    //    - condition (2) is not violated: if cells are moving left-to-right, righthand sibling has already been updated in the downward pass.
                    // 2. The second condition checks if it's safe to process a page during the downward pass.
                    //    - condition (1) is not violated: if cells are moving right-to-left, we do nothing.
                    //    - condition (2) is not violated: if cells are moving left-to-right, we are allowed to update.
                    if i >= 0
                        || old_cell_count_per_page_cumulative[page_idx - 1]
                            >= cell_array.cell_count_per_page_cumulative[page_idx - 1]
                    {
                        let (start_old_cells, start_new_cells, number_new_cells) = if page_idx == 0
                        {
                            (0, 0, cell_array.cell_count_up_to_page(0))
                        } else {
                            let this_was_old_page = page_idx < balance_info.sibling_count;
                            // We add !is_table_leaf because we want to skip 1 in case of divider cell which is encountared between pages assigned
                            let start_old_cells = if this_was_old_page {
                                old_cell_count_per_page_cumulative[page_idx - 1] as usize
                                    + (!is_table_leaf) as usize
                            } else {
                                cell_array.cell_payloads.len()
                            };
                            let start_new_cells = cell_array.cell_count_up_to_page(page_idx - 1)
                                + (!is_table_leaf) as usize;
                            (
                                start_old_cells,
                                start_new_cells,
                                cell_array.cell_count_up_to_page(page_idx) - start_new_cells,
                            )
                        };
                        let page = pages_to_balance_new[page_idx].as_ref().unwrap();
                        let page = page.get();
                        tracing::debug!("pre_edit_page(page={})", page.get().id);
                        let page_contents = page.get_contents();
                        edit_page(
                            page_contents,
                            start_old_cells,
                            start_new_cells,
                            number_new_cells,
                            &cell_array,
                            self.usable_space() as u16,
                        )?;
                        debug_validate_cells!(page_contents, self.usable_space() as u16);
                        tracing::trace!(
                            "edit_page page={} cells={}",
                            page.get().id,
                            page_contents.cell_count()
                        );
                        page_contents.overflow_cells.clear();

                        done[page_idx] = true;
                    }
                }

                // TODO: vacuum support
                let first_child_page = pages_to_balance_new[0].as_ref().unwrap();
                let first_child_page = first_child_page.get();
                let first_child_contents = first_child_page.get_contents();
                if parent_is_root
                    && parent_contents.cell_count() == 0

                    // this check to make sure we are not having negative free space
                    && parent_contents.offset
                        <= compute_free_space(first_child_contents, self.usable_space() as u16)
                            as usize
                {
                    // From SQLite:
                    // The root page of the b-tree now contains no cells. The only sibling
                    // page is the right-child of the parent. Copy the contents of the
                    // child page into the parent, decreasing the overall height of the
                    // b-tree structure by one. This is described as the "balance-shallower"
                    // sub-algorithm in some documentation.
                    assert!(sibling_count_new == 1);
                    let parent_offset = if parent_page.get().id == 1 {
                        DATABASE_HEADER_SIZE
                    } else {
                        0
                    };

                    // From SQLite:
                    // It is critical that the child page be defragmented before being
                    // copied into the parent, because if the parent is page 1 then it will
                    // by smaller than the child due to the database header, and so
                    // all the free space needs to be up front.
                    defragment_page(first_child_contents, self.usable_space() as u16);

                    let child_top = first_child_contents.cell_content_area() as usize;
                    let parent_buf = parent_contents.as_ptr();
                    let child_buf = first_child_contents.as_ptr();
                    let content_size = self.usable_space() - child_top;

                    // Copy cell contents
                    parent_buf[child_top..child_top + content_size]
                        .copy_from_slice(&child_buf[child_top..child_top + content_size]);

                    // Copy header and pointer
                    // NOTE: don't use .cell_pointer_array_offset_and_size() because of different
                    // header size
                    let header_and_pointer_size = first_child_contents.header_size()
                        + first_child_contents.cell_pointer_array_size();
                    parent_buf[parent_offset..parent_offset + header_and_pointer_size]
                        .copy_from_slice(
                            &child_buf[first_child_contents.offset
                                ..first_child_contents.offset + header_and_pointer_size],
                        );

                    self.stack.set_cell_index(0); // reset cell index, top is already parent
                    sibling_count_new -= 1; // decrease sibling count for debugging and free at the end
                    assert!(sibling_count_new < balance_info.sibling_count);
                }

                #[cfg(debug_assertions)]
                self.post_balance_non_root_validation(
                    &parent_page_btree,
                    balance_info,
                    parent_contents,
                    pages_to_balance_new,
                    page_type,
                    is_table_leaf,
                    cells_debug,
                    sibling_count_new,
                    right_page_id,
                );

                (
                    WriteState::BalanceFreePages {
                        curr_page: sibling_count_new,
                        sibling_count_new,
                    },
                    Ok(IOResult::Done(())),
                )
            }
            WriteState::BalanceFreePages {
                curr_page,
                sibling_count_new,
            } => {
                let write_info = self.state.write_info().unwrap();
                let mut balance_info: std::cell::RefMut<'_, Option<BalanceInfo>> =
                    write_info.balance_info.borrow_mut();
                let balance_info = balance_info.as_mut().unwrap();
                // We have to free pages that are not used anymore
                if !((sibling_count_new..balance_info.sibling_count).contains(&curr_page)) {
                    (WriteState::BalanceStart, Ok(IOResult::Done(())))
                } else {
                    let page = balance_info.pages_to_balance[curr_page].as_ref().unwrap();
                    return_if_io!(self
                        .pager
                        .free_page(Some(page.get().clone()), page.get().get().id));
                    (
                        WriteState::BalanceFreePages {
                            curr_page: curr_page + 1,
                            sibling_count_new,
                        },
                        Ok(IOResult::Done(())),
                    )
                }
            }
            WriteState::Finish => todo!(),
        };
        if matches!(next_write_state, WriteState::BalanceStart) {
            // reset balance state
            let _ = self.state.mut_write_info().unwrap().balance_info.take();
        }
        let write_info = self.state.mut_write_info().unwrap();
        write_info.state = next_write_state;
        result
    }

    /// Validates that a divider cell was correctly inserted into the parent page
    /// during B-tree balancing and that it points to the correct child page.
    #[cfg(debug_assertions)]
    fn validate_balance_non_root_divider_cell_insertion(
        &self,
        balance_info: &mut BalanceInfo,
        parent_contents: &mut PageContent,
        divider_cell_insert_idx_in_parent: usize,
        divider_cell_is_overflow_cell: bool,
        child_page: &std::sync::Arc<crate::Page>,
    ) {
        let left_pointer = if divider_cell_is_overflow_cell {
            parent_contents.overflow_cells
                .iter()
                .find(|cell| cell.index == divider_cell_insert_idx_in_parent)
                .map(|cell| read_u32(&cell.payload, 0))
                .unwrap_or_else(|| {
                    panic!(
                        "overflow cell with divider cell was not found (divider_cell_idx={}, balance_info.first_divider_cell={}, overflow_cells.len={})",
                        divider_cell_insert_idx_in_parent,
                        balance_info.first_divider_cell,
                        parent_contents.overflow_cells.len(),
                    )
                })
        } else if divider_cell_insert_idx_in_parent < parent_contents.cell_count() {
            let (cell_start, cell_len) = parent_contents
                .cell_get_raw_region(divider_cell_insert_idx_in_parent, self.usable_space());
            read_u32(
                &parent_contents.as_ptr()[cell_start..cell_start + cell_len],
                0,
            )
        } else {
            panic!(
                "divider cell is not in the parent page (divider_cell_idx={}, balance_info.first_divider_cell={}, overflow_cells.len={})",
                divider_cell_insert_idx_in_parent,
                balance_info.first_divider_cell,
                parent_contents.overflow_cells.len(),
            )
        };

        // Verify the left pointer points to the correct page
        assert_eq!(
            left_pointer,
            child_page.get().id as u32,
            "the cell we just inserted doesn't point to the correct page. points to {}, should point to {}",
            left_pointer,
            child_page.get().id as u32
        );
    }

    #[cfg(debug_assertions)]
    #[allow(clippy::too_many_arguments)]
    fn post_balance_non_root_validation(
        &self,
        parent_page: &BTreePage,
        balance_info: &mut BalanceInfo,
        parent_contents: &mut PageContent,
        pages_to_balance_new: [Option<BTreePage>; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE],
        page_type: PageType,
        is_table_leaf: bool,
        mut cells_debug: Vec<Vec<u8>>,
        sibling_count_new: usize,
        right_page_id: u32,
    ) {
        let mut valid = true;
        let mut current_index_cell = 0;
        for cell_idx in 0..parent_contents.cell_count() {
            let cell = parent_contents
                .cell_get(cell_idx, self.usable_space())
                .unwrap();
            match cell {
                BTreeCell::TableInteriorCell(table_interior_cell) => {
                    let left_child_page = table_interior_cell.left_child_page;
                    if left_child_page == parent_page.get().get().id as u32 {
                        tracing::error!("balance_non_root(parent_divider_points_to_same_page, page_id={}, cell_left_child_page={})",
                                parent_page.get().get().id,
                                left_child_page,
                            );
                        valid = false;
                    }
                }
                BTreeCell::IndexInteriorCell(index_interior_cell) => {
                    let left_child_page = index_interior_cell.left_child_page;
                    if left_child_page == parent_page.get().get().id as u32 {
                        tracing::error!("balance_non_root(parent_divider_points_to_same_page, page_id={}, cell_left_child_page={})",
                                parent_page.get().get().id,
                                left_child_page,
                            );
                        valid = false;
                    }
                }
                _ => {}
            }
        }
        // Let's now make a in depth check that we in fact added all possible cells somewhere and they are not lost
        for (page_idx, page) in pages_to_balance_new
            .iter()
            .take(sibling_count_new)
            .enumerate()
        {
            let page = page.as_ref().unwrap();
            let page = page.get();
            let contents = page.get_contents();
            debug_validate_cells!(contents, self.usable_space() as u16);
            // Cells are distributed in order
            for cell_idx in 0..contents.cell_count() {
                let (cell_start, cell_len) =
                    contents.cell_get_raw_region(cell_idx, self.usable_space());
                let buf = contents.as_ptr();
                let cell_buf = to_static_buf(&mut buf[cell_start..cell_start + cell_len]);
                let cell_buf_in_array = &cells_debug[current_index_cell];
                if cell_buf != cell_buf_in_array {
                    tracing::error!("balance_non_root(cell_not_found_debug, page_id={}, cell_in_cell_array_idx={})",
                        page.get().id,
                        current_index_cell,
                    );
                    valid = false;
                }

                let cell = crate::storage::sqlite3_ondisk::read_btree_cell(
                    cell_buf,
                    contents,
                    0,
                    self.usable_space(),
                )
                .unwrap();
                match &cell {
                    BTreeCell::TableInteriorCell(table_interior_cell) => {
                        let left_child_page = table_interior_cell.left_child_page;
                        if left_child_page == page.get().id as u32 {
                            tracing::error!("balance_non_root(child_page_points_same_page, page_id={}, cell_left_child_page={}, page_idx={})",
                                page.get().id,
                                left_child_page,
                                page_idx
                            );
                            valid = false;
                        }
                        if left_child_page == parent_page.get().get().id as u32 {
                            tracing::error!("balance_non_root(child_page_points_parent_of_child, page_id={}, cell_left_child_page={}, page_idx={})",
                                page.get().id,
                                left_child_page,
                                page_idx
                            );
                            valid = false;
                        }
                    }
                    BTreeCell::IndexInteriorCell(index_interior_cell) => {
                        let left_child_page = index_interior_cell.left_child_page;
                        if left_child_page == page.get().id as u32 {
                            tracing::error!("balance_non_root(child_page_points_same_page, page_id={}, cell_left_child_page={}, page_idx={})",
                                page.get().id,
                                left_child_page,
                                page_idx
                            );
                            valid = false;
                        }
                        if left_child_page == parent_page.get().get().id as u32 {
                            tracing::error!("balance_non_root(child_page_points_parent_of_child, page_id={}, cell_left_child_page={}, page_idx={})",
                                page.get().id,
                                left_child_page,
                                page_idx
                            );
                            valid = false;
                        }
                    }
                    _ => {}
                }
                current_index_cell += 1;
            }
            // Now check divider cells and their pointers.
            let parent_buf = parent_contents.as_ptr();
            let cell_divider_idx = balance_info.first_divider_cell + page_idx;
            if sibling_count_new == 0 {
                // Balance-shallower case
                // We need to check data in parent page
                debug_validate_cells!(parent_contents, self.usable_space() as u16);

                if pages_to_balance_new[0].is_none() {
                    tracing::error!(
                        "balance_non_root(balance_shallower_incorrect_page, page_idx={})",
                        0
                    );
                    valid = false;
                }

                for (i, value) in pages_to_balance_new
                    .iter()
                    .enumerate()
                    .take(sibling_count_new)
                    .skip(1)
                {
                    if value.is_some() {
                        tracing::error!(
                            "balance_non_root(balance_shallower_incorrect_page, page_idx={})",
                            i
                        );
                        valid = false;
                    }
                }

                if current_index_cell != cells_debug.len()
                    || cells_debug.len() != contents.cell_count()
                    || contents.cell_count() != parent_contents.cell_count()
                {
                    tracing::error!("balance_non_root(balance_shallower_incorrect_cell_count, current_index_cell={}, cells_debug={}, cell_count={}, parent_cell_count={})",
                        current_index_cell,
                        cells_debug.len(),
                        contents.cell_count(),
                        parent_contents.cell_count()
                    );
                    valid = false;
                }

                if right_page_id == page.get().id as u32
                    || right_page_id == parent_page.get().get().id as u32
                {
                    tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, page_id={}, parent_page_id={}, rightmost={})",
                        page.get().id,
                        parent_page.get().get().id,
                        right_page_id,
                    );
                    valid = false;
                }

                if let Some(rm) = contents.rightmost_pointer() {
                    if rm != right_page_id {
                        tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, page_rightmost={}, rightmost={})",
                            rm,
                            right_page_id,
                        );
                        valid = false;
                    }
                }

                if let Some(rm) = parent_contents.rightmost_pointer() {
                    if rm != right_page_id {
                        tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, parent_rightmost={}, rightmost={})",
                            rm,
                            right_page_id,
                        );
                        valid = false;
                    }
                }

                if parent_contents.page_type() != page_type {
                    tracing::error!("balance_non_root(balance_shallower_parent_page_type, page_type={:?}, parent_page_type={:?})",
                        page_type,
                        parent_contents.page_type()
                    );
                    valid = false
                }

                for (parent_cell_idx, cell_buf_in_array) in
                    cells_debug.iter().enumerate().take(contents.cell_count())
                {
                    let (parent_cell_start, parent_cell_len) =
                        parent_contents.cell_get_raw_region(parent_cell_idx, self.usable_space());

                    let (cell_start, cell_len) =
                        contents.cell_get_raw_region(parent_cell_idx, self.usable_space());

                    let buf = contents.as_ptr();
                    let cell_buf = to_static_buf(&mut buf[cell_start..cell_start + cell_len]);
                    let parent_cell_buf = to_static_buf(
                        &mut parent_buf[parent_cell_start..parent_cell_start + parent_cell_len],
                    );

                    if cell_buf != cell_buf_in_array || cell_buf != parent_cell_buf {
                        tracing::error!("balance_non_root(balance_shallower_cell_not_found_debug, page_id={}, cell_in_cell_array_idx={})",
                            page.get().id,
                            parent_cell_idx,
                        );
                        valid = false;
                    }
                }
            } else if page_idx == sibling_count_new - 1 {
                // We will only validate rightmost pointer of parent page, we will not validate rightmost if it's a cell and not the last pointer because,
                // insert cell could've defragmented the page and invalidated the pointer.
                // right pointer, we just check right pointer points to this page.
                if cell_divider_idx == parent_contents.cell_count()
                    && right_page_id != page.get().id as u32
                {
                    tracing::error!("balance_non_root(cell_divider_right_pointer, should point to {}, but points to {})",
                        page.get().id,
                        right_page_id
                    );
                    valid = false;
                }
            } else {
                // divider cell might be an overflow cell
                let mut was_overflow = false;
                for overflow_cell in &parent_contents.overflow_cells {
                    if overflow_cell.index == cell_divider_idx {
                        let left_pointer = read_u32(&overflow_cell.payload, 0);
                        if left_pointer != page.get().id as u32 {
                            tracing::error!("balance_non_root(cell_divider_left_pointer_overflow, should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})",
                        page.get().id,
                        left_pointer,
                        page_idx,
                        parent_contents.overflow_cells.len()
                    );
                            valid = false;
                        }
                        was_overflow = true;
                        break;
                    }
                }
                if was_overflow {
                    if !is_table_leaf {
                        // remember to increase cell if this cell was moved to parent
                        current_index_cell += 1;
                    }
                    continue;
                }
                // check if overflow
                // check if right pointer, this is the last page. Do we update rightmost pointer and defragment moves it?
                let (cell_start, cell_len) =
                    parent_contents.cell_get_raw_region(cell_divider_idx, self.usable_space());
                let cell_left_pointer = read_u32(&parent_buf[cell_start..cell_start + cell_len], 0);
                if cell_left_pointer != page.get().id as u32 {
                    tracing::error!("balance_non_root(cell_divider_left_pointer, should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})",
                        page.get().id,
                        cell_left_pointer,
                        page_idx,
                        parent_contents.overflow_cells.len()
                    );
                    valid = false;
                }
                if is_table_leaf {
                    // If we are in a table leaf page, we just need to check that this cell that should be a divider cell is in the parent
                    // This means we already check cell in leaf pages but not on parent so we don't advance current_index_cell
                    let last_sibling_idx = balance_info.sibling_count - 1;
                    if page_idx >= last_sibling_idx {
                        // This means we are in the last page and we don't need to check anything
                        continue;
                    }
                    let cell_buf: &'static mut [u8] =
                        to_static_buf(&mut cells_debug[current_index_cell - 1]);
                    let cell = crate::storage::sqlite3_ondisk::read_btree_cell(
                        cell_buf,
                        contents,
                        0,
                        self.usable_space(),
                    )
                    .unwrap();
                    let parent_cell = parent_contents
                        .cell_get(cell_divider_idx, self.usable_space())
                        .unwrap();
                    let rowid = match cell {
                        BTreeCell::TableLeafCell(table_leaf_cell) => table_leaf_cell.rowid,
                        _ => unreachable!(),
                    };
                    let rowid_parent = match parent_cell {
                        BTreeCell::TableInteriorCell(table_interior_cell) => {
                            table_interior_cell.rowid
                        }
                        _ => unreachable!(),
                    };
                    if rowid_parent != rowid {
                        tracing::error!("balance_non_root(cell_divider_rowid, page_id={}, cell_divider_idx={}, rowid_parent={}, rowid={})",
                            page.get().id,
                            cell_divider_idx,
                            rowid_parent,
                            rowid
                        );
                        valid = false;
                    }
                } else {
                    // In any other case, we need to check that this cell was moved to parent as divider cell
                    let mut was_overflow = false;
                    for overflow_cell in &parent_contents.overflow_cells {
                        if overflow_cell.index == cell_divider_idx {
                            let left_pointer = read_u32(&overflow_cell.payload, 0);
                            if left_pointer != page.get().id as u32 {
                                tracing::error!("balance_non_root(cell_divider_divider_cell_overflow should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})",
                                    page.get().id,
                                    left_pointer,
                                    page_idx,
                                    parent_contents.overflow_cells.len()
                                );
                                valid = false;
                            }
                            was_overflow = true;
                            break;
                        }
                    }
                    if was_overflow {
                        if !is_table_leaf {
                            // remember to increase cell if this cell was moved to parent
                            current_index_cell += 1;
                        }
                        continue;
                    }
                    let (parent_cell_start, parent_cell_len) =
                        parent_contents.cell_get_raw_region(cell_divider_idx, self.usable_space());
                    let cell_buf_in_array = &cells_debug[current_index_cell];
                    let left_pointer = read_u32(
                        &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len],
                        0,
                    );
                    if left_pointer != page.get().id as u32 {
                        tracing::error!("balance_non_root(divider_cell_left_pointer_interior should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})",
                                    page.get().id,
                                    left_pointer,
                                    page_idx,
                                    parent_contents.overflow_cells.len()
                                );
                        valid = false;
                    }
                    match page_type {
                        PageType::TableInterior | PageType::IndexInterior => {
                            let parent_cell_buf =
                                &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len];
                            if parent_cell_buf[4..] != cell_buf_in_array[4..] {
                                tracing::error!("balance_non_root(cell_divider_cell, page_id={}, cell_divider_idx={})",
                                    page.get().id,
                                    cell_divider_idx,
                                );
                                valid = false;
                            }
                        }
                        PageType::IndexLeaf => {
                            let parent_cell_buf =
                                &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len];
                            if parent_cell_buf[4..] != cell_buf_in_array[..] {
                                tracing::error!("balance_non_root(cell_divider_cell_index_leaf, page_id={}, cell_divider_idx={})",
                                    page.get().id,
                                    cell_divider_idx,
                                );
                                valid = false;
                            }
                        }
                        _ => {
                            unreachable!()
                        }
                    }
                    current_index_cell += 1;
                }
            }
        }
        assert!(
            valid,
            "corrupted database, cells were not balanced properly"
        );
    }

    /// Balance the root page.
    /// This is done when the root page overflows, and we need to create a new root page.
    /// See e.g. https://en.wikipedia.org/wiki/B-tree
    fn balance_root(&mut self) -> Result<()> {
        /* todo: balance deeper, create child and copy contents of root there. Then split root */
        /* if we are in root page then we just need to create a new root and push key there */

        let is_page_1 = {
            let current_root = self.stack.top();
            current_root.get().get().id == 1
        };

        let offset = if is_page_1 { DATABASE_HEADER_SIZE } else { 0 };

        let root_btree = self.stack.top();
        let root = root_btree.get();
        let root_contents = root.get_contents();
        // FIXME: handle page cache is full
        let child_btree =
            self.pager
                .do_allocate_page(root_contents.page_type(), 0, BtreePageAllocMode::Any)?;

        tracing::debug!(
            "balance_root(root={}, rightmost={}, page_type={:?})",
            root.get().id,
            child_btree.get().get().id,
            root.get_contents().page_type()
        );

        turso_assert!(root.is_dirty(), "root must be marked dirty");
        turso_assert!(
            child_btree.get().is_dirty(),
            "child must be marked dirty as freshly allocated page"
        );

        let root_buf = root_contents.as_ptr();
        let child = child_btree.get();
        let child_contents = child.get_contents();
        let child_buf = child_contents.as_ptr();
        let (root_pointer_start, root_pointer_len) =
            root_contents.cell_pointer_array_offset_and_size();
        let (child_pointer_start, _) = child.get_contents().cell_pointer_array_offset_and_size();

        let top = root_contents.cell_content_area() as usize;

        // 1. Modify child
        // Copy pointers
        child_buf[child_pointer_start..child_pointer_start + root_pointer_len]
            .copy_from_slice(&root_buf[root_pointer_start..root_pointer_start + root_pointer_len]);
        // Copy cell contents
        child_buf[top..].copy_from_slice(&root_buf[top..]);
        // Copy header
        child_buf[0..root_contents.header_size()]
            .copy_from_slice(&root_buf[offset..offset + root_contents.header_size()]);
        // Copy overflow cells
        std::mem::swap(
            &mut child_contents.overflow_cells,
            &mut root_contents.overflow_cells,
        );
        root_contents.overflow_cells.clear();

        // 2. Modify root
        let new_root_page_type = match root_contents.page_type() {
            PageType::IndexLeaf => PageType::IndexInterior,
            PageType::TableLeaf => PageType::TableInterior,
            other => other,
        } as u8;
        // set new page type
        root_contents.write_u8(offset::BTREE_PAGE_TYPE, new_root_page_type);
        root_contents.write_u32(offset::BTREE_RIGHTMOST_PTR, child.get().id as u32);
        root_contents.write_u16(offset::BTREE_CELL_CONTENT_AREA, self.usable_space() as u16);
        root_contents.write_u16(offset::BTREE_CELL_COUNT, 0);
        root_contents.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0);

        root_contents.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0);
        root_contents.overflow_cells.clear();
        self.root_page = root.get().id;
        self.stack.clear();
        self.stack.push(root_btree.clone());
        self.stack.set_cell_index(0); // leave parent pointing at the rightmost pointer (in this case 0, as there are no cells), since we will be balancing the rightmost child page.
        self.stack.push(child_btree.clone());
        Ok(())
    }

    fn usable_space(&self) -> usize {
        self.pager.usable_space()
    }

    pub fn seek_end(&mut self) -> Result<IOResult<()>> {
        assert!(self.mv_cursor.is_none()); // unsure about this -_-
        self.move_to_root()?;
        loop {
            let mem_page = self.stack.top();
            let page_id = mem_page.get().get().id;
            let page = self.read_page(page_id)?;
            return_if_locked_maybe_load!(self.pager, page);

            let page = page.get();
            let contents = page.get().contents.as_ref().unwrap();
            if contents.is_leaf() {
                // set cursor just past the last cell to append
                self.stack.set_cell_index(contents.cell_count() as i32);
                return Ok(IOResult::Done(()));
            }

            match contents.rightmost_pointer() {
                Some(right_most_pointer) => {
                    self.stack.set_cell_index(contents.cell_count() as i32 + 1); // invalid on interior
                    let child = self.read_page(right_most_pointer as usize)?;
                    self.stack.push(child);
                }
                None => unreachable!("interior page must have rightmost pointer"),
            }
        }
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn seek_to_last(&mut self) -> Result<IOResult<()>> {
        assert!(self.mv_cursor.is_none());
        let has_record = return_if_io!(self.move_to_rightmost());
        self.invalidate_record();
        self.has_record.replace(has_record);
        if !has_record {
            let is_empty = return_if_io!(self.is_empty_table());
            assert!(is_empty);
            return Ok(IOResult::Done(()));
        }
        Ok(IOResult::Done(()))
    }

    pub fn is_empty(&self) -> bool {
        !self.has_record.get()
    }

    pub fn root_page(&self) -> usize {
        self.root_page
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn rewind(&mut self) -> Result<IOResult<()>> {
        if let Some(mv_cursor) = &self.mv_cursor {
            {
                let mut mv_cursor = mv_cursor.borrow_mut();
                mv_cursor.rewind();
            }
            let cursor_has_record = return_if_io!(self.get_next_record());
            self.invalidate_record();
            self.has_record.replace(cursor_has_record);
        } else {
            self.move_to_root()?;

            let cursor_has_record = return_if_io!(self.get_next_record());
            self.invalidate_record();
            self.has_record.replace(cursor_has_record);
        }
        Ok(IOResult::Done(()))
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn last(&mut self) -> Result<IOResult<()>> {
        assert!(self.mv_cursor.is_none());
        let cursor_has_record = return_if_io!(self.move_to_rightmost());
        self.has_record.replace(cursor_has_record);
        self.invalidate_record();
        Ok(IOResult::Done(()))
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn next(&mut self) -> Result<IOResult<bool>> {
        return_if_io!(self.restore_context());
        let cursor_has_record = return_if_io!(self.get_next_record());
        self.has_record.replace(cursor_has_record);
        self.invalidate_record();
        Ok(IOResult::Done(cursor_has_record))
    }

    fn invalidate_record(&mut self) {
        self.get_immutable_record_or_create()
            .as_mut()
            .unwrap()
            .invalidate();
        self.record_cursor.borrow_mut().invalidate();
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn prev(&mut self) -> Result<IOResult<bool>> {
        assert!(self.mv_cursor.is_none());
        return_if_io!(self.restore_context());
        let cursor_has_record = return_if_io!(self.get_prev_record());
        self.has_record.replace(cursor_has_record);
        self.invalidate_record();
        Ok(IOResult::Done(cursor_has_record))
    }

    #[instrument(skip(self), level = Level::DEBUG)]
    pub fn rowid(&mut self) -> Result<IOResult<Option<i64>>> {
        if let Some(mv_cursor) = &self.mv_cursor {
            if self.has_record.get() {
                let mv_cursor = mv_cursor.borrow();
                return Ok(IOResult::Done(
                    mv_cursor.current_row_id().map(|rowid| rowid.row_id),
                ));
            } else {
                return Ok(IOResult::Done(None));
            }
        }
        if self.has_record.get() {
            let page = self.stack.top();
            return_if_locked_maybe_load!(self.pager, page);
            // load record
            let _ = return_if_io!(self.record());
            let page_type = page.get().get_contents().page_type();
            let page = page.get();
            let contents = page.get_contents();
            let cell_idx = self.stack.current_cell_index();
            let cell = contents.cell_get(cell_idx as usize, self.usable_space())?;
            if page_type.is_table() {
                let BTreeCell::TableLeafCell(TableLeafCell { rowid, .. }) = cell else {
                    unreachable!(
                        "BTreeCursor::rowid(): unexpected page_type: {:?}",
                        page_type
                    );
                };
                Ok(IOResult::Done(Some(rowid)))
            } else {
                Ok(IOResult::Done(self.get_index_rowid_from_record()))
            }
        } else {
            Ok(IOResult::Done(None))
        }
    }

    #[instrument(skip(self), level = Level::DEBUG)]
    pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result<IOResult<SeekResult>> {
        assert!(self.mv_cursor.is_none());
        // Empty trace to capture the span information
        tracing::trace!("");
        // We need to clear the null flag for the table cursor before seeking,
        // because it might have been set to false by an unmatched left-join row during the previous iteration
        // on the outer loop.
        self.set_null_flag(false);
        let seek_result = return_if_io!(self.do_seek(key, op));
        self.invalidate_record();
        // Reset seek state
        self.seek_state = CursorSeekState::Start;
        self.valid_state = CursorValidState::Valid;
        Ok(IOResult::Done(seek_result))
    }

    /// Return a reference to the record the cursor is currently pointing to.
    /// If record was not parsed yet, then we have to parse it and in case of I/O we yield control
    /// back.
    #[instrument(skip(self), level = Level::DEBUG)]
    pub fn record(&self) -> Result<IOResult<Option<Ref<ImmutableRecord>>>> {
        if !self.has_record.get() {
            return Ok(IOResult::Done(None));
        }
        let invalidated = self
            .reusable_immutable_record
            .borrow()
            .as_ref()
            .is_none_or(|record| record.is_invalidated());
        if !invalidated {
            *self.parse_record_state.borrow_mut() = ParseRecordState::Init;
            let record_ref =
                Ref::filter_map(self.reusable_immutable_record.borrow(), |opt| opt.as_ref())
                    .unwrap();
            return Ok(IOResult::Done(Some(record_ref)));
        }
        if self.mv_cursor.is_some() {
            let mv_cursor = self.mv_cursor.as_ref().unwrap().borrow();
            let row = mv_cursor.current_row().unwrap().unwrap();
            self.get_immutable_record_or_create()
                .as_mut()
                .unwrap()
                .invalidate();
            self.get_immutable_record_or_create()
                .as_mut()
                .unwrap()
                .start_serialization(&row.data);
            self.record_cursor.borrow_mut().invalidate();
            let record_ref =
                Ref::filter_map(self.reusable_immutable_record.borrow(), |opt| opt.as_ref())
                    .unwrap();
            return Ok(IOResult::Done(Some(record_ref)));
        }

        if *self.parse_record_state.borrow() == ParseRecordState::Init {
            *self.parse_record_state.borrow_mut() = ParseRecordState::Parsing {
                payload: Vec::new(),
            };
        }
        let page = self.stack.top();
        return_if_locked_maybe_load!(self.pager, page);
        let page = page.get();
        let contents = page.get_contents();
        let cell_idx = self.stack.current_cell_index();
        let cell = contents.cell_get(cell_idx as usize, self.usable_space())?;
        let (payload, payload_size, first_overflow_page) = match cell {
            BTreeCell::TableLeafCell(TableLeafCell {
                payload,
                payload_size,
                first_overflow_page,
                ..
            }) => (payload, payload_size, first_overflow_page),
            BTreeCell::IndexInteriorCell(IndexInteriorCell {
                payload,
                payload_size,
                first_overflow_page,
                ..
            }) => (payload, payload_size, first_overflow_page),
            BTreeCell::IndexLeafCell(IndexLeafCell {
                payload,
                first_overflow_page,
                payload_size,
            }) => (payload, payload_size, first_overflow_page),
            _ => unreachable!("unexpected page_type"),
        };
        if let Some(next_page) = first_overflow_page {
            return_if_io!(self.process_overflow_read(payload, next_page, payload_size))
        } else {
            self.get_immutable_record_or_create()
                .as_mut()
                .unwrap()
                .invalidate();
            self.get_immutable_record_or_create()
                .as_mut()
                .unwrap()
                .start_serialization(payload);
            self.record_cursor.borrow_mut().invalidate();
        };

        *self.parse_record_state.borrow_mut() = ParseRecordState::Init;
        let record_ref =
            Ref::filter_map(self.reusable_immutable_record.borrow(), |opt| opt.as_ref()).unwrap();
        Ok(IOResult::Done(Some(record_ref)))
    }

    #[instrument(skip(self), level = Level::DEBUG)]
    pub fn insert(
        &mut self,
        key: &BTreeKey,
        // Indicate whether it's necessary to traverse to find the leaf page
        // FIXME: refactor this out into a state machine, these ad-hoc state
        // variables are very hard to reason about
        mut moved_before: bool,
    ) -> Result<IOResult<()>> {
        tracing::debug!(valid_state = ?self.valid_state, cursor_state = ?self.state, is_write_in_progress = self.is_write_in_progress());
        match &self.mv_cursor {
            Some(mv_cursor) => match key.maybe_rowid() {
                Some(rowid) => {
                    let row_id = crate::mvcc::database::RowID::new(self.table_id() as u64, rowid);
                    let record_buf = key.get_record().unwrap().get_payload().to_vec();
                    let row = crate::mvcc::database::Row::new(row_id, record_buf);
                    mv_cursor.borrow_mut().insert(row).unwrap();
                }
                None => todo!("Support mvcc inserts with index btrees"),
            },
            None => {
                match (&self.valid_state, self.is_write_in_progress()) {
                    (CursorValidState::Valid, _) => {
                        // consider the current position valid unless the caller explicitly asks us to seek.
                    }
                    (CursorValidState::RequireSeek, false) => {
                        // we must seek.
                        moved_before = false;
                    }
                    (CursorValidState::RequireSeek, true) => {
                        // illegal to seek during a write no matter what CursorValidState or caller says -- we might e.g. move to the wrong page during balancing
                        moved_before = true;
                    }
                    (CursorValidState::RequireAdvance(direction), _) => {
                        // FIXME: this is a hack to support the case where we need to advance the cursor after a seek.
                        // We should have a proper state machine for this.
                        return_if_io!(match direction {
                            IterationDirection::Forwards => self.next(),
                            IterationDirection::Backwards => self.prev(),
                        });
                        self.valid_state = CursorValidState::Valid;
                        self.seek_state = CursorSeekState::Start;
                        moved_before = true;
                    }
                };
                if !moved_before {
                    let seek_result = match key {
                        BTreeKey::IndexKey(_) => {
                            return_if_io!(self.seek(
                                SeekKey::IndexKey(key.get_record().unwrap()),
                                SeekOp::GE { eq_only: true }
                            ))
                        }
                        BTreeKey::TableRowId(_) => {
                            return_if_io!(self.seek(
                                SeekKey::TableRowId(key.to_rowid()),
                                SeekOp::GE { eq_only: true }
                            ))
                        }
                    };
                    if SeekResult::TryAdvance == seek_result {
                        self.valid_state =
                            CursorValidState::RequireAdvance(IterationDirection::Forwards);
                        return_if_io!(self.next());
                    }
                    self.context.take(); // we know where we wanted to move so if there was any saved context, discard it.
                    self.valid_state = CursorValidState::Valid;
                    self.seek_state = CursorSeekState::Start;
                    tracing::debug!(
                        "seeked to the right place, page is now {:?}",
                        self.stack.top().get().get().id
                    );
                }
                return_if_io!(self.insert_into_page(key));
                if key.maybe_rowid().is_some() {
                    self.has_record.replace(true);
                }
            }
        };
        Ok(IOResult::Done(()))
    }

    /// Delete state machine flow:
    /// 1. Start -> check if the rowid to be delete is present in the page or not. If not we early return
    /// 2. DeterminePostBalancingSeekKey -> determine the key to seek to after balancing.
    /// 3. LoadPage -> load the page.
    /// 4. FindCell -> find the cell to be deleted in the page.
    /// 5. ClearOverflowPages -> Clear the overflow pages if there are any before dropping the cell, then if we are in a leaf page we just drop the cell in place.
    /// if we are in interior page, we need to rotate keys in order to replace current cell (InteriorNodeReplacement).
    /// 6. InteriorNodeReplacement -> we copy the left subtree leaf node into the deleted interior node's place.
    /// 7. WaitForBalancingToComplete -> perform balancing
    /// 8. SeekAfterBalancing -> adjust the cursor to a node that is closer to the deleted value. go to Finish
    /// 9. Finish -> Delete operation is done. Return CursorResult(Ok())
    #[instrument(skip(self), level = Level::DEBUG)]
    pub fn delete(&mut self) -> Result<IOResult<()>> {
        assert!(self.mv_cursor.is_none());

        if let CursorState::None = &self.state {
            self.state = CursorState::Delete(DeleteInfo {
                state: DeleteState::Start,
                balance_write_info: None,
            })
        }

        loop {
            let delete_state = {
                let delete_info = self.state.delete_info().expect("cannot get delete info");
                delete_info.state.clone()
            };
            tracing::debug!(?delete_state);

            match delete_state {
                DeleteState::Start => {
                    let page = self.stack.top();
                    self.pager.add_dirty(&page.get());
                    if matches!(
                        page.get().get_contents().page_type(),
                        PageType::TableLeaf | PageType::TableInterior
                    ) {
                        if return_if_io!(self.rowid()).is_none() {
                            self.state = CursorState::None;
                            return Ok(IOResult::Done(()));
                        }
                    } else if self.reusable_immutable_record.borrow().is_none() {
                        self.state = CursorState::None;
                        return Ok(IOResult::Done(()));
                    }

                    let delete_info = self.state.mut_delete_info().unwrap();
                    delete_info.state = DeleteState::DeterminePostBalancingSeekKey;
                }

                DeleteState::DeterminePostBalancingSeekKey => {
                    // FIXME: skip this work if we determine deletion wont result in balancing
                    // Right now we calculate the key every time for simplicity/debugging
                    // since it won't affect correctness which is more important
                    let page = self.stack.top();
                    return_if_locked_maybe_load!(self.pager, page);
                    let target_key = if page.get().is_index() {
                        let record = match return_if_io!(self.record()) {
                            Some(record) => record.clone(),
                            None => unreachable!("there should've been a record"),
                        };
                        DeleteSavepoint::Payload(record)
                    } else {
                        let Some(rowid) = return_if_io!(self.rowid()) else {
                            panic!("cursor should be pointing to a record with a rowid");
                        };
                        DeleteSavepoint::Rowid(rowid)
                    };

                    let delete_info = self.state.mut_delete_info().unwrap();
                    delete_info.state = DeleteState::LoadPage {
                        post_balancing_seek_key: Some(target_key),
                    };
                }

                DeleteState::LoadPage {
                    post_balancing_seek_key,
                } => {
                    let page = self.stack.top();
                    return_if_locked_maybe_load!(self.pager, page);

                    let delete_info = self.state.mut_delete_info().unwrap();
                    delete_info.state = DeleteState::FindCell {
                        post_balancing_seek_key,
                    };
                }

                DeleteState::FindCell {
                    post_balancing_seek_key,
                } => {
                    let page = self.stack.top();
                    let cell_idx = self.stack.current_cell_index() as usize;

                    let page = page.get();
                    let contents = page.get().contents.as_ref().unwrap();
                    if cell_idx >= contents.cell_count() {
                        return_corrupt!(format!(
                            "Corrupted page: cell index {} is out of bounds for page with {} cells",
                            cell_idx,
                            contents.cell_count()
                        ));
                    }

                    tracing::debug!(
                        "DeleteState::FindCell: page_id: {}, cell_idx: {}",
                        page.get().id,
                        cell_idx
                    );

                    let cell = contents.cell_get(cell_idx, self.usable_space())?;

                    let original_child_pointer = match &cell {
                        BTreeCell::TableInteriorCell(interior) => Some(interior.left_child_page),
                        BTreeCell::IndexInteriorCell(interior) => Some(interior.left_child_page),
                        _ => None,
                    };

                    let delete_info = self.state.mut_delete_info().unwrap();
                    delete_info.state = DeleteState::ClearOverflowPages {
                        cell_idx,
                        cell,
                        original_child_pointer,
                        post_balancing_seek_key,
                    };
                }

                DeleteState::ClearOverflowPages {
                    cell_idx,
                    cell,
                    original_child_pointer,
                    post_balancing_seek_key,
                } => {
                    return_if_io!(self.clear_overflow_pages(&cell));

                    let page = self.stack.top();
                    let page = page.get();
                    let contents = page.get_contents();

                    let delete_info = self.state.mut_delete_info().unwrap();
                    if !contents.is_leaf() {
                        delete_info.state = DeleteState::InteriorNodeReplacement {
                            page: page.clone(),
                            btree_depth: self.stack.current(),
                            cell_idx,
                            original_child_pointer,
                            post_balancing_seek_key,
                        };
                    } else {
                        drop_cell(contents, cell_idx, self.usable_space() as u16)?;

                        let delete_info = self.state.mut_delete_info().unwrap();
                        delete_info.state = DeleteState::CheckNeedsBalancing {
                            btree_depth: self.stack.current(),
                            post_balancing_seek_key,
                        };
                    }
                }

                DeleteState::InteriorNodeReplacement {
                    page,
                    btree_depth,
                    cell_idx,
                    original_child_pointer,
                    post_balancing_seek_key,
                } => {
                    // This is an interior node, we need to handle deletion differently.
                    // 1. Move cursor to the largest key in the left subtree.
                    // 2. Replace the cell in the interior (parent) node with that key.
                    // 3. Delete that key from the child page.

                    // Step 1: Move cursor to the largest key in the left subtree.
                    // The largest key is always in a leaf, and so this traversal may involvegoing multiple pages downwards,
                    // so we store the page we are currently on.

                    // avoid calling prev() because it internally calls restore_context() which may cause unintended behavior.
                    return_if_io!(self.get_prev_record());

                    // Ensure we keep the parent page at the same position as before the replacement.
                    self.stack
                        .node_states
                        .borrow_mut()
                        .get_mut(btree_depth)
                        .expect("parent page should be on the stack")
                        .cell_idx = cell_idx as i32;
                    let (cell_payload, leaf_cell_idx) = {
                        let leaf_page_ref = self.stack.top();
                        let leaf_page = leaf_page_ref.get();
                        let leaf_contents = leaf_page.get().contents.as_ref().unwrap();
                        assert!(leaf_contents.is_leaf());
                        assert!(leaf_contents.cell_count() > 0);
                        let leaf_cell_idx = leaf_contents.cell_count() - 1;
                        let last_cell_on_child_page =
                            leaf_contents.cell_get(leaf_cell_idx, self.usable_space())?;

                        let mut cell_payload: Vec<u8> = Vec::new();
                        let child_pointer =
                            original_child_pointer.expect("there should be a pointer");
                        // Rewrite the old leaf cell as an interior cell depending on type.
                        match last_cell_on_child_page {
                            BTreeCell::TableLeafCell(leaf_cell) => {
                                // Table interior cells contain the left child pointer and the rowid as varint.
                                cell_payload.extend_from_slice(&child_pointer.to_be_bytes());
                                write_varint_to_vec(leaf_cell.rowid as u64, &mut cell_payload);
                            }
                            BTreeCell::IndexLeafCell(leaf_cell) => {
                                // Index interior cells contain:
                                // 1. The left child pointer
                                // 2. The payload size as varint
                                // 3. The payload
                                // 4. The first overflow page as varint, omitted if no overflow.
                                cell_payload.extend_from_slice(&child_pointer.to_be_bytes());
                                write_varint_to_vec(leaf_cell.payload_size, &mut cell_payload);
                                cell_payload.extend_from_slice(leaf_cell.payload);
                                if let Some(first_overflow_page) = leaf_cell.first_overflow_page {
                                    cell_payload
                                        .extend_from_slice(&first_overflow_page.to_be_bytes());
                                }
                            }
                            _ => unreachable!("Expected table leaf cell"),
                        }
                        (cell_payload, leaf_cell_idx)
                    };

                    let leaf_page = self.stack.top();

                    self.pager.add_dirty(&page);
                    self.pager.add_dirty(&leaf_page.get());

                    // Step 2: Replace the cell in the parent (interior) page.
                    {
                        let parent_contents = page.get_contents();
                        let parent_page_id = page.get().id;
                        let left_child_page = u32::from_be_bytes(
                            cell_payload[..4].try_into().expect("invalid cell payload"),
                        );
                        turso_assert!(
                            left_child_page as usize != parent_page_id,
                            "corrupt: current page and left child page of cell {} are both {}",
                            left_child_page,
                            parent_page_id
                        );

                        // First, drop the old cell that is being replaced.
                        drop_cell(parent_contents, cell_idx, self.usable_space() as u16)?;
                        // Then, insert the new cell (the predecessor) in its place.
                        insert_into_cell(
                            parent_contents,
                            &cell_payload,
                            cell_idx,
                            self.usable_space() as u16,
                        )?;
                    }

                    // Step 3: Delete the predecessor cell from the leaf page.
                    {
                        let leaf_page_ref = leaf_page.get();
                        let leaf_contents = leaf_page_ref.get_contents();
                        drop_cell(leaf_contents, leaf_cell_idx, self.usable_space() as u16)?;
                    }

                    let delete_info = self.state.mut_delete_info().unwrap();
                    delete_info.state = DeleteState::CheckNeedsBalancing {
                        btree_depth,
                        post_balancing_seek_key,
                    };
                }

                DeleteState::CheckNeedsBalancing {
                    btree_depth,
                    post_balancing_seek_key,
                } => {
                    let page = self.stack.top();
                    return_if_locked_maybe_load!(self.pager, page);
                    // Check if either the leaf page we took the replacement cell from underflows, or if the interior page we inserted it into overflows OR underflows.
                    // If the latter is true, we must always balance that level regardless of whether the leaf page (or any ancestor pages in between) need balancing.

                    let leaf_underflows = {
                        let leaf_page = page.get();
                        let leaf_contents = leaf_page.get_contents();
                        let free_space =
                            compute_free_space(leaf_contents, self.usable_space() as u16);
                        free_space as usize * 3 > self.usable_space() * 2
                    };

                    let interior_overflows_or_underflows = {
                        // Invariant: ancestor pages on the stack are pinned to the page cache,
                        // so we don't need return_if_locked_maybe_load! any ancestor,
                        // and we already loaded the current page above.
                        let interior_page = self
                            .stack
                            .get_page_at_level(btree_depth)
                            .expect("ancestor page should be on the stack");
                        let interior_page = interior_page.get();
                        let interior_contents = interior_page.get_contents();
                        let overflows = !interior_contents.overflow_cells.is_empty();
                        if overflows {
                            true
                        } else {
                            let free_space =
                                compute_free_space(interior_contents, self.usable_space() as u16);
                            free_space as usize * 3 > self.usable_space() * 2
                        }
                    };

                    let needs_balancing = leaf_underflows || interior_overflows_or_underflows;

                    if needs_balancing {
                        let delete_info = self.state.mut_delete_info().unwrap();
                        if delete_info.balance_write_info.is_none() {
                            let mut write_info = WriteInfo::new();
                            write_info.state = WriteState::BalanceStart;
                            delete_info.balance_write_info = Some(write_info);
                        }
                        let balance_only_ancestor =
                            !leaf_underflows && interior_overflows_or_underflows;
                        if balance_only_ancestor {
                            // Only need to balance the ancestor page; move there immediately.
                            while self.stack.current() > btree_depth {
                                self.stack.pop();
                            }
                        }
                        let balance_both = leaf_underflows && interior_overflows_or_underflows;
                        delete_info.state = DeleteState::WaitForBalancingToComplete {
                            balance_ancestor_at_depth: if balance_both {
                                Some(btree_depth)
                            } else {
                                None
                            },
                            target_key: post_balancing_seek_key.unwrap(),
                        }
                    } else {
                        // No balancing needed, we're done
                        self.stack.retreat();
                        self.state = CursorState::None;
                        return Ok(IOResult::Done(()));
                    }
                }

                DeleteState::WaitForBalancingToComplete {
                    target_key,
                    balance_ancestor_at_depth,
                } => {
                    let delete_info = self.state.mut_delete_info().unwrap();

                    // Switch the CursorState to Write state for balancing
                    let write_info = delete_info.balance_write_info.take().unwrap();
                    self.state = CursorState::Write(write_info);

                    match self.balance(balance_ancestor_at_depth)? {
                        IOResult::Done(()) => {
                            let write_info = match &self.state {
                                CursorState::Write(wi) => wi.clone(),
                                _ => unreachable!("Balance operation changed cursor state"),
                            };

                            // Move to seek state
                            self.state = CursorState::Delete(DeleteInfo {
                                state: DeleteState::SeekAfterBalancing { target_key },
                                balance_write_info: Some(write_info),
                            });
                        }

                        IOResult::IO => {
                            // Move to seek state
                            // Save balance progress and return IO
                            let write_info = match &self.state {
                                CursorState::Write(wi) => wi.clone(),
                                _ => unreachable!("Balance operation changed cursor state"),
                            };

                            self.state = CursorState::Delete(DeleteInfo {
                                state: DeleteState::WaitForBalancingToComplete {
                                    target_key,
                                    balance_ancestor_at_depth,
                                },
                                balance_write_info: Some(write_info),
                            });
                            return Ok(IOResult::IO);
                        }
                    }
                }

                DeleteState::SeekAfterBalancing { target_key } => {
                    let key = match &target_key {
                        DeleteSavepoint::Rowid(rowid) => SeekKey::TableRowId(*rowid),
                        DeleteSavepoint::Payload(immutable_record) => {
                            SeekKey::IndexKey(immutable_record)
                        }
                    };
                    // We want to end up pointing at the row to the left of the position of the row we deleted, so
                    // that after we call next() in the loop,the next row we delete will again be the same position as this one.
                    let seek_result = return_if_io!(self.seek(key, SeekOp::LT));

                    if let SeekResult::TryAdvance = seek_result {
                        let CursorState::Delete(delete_info) = &self.state else {
                            unreachable!("expected delete state");
                        };
                        self.state = CursorState::Delete(DeleteInfo {
                            state: DeleteState::TryAdvance,
                            balance_write_info: delete_info.balance_write_info.clone(),
                        });
                        continue;
                    }

                    self.state = CursorState::None;
                    return Ok(IOResult::Done(()));
                }
                DeleteState::TryAdvance => {
                    // we use LT always for post-delete seeks, which uses backwards iteration, so we always call prev() here.
                    return_if_io!(self.prev());
                    self.state = CursorState::None;
                    return Ok(IOResult::Done(()));
                }
            }
        }
    }

    /// In outer joins, whenever the right-side table has no matching row, the query must still return a row
    /// for each left-side row. In order to achieve this, we set the null flag on the right-side table cursor
    /// so that it returns NULL for all columns until cleared.
    #[inline(always)]
    pub fn set_null_flag(&mut self, flag: bool) {
        self.null_flag = flag;
    }

    #[inline(always)]
    pub fn get_null_flag(&self) -> bool {
        self.null_flag
    }

    #[instrument(skip_all, level = Level::DEBUG)]
    pub fn exists(&mut self, key: &Value) -> Result<IOResult<bool>> {
        assert!(self.mv_cursor.is_none());
        let int_key = match key {
            Value::Integer(i) => i,
            _ => unreachable!("btree tables are indexed by integers!"),
        };
        let seek_result =
            return_if_io!(self.seek(SeekKey::TableRowId(*int_key), SeekOp::GE { eq_only: true }));
        let exists = matches!(seek_result, SeekResult::Found);
        self.invalidate_record();
        Ok(IOResult::Done(exists))
    }

    /// Clear the overflow pages linked to a specific page provided by the leaf cell
    /// Uses a state machine to keep track of it's operations so that traversal can be
    /// resumed from last point after IO interruption
    #[instrument(skip_all, level = Level::DEBUG)]
    fn clear_overflow_pages(&mut self, cell: &BTreeCell) -> Result<IOResult<()>> {
        loop {
            let state = self.overflow_state.take().unwrap_or(OverflowState::Start);

            match state {
                OverflowState::Start => {
                    let first_overflow_page = match cell {
                        BTreeCell::TableLeafCell(leaf_cell) => leaf_cell.first_overflow_page,
                        BTreeCell::IndexLeafCell(leaf_cell) => leaf_cell.first_overflow_page,
                        BTreeCell::IndexInteriorCell(interior_cell) => {
                            interior_cell.first_overflow_page
                        }
                        BTreeCell::TableInteriorCell(_) => return Ok(IOResult::Done(())), // No overflow pages
                    };

                    if let Some(page) = first_overflow_page {
                        self.overflow_state = Some(OverflowState::ProcessPage { next_page: page });
                        continue;
                    } else {
                        self.overflow_state = Some(OverflowState::Done);
                    }
                }
                OverflowState::ProcessPage { next_page } => {
                    if next_page < 2
                        || next_page as usize
                            > header_accessor::get_database_size(&self.pager)? as usize
                    {
                        self.overflow_state = None;
                        return Err(LimboError::Corrupt("Invalid overflow page number".into()));
                    }
                    let page = self.read_page(next_page as usize)?;
                    return_if_locked_maybe_load!(self.pager, page);

                    let page = page.get();
                    let contents = page.get().contents.as_ref().unwrap();
                    let next = contents.read_u32(0);

                    return_if_io!(self.pager.free_page(Some(page), next_page as usize));

                    if next != 0 {
                        self.overflow_state = Some(OverflowState::ProcessPage { next_page: next });
                    } else {
                        self.overflow_state = Some(OverflowState::Done);
                    }
                }
                OverflowState::Done => {
                    self.overflow_state = None;
                    return Ok(IOResult::Done(()));
                }
            };
        }
    }

    /// Destroys a B-tree by freeing all its pages in an iterative depth-first order.
    /// This ensures child pages are freed before their parents
    /// Uses a state machine to keep track of the operation to ensure IO doesn't cause repeated traversals
    ///
    /// # Example
    /// For a B-tree with this structure (where 4' is an overflow page):
    /// ```text
    ///            1 (root)
    ///           /        \
    ///          2          3
    ///        /   \      /   \
    /// 4' <- 4     5    6     7
    /// ```
    ///
    /// The destruction order would be: [4',4,5,2,6,7,3,1]
    #[instrument(skip(self), level = Level::DEBUG)]
    pub fn btree_destroy(&mut self) -> Result<IOResult<Option<usize>>> {
        if let CursorState::None = &self.state {
            self.move_to_root()?;
            self.state = CursorState::Destroy(DestroyInfo {
                state: DestroyState::Start,
            });
        }

        loop {
            let destroy_state = {
                let destroy_info = self
                    .state
                    .destroy_info()
                    .expect("unable to get a mut reference to destroy state in cursor");
                destroy_info.state.clone()
            };

            match destroy_state {
                DestroyState::Start => {
                    let destroy_info = self
                        .state
                        .mut_destroy_info()
                        .expect("unable to get a mut reference to destroy state in cursor");
                    destroy_info.state = DestroyState::LoadPage;
                }
                DestroyState::LoadPage => {
                    let page = self.stack.top();
                    return_if_locked_maybe_load!(self.pager, page);

                    let destroy_info = self
                        .state
                        .mut_destroy_info()
                        .expect("unable to get a mut reference to destroy state in cursor");
                    destroy_info.state = DestroyState::ProcessPage;
                }
                DestroyState::ProcessPage => {
                    let page = self.stack.top();
                    self.stack.advance();
                    assert!(page.get().is_loaded()); //  page should be loaded at this time
                    let page = page.get();
                    let contents = page.get().contents.as_ref().unwrap();
                    let cell_idx = self.stack.current_cell_index();

                    //  If we've processed all cells in this page, figure out what to do with this page
                    if cell_idx >= contents.cell_count() as i32 {
                        match (contents.is_leaf(), cell_idx) {
                            //  Leaf pages with all cells processed
                            (true, n) if n >= contents.cell_count() as i32 => {
                                let destroy_info = self.state.mut_destroy_info().expect(
                                    "unable to get a mut reference to destroy state in cursor",
                                );
                                destroy_info.state = DestroyState::FreePage;
                                continue;
                            }
                            //  Non-leaf page which has processed all children but not it's potential right child
                            (false, n) if n == contents.cell_count() as i32 => {
                                if let Some(rightmost) = contents.rightmost_pointer() {
                                    let rightmost_page = self.read_page(rightmost as usize)?;
                                    self.stack.push(rightmost_page);
                                    let destroy_info = self.state.mut_destroy_info().expect(
                                        "unable to get a mut reference to destroy state in cursor",
                                    );
                                    destroy_info.state = DestroyState::LoadPage;
                                } else {
                                    let destroy_info = self.state.mut_destroy_info().expect(
                                        "unable to get a mut reference to destroy state in cursor",
                                    );
                                    destroy_info.state = DestroyState::FreePage;
                                }
                                continue;
                            }
                            //  Non-leaf page which has processed all children and it's right child
                            (false, n) if n > contents.cell_count() as i32 => {
                                let destroy_info = self.state.mut_destroy_info().expect(
                                    "unable to get a mut reference to destroy state in cursor",
                                );
                                destroy_info.state = DestroyState::FreePage;
                                continue;
                            }
                            _ => unreachable!("Invalid cell idx state"),
                        }
                    }

                    //  We have not yet processed all cells in this page
                    //  Get the current cell
                    let cell = contents.cell_get(cell_idx as usize, self.usable_space())?;

                    match contents.is_leaf() {
                        //  For a leaf cell, clear the overflow pages associated with this cell
                        true => {
                            let destroy_info = self
                                .state
                                .mut_destroy_info()
                                .expect("unable to get a mut reference to destroy state in cursor");
                            destroy_info.state = DestroyState::ClearOverflowPages { cell };
                            continue;
                        }
                        //  For interior cells, check the type of cell to determine what to do
                        false => match &cell {
                            //  For index interior cells, remove the overflow pages
                            BTreeCell::IndexInteriorCell(_) => {
                                let destroy_info = self.state.mut_destroy_info().expect(
                                    "unable to get a mut reference to destroy state in cursor",
                                );
                                destroy_info.state = DestroyState::ClearOverflowPages { cell };
                                continue;
                            }
                            //  For all other interior cells, load the left child page
                            _ => {
                                let child_page_id = match &cell {
                                    BTreeCell::TableInteriorCell(cell) => cell.left_child_page,
                                    BTreeCell::IndexInteriorCell(cell) => cell.left_child_page,
                                    _ => panic!("expected interior cell"),
                                };
                                let child_page = self.read_page(child_page_id as usize)?;
                                self.stack.push(child_page);
                                let destroy_info = self.state.mut_destroy_info().expect(
                                    "unable to get a mut reference to destroy state in cursor",
                                );
                                destroy_info.state = DestroyState::LoadPage;
                                continue;
                            }
                        },
                    }
                }
                DestroyState::ClearOverflowPages { cell } => {
                    match self.clear_overflow_pages(&cell)? {
                        IOResult::Done(_) => match cell {
                            //  For an index interior cell, clear the left child page now that overflow pages have been cleared
                            BTreeCell::IndexInteriorCell(index_int_cell) => {
                                let child_page =
                                    self.read_page(index_int_cell.left_child_page as usize)?;
                                self.stack.push(child_page);
                                let destroy_info = self.state.mut_destroy_info().expect(
                                    "unable to get a mut reference to destroy state in cursor",
                                );
                                destroy_info.state = DestroyState::LoadPage;
                                continue;
                            }
                            //  For any leaf cell, advance the index now that overflow pages have been cleared
                            BTreeCell::TableLeafCell(_) | BTreeCell::IndexLeafCell(_) => {
                                let destroy_info = self.state.mut_destroy_info().expect(
                                    "unable to get a mut reference to destroy state in cursor",
                                );
                                destroy_info.state = DestroyState::LoadPage;
                            }
                            _ => panic!("unexpected cell type"),
                        },
                        IOResult::IO => return Ok(IOResult::IO),
                    }
                }
                DestroyState::FreePage => {
                    let page = self.stack.top();
                    let page_id = page.get().get().id;

                    return_if_io!(self.pager.free_page(Some(page.get()), page_id));

                    if self.stack.has_parent() {
                        self.stack.pop();
                        let destroy_info = self
                            .state
                            .mut_destroy_info()
                            .expect("unable to get a mut reference to destroy state in cursor");
                        destroy_info.state = DestroyState::ProcessPage;
                    } else {
                        self.state = CursorState::None;
                        //  TODO: For now, no-op the result return None always. This will change once [AUTO_VACUUM](https://www.sqlite.org/lang_vacuum.html) is introduced
                        //  At that point, the last root page(call this x) will be moved into the position of the root page of this table and the value returned will be x
                        return Ok(IOResult::Done(None));
                    }
                }
            }
        }
    }

    pub fn table_id(&self) -> usize {
        self.root_page
    }

    pub fn overwrite_cell(
        &mut self,
        page_ref: BTreePage,
        cell_idx: usize,
        record: &ImmutableRecord,
    ) -> Result<IOResult<()>> {
        // build the new payload
        let page = page_ref.get();
        let page_contents = page.get().contents.as_ref().unwrap();
        let serial_types_len = self.record_cursor.borrow_mut().len(record);
        let mut new_payload = Vec::with_capacity(serial_types_len);
        let rowid = return_if_io!(self.rowid());
        fill_cell_payload(
            page_contents,
            rowid,
            &mut new_payload,
            cell_idx,
            record,
            self.usable_space(),
            self.pager.clone(),
        );

        // figure out old cell offset & size
        let (old_offset, old_local_size) = {
            let page_ref = page_ref.get();
            let page = page_ref.get().contents.as_ref().unwrap();
            page.cell_get_raw_region(cell_idx, self.usable_space())
        };

        // if it all fits in local space and old_local_size is enough, do an in-place overwrite
        if new_payload.len() == old_local_size {
            self.overwrite_content(page_ref.clone(), old_offset, &new_payload)?;
            Ok(IOResult::Done(()))
        } else {
            // doesn't fit, drop it and insert a new one
            drop_cell(
                page_ref.get().get_contents(),
                cell_idx,
                self.usable_space() as u16,
            )?;
            insert_into_cell(
                page_ref.get().get_contents(),
                &new_payload,
                cell_idx,
                self.usable_space() as u16,
            )?;
            Ok(IOResult::Done(()))
        }
    }

    pub fn overwrite_content(
        &mut self,
        page_ref: BTreePage,
        dest_offset: usize,
        new_payload: &[u8],
    ) -> Result<IOResult<()>> {
        return_if_locked!(page_ref.get());
        let page_ref = page_ref.get();
        let buf = page_ref.get().contents.as_mut().unwrap().as_ptr();
        buf[dest_offset..dest_offset + new_payload.len()].copy_from_slice(new_payload);

        Ok(IOResult::Done(()))
    }

    fn get_immutable_record_or_create(&self) -> std::cell::RefMut<'_, Option<ImmutableRecord>> {
        if self.reusable_immutable_record.borrow().is_none() {
            let record = ImmutableRecord::new(4096);
            self.reusable_immutable_record.replace(Some(record));
        }
        self.reusable_immutable_record.borrow_mut()
    }

    fn get_immutable_record(&self) -> std::cell::RefMut<'_, Option<ImmutableRecord>> {
        self.reusable_immutable_record.borrow_mut()
    }

    pub fn is_write_in_progress(&self) -> bool {
        matches!(self.state, CursorState::Write(_))
    }

    /// Count the number of entries in the b-tree
    ///
    /// Only supposed to be used in the context of a simple Count Select Statement
    #[instrument(skip(self), level = Level::DEBUG)]
    pub fn count(&mut self) -> Result<IOResult<usize>> {
        if self.count == 0 {
            self.move_to_root()?;
        }

        if let Some(_mv_cursor) = &self.mv_cursor {
            todo!("Implement count for mvcc");
        }

        let mut mem_page_rc;
        let mut mem_page;
        let mut contents;

        loop {
            mem_page_rc = self.stack.top();
            return_if_locked_maybe_load!(self.pager, mem_page_rc);
            mem_page = mem_page_rc.get();
            contents = mem_page.get().contents.as_ref().unwrap();

            /* If this is a leaf page or the tree is not an int-key tree, then
             ** this page contains countable entries. Increment the entry counter
             ** accordingly.
             */
            if !matches!(contents.page_type(), PageType::TableInterior) {
                self.count += contents.cell_count();
            }

            self.stack.advance();
            let cell_idx = self.stack.current_cell_index() as usize;

            // Second condition is necessary in case we return if the page is locked in the loop below
            if contents.is_leaf() || cell_idx > contents.cell_count() {
                loop {
                    if !self.stack.has_parent() {
                        // All pages of the b-tree have been visited. Return successfully
                        self.move_to_root()?;

                        return Ok(IOResult::Done(self.count));
                    }

                    // Move to parent
                    self.stack.pop();

                    mem_page_rc = self.stack.top();
                    return_if_locked_maybe_load!(self.pager, mem_page_rc);
                    mem_page = mem_page_rc.get();
                    contents = mem_page.get().contents.as_ref().unwrap();

                    let cell_idx = self.stack.current_cell_index() as usize;

                    if cell_idx <= contents.cell_count() {
                        break;
                    }
                }
            }

            let cell_idx = self.stack.current_cell_index() as usize;

            assert!(cell_idx <= contents.cell_count(),);
            assert!(!contents.is_leaf());

            if cell_idx == contents.cell_count() {
                // Move to right child
                // should be safe as contents is not a leaf page
                let right_most_pointer = contents.rightmost_pointer().unwrap();
                self.stack.advance();
                let mem_page = self.read_page(right_most_pointer as usize)?;
                self.stack.push(mem_page);
            } else {
                // Move to child left page
                let cell = contents.cell_get(cell_idx, self.usable_space())?;

                match cell {
                    BTreeCell::TableInteriorCell(TableInteriorCell {
                        left_child_page, ..
                    })
                    | BTreeCell::IndexInteriorCell(IndexInteriorCell {
                        left_child_page, ..
                    }) => {
                        self.stack.advance();
                        let mem_page = self.read_page(left_child_page as usize)?;
                        self.stack.push(mem_page);
                    }
                    _ => unreachable!(),
                }
            }
        }
    }

    // Save cursor context, to be restored later
    pub fn save_context(&mut self, cursor_context: CursorContext) {
        self.valid_state = CursorValidState::RequireSeek;
        self.context = Some(cursor_context);
    }

    /// If context is defined, restore it and set it None on success
    #[instrument(skip_all, level = Level::DEBUG)]
    fn restore_context(&mut self) -> Result<IOResult<()>> {
        if self.context.is_none() || matches!(self.valid_state, CursorValidState::Valid) {
            return Ok(IOResult::Done(()));
        }
        if let CursorValidState::RequireAdvance(direction) = self.valid_state {
            let has_record = return_if_io!(match direction {
                // Avoid calling next()/prev() directly because they immediately call restore_context()
                IterationDirection::Forwards => self.get_next_record(),
                IterationDirection::Backwards => self.get_prev_record(),
            });
            self.has_record.set(has_record);
            self.invalidate_record();
            self.context = None;
            self.valid_state = CursorValidState::Valid;
            return Ok(IOResult::Done(()));
        }
        let ctx = self.context.take().unwrap();
        let seek_key = match ctx {
            CursorContext::TableRowId(rowid) => SeekKey::TableRowId(rowid),
            CursorContext::IndexKeyRowId(ref record) => SeekKey::IndexKey(record),
        };
        let res = self.seek(seek_key, SeekOp::GE { eq_only: true })?;
        match res {
            IOResult::Done(res) => {
                if let SeekResult::TryAdvance = res {
                    self.valid_state =
                        CursorValidState::RequireAdvance(IterationDirection::Forwards);
                    self.context = Some(ctx);
                    return Ok(IOResult::IO);
                }
                self.valid_state = CursorValidState::Valid;
                Ok(IOResult::Done(()))
            }
            IOResult::IO => {
                self.context = Some(ctx);
                Ok(IOResult::IO)
            }
        }
    }

    pub fn read_page(&self, page_idx: usize) -> Result<BTreePage> {
        btree_read_page(&self.pager, page_idx)
    }

    pub fn allocate_page(&self, page_type: PageType, offset: usize) -> Result<BTreePage> {
        self.pager
            .do_allocate_page(page_type, offset, BtreePageAllocMode::Any)
    }
}

#[derive(Debug, thiserror::Error)]
pub enum IntegrityCheckError {
    #[error("Cell {cell_idx} in page {page_id} is out of range. cell_range={cell_start}..{cell_end}, content_area={content_area}, usable_space={usable_space}")]
    CellOutOfRange {
        cell_idx: usize,
        page_id: usize,
        cell_start: usize,
        cell_end: usize,
        content_area: usize,
        usable_space: usize,
    },
    #[error("Cell {cell_idx} in page {page_id} extends out of page. cell_range={cell_start}..{cell_end}, content_area={content_area}, usable_space={usable_space}")]
    CellOverflowsPage {
        cell_idx: usize,
        page_id: usize,
        cell_start: usize,
        cell_end: usize,
        content_area: usize,
        usable_space: usize,
    },
    #[error("Page {page_id} cell {cell_idx} has rowid={rowid} in wrong order. Parent cell has parent_rowid={max_intkey} and next_rowid={next_rowid}")]
    CellRowidOutOfRange {
        page_id: usize,
        cell_idx: usize,
        rowid: i64,
        max_intkey: i64,
        next_rowid: i64,
    },
    #[error("Page {page_id} is at different depth from another leaf page this_page_depth={this_page_depth}, other_page_depth={other_page_depth} ")]
    LeafDepthMismatch {
        page_id: usize,
        this_page_depth: usize,
        other_page_depth: usize,
    },
    #[error("Page {page_id} detected freeblock that extends page start={start} end={end}")]
    FreeBlockOutOfRange {
        page_id: usize,
        start: usize,
        end: usize,
    },
    #[error("Page {page_id} cell overlap detected at position={start} with previous_end={prev_end}. content_area={content_area}, is_free_block={is_free_block}")]
    CellOverlap {
        page_id: usize,
        start: usize,
        prev_end: usize,
        content_area: usize,
        is_free_block: bool,
    },
    #[error("Page {page_id} unexpected fragmentation got={got}, expected={expected}")]
    UnexpectedFragmentation {
        page_id: usize,
        got: usize,
        expected: usize,
    },
}

#[derive(Clone)]
struct IntegrityCheckPageEntry {
    page_idx: usize,
    level: usize,
    max_intkey: i64,
}
pub struct IntegrityCheckState {
    pub current_page: usize,
    page_stack: Vec<IntegrityCheckPageEntry>,
    first_leaf_level: Option<usize>,
}

impl IntegrityCheckState {
    pub fn new(page_idx: usize) -> Self {
        Self {
            current_page: page_idx,
            page_stack: vec![IntegrityCheckPageEntry {
                page_idx,
                level: 0,
                max_intkey: i64::MAX,
            }],
            first_leaf_level: None,
        }
    }
}
impl std::fmt::Debug for IntegrityCheckState {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("IntegrityCheckState")
            .field("current_page", &self.current_page)
            .field("first_leaf_level", &self.first_leaf_level)
            .finish()
    }
}

/// Perform integrity check on a whole table/index. We check for:
/// 1. Correct order of keys in case of rowids.
/// 2. There are no overlap between cells.
/// 3. Cells do not scape outside expected range.
/// 4. Depth of leaf pages are equal.
/// 5. Overflow pages are correct (TODO)
///
/// In order to keep this reentrant, we keep a stack of pages we need to check. Ideally, like in
/// SQLlite, we would have implemented a recursive solution which would make it easier to check the
/// depth.
pub fn integrity_check(
    state: &mut IntegrityCheckState,
    errors: &mut Vec<IntegrityCheckError>,
    pager: &Rc<Pager>,
) -> Result<IOResult<()>> {
    let Some(IntegrityCheckPageEntry {
        page_idx,
        level,
        max_intkey,
    }) = state.page_stack.last().cloned()
    else {
        return Ok(IOResult::Done(()));
    };
    let page = btree_read_page(pager, page_idx)?;
    return_if_locked_maybe_load!(pager, page);
    state.page_stack.pop();

    let page = page.get();
    let contents = page.get_contents();
    let usable_space = pager.usable_space() as u16;
    let mut coverage_checker = CoverageChecker::new(page.get().id);

    // Now we check every cell for few things:
    // 1. Check cell is in correct range. Not exceeds page and not starts before we have marked
    //    (cell content area).
    // 2. We add the cell to coverage checker in order to check if cells do not overlap.
    // 3. We check order of rowids in case of table pages. We iterate backwards in order to check
    //    if current cell's rowid is less than the next cell. We also check rowid is less than the
    //    parent's divider cell. In case of this page being root page max rowid will be i64::MAX.
    // 4. We append pages to the stack to check later.
    // 5. In case of leaf page, check if the current level(depth) is equal to other leaf pages we
    //    have seen.
    let mut next_rowid = max_intkey;
    for cell_idx in (0..contents.cell_count()).rev() {
        let (cell_start, cell_length) =
            contents.cell_get_raw_region(cell_idx, usable_space as usize);
        if cell_start < contents.cell_content_area() as usize
            || cell_start > usable_space as usize - 4
        {
            errors.push(IntegrityCheckError::CellOutOfRange {
                cell_idx,
                page_id: page.get().id,
                cell_start,
                cell_end: cell_start + cell_length,
                content_area: contents.cell_content_area() as usize,
                usable_space: usable_space as usize,
            });
        }
        if cell_start + cell_length > usable_space as usize {
            errors.push(IntegrityCheckError::CellOverflowsPage {
                cell_idx,
                page_id: page.get().id,
                cell_start,
                cell_end: cell_start + cell_length,
                content_area: contents.cell_content_area() as usize,
                usable_space: usable_space as usize,
            });
        }
        coverage_checker.add_cell(cell_start, cell_start + cell_length);
        let cell = contents.cell_get(cell_idx, usable_space as usize)?;
        match cell {
            BTreeCell::TableInteriorCell(table_interior_cell) => {
                state.page_stack.push(IntegrityCheckPageEntry {
                    page_idx: table_interior_cell.left_child_page as usize,
                    level: level + 1,
                    max_intkey: table_interior_cell.rowid,
                });
                let rowid = table_interior_cell.rowid;
                if rowid > max_intkey || rowid > next_rowid {
                    errors.push(IntegrityCheckError::CellRowidOutOfRange {
                        page_id: page.get().id,
                        cell_idx,
                        rowid,
                        max_intkey,
                        next_rowid,
                    });
                }
                next_rowid = rowid;
            }
            BTreeCell::TableLeafCell(table_leaf_cell) => {
                // check depth of leaf pages are equal
                if let Some(expected_leaf_level) = state.first_leaf_level {
                    if expected_leaf_level != level {
                        errors.push(IntegrityCheckError::LeafDepthMismatch {
                            page_id: page.get().id,
                            this_page_depth: level,
                            other_page_depth: expected_leaf_level,
                        });
                    }
                } else {
                    state.first_leaf_level = Some(level);
                }
                let rowid = table_leaf_cell.rowid;
                if rowid > max_intkey || rowid > next_rowid {
                    errors.push(IntegrityCheckError::CellRowidOutOfRange {
                        page_id: page.get().id,
                        cell_idx,
                        rowid,
                        max_intkey,
                        next_rowid,
                    });
                }
                next_rowid = rowid;
            }
            BTreeCell::IndexInteriorCell(index_interior_cell) => {
                state.page_stack.push(IntegrityCheckPageEntry {
                    page_idx: index_interior_cell.left_child_page as usize,
                    level: level + 1,
                    max_intkey, // we don't care about intkey in non-table pages
                });
            }
            BTreeCell::IndexLeafCell(_) => {
                // check depth of leaf pages are equal
                if let Some(expected_leaf_level) = state.first_leaf_level {
                    if expected_leaf_level != level {
                        errors.push(IntegrityCheckError::LeafDepthMismatch {
                            page_id: page.get().id,
                            this_page_depth: level,
                            other_page_depth: expected_leaf_level,
                        });
                    }
                } else {
                    state.first_leaf_level = Some(level);
                }
            }
        }
    }

    // Now we add free blocks to the coverage checker
    let first_freeblock = contents.first_freeblock();
    if first_freeblock > 0 {
        let mut pc = first_freeblock;
        while pc > 0 {
            let next = contents.read_u16_no_offset(pc as usize);
            let size = contents.read_u16_no_offset(pc as usize + 2) as usize;
            // check it doesn't go out of range
            if pc > usable_space - 4 {
                errors.push(IntegrityCheckError::FreeBlockOutOfRange {
                    page_id: page.get().id,
                    start: pc as usize,
                    end: pc as usize + size,
                });
                break;
            }
            coverage_checker.add_free_block(pc as usize, pc as usize + size);
            pc = next;
        }
    }

    // Let's check the overlap of freeblocks and cells now that we have collected them all.
    coverage_checker.analyze(
        usable_space,
        contents.cell_content_area() as usize,
        errors,
        contents.num_frag_free_bytes() as usize,
    );

    Ok(IOResult::Done(()))
}

pub fn btree_read_page(pager: &Rc<Pager>, page_idx: usize) -> Result<BTreePage> {
    pager.read_page(page_idx).map(|page| {
        Arc::new(BTreePageInner {
            page: RefCell::new(page),
        })
    })
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct IntegrityCheckCellRange {
    start: usize,
    end: usize,
    is_free_block: bool,
}

// Implement ordering for min-heap (smallest start address first)
impl Ord for IntegrityCheckCellRange {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.start.cmp(&other.start)
    }
}

impl PartialOrd for IntegrityCheckCellRange {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

#[cfg(debug_assertions)]
fn validate_cells_after_insertion(cell_array: &CellArray, leaf_data: bool) {
    for cell in &cell_array.cell_payloads {
        assert!(cell.len() >= 4);

        if leaf_data {
            assert!(cell[0] != 0, "payload is {cell:?}");
        }
    }
}

pub struct CoverageChecker {
    /// Min-heap ordered by cell start
    heap: BinaryHeap<Reverse<IntegrityCheckCellRange>>,
    page_idx: usize,
}

impl CoverageChecker {
    pub fn new(page_idx: usize) -> Self {
        Self {
            heap: BinaryHeap::new(),
            page_idx,
        }
    }

    fn add_range(&mut self, cell_start: usize, cell_end: usize, is_free_block: bool) {
        self.heap.push(Reverse(IntegrityCheckCellRange {
            start: cell_start,
            end: cell_end,
            is_free_block,
        }));
    }

    pub fn add_cell(&mut self, cell_start: usize, cell_end: usize) {
        self.add_range(cell_start, cell_end, false);
    }

    pub fn add_free_block(&mut self, cell_start: usize, cell_end: usize) {
        self.add_range(cell_start, cell_end, true);
    }

    pub fn analyze(
        &mut self,
        usable_space: u16,
        content_area: usize,
        errors: &mut Vec<IntegrityCheckError>,
        expected_fragmentation: usize,
    ) {
        let mut fragmentation = 0;
        let mut prev_end = content_area;
        while let Some(cell) = self.heap.pop() {
            let start = cell.0.start;
            if prev_end > start {
                errors.push(IntegrityCheckError::CellOverlap {
                    page_id: self.page_idx,
                    start,
                    prev_end,
                    content_area,
                    is_free_block: cell.0.is_free_block,
                });
                break;
            } else {
                fragmentation += start - prev_end;
                prev_end = cell.0.end;
            }
        }
        fragmentation += usable_space as usize - prev_end;
        if fragmentation != expected_fragmentation {
            errors.push(IntegrityCheckError::UnexpectedFragmentation {
                page_id: self.page_idx,
                got: fragmentation,
                expected: expected_fragmentation,
            });
        }
    }
}

/// Stack of pages representing the tree traversal order.
/// current_page represents the current page being used in the tree and current_page - 1 would be
/// the parent. Using current_page + 1 or higher is undefined behaviour.
struct PageStack {
    /// Pointer to the current page being consumed
    current_page: Cell<i32>,
    /// List of pages in the stack. Root page will be in index 0
    pub stack: RefCell<[Option<BTreePage>; BTCURSOR_MAX_DEPTH + 1]>,
    /// List of cell indices in the stack.
    /// node_states[current_page] is the current cell index being consumed. Similarly
    /// node_states[current_page-1] is the cell index of the parent of the current page
    /// that we save in case of going back up.
    /// There are two points that need special attention:
    ///  If node_states[current_page] = -1, it indicates that the current iteration has reached the start of the current_page
    ///  If node_states[current_page] = `cell_count`, it means that the current iteration has reached the end of the current_page
    node_states: RefCell<[BTreeNodeState; BTCURSOR_MAX_DEPTH + 1]>,
}

impl PageStack {
    fn increment_current(&self) {
        self.current_page.set(self.current_page.get() + 1);
    }
    fn decrement_current(&self) {
        assert!(self.current_page.get() > 0);
        self.current_page.set(self.current_page.get() - 1);
    }
    /// Push a new page onto the stack.
    /// This effectively means traversing to a child page.
    #[instrument(skip_all, level = Level::DEBUG, name = "pagestack::push")]
    fn _push(&self, page: BTreePage, starting_cell_idx: i32) {
        tracing::trace!(
            current = self.current_page.get(),
            new_page_id = page.get().get().id,
        );
        'validate: {
            let current = self.current_page.get();
            if current == -1 {
                break 'validate;
            }
            let stack = self.stack.borrow();
            let current_top = stack[current as usize].as_ref();
            if let Some(current_top) = current_top {
                turso_assert!(
                    current_top.get().get().id != page.get().get().id,
                    "about to push page {} twice",
                    page.get().get().id
                );
            }
        }
        self.populate_parent_cell_count();
        self.increment_current();
        let current = self.current_page.get();
        assert!(
            current < BTCURSOR_MAX_DEPTH as i32,
            "corrupted database, stack is bigger than expected"
        );
        assert!(current >= 0);

        // Pin the page to prevent it from being evicted while on the stack
        page.get().pin();

        self.stack.borrow_mut()[current as usize] = Some(page);
        self.node_states.borrow_mut()[current as usize] = BTreeNodeState {
            cell_idx: starting_cell_idx,
            cell_count: None, // we don't know the cell count yet, so we set it to None. any code pushing a child page onto the stack MUST set the parent page's cell_count.
        };
    }

    /// Populate the parent page's cell count.
    /// This is needed so that we can, from a child page, check of ancestor pages' position relative to its cell index
    /// without having to perform IO to get the ancestor page contents.
    ///
    /// This rests on the assumption that the parent page is already in memory whenever a child is pushed onto the stack.
    /// We currently ensure this by pinning all the pages on [PageStack] to the page cache so that they cannot be evicted.
    fn populate_parent_cell_count(&self) {
        let stack_empty = self.current_page.get() == -1;
        if stack_empty {
            return;
        }
        let current = self.current();
        let stack = self.stack.borrow();
        let page = stack[current].as_ref().unwrap();
        let page = page.get();
        turso_assert!(
            page.is_pinned(),
            "parent page {} is not pinned",
            page.get().id
        );
        turso_assert!(
            page.is_loaded(),
            "parent page {} is not loaded",
            page.get().id
        );
        let contents = page.get_contents();
        let cell_count = contents.cell_count() as i32;
        self.node_states.borrow_mut()[current].cell_count = Some(cell_count);
    }

    fn push(&self, page: BTreePage) {
        self._push(page, -1);
    }

    fn push_backwards(&self, page: BTreePage) {
        self._push(page, i32::MAX);
    }

    /// Pop a page off the stack.
    /// This effectively means traversing back up to a parent page.
    #[instrument(skip_all, level = Level::DEBUG, name = "pagestack::pop")]
    fn pop(&self) {
        let current = self.current_page.get();
        assert!(current >= 0);
        tracing::trace!(current);

        // Unpin the page before removing it from the stack
        if let Some(page) = &self.stack.borrow()[current as usize] {
            page.get().unpin();
        }

        self.node_states.borrow_mut()[current as usize] = BTreeNodeState::default();
        self.stack.borrow_mut()[current as usize] = None;
        self.decrement_current();
    }

    /// Get the top page on the stack.
    /// This is the page that is currently being traversed.
    #[instrument(skip(self), level = Level::DEBUG, name = "pagestack::top", )]
    fn top(&self) -> BTreePage {
        let page = self.stack.borrow()[self.current()]
            .as_ref()
            .unwrap()
            .clone();
        tracing::trace!(current = self.current(), page_id = page.get().get().id);
        page
    }

    /// Current page pointer being used
    fn current(&self) -> usize {
        let current = self.current_page.get() as usize;
        assert!(self.current_page.get() >= 0);
        current
    }

    /// Cell index of the current page
    fn current_cell_index(&self) -> i32 {
        let current = self.current();
        self.node_states.borrow()[current].cell_idx
    }

    /// Check if the current cell index is less than 0.
    /// This means we have been iterating backwards and have reached the start of the page.
    fn current_cell_index_less_than_min(&self) -> bool {
        let cell_idx = self.current_cell_index();
        cell_idx < 0
    }

    /// Advance the current cell index of the current page to the next cell.
    /// We usually advance after going traversing a new page
    #[instrument(skip(self), level = Level::DEBUG, name = "pagestack::advance",)]
    fn advance(&self) {
        let current = self.current();
        tracing::trace!(
            curr_cell_index = self.node_states.borrow()[current].cell_idx,
            node_states = ?self.node_states.borrow().iter().map(|state| state.cell_idx).collect::<Vec<_>>(),
        );
        self.node_states.borrow_mut()[current].cell_idx += 1;
    }

    #[instrument(skip(self), level = Level::DEBUG, name = "pagestack::retreat")]
    fn retreat(&self) {
        let current = self.current();
        tracing::trace!(
            curr_cell_index = self.node_states.borrow()[current].cell_idx,
            node_states = ?self.node_states.borrow().iter().map(|state| state.cell_idx).collect::<Vec<_>>(),
        );
        self.node_states.borrow_mut()[current].cell_idx -= 1;
    }

    fn set_cell_index(&self, idx: i32) {
        let current = self.current();
        self.node_states.borrow_mut()[current].cell_idx = idx;
    }

    fn has_parent(&self) -> bool {
        self.current_page.get() > 0
    }

    /// Get a page at a specific level in the stack (0 = root, 1 = first child, etc.)
    fn get_page_at_level(&self, level: usize) -> Option<BTreePage> {
        let stack = self.stack.borrow();
        if level < stack.len() {
            stack[level].clone()
        } else {
            None
        }
    }

    fn unpin_all_if_pinned(&self) {
        self.stack
            .borrow_mut()
            .iter_mut()
            .flatten()
            .for_each(|page| {
                let _ = page.get().try_unpin();
            });
    }

    fn clear(&self) {
        self.unpin_all_if_pinned();

        self.current_page.set(-1);
    }
}

impl Drop for PageStack {
    fn drop(&mut self) {
        self.unpin_all_if_pinned();
    }
}

/// Used for redistributing cells during a balance operation.
struct CellArray {
    /// The actual cell data.
    /// For all other page types except table leaves, this will also contain the associated divider cell from the parent page.
    cell_payloads: Vec<&'static mut [u8]>,

    /// Prefix sum of cells in each page.
    /// For example, if three pages have 1, 2, and 3 cells, respectively,
    /// then cell_count_per_page_cumulative will be [1, 3, 6].
    cell_count_per_page_cumulative: [u16; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE],
}

impl CellArray {
    pub fn cell_size_bytes(&self, cell_idx: usize) -> u16 {
        self.cell_payloads[cell_idx].len() as u16
    }

    /// Returns the number of cells up to and including the given page.
    pub fn cell_count_up_to_page(&self, page_idx: usize) -> usize {
        self.cell_count_per_page_cumulative[page_idx] as usize
    }
}

impl BTreePageInner {
    pub fn get(&self) -> PageRef {
        self.page.borrow().clone()
    }
}

/// Try to find a free block available and allocate it if found
fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> Result<usize> {
    // NOTE: freelist is in ascending order of keys and pc
    // unuse_space is reserved bytes at the end of page, therefore we must substract from maxpc
    let mut prev_pc = page_ref.offset + offset::BTREE_FIRST_FREEBLOCK;
    let mut pc = page_ref.first_freeblock() as usize;
    let maxpc = usable_space as usize - amount;

    while pc <= maxpc {
        if pc + 4 > usable_space as usize {
            return_corrupt!("Free block header extends beyond page");
        }

        let next = page_ref.read_u16_no_offset(pc);
        let size = page_ref.read_u16_no_offset(pc + 2);

        if amount <= size as usize {
            let new_size = size as usize - amount;
            if new_size < 4 {
                // The code is checking if using a free slot that would leave behind a very small fragment (x < 4 bytes)
                // would cause the total fragmentation to exceed the limit of 60 bytes
                // check sqlite docs https://www.sqlite.org/fileformat.html#:~:text=A%20freeblock%20requires,not%20exceed%2060
                if page_ref.num_frag_free_bytes() > 57 {
                    return Ok(0);
                }
                // Delete the slot from freelist and update the page's fragment count.
                page_ref.write_u16_no_offset(prev_pc, next);
                let frag = page_ref.num_frag_free_bytes() + new_size as u8;
                page_ref.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, frag);
                return Ok(pc);
            } else if new_size + pc > maxpc {
                return_corrupt!("Free block extends beyond page end");
            } else {
                // Requested amount fits inside the current free slot so we reduce its size
                // to account for newly allocated space.
                page_ref.write_u16_no_offset(pc + 2, new_size as u16);
                return Ok(pc + new_size);
            }
        }

        prev_pc = pc;
        pc = next as usize;
        if pc <= prev_pc {
            if pc != 0 {
                return_corrupt!("Free list not in ascending order");
            }
            return Ok(0);
        }
    }
    if pc > maxpc + amount - 4 {
        return_corrupt!("Free block chain extends beyond page end");
    }
    Ok(0)
}

pub fn btree_init_page(page: &BTreePage, page_type: PageType, offset: usize, usable_space: u16) {
    // setup btree page
    let contents = page.get();
    tracing::debug!(
        "btree_init_page(id={}, offset={})",
        contents.get().id,
        offset
    );
    let contents = contents.get().contents.as_mut().unwrap();
    contents.offset = offset;
    let id = page_type as u8;
    contents.write_u8(offset::BTREE_PAGE_TYPE, id);
    contents.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0);
    contents.write_u16(offset::BTREE_CELL_COUNT, 0);

    contents.write_u16(offset::BTREE_CELL_CONTENT_AREA, usable_space);

    contents.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0);
    contents.write_u32(offset::BTREE_RIGHTMOST_PTR, 0);
}

fn to_static_buf(buf: &mut [u8]) -> &'static mut [u8] {
    unsafe { std::mem::transmute::<&mut [u8], &'static mut [u8]>(buf) }
}

fn edit_page(
    page: &mut PageContent,
    start_old_cells: usize,
    start_new_cells: usize,
    number_new_cells: usize,
    cell_array: &CellArray,
    usable_space: u16,
) -> Result<()> {
    tracing::debug!(
        "edit_page start_old_cells={} start_new_cells={} number_new_cells={} cell_array={}",
        start_old_cells,
        start_new_cells,
        number_new_cells,
        cell_array.cell_payloads.len()
    );
    let end_old_cells = start_old_cells + page.cell_count() + page.overflow_cells.len();
    let end_new_cells = start_new_cells + number_new_cells;
    let mut count_cells = page.cell_count();
    if start_old_cells < start_new_cells {
        debug_validate_cells!(page, usable_space);
        let number_to_shift = page_free_array(
            page,
            start_old_cells,
            start_new_cells - start_old_cells,
            cell_array,
            usable_space,
        )?;
        // shift pointers left
        shift_cells_left(page, count_cells, number_to_shift);
        count_cells -= number_to_shift;
        debug_validate_cells!(page, usable_space);
    }
    if end_new_cells < end_old_cells {
        debug_validate_cells!(page, usable_space);
        let number_tail_removed = page_free_array(
            page,
            end_new_cells,
            end_old_cells - end_new_cells,
            cell_array,
            usable_space,
        )?;
        assert!(count_cells >= number_tail_removed);
        count_cells -= number_tail_removed;
        debug_validate_cells!(page, usable_space);
    }
    // TODO: make page_free_array defragment, for now I'm lazy so this will work for now.
    defragment_page(page, usable_space);
    // TODO: add to start
    if start_new_cells < start_old_cells {
        let count = number_new_cells.min(start_old_cells - start_new_cells);
        page_insert_array(page, start_new_cells, count, cell_array, 0, usable_space)?;
        count_cells += count;
    }
    // TODO: overflow cells
    debug_validate_cells!(page, usable_space);
    for i in 0..page.overflow_cells.len() {
        let overflow_cell = &page.overflow_cells[i];
        // cell index in context of new list of cells that should be in the page
        if start_old_cells + overflow_cell.index >= start_new_cells {
            let cell_idx = start_old_cells + overflow_cell.index - start_new_cells;
            if cell_idx < number_new_cells {
                count_cells += 1;
                page_insert_array(
                    page,
                    start_new_cells + cell_idx,
                    1,
                    cell_array,
                    cell_idx,
                    usable_space,
                )?;
            }
        }
    }
    debug_validate_cells!(page, usable_space);
    // TODO: append cells to end
    page_insert_array(
        page,
        start_new_cells + count_cells,
        number_new_cells - count_cells,
        cell_array,
        count_cells,
        usable_space,
    )?;
    debug_validate_cells!(page, usable_space);
    // TODO: noverflow
    page.write_u16(offset::BTREE_CELL_COUNT, number_new_cells as u16);
    Ok(())
}

/// Shifts the cell pointers in the B-tree page to the left by a specified number of positions.
///
/// # Parameters
/// - `page`: A mutable reference to the `PageContent` representing the B-tree page.
/// - `count_cells`: The total number of cells currently in the page.
/// - `number_to_shift`: The number of cell pointers to shift to the left.
///
/// # Behavior
/// This function modifies the cell pointer array within the page by copying memory regions.
/// It shifts the pointers starting from `number_to_shift` to the beginning of the array,
/// effectively removing the first `number_to_shift` pointers.
fn shift_cells_left(page: &mut PageContent, count_cells: usize, number_to_shift: usize) {
    let buf = page.as_ptr();
    let (start, _) = page.cell_pointer_array_offset_and_size();
    buf.copy_within(
        start + (number_to_shift * 2)..start + (count_cells * 2),
        start,
    );
}

fn page_free_array(
    page: &mut PageContent,
    first: usize,
    count: usize,
    cell_array: &CellArray,
    usable_space: u16,
) -> Result<usize> {
    tracing::debug!("page_free_array {}..{}", first, first + count);
    let buf = &mut page.as_ptr()[page.offset..usable_space as usize];
    let buf_range = buf.as_ptr_range();
    let mut number_of_cells_removed = 0;
    let mut number_of_cells_buffered = 0;
    let mut buffered_cells_offsets: [u16; 10] = [0; 10];
    let mut buffered_cells_ends: [u16; 10] = [0; 10];
    for i in first..first + count {
        let cell = &cell_array.cell_payloads[i];
        let cell_pointer = cell.as_ptr_range();
        // check if not overflow cell
        if cell_pointer.start >= buf_range.start && cell_pointer.start < buf_range.end {
            assert!(
                cell_pointer.end >= buf_range.start && cell_pointer.end <= buf_range.end,
                "whole cell should be inside the page"
            );
            // TODO: remove pointer too
            let offset = (cell_pointer.start as usize - buf_range.start as usize) as u16;
            let len = (cell_pointer.end as usize - cell_pointer.start as usize) as u16;
            assert!(len > 0, "cell size should be greater than 0");
            let end = offset + len;

            /* Try to merge the current cell with a contiguous buffered cell to reduce the number of
             * `free_cell_range()` operations. Break on the first merge to avoid consuming too much time,
             * `free_cell_range()` will try to merge contiguous cells anyway. */
            let mut j = 0;
            while j < number_of_cells_buffered {
                // If the buffered cell is immediately after the current cell
                if buffered_cells_offsets[j] == end {
                    // Merge them by updating the buffered cell's offset to the current cell's offset
                    buffered_cells_offsets[j] = offset;
                    break;
                // If the buffered cell is immediately before the current cell
                } else if buffered_cells_ends[j] == offset {
                    // Merge them by updating the buffered cell's end offset to the current cell's end offset
                    buffered_cells_ends[j] = end;
                    break;
                }
                j += 1;
            }
            // If no cells were merged
            if j >= number_of_cells_buffered {
                // If the buffered cells array is full, flush the buffered cells using `free_cell_range()` to empty the array
                if number_of_cells_buffered >= buffered_cells_offsets.len() {
                    for j in 0..number_of_cells_buffered {
                        free_cell_range(
                            page,
                            buffered_cells_offsets[j],
                            buffered_cells_ends[j] - buffered_cells_offsets[j],
                            usable_space,
                        )?;
                    }
                    number_of_cells_buffered = 0; // Reset array counter
                }
                // Buffer the current cell
                buffered_cells_offsets[number_of_cells_buffered] = offset;
                buffered_cells_ends[number_of_cells_buffered] = end;
                number_of_cells_buffered += 1;
            }
            number_of_cells_removed += 1;
        }
    }
    for j in 0..number_of_cells_buffered {
        free_cell_range(
            page,
            buffered_cells_offsets[j],
            buffered_cells_ends[j] - buffered_cells_offsets[j],
            usable_space,
        )?;
    }
    page.write_u16(
        offset::BTREE_CELL_COUNT,
        page.cell_count() as u16 - number_of_cells_removed as u16,
    );
    Ok(number_of_cells_removed)
}
fn page_insert_array(
    page: &mut PageContent,
    first: usize,
    count: usize,
    cell_array: &CellArray,
    mut start_insert: usize,
    usable_space: u16,
) -> Result<()> {
    // TODO: implement faster algorithm, this is doing extra work that's not needed.
    // See pageInsertArray to understand faster way.
    tracing::debug!(
        "page_insert_array(cell_array.cells={}..{}, cell_count={}, page_type={:?})",
        first,
        first + count,
        page.cell_count(),
        page.page_type()
    );
    for i in first..first + count {
        insert_into_cell_during_balance(
            page,
            cell_array.cell_payloads[i],
            start_insert,
            usable_space,
        )?;
        start_insert += 1;
    }
    debug_validate_cells!(page, usable_space);
    Ok(())
}

/// Free the range of bytes that a cell occupies.
/// This function also updates the freeblock list in the page.
/// Freeblocks are used to keep track of free space in the page,
/// and are organized as a linked list.
fn free_cell_range(
    page: &mut PageContent,
    mut offset: u16,
    len: u16,
    usable_space: u16,
) -> Result<()> {
    if len < 4 {
        return_corrupt!("Minimum cell size is 4");
    }

    if offset > usable_space.saturating_sub(4) {
        return_corrupt!("Start offset beyond usable space");
    }

    let mut size = len;
    let mut end = offset + len;
    let mut pointer_to_pc = page.offset as u16 + 1;
    // if the freeblock list is empty, we set this block as the first freeblock in the page header.
    let pc = if page.first_freeblock() == 0 {
        0
    } else {
        // if the freeblock list is not empty, and the offset is greater than the first freeblock,
        // then we need to do some more calculation to figure out where to insert the freeblock
        // in the freeblock linked list.
        let first_block = page.first_freeblock();

        let mut pc = first_block;

        while pc < offset {
            if pc <= pointer_to_pc {
                if pc == 0 {
                    break;
                }
                return_corrupt!("free cell range free block not in ascending order");
            }

            let next = page.read_u16_no_offset(pc as usize);
            pointer_to_pc = pc;
            pc = next;
        }

        if pc > usable_space - 4 {
            return_corrupt!("Free block beyond usable space");
        }
        let mut removed_fragmentation = 0;
        if pc > 0 && offset + len + 3 >= pc {
            removed_fragmentation = (pc - end) as u8;

            if end > pc {
                return_corrupt!("Invalid block overlap");
            }
            end = pc + page.read_u16_no_offset(pc as usize + 2);
            if end > usable_space {
                return_corrupt!("Coalesced block extends beyond page");
            }
            size = end - offset;
            pc = page.read_u16_no_offset(pc as usize);
        }

        if pointer_to_pc > page.offset as u16 + 1 {
            let prev_end = pointer_to_pc + page.read_u16_no_offset(pointer_to_pc as usize + 2);
            if prev_end + 3 >= offset {
                if prev_end > offset {
                    return_corrupt!("Invalid previous block overlap");
                }
                removed_fragmentation += (offset - prev_end) as u8;
                size = end - pointer_to_pc;
                offset = pointer_to_pc;
            }
        }
        if removed_fragmentation > page.num_frag_free_bytes() {
            return_corrupt!(format!(
                "Invalid fragmentation count. Had {} and removed {}",
                page.num_frag_free_bytes(),
                removed_fragmentation
            ));
        }
        let frag = page.num_frag_free_bytes() - removed_fragmentation;
        page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, frag);
        pc
    };

    if (offset as u32) <= page.cell_content_area() {
        if (offset as u32) < page.cell_content_area() {
            return_corrupt!("Free block before content area");
        }
        if pointer_to_pc != page.offset as u16 + offset::BTREE_FIRST_FREEBLOCK as u16 {
            return_corrupt!("Invalid content area merge");
        }
        page.write_u16(offset::BTREE_FIRST_FREEBLOCK, pc);
        page.write_u16(offset::BTREE_CELL_CONTENT_AREA, end);
    } else {
        page.write_u16_no_offset(pointer_to_pc as usize, offset);
        page.write_u16_no_offset(offset as usize, pc);
        page.write_u16_no_offset(offset as usize + 2, size);
    }

    Ok(())
}

/// Defragment a page. This means packing all the cells to the end of the page.
fn defragment_page(page: &PageContent, usable_space: u16) {
    debug_validate_cells!(page, usable_space);
    tracing::debug!("defragment_page");
    let cloned_page = page.clone();
    // TODO(pere): usable space should include offset probably
    let mut cbrk = usable_space;

    // TODO: implement fast algorithm

    let last_cell = usable_space - 4;
    let first_cell = cloned_page.unallocated_region_start() as u16;

    if cloned_page.cell_count() > 0 {
        let read_buf = cloned_page.as_ptr();
        let write_buf = page.as_ptr();

        for i in 0..cloned_page.cell_count() {
            let (cell_offset, _) = page.cell_pointer_array_offset_and_size();
            let cell_idx = cell_offset + (i * 2);

            let pc = cloned_page.read_u16_no_offset(cell_idx);
            if pc > last_cell {
                unimplemented!("corrupted page");
            }

            assert!(pc <= last_cell);

            let (_, size) = cloned_page.cell_get_raw_region(i, usable_space as usize);
            let size = size as u16;
            cbrk -= size;
            if cbrk < first_cell || pc + size > usable_space {
                todo!("corrupt");
            }
            assert!(cbrk + size <= usable_space && cbrk >= first_cell);
            // set new pointer
            page.write_u16_no_offset(cell_idx, cbrk);
            // copy payload
            write_buf[cbrk as usize..cbrk as usize + size as usize]
                .copy_from_slice(&read_buf[pc as usize..pc as usize + size as usize]);
        }
    }

    // assert!( nfree >= 0 );
    // if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){
    //   return SQLITE_CORRUPT_PAGE(pPage);
    // }
    assert!(cbrk >= first_cell);

    // set new first byte of cell content
    page.write_u16(offset::BTREE_CELL_CONTENT_AREA, cbrk);
    // set free block to 0, unused spaced can be retrieved from gap between cell pointer end and content start
    page.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0);
    page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0);
    debug_validate_cells!(page, usable_space);
}

#[cfg(debug_assertions)]
/// Only enabled in debug mode, where we ensure that all cells are valid.
fn debug_validate_cells_core(page: &PageContent, usable_space: u16) {
    for i in 0..page.cell_count() {
        let (offset, size) = page.cell_get_raw_region(i, usable_space as usize);
        let buf = &page.as_ptr()[offset..offset + size];
        // E.g. the following table btree cell may just have two bytes:
        // Payload size 0 (stored as SerialTypeKind::ConstInt0)
        // Rowid 1 (stored as SerialTypeKind::ConstInt1)
        assert!(
            size >= 2,
            "cell size should be at least 2 bytes idx={i}, cell={buf:?}, offset={offset}"
        );
        if page.is_leaf() {
            assert!(page.as_ptr()[offset] != 0);
        }
        assert!(
            offset + size <= usable_space as usize,
            "cell spans out of usable space"
        );
    }
}

/// Insert a record into a cell.
/// If the cell overflows, an overflow cell is created.
/// insert_into_cell() is called from insert_into_page(),
/// and the overflow cell count is used to determine if the page overflows,
/// i.e. whether we need to balance the btree after the insert.
fn _insert_into_cell(
    page: &mut PageContent,
    payload: &[u8],
    cell_idx: usize,
    usable_space: u16,
    allow_regular_insert_despite_overflow: bool, // see [insert_into_cell_during_balance()]
) -> Result<()> {
    assert!(
        cell_idx <= page.cell_count() + page.overflow_cells.len(),
        "attempting to add cell to an incorrect place cell_idx={} cell_count={} page_type={:?}",
        cell_idx,
        page.cell_count(),
        page.page_type()
    );
    let already_has_overflow = !page.overflow_cells.is_empty();
    let enough_space = if already_has_overflow && !allow_regular_insert_despite_overflow {
        false
    } else {
        // otherwise, we need to check if we have enough space
        let free = compute_free_space(page, usable_space);
        payload.len() + CELL_PTR_SIZE_BYTES <= free as usize
    };
    if !enough_space {
        // add to overflow cell
        page.overflow_cells.push(OverflowCell {
            index: cell_idx,
            payload: Pin::new(Vec::from(payload)),
        });
        return Ok(());
    }
    assert!(
        cell_idx <= page.cell_count(),
        "cell_idx > page.cell_count() without overflow cells"
    );

    let new_cell_data_pointer = allocate_cell_space(page, payload.len() as u16, usable_space)?;
    tracing::debug!(
        "insert_into_cell(idx={}, pc={}, size={})",
        cell_idx,
        new_cell_data_pointer,
        payload.len()
    );
    assert!(new_cell_data_pointer + payload.len() as u16 <= usable_space);
    let buf = page.as_ptr();

    // copy data
    buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()]
        .copy_from_slice(payload);
    //  memmove(pIns+2, pIns, 2*(pPage->nCell - i));
    let (cell_pointer_array_start, _) = page.cell_pointer_array_offset_and_size();
    let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_PTR_SIZE_BYTES * cell_idx);

    // move existing pointers forward by CELL_PTR_SIZE_BYTES...
    let n_cells_forward = page.cell_count() - cell_idx;
    let n_bytes_forward = CELL_PTR_SIZE_BYTES * n_cells_forward;
    if n_bytes_forward > 0 {
        buf.copy_within(
            cell_pointer_cur_idx..cell_pointer_cur_idx + n_bytes_forward,
            cell_pointer_cur_idx + CELL_PTR_SIZE_BYTES,
        );
    }
    // ...and insert new cell pointer at the current index
    page.write_u16_no_offset(cell_pointer_cur_idx, new_cell_data_pointer);

    // update cell count
    let new_n_cells = (page.cell_count() + 1) as u16;
    page.write_u16(offset::BTREE_CELL_COUNT, new_n_cells);
    debug_validate_cells!(page, usable_space);
    Ok(())
}

fn insert_into_cell(
    page: &mut PageContent,
    payload: &[u8],
    cell_idx: usize,
    usable_space: u16,
) -> Result<()> {
    _insert_into_cell(page, payload, cell_idx, usable_space, false)
}

/// Normally in [insert_into_cell()], if a page already has overflow cells, all
/// new insertions are also added to the overflow cells vector.
/// SQLite doesn't use regular [insert_into_cell()] during balancing,
/// so we have a specialized function for use during balancing that allows regular cell insertion
/// despite the presence of existing overflow cells (overflow cells are one of the reasons we are balancing in the first place).
/// During balancing cells are first repositioned with [edit_page()]
/// and then inserted via [page_insert_array()] which calls [insert_into_cell_during_balance()],
/// and finally the existing overflow cells are cleared.
/// If we would not allow the cell insert to proceed normally despite overflow cells being present,
/// the new insertions would also be added as overflow cells which defeats the point of balancing.
fn insert_into_cell_during_balance(
    page: &mut PageContent,
    payload: &[u8],
    cell_idx: usize,
    usable_space: u16,
) -> Result<()> {
    _insert_into_cell(page, payload, cell_idx, usable_space, true)
}

/// The amount of free space is the sum of:
///  #1. The size of the unallocated region
///  #2. Fragments (isolated 1-3 byte chunks of free space within the cell content area)
///  #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that
///      are not in use due to e.g. deletions)
/// Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected
/// to be between first cell byte and end of cell pointer area.
#[allow(unused_assignments)]
fn compute_free_space(page: &PageContent, usable_space: u16) -> u16 {
    // TODO(pere): maybe free space is not calculated correctly with offset

    // Usable space, not the same as free space, simply means:
    // space that is not reserved for extensions by sqlite. Usually reserved_space is 0.
    let usable_space = usable_space as usize;

    let first_cell = page.offset + page.header_size() + (2 * page.cell_count());
    let cell_content_area_start = page.cell_content_area() as usize;
    let mut free_space_bytes = cell_content_area_start + page.num_frag_free_bytes() as usize;

    // #3 is computed by iterating over the freeblocks linked list
    let mut cur_freeblock_ptr = page.first_freeblock() as usize;
    if cur_freeblock_ptr > 0 {
        if cur_freeblock_ptr < cell_content_area_start {
            // Freeblocks exist in the cell content area e.g. after deletions
            // They should never exist in the unused area of the page.
            todo!("corrupted page");
        }

        let mut next = 0;
        let mut size = 0;
        loop {
            // TODO: check corruption icellast
            next = page.read_u16_no_offset(cur_freeblock_ptr) as usize; // first 2 bytes in freeblock = next freeblock pointer
            size = page.read_u16_no_offset(cur_freeblock_ptr + 2) as usize; // next 2 bytes in freeblock = size of current freeblock
            free_space_bytes += size;
            // Freeblocks are in order from left to right on the page,
            // so the next pointer should > current pointer + its size, or 0 if no next block exists.
            if next <= cur_freeblock_ptr + size + 3 {
                break;
            }
            cur_freeblock_ptr = next;
        }

        // Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list
        assert_eq!(
            next, 0,
            "corrupted page: freeblocks list not in ascending order"
        );

        assert!(
            cur_freeblock_ptr + size <= usable_space,
            "corrupted page: last freeblock extends last page end"
        );
    }

    assert!(
        free_space_bytes <= usable_space,
        "corrupted page: free space is greater than usable space"
    );

    free_space_bytes as u16 - first_cell as u16
}

/// Allocate space for a cell on a page.
fn allocate_cell_space(page_ref: &PageContent, amount: u16, usable_space: u16) -> Result<u16> {
    let mut amount = amount as usize;
    if amount < MINIMUM_CELL_SIZE {
        amount = MINIMUM_CELL_SIZE;
    }

    let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size();
    let gap = cell_offset + 2 * page_ref.cell_count();
    let mut top = page_ref.cell_content_area() as usize;

    // there are free blocks and enough space
    if page_ref.first_freeblock() != 0 && gap + 2 <= top {
        // find slot
        let pc = find_free_cell(page_ref, usable_space, amount)?;
        if pc != 0 {
            return Ok(pc as u16);
        }
        /* fall through, we might need to defragment */
    }

    if gap + 2 + amount > top {
        // defragment
        defragment_page(page_ref, usable_space);
        top = page_ref.read_u16(offset::BTREE_CELL_CONTENT_AREA) as usize;
    }

    top -= amount;

    page_ref.write_u16(offset::BTREE_CELL_CONTENT_AREA, top as u16);

    assert!(top + amount <= usable_space as usize);
    Ok(top as u16)
}

/// Fill in the cell payload with the record.
/// If the record is too large to fit in the cell, it will spill onto overflow pages.
fn fill_cell_payload(
    page_contents: &PageContent,
    int_key: Option<i64>,
    cell_payload: &mut Vec<u8>,
    cell_idx: usize,
    record: &ImmutableRecord,
    usable_space: usize,
    pager: Rc<Pager>,
) {
    // TODO: make record raw from start, having to serialize is not good
    let record_buf = record.get_payload().to_vec();

    let page_type = page_contents.page_type();
    // fill in header
    if matches!(page_type, PageType::IndexInterior) {
        // if a write happened on an index interior page, it is always an overwrite.
        // we must copy the left child pointer of the replaced cell to the new cell.
        let left_child_page = page_contents.cell_interior_read_left_child_page(cell_idx);
        cell_payload.extend_from_slice(&left_child_page.to_be_bytes());
    }
    if matches!(page_type, PageType::TableLeaf) {
        let int_key = int_key.unwrap();
        write_varint_to_vec(record_buf.len() as u64, cell_payload);
        write_varint_to_vec(int_key as u64, cell_payload);
    } else {
        write_varint_to_vec(record_buf.len() as u64, cell_payload);
    }

    let payload_overflow_threshold_max = payload_overflow_threshold_max(page_type, usable_space);
    tracing::debug!(
        "fill_cell_payload(record_size={}, payload_overflow_threshold_max={})",
        record_buf.len(),
        payload_overflow_threshold_max
    );
    if record_buf.len() <= payload_overflow_threshold_max {
        // enough allowed space to fit inside a btree page
        cell_payload.extend_from_slice(record_buf.as_slice());
        return;
    }

    let payload_overflow_threshold_min = payload_overflow_threshold_min(page_type, usable_space);
    // see e.g. https://github.com/sqlite/sqlite/blob/9591d3fe93936533c8c3b0dc4d025ac999539e11/src/dbstat.c#L371
    let mut space_left = payload_overflow_threshold_min
        + (record_buf.len() - payload_overflow_threshold_min) % (usable_space - 4);

    if space_left > payload_overflow_threshold_max {
        space_left = payload_overflow_threshold_min;
    }

    // cell_size must be equal to first value of space_left as this will be the bytes copied to non-overflow page.
    let cell_size = space_left + cell_payload.len() + 4; // 4 is the number of bytes of pointer to first overflow page
    let mut to_copy_buffer = record_buf.as_slice();

    let prev_size = cell_payload.len();
    cell_payload.resize(prev_size + space_left + 4, 0);
    let mut pointer = unsafe { cell_payload.as_mut_ptr().add(prev_size) };
    let mut pointer_to_next = unsafe { cell_payload.as_mut_ptr().add(prev_size + space_left) };

    loop {
        let to_copy = space_left.min(to_copy_buffer.len());
        unsafe { std::ptr::copy(to_copy_buffer.as_ptr(), pointer, to_copy) };

        let left = to_copy_buffer.len() - to_copy;
        if left == 0 {
            break;
        }

        // we still have bytes to add, we will need to allocate new overflow page
        // FIXME: handle page cache is full
        let overflow_page = pager.allocate_overflow_page();
        {
            let id = overflow_page.get().id as u32;
            let contents = overflow_page.get().contents.as_mut().unwrap();

            // TODO: take into account offset here?
            let buf = contents.as_ptr();
            let as_bytes = id.to_be_bytes();
            // update pointer to new overflow page
            unsafe { std::ptr::copy(as_bytes.as_ptr(), pointer_to_next, 4) };

            pointer = unsafe { buf.as_mut_ptr().add(4) };
            pointer_to_next = buf.as_mut_ptr();
            space_left = usable_space - 4;
        }

        to_copy_buffer = &to_copy_buffer[to_copy..];
    }

    assert_eq!(cell_size, cell_payload.len());
}

/// Returns the maximum payload size (X) that can be stored directly on a b-tree page without spilling to overflow pages.
///
/// For table leaf pages: X = usable_size - 35
/// For index pages: X = ((usable_size - 12) * 64/255) - 23
///
/// The usable size is the total page size less the reserved space at the end of each page.
/// These thresholds are designed to:
/// - Give a minimum fanout of 4 for index b-trees
/// - Ensure enough payload is on the b-tree page that the record header can usually be accessed
///   without consulting an overflow page
pub fn payload_overflow_threshold_max(page_type: PageType, usable_space: usize) -> usize {
    match page_type {
        PageType::IndexInterior | PageType::IndexLeaf => {
            ((usable_space - 12) * 64 / 255) - 23 // Index page formula
        }
        PageType::TableInterior | PageType::TableLeaf => {
            usable_space - 35 // Table leaf page formula
        }
    }
}

/// Returns the minimum payload size (M) that must be stored on the b-tree page before spilling to overflow pages is allowed.
///
/// For all page types: M = ((usable_size - 12) * 32/255) - 23
///
/// When payload size P exceeds max_local():
/// - If K = M + ((P-M) % (usable_size-4)) <= max_local(): store K bytes on page
/// - Otherwise: store M bytes on page
///
/// The remaining bytes are stored on overflow pages in both cases.
pub fn payload_overflow_threshold_min(_page_type: PageType, usable_space: usize) -> usize {
    // Same formula for all page types
    ((usable_space - 12) * 32 / 255) - 23
}

/// Drop a cell from a page.
/// This is done by freeing the range of bytes that the cell occupies.
fn drop_cell(page: &mut PageContent, cell_idx: usize, usable_space: u16) -> Result<()> {
    let (cell_start, cell_len) = page.cell_get_raw_region(cell_idx, usable_space as usize);
    free_cell_range(page, cell_start as u16, cell_len as u16, usable_space)?;
    if page.cell_count() > 1 {
        shift_pointers_left(page, cell_idx);
    } else {
        page.write_u16(offset::BTREE_CELL_CONTENT_AREA, usable_space);
        page.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0);
        page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0);
    }
    page.write_u16(offset::BTREE_CELL_COUNT, page.cell_count() as u16 - 1);
    debug_validate_cells!(page, usable_space);
    Ok(())
}

/// Shift pointers to the left once starting from a cell position
/// This is useful when we remove a cell and we want to move left the cells from the right to fill
/// the empty space that's not needed
fn shift_pointers_left(page: &mut PageContent, cell_idx: usize) {
    assert!(page.cell_count() > 0);
    let buf = page.as_ptr();
    let (start, _) = page.cell_pointer_array_offset_and_size();
    let start = start + (cell_idx * 2) + 2;
    let right_cells = page.cell_count() - cell_idx - 1;
    let amount_to_shift = right_cells * 2;
    buf.copy_within(start..start + amount_to_shift, start - 2);
}

#[cfg(test)]
mod tests {
    use rand::{thread_rng, Rng};
    use rand_chacha::{
        rand_core::{RngCore, SeedableRng},
        ChaCha8Rng,
    };
    use sorted_vec::SortedVec;
    use test_log::test;
    use turso_sqlite3_parser::ast::SortOrder;

    use super::*;
    use crate::{
        io::{Buffer, MemoryIO, OpenFlags, IO},
        schema::IndexColumn,
        storage::{
            database::DatabaseFile,
            page_cache::DumbLruPageCache,
            pager::{AtomicDbState, DbState},
        },
        types::Text,
        util::IOExt as _,
        vdbe::Register,
        BufferPool, Completion, Connection, StepResult, WalFile, WalFileShared,
    };
    use std::{
        cell::RefCell,
        collections::HashSet,
        mem::transmute,
        ops::Deref,
        rc::Rc,
        sync::{Arc, Mutex},
    };

    use tempfile::TempDir;

    use crate::{
        io::BufferData,
        storage::{
            btree::{compute_free_space, fill_cell_payload, payload_overflow_threshold_max},
            sqlite3_ondisk::{BTreeCell, PageContent, PageType},
        },
        types::Value,
        Database, Page, Pager, PlatformIO,
    };

    use super::{btree_init_page, defragment_page, drop_cell, insert_into_cell};

    #[allow(clippy::arc_with_non_send_sync)]
    fn get_page(id: usize) -> BTreePage {
        let page = Arc::new(Page::new(id));

        let drop_fn = Rc::new(|_| {});
        let inner = PageContent::new(
            0,
            Arc::new(RefCell::new(Buffer::new(
                BufferData::new(vec![0; 4096]),
                drop_fn,
            ))),
        );
        page.get().contents.replace(inner);
        let page = Arc::new(BTreePageInner {
            page: RefCell::new(page),
        });

        btree_init_page(&page, PageType::TableLeaf, 0, 4096);
        page
    }

    #[allow(clippy::arc_with_non_send_sync)]
    fn get_database() -> Arc<Database> {
        let mut path = TempDir::new().unwrap().keep();
        path.push("test.db");
        {
            let connection = rusqlite::Connection::open(&path).unwrap();
            connection
                .pragma_update(None, "journal_mode", "wal")
                .unwrap();
        }
        let io: Arc<dyn IO> = Arc::new(PlatformIO::new().unwrap());
        let db = Database::open_file(io.clone(), path.to_str().unwrap(), false, false).unwrap();

        db
    }

    fn ensure_cell(page: &mut PageContent, cell_idx: usize, payload: &Vec<u8>) {
        let cell = page.cell_get_raw_region(cell_idx, 4096);
        tracing::trace!("cell idx={} start={} len={}", cell_idx, cell.0, cell.1);
        let buf = &page.as_ptr()[cell.0..cell.0 + cell.1];
        assert_eq!(buf.len(), payload.len());
        assert_eq!(buf, payload);
    }

    fn add_record(
        id: usize,
        pos: usize,
        page: &mut PageContent,
        record: ImmutableRecord,
        conn: &Arc<Connection>,
    ) -> Vec<u8> {
        let mut payload: Vec<u8> = Vec::new();
        fill_cell_payload(
            page,
            Some(id as i64),
            &mut payload,
            pos,
            &record,
            4096,
            conn.pager.borrow().clone(),
        );
        insert_into_cell(page, &payload, pos, 4096).unwrap();
        payload
    }

    #[test]
    fn test_insert_cell() {
        let db = get_database();
        let conn = db.connect().unwrap();
        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;
        let regs = &[Register::Value(Value::Integer(1))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let payload = add_record(1, 0, page, record, &conn);
        assert_eq!(page.cell_count(), 1);
        let free = compute_free_space(page, 4096);
        assert_eq!(free, 4096 - payload.len() as u16 - 2 - header_size);

        let cell_idx = 0;
        ensure_cell(page, cell_idx, &payload);
    }

    struct Cell {
        pos: usize,
        payload: Vec<u8>,
    }

    #[test]
    fn test_drop_1() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;

        let mut total_size = 0;
        let mut cells = Vec::new();
        let usable_space = 4096;
        for i in 0..3 {
            let regs = &[Register::Value(Value::Integer(i as i64))];
            let record = ImmutableRecord::from_registers(regs, regs.len());
            let payload = add_record(i, i, page, record, &conn);
            assert_eq!(page.cell_count(), i + 1);
            let free = compute_free_space(page, usable_space);
            total_size += payload.len() as u16 + 2;
            assert_eq!(free, 4096 - total_size - header_size);
            cells.push(Cell { pos: i, payload });
        }

        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }
        cells.remove(1);
        drop_cell(page, 1, usable_space).unwrap();

        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }
    }

    fn validate_btree(pager: Rc<Pager>, page_idx: usize) -> (usize, bool) {
        let num_columns = 5;
        let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx, num_columns);
        let page = cursor.read_page(page_idx).unwrap();
        while page.get().is_locked() {
            pager.io.run_once().unwrap();
        }
        let page = page.get();
        // Pin page in order to not drop it in between
        page.set_dirty();
        let contents = page.get().contents.as_ref().unwrap();
        let mut previous_key = None;
        let mut valid = true;
        let mut depth = None;
        debug_validate_cells!(contents, pager.usable_space() as u16);
        let mut child_pages = Vec::new();
        for cell_idx in 0..contents.cell_count() {
            let cell = contents.cell_get(cell_idx, cursor.usable_space()).unwrap();
            let current_depth = match cell {
                BTreeCell::TableLeafCell(..) => 1,
                BTreeCell::TableInteriorCell(TableInteriorCell {
                    left_child_page, ..
                }) => {
                    let child_page = cursor.read_page(left_child_page as usize).unwrap();
                    while child_page.get().is_locked() {
                        pager.io.run_once().unwrap();
                    }
                    child_pages.push(child_page);
                    if left_child_page == page.get().id as u32 {
                        valid = false;
                        tracing::error!(
                            "left child page is the same as parent {}",
                            left_child_page
                        );
                        continue;
                    }
                    let (child_depth, child_valid) =
                        validate_btree(pager.clone(), left_child_page as usize);
                    valid &= child_valid;
                    child_depth
                }
                _ => panic!("unsupported btree cell: {cell:?}"),
            };
            if current_depth >= 100 {
                tracing::error!("depth is too big");
                page.clear_dirty();
                return (100, false);
            }
            depth = Some(depth.unwrap_or(current_depth + 1));
            if depth != Some(current_depth + 1) {
                tracing::error!("depth is different for child of page {}", page_idx);
                valid = false;
            }
            match cell {
                BTreeCell::TableInteriorCell(TableInteriorCell { rowid, .. })
                | BTreeCell::TableLeafCell(TableLeafCell { rowid, .. }) => {
                    if previous_key.is_some() && previous_key.unwrap() >= rowid {
                        tracing::error!(
                            "keys are in bad order: prev={:?}, current={}",
                            previous_key,
                            rowid
                        );
                        valid = false;
                    }
                    previous_key = Some(rowid);
                }
                _ => panic!("unsupported btree cell: {cell:?}"),
            }
        }
        if let Some(right) = contents.rightmost_pointer() {
            let (right_depth, right_valid) = validate_btree(pager.clone(), right as usize);
            valid &= right_valid;
            depth = Some(depth.unwrap_or(right_depth + 1));
            if depth != Some(right_depth + 1) {
                tracing::error!("depth is different for child of page {}", page_idx);
                valid = false;
            }
        }
        let first_page_type = child_pages.first().map(|p| {
            if !p.get().is_loaded() {
                let new_page = pager.read_page(p.get().get().id).unwrap();
                p.page.replace(new_page);
            }
            while p.get().is_locked() {
                pager.io.run_once().unwrap();
            }
            p.get().get_contents().page_type()
        });
        if let Some(child_type) = first_page_type {
            for page in child_pages.iter().skip(1) {
                if !page.get().is_loaded() {
                    let new_page = pager.read_page(page.get().get().id).unwrap();
                    page.page.replace(new_page);
                }
                while page.get().is_locked() {
                    pager.io.run_once().unwrap();
                }
                if page.get().get_contents().page_type() != child_type {
                    tracing::error!("child pages have different types");
                    valid = false;
                }
            }
        }
        if contents.rightmost_pointer().is_none() && contents.cell_count() == 0 {
            valid = false;
        }
        page.clear_dirty();
        (depth.unwrap(), valid)
    }

    fn format_btree(pager: Rc<Pager>, page_idx: usize, depth: usize) -> String {
        let num_columns = 5;

        let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx, num_columns);
        let page = cursor.read_page(page_idx).unwrap();
        while page.get().is_locked() {
            pager.io.run_once().unwrap();
        }
        let page = page.get();
        // Pin page in order to not drop it in between loading of different pages. If not contents will be a dangling reference.
        page.set_dirty();
        let contents = page.get().contents.as_ref().unwrap();
        let mut current = Vec::new();
        let mut child = Vec::new();
        for cell_idx in 0..contents.cell_count() {
            let cell = contents.cell_get(cell_idx, cursor.usable_space()).unwrap();
            match cell {
                BTreeCell::TableInteriorCell(cell) => {
                    current.push(format!(
                        "node[rowid:{}, ptr(<=):{}]",
                        cell.rowid, cell.left_child_page
                    ));
                    child.push(format_btree(
                        pager.clone(),
                        cell.left_child_page as usize,
                        depth + 2,
                    ));
                }
                BTreeCell::TableLeafCell(cell) => {
                    current.push(format!(
                        "leaf[rowid:{}, len(payload):{}, overflow:{}]",
                        cell.rowid,
                        cell.payload.len(),
                        cell.first_overflow_page.is_some()
                    ));
                }
                _ => panic!("unsupported btree cell: {cell:?}"),
            }
        }
        if let Some(rightmost) = contents.rightmost_pointer() {
            child.push(format_btree(pager.clone(), rightmost as usize, depth + 2));
        }
        let current = format!(
            "{}-page:{}, ptr(right):{}\n{}+cells:{}",
            " ".repeat(depth),
            page_idx,
            contents.rightmost_pointer().unwrap_or(0),
            " ".repeat(depth),
            current.join(", ")
        );
        page.clear_dirty();
        if child.is_empty() {
            current
        } else {
            current + "\n" + &child.join("\n")
        }
    }

    fn empty_btree() -> (Rc<Pager>, usize, Arc<Database>, Arc<Connection>) {
        #[allow(clippy::arc_with_non_send_sync)]
        let io: Arc<dyn IO> = Arc::new(MemoryIO::new());
        let db = Database::open_file(io.clone(), "test.db", false, false).unwrap();
        let conn = db.connect().unwrap();
        let pager = conn.pager.borrow().clone();

        // FIXME: handle page cache is full
        let _ = run_until_done(|| pager.allocate_page1(), &pager);
        let page2 = pager.allocate_page().unwrap();
        let page2 = Arc::new(BTreePageInner {
            page: RefCell::new(page2),
        });
        btree_init_page(&page2, PageType::TableLeaf, 0, 4096);
        (pager, page2.get().get().id, db, conn)
    }

    #[test]
    #[ignore]
    pub fn btree_insert_fuzz_ex() {
        for sequence in [
            &[
                (777548915, 3364),
                (639157228, 3796),
                (709175417, 1214),
                (390824637, 210),
                (906124785, 1481),
                (197677875, 1305),
                (457946262, 3734),
                (956825466, 592),
                (835875722, 1334),
                (649214013, 1250),
                (531143011, 1788),
                (765057993, 2351),
                (510007766, 1349),
                (884516059, 822),
                (81604840, 2545),
            ]
            .as_slice(),
            &[
                (293471650, 2452),
                (163608869, 627),
                (544576229, 464),
                (705823748, 3441),
            ]
            .as_slice(),
            &[
                (987283511, 2924),
                (261851260, 1766),
                (343847101, 1657),
                (315844794, 572),
            ]
            .as_slice(),
            &[
                (987283511, 2924),
                (261851260, 1766),
                (343847101, 1657),
                (315844794, 572),
                (649272840, 1632),
                (723398505, 3140),
                (334416967, 3874),
            ]
            .as_slice(),
        ] {
            let (pager, root_page, _, _) = empty_btree();
            let num_columns = 5;

            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            for (key, size) in sequence.iter() {
                run_until_done(
                    || {
                        let key = SeekKey::TableRowId(*key);
                        cursor.seek(key, SeekOp::GE { eq_only: true })
                    },
                    pager.deref(),
                )
                .unwrap();
                let regs = &[Register::Value(Value::Blob(vec![0; *size]))];
                let value = ImmutableRecord::from_registers(regs, regs.len());
                tracing::info!("insert key:{}", key);
                run_until_done(
                    || cursor.insert(&BTreeKey::new_table_rowid(*key, Some(&value)), true),
                    pager.deref(),
                )
                .unwrap();
                tracing::info!(
                    "=========== btree ===========\n{}\n\n",
                    format_btree(pager.clone(), root_page, 0)
                );
            }
            for (key, _) in sequence.iter() {
                let seek_key = SeekKey::TableRowId(*key);
                assert!(
                    matches!(
                        cursor.seek(seek_key, SeekOp::GE { eq_only: true }).unwrap(),
                        IOResult::Done(SeekResult::Found)
                    ),
                    "key {key} is not found"
                );
            }
        }
    }

    fn rng_from_time_or_env() -> (ChaCha8Rng, u64) {
        let seed = std::env::var("SEED").map_or(
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .unwrap()
                .as_millis(),
            |v| {
                v.parse()
                    .expect("Failed to parse SEED environment variable as u64")
            },
        );
        let rng = ChaCha8Rng::seed_from_u64(seed as u64);
        (rng, seed as u64)
    }

    fn btree_insert_fuzz_run(
        attempts: usize,
        inserts: usize,
        size: impl Fn(&mut ChaCha8Rng) -> usize,
    ) {
        const VALIDATE_INTERVAL: usize = 1000;
        let do_validate_btree = std::env::var("VALIDATE_BTREE")
            .is_ok_and(|v| v.parse().expect("validate should be bool"));
        let (mut rng, seed) = rng_from_time_or_env();
        let mut seen = HashSet::new();
        tracing::info!("super seed: {}", seed);
        let num_columns = 5;

        for _ in 0..attempts {
            let (pager, root_page, _db, conn) = empty_btree();
            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            let mut keys = SortedVec::new();
            tracing::info!("seed: {seed}");
            for insert_id in 0..inserts {
                let do_validate = do_validate_btree || (insert_id % VALIDATE_INTERVAL == 0);
                run_until_done(|| pager.begin_read_tx(), &pager).unwrap();
                run_until_done(|| pager.begin_write_tx(), &pager).unwrap();
                let size = size(&mut rng);
                let key = {
                    let result;
                    loop {
                        let key = (rng.next_u64() % (1 << 30)) as i64;
                        if seen.contains(&key) {
                            continue;
                        } else {
                            seen.insert(key);
                        }
                        result = key;
                        break;
                    }
                    result
                };
                keys.push(key);
                tracing::info!(
                    "INSERT INTO t VALUES ({}, randomblob({})); -- {}",
                    key,
                    size,
                    insert_id
                );
                run_until_done(
                    || {
                        let key = SeekKey::TableRowId(key);
                        cursor.seek(key, SeekOp::GE { eq_only: true })
                    },
                    pager.deref(),
                )
                .unwrap();
                let regs = &[Register::Value(Value::Blob(vec![0; size]))];
                let value = ImmutableRecord::from_registers(regs, regs.len());
                let btree_before = if do_validate {
                    format_btree(pager.clone(), root_page, 0)
                } else {
                    "".to_string()
                };
                run_until_done(
                    || cursor.insert(&BTreeKey::new_table_rowid(key, Some(&value)), true),
                    pager.deref(),
                )
                .unwrap();
                loop {
                    match pager.end_tx(false, false, &conn, false).unwrap() {
                        IOResult::Done(_) => break,
                        IOResult::IO => {
                            pager.io.run_once().unwrap();
                        }
                    }
                }
                run_until_done(|| pager.begin_read_tx(), &pager).unwrap();
                // FIXME: add sorted vector instead, should be okay for small amounts of keys for now :P, too lazy to fix right now
                cursor.move_to_root().unwrap();
                let mut valid = true;
                if do_validate {
                    cursor.move_to_root().unwrap();
                    for key in keys.iter() {
                        tracing::trace!("seeking key: {}", key);
                        run_until_done(|| cursor.next(), pager.deref()).unwrap();
                        let cursor_rowid = run_until_done(|| cursor.rowid(), pager.deref())
                            .unwrap()
                            .unwrap();
                        if *key != cursor_rowid {
                            valid = false;
                            println!("key {key} is not found, got {cursor_rowid}");
                            break;
                        }
                    }
                }
                // let's validate btree too so that we undertsand where the btree failed
                if do_validate
                    && (!valid || matches!(validate_btree(pager.clone(), root_page), (_, false)))
                {
                    let btree_after = format_btree(pager.clone(), root_page, 0);
                    println!("btree before:\n{btree_before}");
                    println!("btree after:\n{btree_after}");
                    panic!("invalid btree");
                }
                pager.end_read_tx().unwrap();
            }
            run_until_done(|| pager.begin_read_tx(), &pager).unwrap();
            tracing::info!(
                "=========== btree ===========\n{}\n\n",
                format_btree(pager.clone(), root_page, 0)
            );
            if matches!(validate_btree(pager.clone(), root_page), (_, false)) {
                panic!("invalid btree");
            }
            cursor.move_to_root().unwrap();
            for key in keys.iter() {
                tracing::trace!("seeking key: {}", key);
                run_until_done(|| cursor.next(), pager.deref()).unwrap();
                let cursor_rowid = run_until_done(|| cursor.rowid(), pager.deref())
                    .unwrap()
                    .unwrap();
                assert_eq!(
                    *key, cursor_rowid,
                    "key {key} is not found, got {cursor_rowid}"
                );
            }
            pager.end_read_tx().unwrap();
        }
    }

    fn btree_index_insert_fuzz_run(attempts: usize, inserts: usize) {
        use crate::storage::pager::CreateBTreeFlags;

        let (mut rng, seed) = if std::env::var("SEED").is_ok() {
            let seed = std::env::var("SEED").unwrap();
            let seed = seed.parse::<u64>().unwrap();
            let rng = ChaCha8Rng::seed_from_u64(seed);
            (rng, seed)
        } else {
            rng_from_time_or_env()
        };
        let mut seen = HashSet::new();
        tracing::info!("super seed: {}", seed);
        for _ in 0..attempts {
            let (pager, _, _db, conn) = empty_btree();
            let index_root_page_result =
                pager.btree_create(&CreateBTreeFlags::new_index()).unwrap();
            let index_root_page = match index_root_page_result {
                crate::types::IOResult::Done(id) => id as usize,
                crate::types::IOResult::IO => {
                    panic!("btree_create returned IO in test, unexpected")
                }
            };
            let index_def = Index {
                name: "testindex".to_string(),
                columns: (0..10)
                    .map(|i| IndexColumn {
                        name: format!("test{i}"),
                        order: SortOrder::Asc,
                        collation: None,
                        pos_in_table: i,
                        default: None,
                    })
                    .collect(),
                table_name: "test".to_string(),
                root_page: index_root_page,
                unique: false,
                ephemeral: false,
                has_rowid: false,
            };
            let num_columns = index_def.columns.len();
            let mut cursor = BTreeCursor::new_index(
                None,
                pager.clone(),
                index_root_page,
                &index_def,
                num_columns,
            );
            let mut keys = SortedVec::new();
            tracing::info!("seed: {seed}");
            for i in 0..inserts {
                pager.begin_read_tx().unwrap();
                pager.begin_write_tx().unwrap();
                let key = {
                    let result;
                    loop {
                        let cols = (0..num_columns)
                            .map(|_| (rng.next_u64() % (1 << 30)) as i64)
                            .collect::<Vec<_>>();
                        if seen.contains(&cols) {
                            continue;
                        } else {
                            seen.insert(cols.clone());
                        }
                        result = cols;
                        break;
                    }
                    result
                };
                tracing::info!("insert {}/{}: {:?}", i + 1, inserts, key);
                keys.push(key.clone());
                let regs = key
                    .iter()
                    .map(|col| Register::Value(Value::Integer(*col)))
                    .collect::<Vec<_>>();
                let value = ImmutableRecord::from_registers(&regs, regs.len());
                run_until_done(
                    || {
                        let record = ImmutableRecord::from_registers(&regs, regs.len());
                        let key = SeekKey::IndexKey(&record);
                        cursor.seek(key, SeekOp::GE { eq_only: true })
                    },
                    pager.deref(),
                )
                .unwrap();
                run_until_done(
                    || {
                        cursor.insert(
                            &BTreeKey::new_index_key(&value),
                            cursor.is_write_in_progress(),
                        )
                    },
                    pager.deref(),
                )
                .unwrap();
                cursor.move_to_root().unwrap();
                loop {
                    match pager.end_tx(false, false, &conn, false).unwrap() {
                        IOResult::Done(_) => break,
                        IOResult::IO => {
                            pager.io.run_once().unwrap();
                        }
                    }
                }
            }

            // Check that all keys can be found by seeking
            pager.begin_read_tx().unwrap();
            cursor.move_to_root().unwrap();
            for (i, key) in keys.iter().enumerate() {
                tracing::info!("seeking key {}/{}: {:?}", i + 1, keys.len(), key);
                let exists = run_until_done(
                    || {
                        let regs = key
                            .iter()
                            .map(|col| Register::Value(Value::Integer(*col)))
                            .collect::<Vec<_>>();
                        cursor.seek(
                            SeekKey::IndexKey(&ImmutableRecord::from_registers(&regs, regs.len())),
                            SeekOp::GE { eq_only: true },
                        )
                    },
                    pager.deref(),
                )
                .unwrap();
                let mut found = matches!(exists, SeekResult::Found);
                if matches!(exists, SeekResult::TryAdvance) {
                    found = run_until_done(|| cursor.next(), pager.deref()).unwrap();
                }
                assert!(found, "key {key:?} is not found");
            }
            // Check that key count is right
            cursor.move_to_root().unwrap();
            let mut count = 0;
            while run_until_done(|| cursor.next(), pager.deref()).unwrap() {
                count += 1;
            }
            assert_eq!(
                count,
                keys.len(),
                "key count is not right, got {}, expected {}",
                count,
                keys.len()
            );
            // Check that all keys can be found in-order, by iterating the btree
            cursor.move_to_root().unwrap();
            let mut prev = None;
            for (i, key) in keys.iter().enumerate() {
                tracing::info!("iterating key {}/{}: {:?}", i + 1, keys.len(), key);
                run_until_done(|| cursor.next(), pager.deref()).unwrap();
                let record = run_until_done(|| cursor.record(), &pager).unwrap();
                let record = record.as_ref().unwrap();
                let cur = record.get_values().clone();
                if let Some(prev) = prev {
                    if prev >= cur {
                        println!("Seed: {seed}");
                    }
                    assert!(
                        prev < cur,
                        "keys are not in ascending order: {prev:?} < {cur:?}",
                    );
                }
                prev = Some(cur);
            }
            pager.end_read_tx().unwrap();
        }
    }

    fn btree_index_insert_delete_fuzz_run(
        attempts: usize,
        operations: usize,
        size: impl Fn(&mut ChaCha8Rng) -> usize,
        insert_chance: f64,
    ) {
        use crate::storage::pager::CreateBTreeFlags;

        let (mut rng, seed) = if std::env::var("SEED").is_ok() {
            let seed = std::env::var("SEED").unwrap();
            let seed = seed.parse::<u64>().unwrap();
            let rng = ChaCha8Rng::seed_from_u64(seed);
            (rng, seed)
        } else {
            rng_from_time_or_env()
        };
        let mut seen = HashSet::new();
        tracing::info!("super seed: {}", seed);

        for _ in 0..attempts {
            let (pager, _, _db, conn) = empty_btree();
            let index_root_page_result =
                pager.btree_create(&CreateBTreeFlags::new_index()).unwrap();
            let index_root_page = match index_root_page_result {
                crate::types::IOResult::Done(id) => id as usize,
                crate::types::IOResult::IO => {
                    panic!("btree_create returned IO in test, unexpected")
                }
            };
            let index_def = Index {
                name: "testindex".to_string(),
                columns: vec![IndexColumn {
                    name: "testcol".to_string(),
                    order: SortOrder::Asc,
                    collation: None,
                    pos_in_table: 0,
                    default: None,
                }],
                table_name: "test".to_string(),
                root_page: index_root_page,
                unique: false,
                ephemeral: false,
                has_rowid: false,
            };
            let mut cursor =
                BTreeCursor::new_index(None, pager.clone(), index_root_page, &index_def, 1);

            // Track expected keys that should be present in the tree
            let mut expected_keys = Vec::new();

            tracing::info!("seed: {seed}");
            for i in 0..operations {
                let print_progress = i % 100 == 0;
                pager.begin_read_tx().unwrap();
                pager.begin_write_tx().unwrap();

                // Decide whether to insert or delete (80% chance of insert)
                let is_insert = rng.next_u64() % 100 < (insert_chance * 100.0) as u64;

                if is_insert {
                    // Generate a unique key for insertion
                    let key = {
                        let result;
                        loop {
                            let sizeof_blob = size(&mut rng);
                            let blob = (0..sizeof_blob)
                                .map(|_| (rng.next_u64() % 256) as u8)
                                .collect::<Vec<_>>();
                            if seen.contains(&blob) {
                                continue;
                            } else {
                                seen.insert(blob.clone());
                            }
                            result = blob;
                            break;
                        }
                        result
                    };

                    if print_progress {
                        tracing::info!("insert {}/{}, seed: {seed}", i + 1, operations);
                    }
                    expected_keys.push(key.clone());

                    let regs = vec![Register::Value(Value::Blob(key))];
                    let value = ImmutableRecord::from_registers(&regs, regs.len());

                    let seek_result = run_until_done(
                        || {
                            let record = ImmutableRecord::from_registers(&regs, regs.len());
                            let key = SeekKey::IndexKey(&record);
                            cursor.seek(key, SeekOp::GE { eq_only: true })
                        },
                        pager.deref(),
                    )
                    .unwrap();
                    if let SeekResult::TryAdvance = seek_result {
                        run_until_done(|| cursor.next(), pager.deref()).unwrap();
                    }
                    run_until_done(
                        || {
                            cursor.insert(
                                &BTreeKey::new_index_key(&value),
                                cursor.is_write_in_progress(),
                            )
                        },
                        pager.deref(),
                    )
                    .unwrap();
                } else {
                    // Delete a random existing key
                    if !expected_keys.is_empty() {
                        let delete_idx = rng.next_u64() as usize % expected_keys.len();
                        let key_to_delete = expected_keys[delete_idx].clone();

                        if print_progress {
                            tracing::info!("delete {}/{}, seed: {seed}", i + 1, operations);
                        }

                        let regs = vec![Register::Value(Value::Blob(key_to_delete.clone()))];
                        let record = ImmutableRecord::from_registers(&regs, regs.len());

                        // Seek to the key to delete
                        let seek_result = run_until_done(
                            || {
                                cursor
                                    .seek(SeekKey::IndexKey(&record), SeekOp::GE { eq_only: true })
                            },
                            pager.deref(),
                        )
                        .unwrap();
                        let mut found = matches!(seek_result, SeekResult::Found);
                        if matches!(seek_result, SeekResult::TryAdvance) {
                            found = run_until_done(|| cursor.next(), pager.deref()).unwrap();
                        }
                        assert!(found, "expected key {key_to_delete:?} is not found");

                        // Delete the key
                        run_until_done(|| cursor.delete(), pager.deref()).unwrap();

                        // Remove from expected keys
                        expected_keys.remove(delete_idx);
                    }
                }

                cursor.move_to_root().unwrap();
                loop {
                    match pager.end_tx(false, false, &conn, false).unwrap() {
                        IOResult::Done(_) => break,
                        IOResult::IO => {
                            pager.io.run_once().unwrap();
                        }
                    }
                }
            }

            // Final validation
            let mut sorted_keys = expected_keys.clone();
            sorted_keys.sort();
            validate_expected_keys(&pager, &mut cursor, &sorted_keys, seed);

            pager.end_read_tx().unwrap();
        }
    }

    fn validate_expected_keys(
        pager: &Rc<Pager>,
        cursor: &mut BTreeCursor,
        expected_keys: &[Vec<u8>],
        seed: u64,
    ) {
        // Check that all expected keys can be found by seeking
        pager.begin_read_tx().unwrap();
        cursor.move_to_root().unwrap();
        for (i, key) in expected_keys.iter().enumerate() {
            tracing::info!(
                "validating key {}/{}, seed: {seed}",
                i + 1,
                expected_keys.len()
            );
            let exists = run_until_done(
                || {
                    let regs = vec![Register::Value(Value::Blob(key.clone()))];
                    cursor.seek(
                        SeekKey::IndexKey(&ImmutableRecord::from_registers(&regs, regs.len())),
                        SeekOp::GE { eq_only: true },
                    )
                },
                pager.deref(),
            )
            .unwrap();
            let mut found = matches!(exists, SeekResult::Found);
            if matches!(exists, SeekResult::TryAdvance) {
                found = run_until_done(|| cursor.next(), pager.deref()).unwrap();
            }
            assert!(found, "expected key {key:?} is not found");
        }

        // Check key count
        cursor.move_to_root().unwrap();
        run_until_done(|| cursor.rewind(), pager.deref()).unwrap();
        if !cursor.has_record.get() {
            panic!("no keys in tree");
        }
        let mut count = 1;
        loop {
            run_until_done(|| cursor.next(), pager.deref()).unwrap();
            if !cursor.has_record.get() {
                break;
            }
            count += 1;
        }
        assert_eq!(
            count,
            expected_keys.len(),
            "key count is not right, got {}, expected {}, seed: {seed}",
            count,
            expected_keys.len()
        );

        // Check that all keys can be found in-order, by iterating the btree
        cursor.move_to_root().unwrap();
        for (i, key) in expected_keys.iter().enumerate() {
            run_until_done(|| cursor.next(), pager.deref()).unwrap();
            tracing::info!(
                "iterating key {}/{}, cursor stack cur idx: {:?}, cursor stack depth: {:?}, seed: {seed}",
                i + 1,
                expected_keys.len(),
                cursor.stack.current_cell_index(),
                cursor.stack.current()
            );
            let record = run_until_done(|| cursor.record(), pager).unwrap();
            let record = record.as_ref().unwrap();
            let cur = record.get_values().clone();
            let cur = cur.first().unwrap();
            let RefValue::Blob(ref cur) = cur else {
                panic!("expected blob, got {cur:?}");
            };
            assert_eq!(
                cur.to_slice(),
                key,
                "key {key:?} is not found, seed: {seed}"
            );
        }
        pager.end_read_tx().unwrap();
    }

    #[test]
    pub fn test_drop_odd() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;

        let mut total_size = 0;
        let mut cells = Vec::new();
        let usable_space = 4096;
        let total_cells = 10;
        for i in 0..total_cells {
            let regs = &[Register::Value(Value::Integer(i as i64))];
            let record = ImmutableRecord::from_registers(regs, regs.len());
            let payload = add_record(i, i, page, record, &conn);
            assert_eq!(page.cell_count(), i + 1);
            let free = compute_free_space(page, usable_space);
            total_size += payload.len() as u16 + 2;
            assert_eq!(free, 4096 - total_size - header_size);
            cells.push(Cell { pos: i, payload });
        }

        let mut removed = 0;
        let mut new_cells = Vec::new();
        for cell in cells {
            if cell.pos % 2 == 1 {
                drop_cell(page, cell.pos - removed, usable_space).unwrap();
                removed += 1;
            } else {
                new_cells.push(cell);
            }
        }
        let cells = new_cells;
        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }

        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }
    }

    #[test]
    pub fn btree_insert_fuzz_run_equal_size() {
        for size in 1..8 {
            tracing::info!("======= size:{} =======", size);
            btree_insert_fuzz_run(2, 1024, |_| size);
        }
    }

    #[test]
    pub fn btree_index_insert_fuzz_run_equal_size() {
        btree_index_insert_fuzz_run(2, 1024);
    }

    #[test]
    pub fn btree_index_insert_delete_fuzz_run_test() {
        btree_index_insert_delete_fuzz_run(
            2,
            2000,
            |rng| {
                let min: u32 = 4;
                let size = min + rng.next_u32() % (1024 - min);
                size as usize
            },
            0.65,
        );
    }

    #[test]
    pub fn btree_insert_fuzz_run_random() {
        btree_insert_fuzz_run(128, 16, |rng| (rng.next_u32() % 4096) as usize);
    }

    #[test]
    pub fn btree_insert_fuzz_run_small() {
        btree_insert_fuzz_run(1, 100, |rng| (rng.next_u32() % 128) as usize);
    }

    #[test]
    pub fn btree_insert_fuzz_run_big() {
        btree_insert_fuzz_run(64, 32, |rng| 3 * 1024 + (rng.next_u32() % 1024) as usize);
    }

    #[test]
    pub fn btree_insert_fuzz_run_overflow() {
        btree_insert_fuzz_run(64, 32, |rng| (rng.next_u32() % 32 * 1024) as usize);
    }

    #[test]
    #[ignore]
    pub fn fuzz_long_btree_insert_fuzz_run_equal_size() {
        for size in 1..8 {
            tracing::info!("======= size:{} =======", size);
            btree_insert_fuzz_run(2, 10_000, |_| size);
        }
    }

    #[test]
    #[ignore]
    pub fn fuzz_long_btree_index_insert_fuzz_run_equal_size() {
        btree_index_insert_fuzz_run(2, 10_000);
    }

    #[test]
    #[ignore]
    pub fn fuzz_long_btree_index_insert_delete_fuzz_run() {
        btree_index_insert_delete_fuzz_run(
            2,
            10000,
            |rng| {
                let min: u32 = 4;
                let size = min + rng.next_u32() % (1024 - min);
                size as usize
            },
            0.65,
        );
    }

    #[test]
    #[ignore]
    pub fn fuzz_long_btree_insert_fuzz_run_random() {
        btree_insert_fuzz_run(2, 10_000, |rng| (rng.next_u32() % 4096) as usize);
    }

    #[test]
    #[ignore]
    pub fn fuzz_long_btree_insert_fuzz_run_small() {
        btree_insert_fuzz_run(2, 10_000, |rng| (rng.next_u32() % 128) as usize);
    }

    #[test]
    #[ignore]
    pub fn fuzz_long_btree_insert_fuzz_run_big() {
        btree_insert_fuzz_run(2, 10_000, |rng| 3 * 1024 + (rng.next_u32() % 1024) as usize);
    }

    #[test]
    #[ignore]
    pub fn fuzz_long_btree_insert_fuzz_run_overflow() {
        btree_insert_fuzz_run(2, 5_000, |rng| (rng.next_u32() % 32 * 1024) as usize);
    }

    #[allow(clippy::arc_with_non_send_sync)]
    fn setup_test_env(database_size: u32) -> Rc<Pager> {
        let page_size = 512;

        let buffer_pool = Arc::new(BufferPool::new(Some(page_size as usize)));

        // Initialize buffer pool with correctly sized buffers
        for _ in 0..10 {
            let vec = vec![0; page_size as usize]; // Initialize with correct length, not just capacity
            buffer_pool.put(Pin::new(vec));
        }

        let io: Arc<dyn IO> = Arc::new(MemoryIO::new());
        let db_file = Arc::new(DatabaseFile::new(
            io.open_file("test.db", OpenFlags::Create, false).unwrap(),
        ));

        let wal_file = io.open_file("test.wal", OpenFlags::Create, false).unwrap();
        let wal_shared = WalFileShared::new_shared(page_size, &io, wal_file).unwrap();
        let wal = Rc::new(RefCell::new(WalFile::new(
            io.clone(),
            wal_shared,
            buffer_pool.clone(),
        )));

        let pager = Rc::new(
            Pager::new(
                db_file,
                wal,
                io,
                Arc::new(parking_lot::RwLock::new(DumbLruPageCache::new(10))),
                buffer_pool,
                Arc::new(AtomicDbState::new(DbState::Uninitialized)),
                Arc::new(Mutex::new(())),
            )
            .unwrap(),
        );

        pager.io.run_once().unwrap();

        let _ = run_until_done(|| pager.allocate_page1(), &pager);
        for _ in 0..(database_size - 1) {
            pager.allocate_page().unwrap();
        }

        header_accessor::set_page_size(&pager, page_size).unwrap();

        pager
    }

    #[test]
    pub fn test_clear_overflow_pages() -> Result<()> {
        let pager = setup_test_env(5);
        let num_columns = 5;

        let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1, num_columns);

        let max_local = payload_overflow_threshold_max(PageType::TableLeaf, 4096);
        let usable_size = cursor.usable_space();

        // Create a large payload that will definitely trigger overflow
        let large_payload = vec![b'A'; max_local + usable_size];

        // Setup overflow pages (2, 3, 4) with linking
        let mut current_page = 2u32;
        while current_page <= 4 {
            let drop_fn = Rc::new(|_buf| {});
            #[allow(clippy::arc_with_non_send_sync)]
            let buf = Arc::new(RefCell::new(Buffer::allocate(
                header_accessor::get_page_size(&pager)? as usize,
                drop_fn,
            )));
            let c = Completion::new_write(|_| {});
            #[allow(clippy::arc_with_non_send_sync)]
            pager
                .db_file
                .write_page(current_page as usize, buf.clone(), c)?;
            pager.io.run_once()?;

            let page = cursor.read_page(current_page as usize)?;
            while page.get().is_locked() {
                cursor.pager.io.run_once()?;
            }

            {
                let page = page.get();
                let contents = page.get_contents();

                let next_page = if current_page < 4 {
                    current_page + 1
                } else {
                    0
                };
                contents.write_u32(0, next_page); // Write pointer to next overflow page

                let buf = contents.as_ptr();
                buf[4..].fill(b'A');
            }

            current_page += 1;
        }
        pager.io.run_once()?;

        // Create leaf cell pointing to start of overflow chain
        let leaf_cell = BTreeCell::TableLeafCell(TableLeafCell {
            rowid: 1,
            payload: unsafe { transmute::<&[u8], &'static [u8]>(large_payload.as_slice()) },
            first_overflow_page: Some(2), // Point to first overflow page
            payload_size: large_payload.len() as u64,
        });

        let initial_freelist_pages = header_accessor::get_freelist_pages(&pager)?;
        // Clear overflow pages
        let clear_result = cursor.clear_overflow_pages(&leaf_cell)?;
        match clear_result {
            IOResult::Done(_) => {
                // Verify proper number of pages were added to freelist
                assert_eq!(
                    header_accessor::get_freelist_pages(&pager)?,
                    initial_freelist_pages + 3,
                    "Expected 3 pages to be added to freelist"
                );

                // If this is first trunk page
                let trunk_page_id = header_accessor::get_freelist_trunk_page(&pager)?;
                if trunk_page_id > 0 {
                    // Verify trunk page structure
                    let trunk_page = cursor.read_page(trunk_page_id as usize)?;
                    if let Some(contents) = trunk_page.get().get().contents.as_ref() {
                        // Read number of leaf pages in trunk
                        let n_leaf = contents.read_u32(4);
                        assert!(n_leaf > 0, "Trunk page should have leaf entries");

                        for i in 0..n_leaf {
                            let leaf_page_id = contents.read_u32(8 + (i as usize * 4));
                            assert!(
                                (2..=4).contains(&leaf_page_id),
                                "Leaf page ID {leaf_page_id} should be in range 2-4"
                            );
                        }
                    }
                }
            }
            IOResult::IO => {
                cursor.pager.io.run_once()?;
            }
        }

        Ok(())
    }

    #[test]
    pub fn test_clear_overflow_pages_no_overflow() -> Result<()> {
        let pager = setup_test_env(5);
        let num_columns = 5;

        let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1, num_columns);

        let small_payload = vec![b'A'; 10];

        // Create leaf cell with no overflow pages
        let leaf_cell = BTreeCell::TableLeafCell(TableLeafCell {
            rowid: 1,
            payload: unsafe { transmute::<&[u8], &'static [u8]>(small_payload.as_slice()) },
            first_overflow_page: None,
            payload_size: small_payload.len() as u64,
        });

        let initial_freelist_pages = header_accessor::get_freelist_pages(&pager)?;

        // Try to clear non-existent overflow pages
        let clear_result = cursor.clear_overflow_pages(&leaf_cell)?;
        match clear_result {
            IOResult::Done(_) => {
                // Verify freelist was not modified
                assert_eq!(
                    header_accessor::get_freelist_pages(&pager)?,
                    initial_freelist_pages,
                    "Freelist should not change when no overflow pages exist"
                );

                // Verify trunk page wasn't created
                assert_eq!(
                    header_accessor::get_freelist_trunk_page(&pager)?,
                    0,
                    "No trunk page should be created when no overflow pages exist"
                );
            }
            IOResult::IO => {
                cursor.pager.io.run_once()?;
            }
        }

        Ok(())
    }

    #[test]
    fn test_btree_destroy() -> Result<()> {
        let initial_size = 1;
        let pager = setup_test_env(initial_size);
        let num_columns = 5;

        let mut cursor = BTreeCursor::new_table(None, pager.clone(), 2, num_columns);

        // Initialize page 2 as a root page (interior)
        let root_page = cursor.allocate_page(PageType::TableInterior, 0)?;

        // Allocate two leaf pages
        let page3 = cursor.allocate_page(PageType::TableLeaf, 0)?;
        let page4 = cursor.allocate_page(PageType::TableLeaf, 0)?;

        // Configure the root page to point to the two leaf pages
        {
            let root_page = root_page.get();
            let contents = root_page.get().contents.as_mut().unwrap();

            // Set rightmost pointer to page4
            contents.write_u32(offset::BTREE_RIGHTMOST_PTR, page4.get().get().id as u32);

            // Create a cell with pointer to page3
            let cell_content = vec![
                // First 4 bytes: left child pointer (page3)
                (page3.get().get().id >> 24) as u8,
                (page3.get().get().id >> 16) as u8,
                (page3.get().get().id >> 8) as u8,
                page3.get().get().id as u8,
                // Next byte: rowid as varint (simple value 100)
                100,
            ];

            // Insert the cell
            insert_into_cell(contents, &cell_content, 0, 512)?;
        }

        // Add a simple record to each leaf page
        for page in [&page3, &page4] {
            let page = page.get();
            let contents = page.get().contents.as_mut().unwrap();

            // Simple record with just a rowid and payload
            let record_bytes = vec![
                5,                   // Payload length (varint)
                page.get().id as u8, // Rowid (varint)
                b'h',
                b'e',
                b'l',
                b'l',
                b'o', // Payload
            ];

            insert_into_cell(contents, &record_bytes, 0, 512)?;
        }

        // Verify structure before destruction
        assert_eq!(
            header_accessor::get_database_size(&pager)?,
            4, // We should have pages 1-4
            "Database should have 4 pages total"
        );

        // Track freelist state before destruction
        let initial_free_pages = header_accessor::get_freelist_pages(&pager)?;
        assert_eq!(initial_free_pages, 0, "should start with no free pages");

        run_until_done(|| cursor.btree_destroy(), pager.deref())?;

        let pages_freed = header_accessor::get_freelist_pages(&pager)? - initial_free_pages;
        assert_eq!(pages_freed, 3, "should free 3 pages (root + 2 leaves)");

        Ok(())
    }

    #[test]
    pub fn test_defragment() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;

        let mut total_size = 0;
        let mut cells = Vec::new();
        let usable_space = 4096;
        for i in 0..3 {
            let regs = &[Register::Value(Value::Integer(i as i64))];
            let record = ImmutableRecord::from_registers(regs, regs.len());
            let payload = add_record(i, i, page, record, &conn);
            assert_eq!(page.cell_count(), i + 1);
            let free = compute_free_space(page, usable_space);
            total_size += payload.len() as u16 + 2;
            assert_eq!(free, 4096 - total_size - header_size);
            cells.push(Cell { pos: i, payload });
        }

        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }
        cells.remove(1);
        drop_cell(page, 1, usable_space).unwrap();

        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }

        defragment_page(page, usable_space);

        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }
    }

    #[test]
    pub fn test_drop_odd_with_defragment() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;

        let mut total_size = 0;
        let mut cells = Vec::new();
        let usable_space = 4096;
        let total_cells = 10;
        for i in 0..total_cells {
            let regs = &[Register::Value(Value::Integer(i as i64))];
            let record = ImmutableRecord::from_registers(regs, regs.len());
            let payload = add_record(i, i, page, record, &conn);
            assert_eq!(page.cell_count(), i + 1);
            let free = compute_free_space(page, usable_space);
            total_size += payload.len() as u16 + 2;
            assert_eq!(free, 4096 - total_size - header_size);
            cells.push(Cell { pos: i, payload });
        }

        let mut removed = 0;
        let mut new_cells = Vec::new();
        for cell in cells {
            if cell.pos % 2 == 1 {
                drop_cell(page, cell.pos - removed, usable_space).unwrap();
                removed += 1;
            } else {
                new_cells.push(cell);
            }
        }
        let cells = new_cells;
        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }

        defragment_page(page, usable_space);

        for (i, cell) in cells.iter().enumerate() {
            ensure_cell(page, i, &cell.payload);
        }
    }

    #[test]
    pub fn test_fuzz_drop_defragment_insert() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;

        let mut total_size = 0;
        let mut cells = Vec::new();
        let usable_space = 4096;
        let mut i = 100000;
        let seed = thread_rng().gen();
        tracing::info!("seed {}", seed);
        let mut rng = ChaCha8Rng::seed_from_u64(seed);
        while i > 0 {
            i -= 1;
            match rng.next_u64() % 4 {
                0 => {
                    // allow appends with extra place to insert
                    let cell_idx = rng.next_u64() as usize % (page.cell_count() + 1);
                    let free = compute_free_space(page, usable_space);
                    let regs = &[Register::Value(Value::Integer(i as i64))];
                    let record = ImmutableRecord::from_registers(regs, regs.len());
                    let mut payload: Vec<u8> = Vec::new();
                    fill_cell_payload(
                        page,
                        Some(i as i64),
                        &mut payload,
                        cell_idx,
                        &record,
                        4096,
                        conn.pager.borrow().clone(),
                    );
                    if (free as usize) < payload.len() + 2 {
                        // do not try to insert overflow pages because they require balancing
                        continue;
                    }
                    insert_into_cell(page, &payload, cell_idx, 4096).unwrap();
                    assert!(page.overflow_cells.is_empty());
                    total_size += payload.len() as u16 + 2;
                    cells.insert(cell_idx, Cell { pos: i, payload });
                }
                1 => {
                    if page.cell_count() == 0 {
                        continue;
                    }
                    let cell_idx = rng.next_u64() as usize % page.cell_count();
                    let (_, len) = page.cell_get_raw_region(cell_idx, usable_space as usize);
                    drop_cell(page, cell_idx, usable_space).unwrap();
                    total_size -= len as u16 + 2;
                    cells.remove(cell_idx);
                }
                2 => {
                    defragment_page(page, usable_space);
                }
                3 => {
                    // check cells
                    for (i, cell) in cells.iter().enumerate() {
                        ensure_cell(page, i, &cell.payload);
                    }
                    assert_eq!(page.cell_count(), cells.len());
                }
                _ => unreachable!(),
            }
            let free = compute_free_space(page, usable_space);
            assert_eq!(free, 4096 - total_size - header_size);
        }
    }

    #[test]
    pub fn test_fuzz_drop_defragment_insert_issue_1085() {
        // This test is used to demonstrate that issue at https://github.com/tursodatabase/turso/issues/1085
        // is FIXED.
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;

        let mut total_size = 0;
        let mut cells = Vec::new();
        let usable_space = 4096;
        let mut i = 1000;
        for seed in [15292777653676891381, 9261043168681395159] {
            tracing::info!("seed {}", seed);
            let mut rng = ChaCha8Rng::seed_from_u64(seed);
            while i > 0 {
                i -= 1;
                match rng.next_u64() % 3 {
                    0 => {
                        // allow appends with extra place to insert
                        let cell_idx = rng.next_u64() as usize % (page.cell_count() + 1);
                        let free = compute_free_space(page, usable_space);
                        let regs = &[Register::Value(Value::Integer(i))];
                        let record = ImmutableRecord::from_registers(regs, regs.len());
                        let mut payload: Vec<u8> = Vec::new();
                        fill_cell_payload(
                            page,
                            Some(i),
                            &mut payload,
                            cell_idx,
                            &record,
                            4096,
                            conn.pager.borrow().clone(),
                        );
                        if (free as usize) < payload.len() - 2 {
                            // do not try to insert overflow pages because they require balancing
                            continue;
                        }
                        insert_into_cell(page, &payload, cell_idx, 4096).unwrap();
                        assert!(page.overflow_cells.is_empty());
                        total_size += payload.len() as u16 + 2;
                        cells.push(Cell {
                            pos: i as usize,
                            payload,
                        });
                    }
                    1 => {
                        if page.cell_count() == 0 {
                            continue;
                        }
                        let cell_idx = rng.next_u64() as usize % page.cell_count();
                        let (_, len) = page.cell_get_raw_region(cell_idx, usable_space as usize);
                        drop_cell(page, cell_idx, usable_space).unwrap();
                        total_size -= len as u16 + 2;
                        cells.remove(cell_idx);
                    }
                    2 => {
                        defragment_page(page, usable_space);
                    }
                    _ => unreachable!(),
                }
                let free = compute_free_space(page, usable_space);
                assert_eq!(free, 4096 - total_size - header_size);
            }
        }
    }

    // this test will create a tree like this:
    // -page:2, ptr(right):4
    // +cells:node[rowid:14, ptr(<=):3]
    //   -page:3, ptr(right):0
    //   +cells:leaf[rowid:11, len(payload):137, overflow:false]
    //   -page:4, ptr(right):0
    //   +cells:
    #[test]
    pub fn test_drop_page_in_balancing_issue_1203() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let queries = vec![
"CREATE TABLE lustrous_petit (awesome_nomous TEXT,ambitious_amargi TEXT,fantastic_daniels BLOB,stupendous_highleyman TEXT,relaxed_crane TEXT,elegant_bromma INTEGER,proficient_castro BLOB,ambitious_liman TEXT,responsible_lusbert BLOB);",
"INSERT INTO lustrous_petit VALUES ('funny_sarambi', 'hardworking_naoumov', X'666561726C6573735F68696C6C', 'elegant_iafd', 'rousing_flag', 681399778772406122, X'706572736F6E61626C655F676F6477696E6772696D6D', 'insightful_anonymous', X'706F77657266756C5F726F636861'), ('personable_holmes', 'diligent_pera', X'686F6E6573745F64696D656E73696F6E', 'energetic_raskin', 'gleaming_federasyon', -2778469859573362611, X'656666696369656E745F6769617A', 'sensible_skirda', X'66616E7461737469635F6B656174696E67'), ('inquisitive_baedan', 'brave_sphinx', X'67656E65726F75735F6D6F6E7473656E79', 'inquisitive_syndicate', 'amiable_room', 6954857961525890638, X'7374756E6E696E675F6E6965747A73636865', 'glowing_coordinator', X'64617A7A6C696E675F7365766572696E65'), ('upbeat_foxtale', 'engaging_aktimon', X'63726561746976655F6875746368696E6773', 'ample_locura', 'creative_barrett', 6413352509911171593, X'6772697070696E675F6D696E7969', 'competitive_parissi', X'72656D61726B61626C655F77696E7374616E6C6579');",
"INSERT INTO lustrous_petit VALUES ('ambitious_berry', 'devoted_marshall', X'696E7175697369746976655F6C6172657661', 'flexible_pramen', 'outstanding_stauch', 6936508362673228293, X'6C6F76696E675F6261756572', 'charming_anonymous', X'68617264776F726B696E675F616E6E6973'), ('enchanting_cohen', 'engaging_rubel', X'686F6E6573745F70726F766F63617A696F6E65', 'humorous_robin', 'imaginative_shuzo', 4762266264295288131, X'726F7573696E675F6261796572', 'vivid_bolling', X'6F7267616E697A65645F7275696E73'), ('affectionate_resistance', 'gripping_rustamova', X'6B696E645F6C61726B696E', 'bright_boulanger', 'upbeat_ashirov', -1726815435854320541, X'61646570745F66646361', 'dazzling_tashjian', X'68617264776F726B696E675F6D6F72656C'), ('zestful_ewald', 'favorable_lewis', X'73747570656E646F75735F7368616C6966', 'bright_combustion', 'blithesome_harding', 8408539013935554176, X'62726176655F737079726F706F756C6F75', 'hilarious_finnegan', X'676976696E675F6F7267616E697A696E67'), ('blithesome_picqueray', 'sincere_william', X'636F75726167656F75735F6D69746368656C6C', 'rousing_atan', 'mirthful_katie', -429232313453215091, X'6C6F76656C795F776174616E616265', 'stupendous_mcmillan', X'666F63757365645F6B61666568'), ('incredible_kid', 'friendly_yvetot', X'706572666563745F617A697A', 'helpful_manhattan', 'shining_horrox', -4318061095860308846, X'616D626974696F75735F726F7765', 'twinkling_anarkiya', X'696D6167696E61746976655F73756D6E6572');",
"INSERT INTO lustrous_petit VALUES ('sleek_graeber', 'approachable_ghazzawi', X'62726176655F6865776974747768697465', 'adaptable_zimmer', 'polite_cohn', -5464225138957223865, X'68756D6F726F75735F736E72', 'adaptable_igualada', X'6C6F76656C795F7A686F75'), ('imaginative_rautiainen', 'magnificent_ellul', X'73706C656E6469645F726F6361', 'responsible_brown', 'upbeat_uruguaya', -1185340834321792223, X'616D706C655F6D6470', 'philosophical_kelly', X'676976696E675F6461676865726D6172676F7369616E'), ('blithesome_darkness', 'creative_newell', X'6C757374726F75735F61706174726973', 'engaging_kids', 'charming_wark', -1752453819873942466, X'76697669645F6162657273', 'independent_barricadas', X'676C697374656E696E675F64686F6E6474'), ('productive_chardronnet', 'optimistic_karnage', X'64696C6967656E745F666F72657374', 'engaging_beggar', 'sensible_wolke', 784341549042407442, X'656E676167696E675F6265726B6F7769637A', 'blithesome_zuzenko', X'6E6963655F70726F766F63617A696F6E65');",
"INSERT INTO lustrous_petit VALUES ('shining_sagris', 'considerate_mother', X'6F70656E5F6D696E6465645F72696F74', 'polite_laufer', 'patient_mink', 2240393952789100851, X'636F75726167656F75735F6D636D696C6C616E', 'glowing_robertson', X'68656C7066756C5F73796D6F6E6473'), ('dazzling_glug', 'stupendous_poznan', X'706572736F6E61626C655F6672616E6B73', 'open_minded_ruins', 'qualified_manes', 2937238916206423261, X'696E736967687466756C5F68616B69656C', 'passionate_borl', X'616D6961626C655F6B7570656E647561'), ('wondrous_parry', 'knowledgeable_giovanni', X'6D6F76696E675F77696E6E', 'shimmering_aberlin', 'affectionate_calhoun', 702116954493913499, X'7265736F7572636566756C5F62726F6D6D61', 'propitious_mezzagarcia', X'746563686E6F6C6F676963616C5F6E6973686974616E69');",
"INSERT INTO lustrous_petit VALUES ('kind_room', 'hilarious_crow', X'6F70656E5F6D696E6465645F6B6F74616E7969', 'hardworking_petit', 'adaptable_zarrow', 2491343172109894986, X'70726F647563746976655F646563616C6F677565', 'willing_sindikalis', X'62726561746874616B696E675F6A6F7264616E');",
"INSERT INTO lustrous_petit VALUES ('confident_etrebilal', 'agreeable_shifu', X'726F6D616E7469635F7363687765697A6572', 'loving_debs', 'gripping_spooner', -3136910055229112693, X'677265676172696F75735F736B726F7A6974736B79', 'ample_ontiveros', X'7175616C69666965645F726F6D616E69656E6B6F'), ('competitive_call', 'technological_egoumenides', X'6469706C6F6D617469635F6D6F6E616768616E', 'willing_stew', 'frank_neal', -5973720171570031332, X'6C6F76696E675F6465737461', 'dazzling_gambone', X'70726F647563746976655F6D656E64656C676C6565736F6E'), ('favorable_delesalle', 'sensible_atterbury', X'666169746866756C5F64617861', 'bountiful_aldred', 'marvelous_malgraith', 5330463874397264493, X'706572666563745F7765726265', 'lustrous_anti', X'6C6F79616C5F626F6F6B6368696E'), ('stellar_corlu', 'loyal_espana', X'6D6F76696E675F7A6167', 'efficient_nelson', 'qualified_shepard', 1015518116803600464, X'737061726B6C696E675F76616E6469766572', 'loving_scoffer', X'686F6E6573745F756C72696368'), ('adaptable_taylor', 'shining_yasushi', X'696D6167696E61746976655F776974746967', 'alluring_blackmore', 'zestful_coeurderoy', -7094136731216188999, X'696D6167696E61746976655F757A63617465677569', 'gleaming_hernandez', X'6672616E6B5F646F6D696E69636B'), ('competitive_luis', 'stellar_fredericks', X'616772656561626C655F6D696368656C', 'optimistic_navarro', 'funny_hamilton', 4003895682491323194, X'6F70656E5F6D696E6465645F62656C6D6173', 'incredible_thorndycraft', X'656C6567616E745F746F6C6B69656E'), ('remarkable_parsons', 'sparkling_ulrich', X'737061726B6C696E675F6D6172696E636561', 'technological_leighlais', 'warmhearted_konok', -5789111414354869563, X'676976696E675F68657272696E67', 'adept_dabtara', X'667269656E646C795F72617070');",
"INSERT INTO lustrous_petit VALUES ('hardworking_norberg', 'approachable_winter', X'62726176655F68617474696E6768', 'imaginative_james', 'open_minded_capital', -5950508516718821688, X'6C757374726F75735F72616E7473', 'warmhearted_limanov', X'696E736967687466756C5F646F637472696E65'), ('generous_shatz', 'generous_finley', X'726176697368696E675F6B757A6E6574736F76', 'stunning_arrigoni', 'favorable_volcano', -8442328990977069526, X'6D6972746866756C5F616C7467656C64', 'thoughtful_zurbrugg', X'6D6972746866756C5F6D6F6E726F65'), ('frank_kerr', 'splendid_swain', X'70617373696F6E6174655F6D6470', 'flexible_dubey', 'sensible_tj', 6352949260574274181, X'656666696369656E745F6B656D736B79', 'vibrant_ege', X'736C65656B5F6272696768746F6E'), ('organized_neal', 'glistening_sugar', X'656E676167696E675F6A6F72616D', 'romantic_krieger', 'qualified_corr', -4774868512022958085, X'706572666563745F6B6F7A6172656B', 'bountiful_zaikowska', X'74686F7567687466756C5F6C6F6767616E73'), ('excellent_lydiettcarrion', 'diligent_denslow', X'666162756C6F75735F6D616E68617474616E', 'confident_tomar', 'glistening_ligt', -1134906665439009896, X'7175616C69666965645F6F6E6B656E', 'remarkable_anarkiya', X'6C6F79616C5F696E64616261'), ('passionate_melis', 'loyal_xsilent', X'68617264776F726B696E675F73637564', 'lustrous_barnes', 'nice_sugako', -4097897163377829983, X'726F6D616E7469635F6461686572', 'bright_imrie', X'73656E7369626C655F6D61726B'), ('giving_mlb', 'breathtaking_fourier', X'736C65656B5F616E61726368697374', 'glittering_malet', 'brilliant_crew', 8791228049111405793, X'626F756E746966756C5F626576656E736565', 'lovely_swords', X'70726F706974696F75735F696E656469746173'), ('honest_wright', 'qualified_rabble', X'736C65656B5F6D6172656368616C', 'shimmering_marius', 'blithesome_mckelvie', -1330737263592370654, X'6F70656E5F6D696E6465645F736D616C6C', 'energetic_gorman', X'70726F706974696F75735F6B6F74616E7969');",
"DELETE FROM lustrous_petit WHERE (ambitious_liman > 'adept_dabtaqu');",
"INSERT INTO lustrous_petit VALUES ('technological_dewey', 'fabulous_st', X'6F7074696D69737469635F73687562', 'considerate_levy', 'adaptable_kernis', 4195134012457716562, X'61646570745F736F6C6964617269646164', 'vibrant_crump', X'6C6F79616C5F72796E6572'), ('super_marjan', 'awesome_gethin', X'736C65656B5F6F737465727765696C', 'diplomatic_loidl', 'qualified_bokani', -2822676417968234733, X'6272696768745F64756E6C6170', 'creative_en', X'6D6972746866756C5F656C6F6666'), ('philosophical_malet', 'unique_garcia', X'76697669645F6E6F7262657267', 'spellbinding_fire', 'faithful_barringtonbush', -7293711848773657758, X'6272696C6C69616E745F6F6B65656665', 'gripping_guillon', X'706572736F6E61626C655F6D61726C696E7370696B65'), ('thoughtful_morefus', 'lustrous_rodriguez', X'636F6E666964656E745F67726F73736D616E726F73686368696E', 'devoted_jackson', 'propitious_karnage', -7802999054396485709, X'63617061626C655F64', 'enchanting_orwell', X'7477696E6B6C696E675F64616C616B6F676C6F75'), ('alluring_guillon', 'brilliant_pinotnoir', X'706572736F6E61626C655F6A6165636B6C65', 'open_minded_azeez', 'courageous_romania', 2126962403055072268, X'746563686E6F6C6F676963616C5F6962616E657A', 'open_minded_rosa', X'6C757374726F75735F6575726F7065'), ('courageous_kolokotronis', 'inquisitive_gahman', X'677265676172696F75735F626172726574', 'ambitious_shakur', 'fantastic_apatris', -1232732971861520864, X'737061726B6C696E675F7761746368', 'captivating_clover', X'636F6E666964656E745F736574686E65737363617374726F'), ('charming_sullivan', 'focused_congress', X'7368696D6D6572696E675F636C7562', 'wondrous_skrbina', 'giving_mendanlioglu', -6837337053772308333, X'636861726D696E675F73616C696E6173', 'rousing_hedva', X'6469706C6F6D617469635F7061796E');",
        ];

        for query in queries {
            let mut stmt = conn.query(query).unwrap().unwrap();
            loop {
                let row = stmt.step().expect("step");
                match row {
                    StepResult::Done => {
                        break;
                    }
                    _ => {
                        tracing::debug!("row {:?}", row);
                    }
                }
            }
        }
    }

    // this test will create a tree like this:
    // -page:2, ptr(right):3
    // +cells:
    //   -page:3, ptr(right):0
    //   +cells:
    #[test]
    pub fn test_drop_page_in_balancing_issue_1203_2() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let queries = vec![
"CREATE TABLE super_becky (engrossing_berger BLOB,plucky_chai BLOB,mirthful_asbo REAL,bountiful_jon REAL,competitive_petit REAL,engrossing_rexroth REAL);",
"INSERT INTO super_becky VALUES (X'636861726D696E675F6261796572', X'70726F647563746976655F70617269737369', 6847793643.408741, 7330361375.924953, -6586051582.891455, -6921021872.711397), (X'657863656C6C656E745F6F7267616E697A696E67', X'6C757374726F75735F73696E64696B616C6973', 9905774996.48619, 570325205.2246342, 5852346465.53047, 728566012.1968269), (X'7570626561745F73656174746C65', X'62726176655F6661756E', -2202725836.424899, 5424554426.388281, 2625872085.917082, -6657362503.808359), (X'676C6F77696E675F6D617877656C6C', X'7761726D686561727465645F726F77616E', -9610936969.793116, 4886606277.093559, -3414536174.7928505, 6898267795.317778), (X'64796E616D69635F616D616E', X'7374656C6C61725F7374657073', 3918935692.153696, 151068445.947237, 4582065669.356403, -3312668220.4789667), (X'64696C6967656E745F64757272757469', X'7175616C69666965645F6D726163686E696B', 5527271629.262201, 6068855126.044355, 289904657.13490677, 2975774820.0877323), (X'6469706C6F6D617469635F726F76657363696F', X'616C6C7572696E675F626F7474696369', 9844748192.66119, -6180276383.305578, -4137330511.025565, -478754566.79494476), (X'776F6E64726F75735F6173686572', X'6465766F7465645F6176657273696F6E', 2310211470.114773, -6129166761.628184, -2865371645.3145514, 7542428654.8645935), (X'617070726F61636861626C655F6B686F6C61', X'6C757374726F75735F6C696E6E656C6C', -4993113161.458349, 7356727284.362968, -3228937035.568404, -1779334005.5067253);",
"INSERT INTO super_becky VALUES (X'74686F7567687466756C5F726576696577', X'617765736F6D655F63726F73736579', 9401977997.012783, 8428201961.643898, 2822821303.052643, 4555601220.718847), (X'73706563746163756C61725F6B686179617469', X'616772656561626C655F61646F6E696465', 7414547022.041355, 365016845.73330307, 50682963.055828094, -9258802584.962656), (X'6C6F79616C5F656D6572736F6E', X'676C6F77696E675F626174616C6F', -5522070106.765736, 2712536599.6384163, 6631385631.869345, 1242757880.7583427), (X'68617264776F726B696E675F6F6B656C6C79', X'666162756C6F75735F66696C697373', 6682622809.9778805, 4233900041.917185, 9017477903.795563, -756846353.6034946), (X'68617264776F726B696E675F626C61756D616368656E', X'616666656374696F6E6174655F6B6F736D616E', -1146438175.3174362, -7545123696.438596, -6799494012.403366, 5646913977.971333), (X'66616E7461737469635F726F77616E', X'74686F7567687466756C5F7465727269746F72696573', -4414529784.916277, -6209371635.279242, 4491104121.288605, 2590223842.117277);",
"INSERT INTO super_becky VALUES (X'676C697374656E696E675F706F72746572', X'696E7175697369746976655F656D', 2986144164.3676434, 3495899172.5935287, -849280584.9386635, 6869709150.2699375), (X'696D6167696E61746976655F6D65726C696E6F', X'676C6F77696E675F616B74696D6F6E', 8733490615.829357, 6782649864.719433, 6926744218.74107, 1532081022.4379768), (X'6E6963655F726F73736574', X'626C69746865736F6D655F66696C697373', -839304300.0706863, 6155504968.705227, -2951592321.950267, -6254186334.572437), (X'636F6E666964656E745F6C69626574', X'676C696D6D6572696E675F6B6F74616E7969', -5344675223.37533, -8703794729.211002, 3987472096.020382, -7678989974.961197), (X'696D6167696E61746976655F6B61726162756C7574', X'64796E616D69635F6D6367697272', 2028227065.6995697, -7435689525.030833, 7011220815.569796, 5526665697.213846), (X'696E7175697369746976655F636C61726B', X'616666656374696F6E6174655F636C6561766572', 3016598350.546356, -3686782925.383732, 9671422351.958004, 9099319829.078941), (X'63617061626C655F746174616E6B61', X'696E6372656469626C655F6F746F6E6F6D61', 6339989259.432795, -8888997534.102034, 6855868409.475763, -2565348887.290493), (X'676F7267656F75735F6265726E657269', X'65647563617465645F6F6D6F77616C69', 6992467657.527826, -3538089391.748543, -7103111660.146708, 4019283237.3740463), (X'616772656561626C655F63756C74757265', X'73706563746163756C61725F657370616E61', 189387871.06959534, 6211851191.361202, 1786455196.9768047, 7966404387.318119);",
"INSERT INTO super_becky VALUES (X'7068696C6F736F70686963616C5F6C656967686C616973', X'666162756C6F75735F73656D696E61746F7265', 8688321500.141502, -7855144036.024546, -5234949709.573349, -9937638367.366447), (X'617070726F61636861626C655F726F677565', X'676C65616D696E675F6D7574696E79', -5351540099.744092, -3614025150.9013805, -2327775310.276925, 2223379997.077526), (X'676C696D6D6572696E675F63617263686961', X'696D6167696E61746976655F61737379616E6E', 4104832554.8371887, -5531434716.627781, 1652773397.4099865, 3884980522.1830273);",
"DELETE FROM super_becky WHERE (plucky_chai != X'7761726D686561727465645F6877616E67' AND mirthful_asbo != 9537234687.183533 AND bountiful_jon = -3538089391.748543);",
"INSERT INTO super_becky VALUES (X'706C75636B795F6D617263616E74656C', X'696D6167696E61746976655F73696D73', 9535651632.375484, 92270815.0720501, 1299048084.6248207, 6460855331.572151), (X'726F6D616E7469635F706F746C61746368', X'68756D6F726F75735F63686165686F', 9345375719.265533, 7825332230.247925, -7133157299.39028, -6939677879.6597), (X'656666696369656E745F6261676E696E69', X'63726561746976655F67726168616D', -2615470560.1954746, 6790849074.977201, -8081732985.448849, -8133707792.312794), (X'677265676172696F75735F73637564', X'7368696E696E675F67726F7570', -7996394978.2610035, -9734939565.228964, 1108439333.8481388, -5420483517.169478), (X'6C696B61626C655F6B616E6176616C6368796B', X'636F75726167656F75735F7761726669656C64', -1959869609.656724, 4176668769.239971, -8423220404.063669, 9987687878.685959), (X'657863656C6C656E745F68696C6473646F74746572', X'676C6974746572696E675F7472616D7564616E61', -5220160777.908238, 3892402687.8826714, 9803857762.617172, -1065043714.0265541), (X'6D61676E69666963656E745F717565657273', X'73757065725F717565657273', -700932053.2006226, -4706306995.253335, -5286045811.046467, 1954345265.5250092), (X'676976696E675F6275636B65726D616E6E', X'667269656E646C795F70697A7A6F6C61746F', -2186859620.9089565, -6098492099.446075, -7456845586.405931, 8796967674.444252);",
"DELETE FROM super_becky WHERE TRUE;",
"INSERT INTO super_becky VALUES (X'6F7074696D69737469635F6368616E69616C', X'656E657267657469635F6E65677261', 1683345860.4208698, 4163199322.9289455, -4192968616.7868404, -7253371206.571701), (X'616C6C7572696E675F686176656C', X'7477696E6B6C696E675F626965627579636B', -9947019174.287437, 5975899640.893995, 3844707723.8570194, -9699970750.513876), (X'6F7074696D69737469635F7A686F75', X'616D626974696F75735F636F6E6772657373', 4143738484.1081524, -2138255286.170598, 9960750454.03466, 5840575852.80299), (X'73706563746163756C61725F6A6F6E67', X'73656E7369626C655F616269646F72', -1767611042.9716015, -7684260477.580351, 4570634429.188147, -9222640121.140202), (X'706F6C6974655F6B657272', X'696E736967687466756C5F63686F646F726B6F6666', -635016769.5123329, -4359901288.494518, -7531565119.905825, -1180410948.6572971), (X'666C657869626C655F636F6D756E69656C6C6F', X'6E6963655F6172636F73', 8708423014.802425, -6276712625.559328, -771680766.2485523, 8639486874.113342);",
"DELETE FROM super_becky WHERE (mirthful_asbo < 9730384310.536528 AND plucky_chai < X'6E6963655F61726370B2');",
"DELETE FROM super_becky WHERE (mirthful_asbo > 6248699554.426553 AND bountiful_jon > 4124481472.333034);",
"INSERT INTO super_becky VALUES (X'676C696D6D6572696E675F77656C7368', X'64696C6967656E745F636F7262696E', 8217054003.369003, 8745594518.77864, 1928172803.2261295, -8375115534.050233), (X'616772656561626C655F6463', X'6C6F76696E675F666F72656D616E', -5483889804.871533, -8264576639.127487, 4770567289.404846, -3409172927.2573576), (X'6D617276656C6F75735F6173696D616B6F706F756C6F73', X'746563686E6F6C6F676963616C5F6A61637175696572', 2694858779.206814, -1703227425.3442516, -4504989231.263319, -3097265869.5230227), (X'73747570656E646F75735F64757075697364657269', X'68696C6172696F75735F6D75697268656164', 568174708.66469, -4878260547.265669, -9579691520.956625, 73507727.8100338), (X'626C69746865736F6D655F626C6F6B', X'61646570745F6C65696572', 7772117077.916897, 4590608571.321514, -881713470.657032, -9158405774.647465);",
"INSERT INTO super_becky VALUES (X'6772697070696E675F6573736578', X'67656E65726F75735F636875726368696C6C', -4180431825.598956, 7277443000.677654, 2499796052.7878246, -2858339306.235305), (X'756E697175655F6D6172656368616C', X'62726561746874616B696E675F636875726368696C6C', 1401354536.7625294, -611427440.2796707, -4621650430.463729, 1531473111.7482872), (X'657863656C6C656E745F66696E6C6579', X'666169746866756C5F62726F636B', -4020697828.0073624, -2833530733.19637, -7766170050.654022, 8661820959.434689);",
"INSERT INTO super_becky VALUES (X'756E697175655F6C617061797265', X'6C6F76696E675F7374617465', 7063237787.258968, -5425712581.365798, -7750509440.0141945, -7570954710.892544), (X'62726561746874616B696E675F6E65616C', X'636F75726167656F75735F61727269676F6E69', 289862394.2028198, 9690362375.014446, -4712463267.033899, 2474917855.0973473), (X'7477696E6B6C696E675F7368616B7572', X'636F75726167656F75735F636F6D6D6974746565', 5449035403.229155, -2159678989.597906, 3625606019.1150894, -3752010405.4475393);",
"INSERT INTO super_becky VALUES (X'70617373696F6E6174655F73686970776179', X'686F6E6573745F7363687765697A6572', 4193384746.165228, -2232151704.896323, 8615245520.962444, -9789090953.995636);",
"INSERT INTO super_becky VALUES (X'6C696B61626C655F69', X'6661766F7261626C655F6D626168', 6581403690.769894, 3260059398.9544716, -407118859.046051, -3155853965.2700634), (X'73696E636572655F6F72', X'616772656561626C655F617070656C6261756D', 9402938544.308651, -7595112171.758331, -7005316716.211025, -8368210960.419411);",
"INSERT INTO super_becky VALUES (X'6D617276656C6F75735F6B61736864616E', X'6E6963655F636F7272', -5976459640.85817, -3177550476.2092276, 2073318650.736992, -1363247319.9978447);",
"INSERT INTO super_becky VALUES (X'73706C656E6469645F6C616D656E646F6C61', X'677265676172696F75735F766F6E6E65677574', 6898259773.050102, 8973519699.707073, -25070632.280548096, -1845922497.9676847), (X'617765736F6D655F7365766572', X'656E657267657469635F706F746C61746368', -8750678407.717808, 5130907533.668898, -6778425327.111566, 3718982135.202587);",
"INSERT INTO super_becky VALUES (X'70726F706974696F75735F6D616C617465737461', X'657863656C6C656E745F65766572657474', -8846855772.62094, -6168969732.697067, -8796372709.125793, 9983557891.544613), (X'73696E636572655F6C6177', X'696E7175697369746976655F73616E647374726F6D', -6366985697.975358, 3838628702.6652164, 3680621713.3371124, -786796486.8049564), (X'706F6C6974655F676C6561736F6E', X'706C75636B795F677579616E61', -3987946379.104308, -2119148244.413993, -1448660343.6888638, -1264195510.1611118), (X'676C6974746572696E675F6C6975', X'70657273697374656E745F6F6C6976696572', 6741779968.943846, -3239809989.227495, -1026074003.5506897, 4654600514.871752);",
"DELETE FROM super_becky WHERE (engrossing_berger < X'6566651A3C70278D4E200657551D8071A1' AND competitive_petit > 1236742147.9451914);",
"INSERT INTO super_becky VALUES (X'6661766F7261626C655F726569746D616E', X'64657465726D696E65645F726974746572', -7412553243.829927, -7572665195.290464, 7879603411.222157, 3706943306.5691853), (X'70657273697374656E745F6E6F6C616E', X'676C6974746572696E675F73686570617264', 7028261282.277422, -2064164782.3494844, -5244048504.507779, -2399526243.005843), (X'6B6E6F776C6564676561626C655F70617474656E', X'70726F66696369656E745F726F7365627261756768', 3713056763.583538, 3919834206.566164, -6306779387.430006, -9939464323.995546), (X'616461707461626C655F7172757A', X'696E7175697369746976655F68617261776179', 6519349690.299835, -9977624623.820414, 7500579325.440605, -8118341251.362242);",
"INSERT INTO super_becky VALUES (X'636F6E73696465726174655F756E696F6E', X'6E6963655F6573736578', -1497385534.8720198, 9957688503.242973, 9191804202.566128, -179015615.7117195), (X'666169746866756C5F626F776C656773', X'6361707469766174696E675F6D6367697272', 893707300.1576138, 3381656294.246702, 6884723724.381908, 6248331214.701559), (X'6B6E6F776C6564676561626C655F70656E6E61', X'6B696E645F616A697468', -3335162603.6574974, 1812878172.8505402, 5115606679.658335, -5690100280.808182), (X'617765736F6D655F77696E7374616E6C6579', X'70726F706974696F75735F6361726173736F', -7395576292.503981, 4956546102.029215, -1468521769.7486448, -2968223925.60355), (X'636F75726167656F75735F77617266617265', X'74686F7567687466756C5F7361707068697265', 7052982930.566017, -9806098174.104418, -6910398936.377775, -4041963031.766964), (X'657863656C6C656E745F6B62', X'626C69746865736F6D655F666F75747A6F706F756C6F73', 6142173202.994768, 5193126957.544125, -7522202722.983735, -1659088056.594862), (X'7374756E6E696E675F6E6576616461', X'626F756E746966756C5F627572746F6E', -3822097036.7628613, -3458840259.240303, 2544472236.86788, 6928890176.466003);",
"INSERT INTO super_becky VALUES (X'706572736F6E61626C655F646D69747269', X'776F6E64726F75735F6133796F', 2651932559.0077076, 811299402.3174248, -8271909238.671928, 6761098864.189909);",
"INSERT INTO super_becky VALUES (X'726F7573696E675F6B6C6166657461', X'64617A7A6C696E675F6B6E617070', 9370628891.439335, -5923332007.253168, -2763161830.5880013, -9156194881.875952), (X'656666696369656E745F6C6576656C6C6572', X'616C6C7572696E675F706561636F7474', 3102641409.8314342, 2838360181.628153, 2466271662.169607, 1015942181.844162), (X'6469706C6F6D617469635F7065726B696E73', X'726F7573696E675F6172616269', -1551071129.022499, -8079487600.186886, 7832984580.070087, -6785993247.895652), (X'626F756E746966756C5F6D656D62657273', X'706F77657266756C5F70617269737369', 9226031830.72445, 7012021503.536997, -2297349030.108919, -2738320055.4710903), (X'676F7267656F75735F616E6172636F7469636F', X'68656C7066756C5F7765696C616E64', -8394163480.676959, -2978605095.699134, -6439355448.021704, 9137308022.281273), (X'616666656374696F6E6174655F70726F6C65696E666F', X'706C75636B795F73616E7A', 3546758708.3524914, -1870964264.9353771, 338752565.3643894, -3908023657.299715), (X'66756E6E795F706F70756C61697265', X'6F75747374616E64696E675F626576696E67746F6E', -1533858145.408224, 6164225076.710373, 8419445987.622173, 584555253.6852646), (X'76697669645F6D7474', X'7368696D6D6572696E675F70616F6E65737361', 5512251366.193035, -8680583180.123213, -4445968638.153208, -3274009935.4229546);",
"INSERT INTO super_becky VALUES (X'7068696C6F736F70686963616C5F686F7264', X'657863656C6C656E745F67757373656C7370726F757473', -816909447.0240917, -3614686681.8786583, 7701617524.26067, -4541962047.183721), (X'616D6961626C655F69676E6174696576', X'6D61676E69666963656E745F70726F76696E6369616C69', -1318532883.847702, -4918966075.976474, -7601723171.33518, -3515747704.3847466), (X'70726F66696369656E745F32303137', X'66756E6E795F6E77', -1264540201.518032, 8227396547.578808, 6245093925.183641, -8368355328.110817);",
"INSERT INTO super_becky VALUES (X'77696C6C696E675F6E6F6B6B65', X'726F6D616E7469635F677579616E61', 6618610796.3707695, -3814565359.1524105, 1663106272.4565296, -4175107840.768817), (X'72656C617865645F7061766C6F76', X'64657465726D696E65645F63686F646F726B6F6666', -3350029338.034504, -3520837855.4619064, 3375167499.631817, -8866806483.714607), (X'616D706C655F67696464696E6773', X'667269656E646C795F6A6F686E', 1458864959.9942684, 1344208968.0486107, 9335156635.91314, -6180643697.918882), (X'72656C617865645F6C65726F79', X'636F75726167656F75735F6E6F72646772656E', -5164986537.499656, 8820065797.720875, 6146530425.891005, 6949241471.958189), (X'666F63757365645F656D6D61', X'696D6167696E61746976655F6C6F6E67', -9587619060.80035, 6128068142.184402, 6765196076.956905, 800226302.7983418);",
"INSERT INTO super_becky VALUES (X'616D626974696F75735F736F6E67', X'706572666563745F6761686D616E', 4989979180.706432, -9374266591.537058, 314459621.2820797, -3200029490.9553604), (X'666561726C6573735F626C6174', X'676C697374656E696E675F616374696F6E', -8512203612.903147, -7625581186.013805, -9711122307.234787, -301590929.32751083), (X'617765736F6D655F6669646573', X'666169746866756C5F63756E6E696E6768616D', -1428228887.9205084, 7669883854.400173, 5604446195.905277, -1509311057.9653416), (X'68756D6F726F75735F77697468647261776E', X'62726561746874616B696E675F7472617562656C', -7292778713.676636, -6728132503.529593, 2805341768.7252483, 330416975.2300949);",
"INSERT INTO super_becky VALUES (X'677265676172696F75735F696873616E', X'7374656C6C61725F686172746D616E', 8819210651.1988, 5298459883.813452, 7293544377.958424, 460475869.72971725), (X'696E736967687466756C5F62657765726E69747A', X'676C65616D696E675F64656E736C6F77', -6911957282.193239, 1754196756.2193146, -6316860403.693853, -3094020672.236368), (X'6D6972746866756C5F616D6265727261656B656C6C79', X'68756D6F726F75735F6772617665', 1785574023.0269203, -372056983.82761574, 4133719439.9538956, 9374053482.066044), (X'76697669645F736169747461', X'7761726D686561727465645F696E656469746173', 2787071361.6099434, 9663839418.553448, -5934098589.901047, -9774745509.608858), (X'61646570745F6F6375727279', X'6C696B61626C655F726569746D616E', -3098540915.1310825, 5460848322.672174, -6012867197.519758, 6769770087.661135), (X'696E646570656E64656E745F6F', X'656C6567616E745F726F6F726461', 1462542860.3143978, 3360904654.2464733, 5458876201.665213, -5522844849.529962), (X'72656D61726B61626C655F626F6B616E69', X'6F70656E5F6D696E6465645F686F72726F78', 7589481760.867031, 7970075121.546291, 7513467575.5213585, 9663061478.289227), (X'636F6E666964656E745F6C616479', X'70617373696F6E6174655F736B726F7A6974736B79', 8266917234.53915, -7172933478.625412, 309854059.94031143, -8309837814.497616);",
"DELETE FROM super_becky WHERE (competitive_petit != 8725256604.165474 OR engrossing_rexroth > -3607424615.7839313 OR plucky_chai < X'726F7573696E675F6216E20375');",
"INSERT INTO super_becky VALUES (X'7368696E696E675F736F6C69646169726573', X'666561726C6573735F63617264616E', -170727879.20838165, 2744601113.384678, 5676912434.941502, 6757573601.657997), (X'636F75726167656F75735F706C616E636865', X'696E646570656E64656E745F636172736F6E', -6271723086.761938, -180566679.7470188, -1285774632.134449, 1359665735.7842407), (X'677265676172696F75735F7374616D61746F76', X'7374756E6E696E675F77696C64726F6F7473', -6210238866.953484, 2492683045.8287067, -9688894361.68205, 5420275482.048567), (X'696E646570656E64656E745F6F7267616E697A6572', X'676C6974746572696E675F736F72656C', 9291163783.3073, -6843003475.769236, -1320245894.772686, -5023483808.044955), (X'676C6F77696E675F6E65736963', X'676C65616D696E675F746F726D6579', 829526382.8027191, 9365690945.1316, 4761505764.826195, -4149154965.0024815), (X'616C6C7572696E675F646F637472696E65', X'6E6963655F636C6561766572', 3896644979.981762, -288600448.8016701, 9462856570.130062, -909633752.5993862);",
        ];

        for query in queries {
            let mut stmt = conn.query(query).unwrap().unwrap();
            loop {
                let row = stmt.step().expect("step");
                match row {
                    StepResult::Done => {
                        break;
                    }
                    _ => {
                        tracing::debug!("row {:?}", row);
                    }
                }
            }
        }
    }

    #[test]
    pub fn test_free_space() {
        let db = get_database();
        let conn = db.connect().unwrap();
        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let header_size = 8;
        let usable_space = 4096;

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let payload = add_record(0, 0, page, record, &conn);
        let free = compute_free_space(page, usable_space);
        assert_eq!(free, 4096 - payload.len() as u16 - 2 - header_size);
    }

    #[test]
    pub fn test_defragment_1() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let usable_space = 4096;

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let payload = add_record(0, 0, page, record, &conn);

        assert_eq!(page.cell_count(), 1);
        defragment_page(page, usable_space);
        assert_eq!(page.cell_count(), 1);
        let (start, len) = page.cell_get_raw_region(0, usable_space as usize);
        let buf = page.as_ptr();
        assert_eq!(&payload, &buf[start..start + len]);
    }

    #[test]
    pub fn test_insert_drop_insert() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let usable_space = 4096;

        let regs = &[
            Register::Value(Value::Integer(0)),
            Register::Value(Value::Text(Text::new("aaaaaaaa"))),
        ];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(0, 0, page, record, &conn);

        assert_eq!(page.cell_count(), 1);
        drop_cell(page, 0, usable_space).unwrap();
        assert_eq!(page.cell_count(), 0);

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let payload = add_record(0, 0, page, record, &conn);
        assert_eq!(page.cell_count(), 1);

        let (start, len) = page.cell_get_raw_region(0, usable_space as usize);
        let buf = page.as_ptr();
        assert_eq!(&payload, &buf[start..start + len]);
    }

    #[test]
    pub fn test_insert_drop_insert_multiple() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let usable_space = 4096;

        let regs = &[
            Register::Value(Value::Integer(0)),
            Register::Value(Value::Text(Text::new("aaaaaaaa"))),
        ];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(0, 0, page, record, &conn);

        for _ in 0..100 {
            assert_eq!(page.cell_count(), 1);
            drop_cell(page, 0, usable_space).unwrap();
            assert_eq!(page.cell_count(), 0);

            let regs = &[Register::Value(Value::Integer(0))];
            let record = ImmutableRecord::from_registers(regs, regs.len());
            let payload = add_record(0, 0, page, record, &conn);
            assert_eq!(page.cell_count(), 1);

            let (start, len) = page.cell_get_raw_region(0, usable_space as usize);
            let buf = page.as_ptr();
            assert_eq!(&payload, &buf[start..start + len]);
        }
    }

    #[test]
    pub fn test_drop_a_few_insert() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let usable_space = 4096;

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let payload = add_record(0, 0, page, record, &conn);
        let regs = &[Register::Value(Value::Integer(1))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(1, 1, page, record, &conn);
        let regs = &[Register::Value(Value::Integer(2))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(2, 2, page, record, &conn);

        drop_cell(page, 1, usable_space).unwrap();
        drop_cell(page, 1, usable_space).unwrap();

        ensure_cell(page, 0, &payload);
    }

    #[test]
    pub fn test_fuzz_victim_1() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let page = page.get();
        let page = page.get_contents();
        let usable_space = 4096;

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(0, 0, page, record, &conn);

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(0, 0, page, record, &conn);
        drop_cell(page, 0, usable_space).unwrap();

        defragment_page(page, usable_space);

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(0, 1, page, record, &conn);

        drop_cell(page, 0, usable_space).unwrap();

        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let _ = add_record(0, 1, page, record, &conn);
    }

    #[test]
    pub fn test_fuzz_victim_2() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let usable_space = 4096;
        let insert = |pos, page| {
            let regs = &[Register::Value(Value::Integer(0))];
            let record = ImmutableRecord::from_registers(regs, regs.len());
            let _ = add_record(0, pos, page, record, &conn);
        };
        let drop = |pos, page| {
            drop_cell(page, pos, usable_space).unwrap();
        };
        let defragment = |page| {
            defragment_page(page, usable_space);
        };
        let page = page.get();
        defragment(page.get_contents());
        defragment(page.get_contents());
        insert(0, page.get_contents());
        drop(0, page.get_contents());
        insert(0, page.get_contents());
        drop(0, page.get_contents());
        insert(0, page.get_contents());
        defragment(page.get_contents());
        defragment(page.get_contents());
        drop(0, page.get_contents());
        defragment(page.get_contents());
        insert(0, page.get_contents());
        drop(0, page.get_contents());
        insert(0, page.get_contents());
        insert(1, page.get_contents());
        insert(1, page.get_contents());
        insert(0, page.get_contents());
        drop(3, page.get_contents());
        drop(2, page.get_contents());
        compute_free_space(page.get_contents(), usable_space);
    }

    #[test]
    pub fn test_fuzz_victim_3() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let usable_space = 4096;
        let insert = |pos, page| {
            let regs = &[Register::Value(Value::Integer(0))];
            let record = ImmutableRecord::from_registers(regs, regs.len());
            let _ = add_record(0, pos, page, record, &conn);
        };
        let drop = |pos, page| {
            drop_cell(page, pos, usable_space).unwrap();
        };
        let defragment = |page| {
            defragment_page(page, usable_space);
        };
        let regs = &[Register::Value(Value::Integer(0))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let mut payload: Vec<u8> = Vec::new();
        fill_cell_payload(
            page.get().get_contents(),
            Some(0),
            &mut payload,
            0,
            &record,
            4096,
            conn.pager.borrow().clone(),
        );
        let page = page.get();
        insert(0, page.get_contents());
        defragment(page.get_contents());
        insert(0, page.get_contents());
        defragment(page.get_contents());
        insert(0, page.get_contents());
        drop(2, page.get_contents());
        drop(0, page.get_contents());
        let free = compute_free_space(page.get_contents(), usable_space);
        let total_size = payload.len() + 2;
        assert_eq!(
            free,
            usable_space - page.get_contents().header_size() as u16 - total_size as u16
        );
        dbg!(free);
    }

    #[test]
    pub fn btree_insert_sequential() {
        let (pager, root_page, _, _) = empty_btree();
        let mut keys = Vec::new();
        let num_columns = 5;

        for i in 0..10000 {
            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            tracing::info!("INSERT INTO t VALUES ({});", i,);
            let regs = &[Register::Value(Value::Integer(i))];
            let value = ImmutableRecord::from_registers(regs, regs.len());
            tracing::trace!("before insert {}", i);
            run_until_done(
                || {
                    let key = SeekKey::TableRowId(i);
                    cursor.seek(key, SeekOp::GE { eq_only: true })
                },
                pager.deref(),
            )
            .unwrap();
            run_until_done(
                || cursor.insert(&BTreeKey::new_table_rowid(i, Some(&value)), true),
                pager.deref(),
            )
            .unwrap();
            keys.push(i);
        }
        if matches!(validate_btree(pager.clone(), root_page), (_, false)) {
            panic!("invalid btree");
        }
        tracing::trace!(
            "=========== btree ===========\n{}\n\n",
            format_btree(pager.clone(), root_page, 0)
        );
        for key in keys.iter() {
            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            let key = Value::Integer(*key);
            let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap();
            assert!(exists, "key not found {key}");
        }
    }

    #[test]
    pub fn test_big_payload_compute_free() {
        let db = get_database();
        let conn = db.connect().unwrap();

        let page = get_page(2);
        let usable_space = 4096;
        let regs = &[Register::Value(Value::Blob(vec![0; 3600]))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        let mut payload: Vec<u8> = Vec::new();
        fill_cell_payload(
            page.get().get_contents(),
            Some(0),
            &mut payload,
            0,
            &record,
            4096,
            conn.pager.borrow().clone(),
        );
        insert_into_cell(page.get().get_contents(), &payload, 0, 4096).unwrap();
        let free = compute_free_space(page.get().get_contents(), usable_space);
        let total_size = payload.len() + 2;
        assert_eq!(
            free,
            usable_space - page.get().get_contents().header_size() as u16 - total_size as u16
        );
        dbg!(free);
    }

    #[test]
    pub fn test_delete_balancing() {
        // What does this test do:
        // 1. Insert 10,000 rows of ~15 byte payload each. This creates
        //    nearly 40 pages (10,000 * 15 / 4096) and 240 rows per page.
        // 2. Delete enough rows to create empty/ nearly empty pages to trigger balancing
        //    (verified this in SQLite).
        // 3. Verify validity/integrity of btree after deleting and also verify that these
        //    values are actually deleted.

        let (pager, root_page, _, _) = empty_btree();
        let num_columns = 5;

        // Insert 10,000 records in to the BTree.
        for i in 1..=10000 {
            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            let regs = &[Register::Value(Value::Text(Text::new("hello world")))];
            let value = ImmutableRecord::from_registers(regs, regs.len());

            run_until_done(
                || {
                    let key = SeekKey::TableRowId(i);
                    cursor.seek(key, SeekOp::GE { eq_only: true })
                },
                pager.deref(),
            )
            .unwrap();

            run_until_done(
                || cursor.insert(&BTreeKey::new_table_rowid(i, Some(&value)), true),
                pager.deref(),
            )
            .unwrap();
        }

        if let (_, false) = validate_btree(pager.clone(), root_page) {
            panic!("Invalid B-tree after insertion");
        }
        let num_columns = 5;

        // Delete records with 500 <= key <= 3500
        for i in 500..=3500 {
            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            let seek_key = SeekKey::TableRowId(i);

            let seek_result = run_until_done(
                || cursor.seek(seek_key.clone(), SeekOp::GE { eq_only: true }),
                pager.deref(),
            )
            .unwrap();

            if matches!(seek_result, SeekResult::Found) {
                run_until_done(|| cursor.delete(), pager.deref()).unwrap();
            }
        }

        // Verify that records with key < 500 and key > 3500 still exist in the BTree.
        for i in 1..=10000 {
            if (500..=3500).contains(&i) {
                continue;
            }

            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            let key = Value::Integer(i);
            let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap();
            assert!(exists, "Key {i} should exist but doesn't");
        }

        // Verify the deleted records don't exist.
        for i in 500..=3500 {
            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            let key = Value::Integer(i);
            let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap();
            assert!(!exists, "Deleted key {i} still exists");
        }
    }

    #[test]
    pub fn test_overflow_cells() {
        let iterations = 10_usize;
        let mut huge_texts = Vec::new();
        for i in 0..iterations {
            let mut huge_text = String::new();
            for _j in 0..8192 {
                huge_text.push((b'A' + i as u8) as char);
            }
            huge_texts.push(huge_text);
        }

        let (pager, root_page, _, _) = empty_btree();
        let num_columns = 5;

        for (i, huge_text) in huge_texts.iter().enumerate().take(iterations) {
            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
            tracing::info!("INSERT INTO t VALUES ({});", i,);
            let regs = &[Register::Value(Value::Text(Text {
                value: huge_text.as_bytes().to_vec(),
                subtype: crate::types::TextSubtype::Text,
            }))];
            let value = ImmutableRecord::from_registers(regs, regs.len());
            tracing::trace!("before insert {}", i);
            tracing::debug!(
                "=========== btree before ===========\n{}\n\n",
                format_btree(pager.clone(), root_page, 0)
            );
            run_until_done(
                || {
                    let key = SeekKey::TableRowId(i as i64);
                    cursor.seek(key, SeekOp::GE { eq_only: true })
                },
                pager.deref(),
            )
            .unwrap();
            run_until_done(
                || cursor.insert(&BTreeKey::new_table_rowid(i as i64, Some(&value)), true),
                pager.deref(),
            )
            .unwrap();
            tracing::debug!(
                "=========== btree after ===========\n{}\n\n",
                format_btree(pager.clone(), root_page, 0)
            );
        }
        let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page, num_columns);
        cursor.move_to_root().unwrap();
        for i in 0..iterations {
            let has_next = run_until_done(|| cursor.next(), pager.deref()).unwrap();
            if !has_next {
                panic!("expected Some(rowid) but got {:?}", cursor.has_record.get());
            };
            let rowid = run_until_done(|| cursor.rowid(), pager.deref())
                .unwrap()
                .unwrap();
            assert_eq!(rowid, i as i64, "got!=expected");
        }
    }

    #[test]
    pub fn test_read_write_payload_with_offset() {
        let (pager, root_page, _, _) = empty_btree();
        let num_columns = 5;
        let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, num_columns);
        let offset = 2; // blobs data starts at offset 2
        let initial_text = "hello world";
        let initial_blob = initial_text.as_bytes().to_vec();
        let regs = &[Register::Value(Value::Blob(initial_blob.clone()))];
        let value = ImmutableRecord::from_registers(regs, regs.len());

        run_until_done(
            || {
                let key = SeekKey::TableRowId(1);
                cursor.seek(key, SeekOp::GE { eq_only: true })
            },
            pager.deref(),
        )
        .unwrap();

        run_until_done(
            || cursor.insert(&BTreeKey::new_table_rowid(1, Some(&value)), true),
            pager.deref(),
        )
        .unwrap();

        cursor
            .stack
            .set_cell_index(cursor.stack.current_cell_index() + 1);

        let mut read_buffer = Vec::new();
        run_until_done(
            || {
                cursor.read_write_payload_with_offset(
                    offset,
                    &mut read_buffer,
                    initial_blob.len() as u32,
                    false,
                )
            },
            pager.deref(),
        )
        .unwrap();

        assert_eq!(
            std::str::from_utf8(&read_buffer).unwrap(),
            initial_text,
            "Read data doesn't match expected data"
        );

        let mut modified_hello = "olleh".as_bytes().to_vec();
        run_until_done(
            || cursor.read_write_payload_with_offset(offset, &mut modified_hello, 5, true),
            pager.deref(),
        )
        .unwrap();
        let mut verification_buffer = Vec::new();
        run_until_done(
            || {
                cursor.read_write_payload_with_offset(
                    offset,
                    &mut verification_buffer,
                    initial_blob.len() as u32,
                    false,
                )
            },
            pager.deref(),
        )
        .unwrap();

        assert_eq!(
            std::str::from_utf8(&verification_buffer).unwrap(),
            "olleh world",
            "Modified data doesn't match expected result"
        );
    }

    #[test]
    pub fn test_read_write_payload_with_overflow_page() {
        let (pager, root_page, _, _) = empty_btree();
        let num_columns = 5;
        let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, num_columns);
        let mut large_blob = vec![b'A'; 40960 - 11]; // insert large blob. 40960 = 10 page long.
        let hello_world = b"hello world";
        large_blob.extend_from_slice(hello_world);
        let regs = &[Register::Value(Value::Blob(large_blob.clone()))];
        let value = ImmutableRecord::from_registers(regs, regs.len());

        run_until_done(
            || {
                let key = SeekKey::TableRowId(1);
                cursor.seek(key, SeekOp::GE { eq_only: true })
            },
            pager.deref(),
        )
        .unwrap();

        run_until_done(
            || cursor.insert(&BTreeKey::new_table_rowid(1, Some(&value)), true),
            pager.deref(),
        )
        .unwrap();

        cursor
            .stack
            .set_cell_index(cursor.stack.current_cell_index() + 1);

        let offset_to_hello_world = 4 + (large_blob.len() - 11) as u32; // this offset depends on the records type.
        let mut read_buffer = Vec::new();
        run_until_done(
            || {
                cursor.read_write_payload_with_offset(
                    offset_to_hello_world,
                    &mut read_buffer,
                    11,
                    false,
                )
            },
            pager.deref(),
        )
        .unwrap();
        assert_eq!(
            std::str::from_utf8(&read_buffer).unwrap(),
            "hello world",
            "Failed to read 'hello world' from overflow page"
        );

        let mut modified_hello = "olleh".as_bytes().to_vec();
        run_until_done(
            || {
                cursor.read_write_payload_with_offset(
                    offset_to_hello_world,
                    &mut modified_hello,
                    5,
                    true,
                )
            },
            pager.deref(),
        )
        .unwrap();

        let mut verification_buffer = Vec::new();
        run_until_done(
            || {
                cursor.read_write_payload_with_offset(
                    offset_to_hello_world,
                    &mut verification_buffer,
                    hello_world.len() as u32,
                    false,
                )
            },
            pager.deref(),
        )
        .unwrap();

        assert_eq!(
            std::str::from_utf8(&verification_buffer).unwrap(),
            "olleh world",
            "Modified data doesn't match expected result"
        );
    }

    fn run_until_done<T>(action: impl FnMut() -> Result<IOResult<T>>, pager: &Pager) -> Result<T> {
        pager.io.block(action)
    }

    #[test]
    fn test_free_array() {
        let (mut rng, seed) = rng_from_time_or_env();
        tracing::info!("seed={}", seed);

        const ITERATIONS: usize = 10000;
        for _ in 0..ITERATIONS {
            let mut cell_array = CellArray {
                cell_payloads: Vec::new(),
                cell_count_per_page_cumulative: [0; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE],
            };
            let mut cells_cloned = Vec::new();
            let (pager, _, _, _) = empty_btree();
            let page_type = PageType::TableLeaf;
            let page = pager.allocate_page().unwrap();
            let page = Arc::new(BTreePageInner {
                page: RefCell::new(page),
            });
            btree_init_page(&page, page_type, 0, pager.usable_space() as u16);
            let page = page.get();
            let mut size = (rng.next_u64() % 100) as u16;
            let mut i = 0;
            // add a bunch of cells
            while compute_free_space(page.get_contents(), pager.usable_space() as u16) >= size + 10
            {
                insert_cell(i, size, page.get_contents(), pager.clone());
                i += 1;
                size = (rng.next_u64() % 1024) as u16;
            }

            // Create cell array with references to cells inserted
            let contents = page.get_contents();
            for cell_idx in 0..contents.cell_count() {
                let buf = contents.as_ptr();
                let (start, len) = contents.cell_get_raw_region(cell_idx, pager.usable_space());
                cell_array
                    .cell_payloads
                    .push(to_static_buf(&mut buf[start..start + len]));
                cells_cloned.push(buf[start..start + len].to_vec());
            }

            debug_validate_cells!(contents, pager.usable_space() as u16);

            // now free a prefix or suffix of cells added
            let cells_before_free = contents.cell_count();
            let size = rng.next_u64() as usize % cells_before_free;
            let prefix = rng.next_u64() % 2 == 0;
            let start = if prefix {
                0
            } else {
                contents.cell_count() - size
            };
            let removed = page_free_array(
                contents,
                start,
                size,
                &cell_array,
                pager.usable_space() as u16,
            )
            .unwrap();
            // shift if needed
            if prefix {
                shift_cells_left(contents, cells_before_free, removed);
            }

            assert_eq!(removed, size);
            assert_eq!(contents.cell_count(), cells_before_free - size);
            #[cfg(debug_assertions)]
            debug_validate_cells_core(contents, pager.usable_space() as u16);
            // check cells are correct
            let mut cell_idx_cloned = if prefix { size } else { 0 };
            for cell_idx in 0..contents.cell_count() {
                let buf = contents.as_ptr();
                let (start, len) = contents.cell_get_raw_region(cell_idx, pager.usable_space());
                let cell_in_page = &buf[start..start + len];
                let cell_in_array = &cells_cloned[cell_idx_cloned];
                assert_eq!(cell_in_page, cell_in_array);
                cell_idx_cloned += 1;
            }
        }
    }

    fn insert_cell(cell_idx: u64, size: u16, contents: &mut PageContent, pager: Rc<Pager>) {
        let mut payload = Vec::new();
        let regs = &[Register::Value(Value::Blob(vec![0; size as usize]))];
        let record = ImmutableRecord::from_registers(regs, regs.len());
        fill_cell_payload(
            contents,
            Some(cell_idx as i64),
            &mut payload,
            cell_idx as usize,
            &record,
            pager.usable_space(),
            pager.clone(),
        );
        insert_into_cell(
            contents,
            &payload,
            cell_idx as usize,
            pager.usable_space() as u16,
        )
        .unwrap();
    }
}