diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 15e43d25c..855641b08 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -6081,59 +6081,104 @@ impl BTreePageInner { } } -/// Try to find a free block available and allocate it if found -fn find_free_cell(page_ref: &PageContent, usable_space: usize, amount: usize) -> Result { +/// Try to find a freeblock inside the cell content area that is large enough to fit the given amount of bytes. +/// Used to check if a cell can be inserted into a freeblock to reduce fragmentation. +/// Returns the absolute byte offset of the freeblock if found. +fn find_free_slot( + page_ref: &PageContent, + usable_space: usize, + amount: usize, +) -> Result> { + const CELL_SIZE_MIN: usize = 4; // NOTE: freelist is in ascending order of keys and pc // unuse_space is reserved bytes at the end of page, therefore we must substract from maxpc - let mut prev_pc = page_ref.offset + offset::BTREE_FIRST_FREEBLOCK; - let mut pc = page_ref.first_freeblock() as usize; - let maxpc = usable_space - amount; + let mut prev_block = None; + let mut cur_block = match page_ref.first_freeblock() { + 0 => None, + first_block => Some(first_block as usize), + }; - while pc <= maxpc { - if pc + 4 > usable_space { + let max_start_offset = usable_space - amount; + + while let Some(cur) = cur_block { + if cur + CELL_SIZE_MIN > usable_space { return_corrupt!("Free block header extends beyond page"); } - let next = page_ref.read_u16_no_offset(pc); - let size = page_ref.read_u16_no_offset(pc + 2); + let (next, size) = { + let cur_u16: u16 = cur + .try_into() + .unwrap_or_else(|_| panic!("cur={cur} is too large to fit in a u16")); + let (next, size) = page_ref.read_freeblock(cur_u16); + (next as usize, size as usize) + }; - if amount <= size as usize { - let new_size = size as usize - amount; - if new_size < 4 { - // The code is checking if using a free slot that would leave behind a very small fragment (x < 4 bytes) - // would cause the total fragmentation to exceed the limit of 60 bytes - // check sqlite docs https://www.sqlite.org/fileformat.html#:~:text=A%20freeblock%20requires,not%20exceed%2060 - if page_ref.num_frag_free_bytes() > 57 { - return Ok(0); - } - // Delete the slot from freelist and update the page's fragment count. - page_ref.write_u16_no_offset(prev_pc, next); - let frag = page_ref.num_frag_free_bytes() + new_size as u8; - page_ref.write_fragmented_bytes_count(frag); - return Ok(pc); - } else if new_size + pc > maxpc { - return_corrupt!("Free block extends beyond page end"); - } else { - // Requested amount fits inside the current free slot so we reduce its size - // to account for newly allocated space. - page_ref.write_u16_no_offset(pc + 2, new_size as u16); - return Ok(pc + new_size); + // Doesn't fit in this freeblock, try the next one. + if amount > size { + if next == 0 { + // No next -> can't fit. + return Ok(None); } - } - prev_pc = pc; - pc = next as usize; - if pc <= prev_pc { - if pc != 0 { + prev_block = cur_block; + if next <= cur { return_corrupt!("Free list not in ascending order"); } - return Ok(0); + cur_block = Some(next); + continue; + } + + let new_size = size - amount; + // If the freeblock's new size is < CELL_SIZE_MIN, the freeblock is deleted and the remaining bytes + // become fragmented free bytes. + if new_size < CELL_SIZE_MIN { + if page_ref.num_frag_free_bytes() > 57 { + // SQLite has a fragmentation limit of 60 bytes. + // check sqlite docs https://www.sqlite.org/fileformat.html#:~:text=A%20freeblock%20requires,not%20exceed%2060 + return Ok(None); + } + // Delete the slot from freelist and update the page's fragment count. + match prev_block { + Some(prev) => { + let prev_u16: u16 = prev + .try_into() + .unwrap_or_else(|_| panic!("prev={prev} is too large to fit in a u16")); + let next_u16: u16 = next + .try_into() + .unwrap_or_else(|_| panic!("next={next} is too large to fit in a u16")); + page_ref.write_freeblock_next_ptr(prev_u16, next_u16); + } + None => { + let next_u16: u16 = next + .try_into() + .unwrap_or_else(|_| panic!("next={next} is too large to fit in a u16")); + page_ref.write_first_freeblock(next_u16); + } + } + let new_size_u8: u8 = new_size + .try_into() + .unwrap_or_else(|_| panic!("new_size={new_size} is too large to fit in a u8")); + let frag = page_ref.num_frag_free_bytes() + new_size_u8; + page_ref.write_fragmented_bytes_count(frag); + return Ok(cur_block); + } else if new_size + cur > max_start_offset { + return_corrupt!("Free block extends beyond page end"); + } else { + // Requested amount fits inside the current free slot so we reduce its size + // to account for newly allocated space. + let cur_u16: u16 = cur + .try_into() + .unwrap_or_else(|_| panic!("cur={cur} is too large to fit in a u16")); + let new_size_u16: u16 = new_size + .try_into() + .unwrap_or_else(|_| panic!("new_size={new_size} is too large to fit in a u16")); + page_ref.write_freeblock_size(cur_u16, new_size_u16); + // Return the offset immediately after the shrunk freeblock. + return Ok(Some(cur + new_size)); } } - if pc > maxpc + amount - 4 { - return_corrupt!("Free block chain extends beyond page end"); - } - Ok(0) + + Ok(None) } pub fn btree_init_page(page: &BTreePage, page_type: PageType, offset: usize, usable_space: usize) { @@ -6984,8 +7029,7 @@ fn allocate_cell_space( && unallocated_region_start + CELL_PTR_SIZE_BYTES <= cell_content_area_start { // find slot - let pc = find_free_cell(page_ref, usable_space, amount)?; - if pc != 0 { + if let Some(pc) = find_free_slot(page_ref, usable_space, amount)? { // we can fit the cell in a freeblock. return Ok(pc as u16); } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index da99edc59..53ebf04a4 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -574,10 +574,35 @@ impl PageContent { /// - size: the size of the freeblock /// - next_block: the absolute offset of the next freeblock, or None if this is the last freeblock pub fn write_freeblock(&self, offset: u16, size: u16, next_block: Option) { - self.write_u16_no_offset(offset as usize, next_block.unwrap_or(0)); + self.write_freeblock_next_ptr(offset, next_block.unwrap_or(0)); + self.write_freeblock_size(offset, size); + } + + /// Write the new size of a freeblock. + /// Parameters: + /// - offset: the absolute offset of the freeblock + /// - size: the new size of the freeblock + pub fn write_freeblock_size(&self, offset: u16, size: u16) { self.write_u16_no_offset(offset as usize + 2, size); } + /// Write the absolute offset of the next freeblock. + /// Parameters: + /// - offset: the absolute offset of the current freeblock + /// - next_block: the absolute offset of the next freeblock + pub fn write_freeblock_next_ptr(&self, offset: u16, next_block: u16) { + self.write_u16_no_offset(offset as usize, next_block); + } + + /// Read a freeblock from the page content at the given absolute offset. + /// Returns (absolute offset of next freeblock, size of the current freeblock) + pub fn read_freeblock(&self, offset: u16) -> (u16, u16) { + ( + self.read_u16_no_offset(offset as usize), + self.read_u16_no_offset(offset as usize + 2), + ) + } + /// Write the number of cells on this page. pub fn write_cell_count(&self, value: u16) { self.write_u16(BTREE_CELL_COUNT, value);