Merge 'Implement SQLite balancing algorithm' from Pere Diaz Bou

Beep boop.
What happened you ask? I removed the dumb balancing algorithm I
implemented in favor of SQLite's implementation based on B*Tree[1] where
a page is 2/3 full instead of 1/2. It also tries to balance a page by
taking a maximum 3 pages and distributing cells evenly between them.
I've made some changes that are somewhat related:
* Moved most operations on pages out of BTreeCursor because those
operations are based on a page, not on a cursor, and it makes it easier
to test.
* Fixed `write_u16` and `read_u16` cases that didn't need a implicit
offset calculation. Added: `write_u16_no_offset` and
`read_u16_no_offset` to counter this.
* Added some tests with fuzz testing too.
* Fixed some important actions like: `compute_free_space`,
`defragment_page` and `drop_cell`.
[1] https://dl.acm.org/doi/10.1145/356770.356776

Closes #968
This commit is contained in:
Pekka Enberg
2025-02-28 19:10:52 +02:00
5 changed files with 2090 additions and 1029 deletions

View File

@@ -227,8 +227,8 @@ pub fn maybe_init_database_file(file: &Rc<dyn File>, io: &Arc<dyn IO>) -> Result
btree_init_page(
&page1,
storage::sqlite3_ondisk::PageType::TableLeaf,
&db_header,
DATABASE_HEADER_SIZE,
db_header.page_size - db_header.reserved_space as u16,
);
let contents = page1.get().contents.as_mut().unwrap();

File diff suppressed because it is too large Load Diff

View File

@@ -56,6 +56,10 @@ impl Page {
unsafe { &mut *self.inner.get() }
}
pub fn get_contents(&self) -> &mut PageContent {
self.get().contents.as_mut().unwrap()
}
pub fn is_uptodate(&self) -> bool {
self.get().flags.load(Ordering::SeqCst) & PAGE_UPTODATE != 0
}

View File

@@ -439,14 +439,14 @@ impl PageContent {
u16::from_be_bytes([buf[self.offset + pos], buf[self.offset + pos + 1]])
}
pub fn read_u16_no_offset(&self, pos: usize) -> u16 {
let buf = self.as_ptr();
u16::from_be_bytes([buf[pos], buf[pos + 1]])
}
pub fn read_u32(&self, pos: usize) -> u32 {
let buf = self.as_ptr();
u32::from_be_bytes([
buf[self.offset + pos],
buf[self.offset + pos + 1],
buf[self.offset + pos + 2],
buf[self.offset + pos + 3],
])
read_u32(buf, self.offset + pos)
}
pub fn write_u8(&self, pos: usize, value: u8) {
@@ -461,6 +461,12 @@ impl PageContent {
buf[self.offset + pos..self.offset + pos + 2].copy_from_slice(&value.to_be_bytes());
}
pub fn write_u16_no_offset(&self, pos: usize, value: u16) {
tracing::debug!("write_u16(pos={}, value={})", pos, value);
let buf = self.as_ptr();
buf[pos..pos + 2].copy_from_slice(&value.to_be_bytes());
}
pub fn write_u32(&self, pos: usize, value: u32) {
tracing::debug!("write_u32(pos={}, value={})", pos, value);
let buf = self.as_ptr();
@@ -534,6 +540,16 @@ impl PageContent {
}
}
pub fn rightmost_pointer_raw(&self) -> Option<*mut u8> {
match self.page_type() {
PageType::IndexInterior | PageType::TableInterior => {
Some(unsafe { self.as_ptr().as_mut_ptr().add(self.offset + 8) })
}
PageType::IndexLeaf => None,
PageType::TableLeaf => None,
}
}
pub fn cell_get(
&self,
idx: usize,
@@ -574,7 +590,7 @@ impl PageContent {
(self.offset + header_size, self.cell_pointer_array_size())
}
/* Get region of a cell's payload */
/// Get region of a cell's payload
pub fn cell_get_raw_region(
&self,
idx: usize,
@@ -584,10 +600,10 @@ impl PageContent {
) -> (usize, usize) {
let buf = self.as_ptr();
let ncells = self.cell_count();
let cell_pointer_array_start = self.header_size();
let (cell_pointer_array_start, _) = self.cell_pointer_array_offset_and_size();
assert!(idx < ncells, "cell_get: idx out of bounds");
let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each
let cell_pointer = self.read_u16(cell_pointer) as usize;
let cell_pointer = self.read_u16_no_offset(cell_pointer) as usize;
let start = cell_pointer;
let len = match self.page_type() {
PageType::IndexInterior => {
@@ -888,6 +904,7 @@ fn read_payload(unread: &[u8], payload_size: usize, pager: Rc<Pager>) -> (Vec<u8
assert!(left_to_read > 0);
let page;
loop {
// FIXME(pere): this looks terrible, what did i do lmao
let page_ref = pager.read_page(next_overflow as usize);
if let Ok(p) = page_ref {
page = p;
@@ -1471,6 +1488,10 @@ impl WalHeader {
}
}
pub fn read_u32(buf: &[u8], pos: usize) -> u32 {
u32::from_be_bytes([buf[pos], buf[pos + 1], buf[pos + 2], buf[pos + 3]])
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -2707,6 +2707,9 @@ impl Program {
_ => unreachable!("Not a record! Cannot insert a non record value."),
};
let key = &state.registers[*key_reg];
// NOTE(pere): Sending moved_before == true is okay because we moved before but
// if we were to set to false after starting a balance procedure, it might
// leave undefined state.
return_if_io!(cursor.insert(key, record, true));
state.pc += 1;
}