mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-27 11:54:30 +01:00
Merge 'Implement SQLite balancing algorithm' from Pere Diaz Bou
Beep boop. What happened you ask? I removed the dumb balancing algorithm I implemented in favor of SQLite's implementation based on B*Tree[1] where a page is 2/3 full instead of 1/2. It also tries to balance a page by taking a maximum 3 pages and distributing cells evenly between them. I've made some changes that are somewhat related: * Moved most operations on pages out of BTreeCursor because those operations are based on a page, not on a cursor, and it makes it easier to test. * Fixed `write_u16` and `read_u16` cases that didn't need a implicit offset calculation. Added: `write_u16_no_offset` and `read_u16_no_offset` to counter this. * Added some tests with fuzz testing too. * Fixed some important actions like: `compute_free_space`, `defragment_page` and `drop_cell`. [1] https://dl.acm.org/doi/10.1145/356770.356776 Closes #968
This commit is contained in:
@@ -227,8 +227,8 @@ pub fn maybe_init_database_file(file: &Rc<dyn File>, io: &Arc<dyn IO>) -> Result
|
||||
btree_init_page(
|
||||
&page1,
|
||||
storage::sqlite3_ondisk::PageType::TableLeaf,
|
||||
&db_header,
|
||||
DATABASE_HEADER_SIZE,
|
||||
db_header.page_size - db_header.reserved_space as u16,
|
||||
);
|
||||
|
||||
let contents = page1.get().contents.as_mut().unwrap();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -56,6 +56,10 @@ impl Page {
|
||||
unsafe { &mut *self.inner.get() }
|
||||
}
|
||||
|
||||
pub fn get_contents(&self) -> &mut PageContent {
|
||||
self.get().contents.as_mut().unwrap()
|
||||
}
|
||||
|
||||
pub fn is_uptodate(&self) -> bool {
|
||||
self.get().flags.load(Ordering::SeqCst) & PAGE_UPTODATE != 0
|
||||
}
|
||||
|
||||
@@ -439,14 +439,14 @@ impl PageContent {
|
||||
u16::from_be_bytes([buf[self.offset + pos], buf[self.offset + pos + 1]])
|
||||
}
|
||||
|
||||
pub fn read_u16_no_offset(&self, pos: usize) -> u16 {
|
||||
let buf = self.as_ptr();
|
||||
u16::from_be_bytes([buf[pos], buf[pos + 1]])
|
||||
}
|
||||
|
||||
pub fn read_u32(&self, pos: usize) -> u32 {
|
||||
let buf = self.as_ptr();
|
||||
u32::from_be_bytes([
|
||||
buf[self.offset + pos],
|
||||
buf[self.offset + pos + 1],
|
||||
buf[self.offset + pos + 2],
|
||||
buf[self.offset + pos + 3],
|
||||
])
|
||||
read_u32(buf, self.offset + pos)
|
||||
}
|
||||
|
||||
pub fn write_u8(&self, pos: usize, value: u8) {
|
||||
@@ -461,6 +461,12 @@ impl PageContent {
|
||||
buf[self.offset + pos..self.offset + pos + 2].copy_from_slice(&value.to_be_bytes());
|
||||
}
|
||||
|
||||
pub fn write_u16_no_offset(&self, pos: usize, value: u16) {
|
||||
tracing::debug!("write_u16(pos={}, value={})", pos, value);
|
||||
let buf = self.as_ptr();
|
||||
buf[pos..pos + 2].copy_from_slice(&value.to_be_bytes());
|
||||
}
|
||||
|
||||
pub fn write_u32(&self, pos: usize, value: u32) {
|
||||
tracing::debug!("write_u32(pos={}, value={})", pos, value);
|
||||
let buf = self.as_ptr();
|
||||
@@ -534,6 +540,16 @@ impl PageContent {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rightmost_pointer_raw(&self) -> Option<*mut u8> {
|
||||
match self.page_type() {
|
||||
PageType::IndexInterior | PageType::TableInterior => {
|
||||
Some(unsafe { self.as_ptr().as_mut_ptr().add(self.offset + 8) })
|
||||
}
|
||||
PageType::IndexLeaf => None,
|
||||
PageType::TableLeaf => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cell_get(
|
||||
&self,
|
||||
idx: usize,
|
||||
@@ -574,7 +590,7 @@ impl PageContent {
|
||||
(self.offset + header_size, self.cell_pointer_array_size())
|
||||
}
|
||||
|
||||
/* Get region of a cell's payload */
|
||||
/// Get region of a cell's payload
|
||||
pub fn cell_get_raw_region(
|
||||
&self,
|
||||
idx: usize,
|
||||
@@ -584,10 +600,10 @@ impl PageContent {
|
||||
) -> (usize, usize) {
|
||||
let buf = self.as_ptr();
|
||||
let ncells = self.cell_count();
|
||||
let cell_pointer_array_start = self.header_size();
|
||||
let (cell_pointer_array_start, _) = self.cell_pointer_array_offset_and_size();
|
||||
assert!(idx < ncells, "cell_get: idx out of bounds");
|
||||
let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each
|
||||
let cell_pointer = self.read_u16(cell_pointer) as usize;
|
||||
let cell_pointer = self.read_u16_no_offset(cell_pointer) as usize;
|
||||
let start = cell_pointer;
|
||||
let len = match self.page_type() {
|
||||
PageType::IndexInterior => {
|
||||
@@ -888,6 +904,7 @@ fn read_payload(unread: &[u8], payload_size: usize, pager: Rc<Pager>) -> (Vec<u8
|
||||
assert!(left_to_read > 0);
|
||||
let page;
|
||||
loop {
|
||||
// FIXME(pere): this looks terrible, what did i do lmao
|
||||
let page_ref = pager.read_page(next_overflow as usize);
|
||||
if let Ok(p) = page_ref {
|
||||
page = p;
|
||||
@@ -1471,6 +1488,10 @@ impl WalHeader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_u32(buf: &[u8], pos: usize) -> u32 {
|
||||
u32::from_be_bytes([buf[pos], buf[pos + 1], buf[pos + 2], buf[pos + 3]])
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -2707,6 +2707,9 @@ impl Program {
|
||||
_ => unreachable!("Not a record! Cannot insert a non record value."),
|
||||
};
|
||||
let key = &state.registers[*key_reg];
|
||||
// NOTE(pere): Sending moved_before == true is okay because we moved before but
|
||||
// if we were to set to false after starting a balance procedure, it might
|
||||
// leave undefined state.
|
||||
return_if_io!(cursor.insert(key, record, true));
|
||||
state.pc += 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user