diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 1bb912874..aff3efa85 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -136,6 +136,7 @@ impl DatabaseStorage { } } +#[allow(dead_code)] struct BufferPool {} impl limbo_core::DatabaseStorage for DatabaseStorage { diff --git a/core/io/darwin.rs b/core/io/darwin.rs index 3775d1b99..cf527a3cb 100644 --- a/core/io/darwin.rs +++ b/core/io/darwin.rs @@ -12,7 +12,6 @@ use rustix::io::Errno; use std::cell::RefCell; use std::collections::HashMap; use std::io::{Read, Seek, Write}; -use std::os::unix::fs::MetadataExt; use std::rc::Rc; pub struct DarwinIO { diff --git a/core/json/de.rs b/core/json/de.rs index af6b8cd43..aac5cb86f 100644 --- a/core/json/de.rs +++ b/core/json/de.rs @@ -140,7 +140,6 @@ unicode_letter = _{ value = _{ null | boolean | string | number | object | array } "#] - struct Parser; /// Deserialize an instance of type `T` from a string of JSON5 text. Can fail if the input is diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 219688e1e..2dfb656d9 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1,4 +1,4 @@ -use log::trace; +use log::debug; use crate::storage::pager::{Page, Pager}; use crate::storage::sqlite3_ondisk::{ @@ -9,6 +9,7 @@ use crate::types::{Cursor, CursorResult, OwnedRecord, OwnedValue, SeekKey, SeekO use crate::Result; use std::cell::{Ref, RefCell}; +use std::pin::Pin; use std::rc::Rc; use super::sqlite3_ondisk::{write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell}; @@ -49,6 +50,25 @@ impl MemPage { } } +#[derive(Debug)] +enum WriteState { + Start, + BalanceStart, + BalanceGetParentPage, + BalanceMoveUp, + Finish, +} + +struct WriteInfo { + state: WriteState, + current_page: RefCell, Rc>)>>, + parent_page: RefCell, Rc>)>>, + new_pages: RefCell, Rc>)>>, + scratch_cells: RefCell>, + rightmost_pointer: RefCell>, + page_copy: RefCell>, // this holds the copy a of a page needed for buffer references +} + pub struct BTreeCursor { pager: Rc, root_page: usize, @@ -58,6 +78,7 @@ pub struct BTreeCursor { null_flag: bool, database_header: Rc>, going_upwards: bool, + write_info: WriteInfo, } impl BTreeCursor { @@ -75,6 +96,15 @@ impl BTreeCursor { null_flag: false, database_header, going_upwards: false, + write_info: WriteInfo { + state: WriteState::Start, + current_page: RefCell::new(None), + parent_page: RefCell::new(None), + new_pages: RefCell::new(Vec::with_capacity(4)), + scratch_cells: RefCell::new(Vec::new()), + rightmost_pointer: RefCell::new(None), + page_copy: RefCell::new(None), + }, } } @@ -501,62 +531,85 @@ impl BTreeCursor { key: &OwnedValue, record: &OwnedRecord, ) -> Result> { - let page_ref = self.get_page()?; - let int_key = match key { - OwnedValue::Integer(i) => *i as u64, - _ => unreachable!("btree tables are indexed by integers!"), - }; + loop { + let state = &self.write_info.state; + match state { + WriteState::Start => { + let page_ref = self.get_current_page()?; + let int_key = match key { + OwnedValue::Integer(i) => *i as u64, + _ => unreachable!("btree tables are indexed by integers!"), + }; - let (cell_idx, page_type) = { - let page = RefCell::borrow(&page_ref); - if page.is_locked() { - return Ok(CursorResult::IO); - } + // get page and find cell + let (cell_idx, page_type) = { + let page = RefCell::borrow(&page_ref); + if page.is_locked() { + return Ok(CursorResult::IO); + } - page.set_dirty(); - self.pager.add_dirty(page.id); + page.set_dirty(); + self.pager.add_dirty(page.id); - let mut page = page.contents.write().unwrap(); - let page = page.as_mut().unwrap(); - assert!(matches!(page.page_type(), PageType::TableLeaf)); + let mut page = page.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + assert!(matches!(page.page_type(), PageType::TableLeaf)); - // find cell - (self.find_cell(page, int_key), page.page_type()) - }; + // find cell + (self.find_cell(page, int_key), page.page_type()) + }; - // TODO: if overwrite drop cell + // TODO: if overwrite drop cell - // insert cell + // insert cell - let mut cell_payload: Vec = Vec::new(); - self.fill_cell_payload(page_type, Some(int_key), &mut cell_payload, record); + let mut cell_payload: Vec = Vec::new(); + self.fill_cell_payload(page_type, Some(int_key), &mut cell_payload, record); - // insert - let overflow = { - let page = RefCell::borrow(&page_ref); + // insert + let overflow = { + let page = RefCell::borrow(&page_ref); - let mut page = page.contents.write().unwrap(); - let page = page.as_mut().unwrap(); - self.insert_into_cell(page, cell_payload.as_slice(), cell_idx); - page.overflow_cells.len() - }; - - if overflow > 0 { - self.balance_leaf(); + let mut page = page.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + self.insert_into_cell(page, cell_payload.as_slice(), cell_idx); + page.overflow_cells.len() + }; + if overflow > 0 { + self.write_info.state = WriteState::BalanceStart; + self.write_info.current_page.borrow_mut().replace(( + self.page.borrow().as_ref().unwrap().clone(), + page_ref.clone(), + )); + } else { + self.write_info.state = WriteState::Finish; + } + } + WriteState::BalanceStart + | WriteState::BalanceMoveUp + | WriteState::BalanceGetParentPage => { + let res = self.balance_leaf()?; + if matches!(res, CursorResult::IO) { + return Ok(res); + } + } + WriteState::Finish => { + self.write_info.state = WriteState::Start; + return Ok(CursorResult::Ok(())); + } + }; } - - Ok(CursorResult::Ok(())) } /* insert to postion and shift other pointers */ - fn insert_into_cell(&mut self, page: &mut PageContent, payload: &[u8], cell_idx: usize) { + fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) { let free = self.compute_free_space(page, RefCell::borrow(&self.database_header)); let enough_space = payload.len() + 2 <= free as usize; if !enough_space { // add to overflow cell page.overflow_cells.push(OverflowCell { index: cell_idx, - payload: Vec::from(payload), + payload: Pin::new(Vec::from(payload)), }); return; } @@ -589,7 +642,7 @@ impl BTreeCursor { page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, new_n_cells); } - fn free_cell_range(&mut self, page: &mut PageContent, offset: u16, len: u16) { + fn free_cell_range(&self, page: &mut PageContent, offset: u16, len: u16) { if page.first_freeblock() == 0 { // insert into empty list page.write_u16(offset as usize, 0); @@ -647,7 +700,7 @@ impl BTreeCursor { } } - fn drop_cell(&mut self, page: &mut PageContent, cell_idx: usize) { + fn drop_cell(&self, page: &mut PageContent, cell_idx: usize) { let (cell_start, cell_len) = page.cell_get_raw_region( cell_idx, self.max_local(page.page_type()), @@ -658,7 +711,7 @@ impl BTreeCursor { page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1); } - fn get_page(&mut self) -> crate::Result>> { + fn get_current_page(&mut self) -> crate::Result>> { let mem_page = { let mem_page = self.page.borrow(); let mem_page = mem_page.as_ref().unwrap(); @@ -669,52 +722,52 @@ impl BTreeCursor { Ok(page_ref) } - fn balance_leaf(&mut self) { - // This is a naive algorithm that doesn't try to distribute cells evenly by content. - // It will try to split the page in half by keys not by content. - // Sqlite tries to have a page at least 40% full. - loop { - let mem_page = { - let mem_page = self.page.borrow(); - let mem_page = mem_page.as_ref().unwrap(); - mem_page.clone() - }; - - { - // check if we don't need to balance - let page_ref = self.read_page_sync(mem_page.page_idx); - let page_rc = RefCell::borrow(&page_ref); - + /// This is a naive algorithm that doesn't try to distribute cells evenly by content. + /// It will try to split the page in half by keys not by content. + /// Sqlite tries to have a page at least 40% full. + fn balance_leaf(&mut self) -> Result> { + let state = &self.write_info.state; + match state { + WriteState::BalanceStart => { + let current_page = self.write_info.current_page.borrow(); + let mem_page = ¤t_page.as_ref().unwrap().0; { - // don't continue if there are no overflow cells - let mut page = page_rc.contents.write().unwrap(); - let page = page.as_mut().unwrap(); - if page.overflow_cells.is_empty() { - break; + // check if we don't need to balance + let page_ref = ¤t_page.as_ref().unwrap().1; + let page_rc = RefCell::borrow(&page_ref); + + { + // don't continue if there are no overflow cells + let mut page = page_rc.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + if page.overflow_cells.is_empty() { + self.write_info.state = WriteState::Finish; + return Ok(CursorResult::Ok(())); + } } } - } - if mem_page.parent.is_none() { - self.balance_root(); - continue; - } - trace!("Balancing leaf. leaf={}", mem_page.page_idx); + if mem_page.parent.is_none() { + drop(current_page); + self.balance_root(); + return Ok(CursorResult::Ok(())); + } + debug!("Balancing leaf. leaf={}", mem_page.page_idx); - let page_ref = self.read_page_sync(mem_page.page_idx); - let page_rc = RefCell::borrow(&page_ref); + let page_ref = ¤t_page.as_ref().unwrap().1; + let page_rc = RefCell::borrow(&page_ref); - // Copy of page used to reference cell bytes. - let page_copy = { - let mut page = page_rc.contents.write().unwrap(); - let page = page.as_mut().unwrap(); - page.clone() - }; + // Copy of page used to reference cell bytes. + let page_copy = { + let mut page = page_rc.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + page.clone() + }; - // In memory in order copy of all cells in pages we want to balance. For now let's do a 2 page split. - // Right pointer in interior cells should be converted to regular cells if more than 2 pages are used for balancing. - let (scratch_cells, right_most_pointer) = { - let mut scratch_cells: Vec<&[u8]> = Vec::new(); + // In memory in order copy of all cells in pages we want to balance. For now let's do a 2 page split. + // Right pointer in interior cells should be converted to regular cells if more than 2 pages are used for balancing. + let mut scratch_cells = self.write_info.scratch_cells.borrow_mut(); + scratch_cells.clear(); for cell_idx in 0..page_copy.cell_count() { let (start, len) = page_copy.cell_get_raw_region( @@ -724,16 +777,18 @@ impl BTreeCursor { self.usable_space(), ); let buf = page_copy.as_ptr(); - scratch_cells.push(&buf[start..start + len]); + scratch_cells.push(to_static_buf(&buf[start..start + len])); } for overflow_cell in &page_copy.overflow_cells { - scratch_cells.insert(overflow_cell.index, &overflow_cell.payload); + scratch_cells + .insert(overflow_cell.index, to_static_buf(&overflow_cell.payload)); } - (scratch_cells, page_copy.rightmost_pointer()) - }; + *self.write_info.rightmost_pointer.borrow_mut() = + page_copy.rightmost_pointer().clone(); - // allocate new pages and move cells to those new pages - { + self.write_info.page_copy.replace(Some(page_copy)); + + // allocate new pages and move cells to those new pages // split procedure let mut page = page_rc.contents.write().unwrap(); let page = page.as_mut().unwrap(); @@ -748,197 +803,266 @@ impl BTreeCursor { let right_page_ref = self.allocate_page(page.page_type()); let right_page = RefCell::borrow_mut(&right_page_ref); let right_page_id = right_page.id; - let mut right_page = right_page.contents.write().unwrap(); - let right_page = right_page.as_mut().unwrap(); - { - let is_leaf = page.is_leaf(); - let page_type = page.page_type(); - let mut new_pages = vec![page, right_page]; - let new_pages_ids = [mem_page.page_idx, right_page_id]; - trace!( - "splitting left={} right={}", - new_pages_ids[0], - new_pages_ids[1] - ); - // drop divider cells and find right pointer - // NOTE: since we are doing a simple split we only finding the pointer we want to update (right pointer). - // Right pointer means cell that points to the last page, as we don't really want to drop this one. This one - // can be a "rightmost pointer" or a "cell". - // TODO(pere): simplify locking... - // we always asumme there is a parent - let parent_rc = mem_page.parent.as_ref().unwrap(); + self.write_info.new_pages.borrow_mut().clear(); + self.write_info + .new_pages + .borrow_mut() + .push((mem_page.clone(), page_ref.clone())); + self.write_info.new_pages.borrow_mut().push(( + Rc::new(MemPage::new(mem_page.parent.clone(), right_page_id, 0)), + right_page_ref.clone(), + )); - let parent_ref = self.read_page_sync(parent_rc.page_idx); - let parent = RefCell::borrow_mut(&parent_ref); - parent.set_dirty(); - self.pager.add_dirty(parent.id); - let mut parent = parent.contents.write().unwrap(); - let parent = parent.as_mut().unwrap(); - // if this isn't empty next loop won't work - assert!(parent.overflow_cells.is_empty()); + let new_pages_ids = [mem_page.page_idx, right_page_id]; + debug!( + "splitting left={} right={}", + new_pages_ids[0], new_pages_ids[1] + ); - // Right page pointer is u32 in right most pointer, and in cell is u32 too, so we can use a *u32 to hold where we want to change this value - let mut right_pointer = BTREE_HEADER_OFFSET_RIGHTMOST; - for cell_idx in 0..parent.cell_count() { - let cell = parent - .cell_get( - cell_idx, - self.pager.clone(), - self.max_local(page_type.clone()), - self.min_local(page_type.clone()), - self.usable_space(), - ) - .unwrap(); - let found = match cell { - BTreeCell::TableInteriorCell(interior) => { - interior._left_child_page as usize == mem_page.page_idx - } - _ => unreachable!("Parent should always be a "), - }; - if found { - let (start, _len) = parent.cell_get_raw_region( - cell_idx, - self.max_local(page_type.clone()), - self.min_local(page_type.clone()), - self.usable_space(), - ); - right_pointer = start; - break; - } - } + // drop divider cells and find right pointer + // NOTE: since we are doing a simple split we only finding the pointer we want to update (right pointer). + // Right pointer means cell that points to the last page, as we don't really want to drop this one. This one + // can be a "rightmost pointer" or a "cell". + // TODO(pere): simplify locking... + // we always asumme there is a parent + self.write_info.state = WriteState::BalanceGetParentPage; + return Ok(CursorResult::Ok(())); + } + WriteState::BalanceGetParentPage => { + let current_page = self.write_info.current_page.borrow(); + let mem_page = ¤t_page.as_ref().unwrap().0; - // reset pages - for page in &new_pages { - page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0); - page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0); + let parent_rc = mem_page.parent.as_ref().unwrap(); + let parent_ref = self.pager.read_page(parent_rc.page_idx)?; + if !RefCell::borrow(&parent_ref).is_locked() { + self.write_info.state = WriteState::BalanceMoveUp; + self.write_info + .parent_page + .borrow_mut() + .replace((parent_rc.clone(), parent_ref.clone())); + Ok(CursorResult::Ok(())) + } else { + Ok(CursorResult::IO) + } + } + WriteState::BalanceMoveUp => { + let parent = self.write_info.parent_page.borrow(); + let parent_entry = parent.as_ref().unwrap(); + let parent_ref = &parent_entry.1; + let parent = RefCell::borrow_mut(&parent_ref); - let db_header = RefCell::borrow(&self.database_header); - let cell_content_area_start = - db_header.page_size - db_header.unused_space as u16; - page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start); + let (page_type, current_idx) = { + let current_page = self.write_info.current_page.borrow(); + let pagerc = current_page.as_ref().unwrap(); + let page = RefCell::borrow(&pagerc.1); + let page = page.contents.read().unwrap(); + ( + page.as_ref().unwrap().page_type().clone(), + pagerc.0.page_idx, + ) + }; - page.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0); - page.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0); - } + parent.set_dirty(); + self.pager.add_dirty(parent.id); + let mut parent_contents_lock = parent.contents.write().unwrap(); + let parent_contents = parent_contents_lock.as_mut().unwrap(); + // if this isn't empty next loop won't work + assert_eq!(parent_contents.overflow_cells.len(), 0); - // distribute cells - let new_pages_len = new_pages.len(); - let cells_per_page = scratch_cells.len() / new_pages.len(); - let mut current_cell_index = 0_usize; - let mut divider_cells_index = Vec::new(); /* index to scratch cells that will be used as dividers in order */ - - for (i, page) in new_pages.iter_mut().enumerate() { - let last_page = i == new_pages_len - 1; - let cells_to_copy = if last_page { - // last cells is remaining pages if division was odd - scratch_cells.len() - current_cell_index - } else { - cells_per_page - }; - - let cell_index_range = - current_cell_index..current_cell_index + cells_to_copy; - for (j, cell_idx) in cell_index_range.enumerate() { - let cell = scratch_cells[cell_idx]; - self.insert_into_cell(page, cell, j); - } - divider_cells_index.push(current_cell_index + cells_to_copy - 1); - current_cell_index += cells_to_copy; - } - - // update rightmost pointer for each page if we are in interior page - if !is_leaf { - for page in new_pages.iter_mut().take(new_pages_len - 1) { - assert!(page.cell_count() == 1); - let last_cell = page - .cell_get( - page.cell_count() - 1, - self.pager.clone(), - self.max_local(page.page_type()), - self.min_local(page.page_type()), - self.usable_space(), - ) - .unwrap(); - let last_cell_pointer = match last_cell { - BTreeCell::TableInteriorCell(interior) => interior._left_child_page, - _ => unreachable!(), - }; - self.drop_cell(page, page.cell_count() - 1); - page.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, last_cell_pointer); - } - // last page right most pointer points to previous right most pointer before splitting - let last_page = new_pages.last().unwrap(); - last_page - .write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, right_most_pointer.unwrap()); - } - - // insert dividers in parent - // we can consider dividers the first cell of each page starting from the second page - for (page_id_index, page) in - new_pages.iter_mut().take(new_pages_len - 1).enumerate() - { - assert!(page.cell_count() > 1); - let divider_cell_index = divider_cells_index[page_id_index]; - let cell_payload = scratch_cells[divider_cell_index]; - let cell = read_btree_cell( - cell_payload, - &page.page_type(), - 0, + // Right page pointer is u32 in right most pointer, and in cell is u32 too, so we can use a *u32 to hold where we want to change this value + let mut right_pointer = BTREE_HEADER_OFFSET_RIGHTMOST; + for cell_idx in 0..parent_contents.cell_count() { + let cell = parent_contents + .cell_get( + cell_idx, self.pager.clone(), - self.max_local(page.page_type()), - self.min_local(page.page_type()), + self.max_local(page_type.clone()), + self.min_local(page_type.clone()), self.usable_space(), ) .unwrap(); - - if is_leaf { - // create a new divider cell and push - let key = match cell { - BTreeCell::TableLeafCell(leaf) => leaf._rowid, - _ => unreachable!(), - }; - let mut divider_cell = Vec::new(); - divider_cell.extend_from_slice( - &(new_pages_ids[page_id_index] as u32).to_be_bytes(), - ); - divider_cell.extend(std::iter::repeat(0).take(9)); - let n = write_varint(&mut divider_cell.as_mut_slice()[4..], key); - divider_cell.truncate(4 + n); - let parent_cell_idx = self.find_cell(parent, key); - self.insert_into_cell(parent, divider_cell.as_slice(), parent_cell_idx); - } else { - // move cell - let key = match cell { - BTreeCell::TableInteriorCell(interior) => interior._rowid, - _ => unreachable!(), - }; - let parent_cell_idx = self.find_cell(page, key); - self.insert_into_cell(parent, cell_payload, parent_cell_idx); - // self.drop_cell(*page, 0); + let found = match cell { + BTreeCell::TableInteriorCell(interior) => { + interior._left_child_page as usize == current_idx } - } - - { - // copy last page id to right pointer - let last_pointer = *new_pages_ids.last().unwrap() as u32; - parent.write_u32(right_pointer, last_pointer); + _ => unreachable!("Parent should always be a "), + }; + if found { + let (start, _len) = parent_contents.cell_get_raw_region( + cell_idx, + self.max_local(page_type.clone()), + self.min_local(page_type.clone()), + self.usable_space(), + ); + right_pointer = start; + break; } } + + let mut new_pages = self.write_info.new_pages.borrow_mut(); + let scratch_cells = self.write_info.scratch_cells.borrow(); + + // reset pages + for (_, page) in new_pages.iter() { + let page = page.borrow_mut(); + let mut page = page.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + + page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0); + page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0); + + let db_header = RefCell::borrow(&self.database_header); + let cell_content_area_start = + db_header.page_size - db_header.unused_space as u16; + page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start); + + page.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0); + page.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0); + } + + // distribute cells + let new_pages_len = new_pages.len(); + let cells_per_page = scratch_cells.len() / new_pages.len(); + let mut current_cell_index = 0_usize; + let mut divider_cells_index = Vec::new(); /* index to scratch cells that will be used as dividers in order */ + + for (i, (_, page)) in new_pages.iter_mut().enumerate() { + let page = page.borrow_mut(); + let mut page = page.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + + let last_page = i == new_pages_len - 1; + let cells_to_copy = if last_page { + // last cells is remaining pages if division was odd + scratch_cells.len() - current_cell_index + } else { + cells_per_page + }; + + let cell_index_range = current_cell_index..current_cell_index + cells_to_copy; + for (j, cell_idx) in cell_index_range.enumerate() { + let cell = scratch_cells[cell_idx]; + self.insert_into_cell(page, cell, j); + } + divider_cells_index.push(current_cell_index + cells_to_copy - 1); + current_cell_index += cells_to_copy; + } + let is_leaf = { + let page = self.write_info.current_page.borrow(); + let page = RefCell::borrow(&page.as_ref().unwrap().1); + let page = page.contents.read().unwrap(); + page.as_ref().unwrap().is_leaf() + }; + + // update rightmost pointer for each page if we are in interior page + if !is_leaf { + for (_, page) in new_pages.iter_mut().take(new_pages_len - 1) { + let page = page.borrow_mut(); + let mut page = page.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + + assert!(page.cell_count() == 1); + let last_cell = page + .cell_get( + page.cell_count() - 1, + self.pager.clone(), + self.max_local(page.page_type()), + self.min_local(page.page_type()), + self.usable_space(), + ) + .unwrap(); + let last_cell_pointer = match last_cell { + BTreeCell::TableInteriorCell(interior) => interior._left_child_page, + _ => unreachable!(), + }; + self.drop_cell(page, page.cell_count() - 1); + page.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, last_cell_pointer); + } + // last page right most pointer points to previous right most pointer before splitting + let last_page = new_pages.last().unwrap(); + let last_page = &last_page.1; + let last_page = RefCell::borrow(&last_page); + let mut last_page = last_page.contents.write().unwrap(); + let last_page = last_page.as_mut().unwrap(); + last_page.write_u32( + BTREE_HEADER_OFFSET_RIGHTMOST, + self.write_info.rightmost_pointer.borrow().unwrap(), + ); + } + + // insert dividers in parent + // we can consider dividers the first cell of each page starting from the second page + for (page_id_index, (mem_page, page)) in + new_pages.iter_mut().take(new_pages_len - 1).enumerate() + { + let page = page.borrow_mut(); + let mut page = page.contents.write().unwrap(); + let page = page.as_mut().unwrap(); + assert!(page.cell_count() > 1); + let divider_cell_index = divider_cells_index[page_id_index]; + let cell_payload = scratch_cells[divider_cell_index]; + let cell = read_btree_cell( + cell_payload, + &page.page_type(), + 0, + self.pager.clone(), + self.max_local(page.page_type()), + self.min_local(page.page_type()), + self.usable_space(), + ) + .unwrap(); + + if is_leaf { + // create a new divider cell and push + let key = match cell { + BTreeCell::TableLeafCell(leaf) => leaf._rowid, + _ => unreachable!(), + }; + let mut divider_cell = Vec::new(); + divider_cell.extend_from_slice(&(mem_page.page_idx as u32).to_be_bytes()); + divider_cell.extend(std::iter::repeat(0).take(9)); + let n = write_varint(&mut divider_cell.as_mut_slice()[4..], key); + divider_cell.truncate(4 + n); + let parent_cell_idx = self.find_cell(parent_contents, key); + self.insert_into_cell( + parent_contents, + divider_cell.as_slice(), + parent_cell_idx, + ); + } else { + // move cell + let key = match cell { + BTreeCell::TableInteriorCell(interior) => interior._rowid, + _ => unreachable!(), + }; + let parent_cell_idx = self.find_cell(page, key); + self.insert_into_cell(parent_contents, cell_payload, parent_cell_idx); + // self.drop_cell(*page, 0); + } + } + + { + // copy last page id to right pointer + let last_pointer = new_pages.last().unwrap().0.page_idx as u32; + parent_contents.write_u32(right_pointer, last_pointer); + } + self.page = RefCell::new(Some(parent_entry.0.clone())); + self.write_info + .current_page + .replace(Some(parent_entry.clone())); + self.write_info.state = WriteState::BalanceStart; + self.write_info.page_copy.replace(None); + Ok(CursorResult::Ok(())) } - self.page = RefCell::new(Some(mem_page.parent.as_ref().unwrap().clone())); + _ => unreachable!("invalid balance leaf state {:?}", state), } } fn balance_root(&mut self) { /* todo: balance deeper, create child and copy contents of root there. Then split root */ /* if we are in root page then we just need to create a new root and push key there */ - let mem_page = { - let mem_page = self.page.borrow(); - let mem_page = mem_page.as_ref().unwrap(); - mem_page.clone() - }; let new_root_page_ref = self.allocate_page(PageType::TableInterior); { @@ -954,9 +1078,11 @@ impl BTreeCursor { /* swap splitted page buffer with new root buffer so we don't have to update page idx */ { - let page_ref = self.read_page_sync(mem_page.page_idx); - let (root_id, child_id) = { - let mut page_rc = RefCell::borrow_mut(&page_ref); + let (root_id, child_id, child) = { + let page = self.write_info.current_page.borrow(); + let page_ref = &page.as_ref().unwrap().1; + let child = page_ref.clone(); + let mut page_rc = RefCell::borrow_mut(page_ref); let mut new_root_page = RefCell::borrow_mut(&new_root_page_ref); // Swap the entire Page structs @@ -964,30 +1090,26 @@ impl BTreeCursor { self.pager.add_dirty(new_root_page.id); self.pager.add_dirty(page_rc.id); - (new_root_page.id, page_rc.id) + (new_root_page.id, page_rc.id, child) }; let root = new_root_page_ref.clone(); - let child = page_ref.clone(); let parent = Some(Rc::new(MemPage::new(None, root_id, 0))); - self.page = RefCell::new(Some(Rc::new(MemPage::new(parent, child_id, 0)))); - trace!("Balancing root. root={}, rightmost={}", root_id, child_id); + let current_mem_page = Rc::new(MemPage::new(parent, child_id, 0)); + self.page = RefCell::new(Some(current_mem_page.clone())); + + self.write_info + .current_page + .replace(Some((current_mem_page, child.clone()))); + + debug!("Balancing root. root={}, rightmost={}", root_id, child_id); self.pager.put_page(root_id, root); self.pager.put_page(child_id, child); } } - fn read_page_sync(&mut self, page_idx: usize) -> Rc> { - loop { - let page_ref = self.pager.read_page(page_idx); - if let Ok(p) = page_ref { - return p; - } - } - } - - fn allocate_page(&mut self, page_type: PageType) -> Rc> { + fn allocate_page(&self, page_type: PageType) -> Rc> { let page = self.pager.allocate_page().unwrap(); { @@ -1029,7 +1151,7 @@ impl BTreeCursor { /* Allocate space for a cell on a page. */ - fn allocate_cell_space(&mut self, page_ref: &PageContent, amount: u16) -> u16 { + fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 { let amount = amount as usize; let (cell_offset, _) = page_ref.cell_get_raw_pointer_region(); @@ -1499,7 +1621,7 @@ impl Cursor for BTreeCursor { CursorResult::Ok(_) => {} CursorResult::IO => return Ok(CursorResult::IO), }; - let page_ref = self.get_page()?; + let page_ref = self.get_current_page()?; let page = RefCell::borrow(&page_ref); if page.is_locked() { return Ok(CursorResult::IO); @@ -1531,3 +1653,7 @@ impl Cursor for BTreeCursor { } } } + +fn to_static_buf(buf: &[u8]) -> &'static [u8] { + unsafe { std::mem::transmute::<&[u8], &'static [u8]>(buf) } +} diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index c3f2630fa..6c0b4b376 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -50,6 +50,7 @@ use crate::types::{OwnedRecord, OwnedValue}; use crate::{File, Result}; use log::trace; use std::cell::RefCell; +use std::pin::Pin; use std::rc::Rc; /// The size of the database header in bytes. @@ -267,7 +268,7 @@ impl TryFrom for PageType { #[derive(Debug, Clone)] pub struct OverflowCell { pub index: usize, - pub payload: Vec, + pub payload: Pin>, } #[derive(Debug)] @@ -1100,22 +1101,6 @@ pub fn begin_write_wal_header(io: &Rc, header: &WalHeader) -> Result<( Ok(()) } -fn finish_read_wal_frame( - buf: Rc>, - frame: Rc>, -) -> Result<()> { - let buf = buf.borrow(); - let buf = buf.as_slice(); - let mut frame = frame.borrow_mut(); - frame.page_number = u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]); - frame.db_size = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]); - frame.salt_1 = u32::from_be_bytes([buf[8], buf[9], buf[10], buf[11]]); - frame.salt_2 = u32::from_be_bytes([buf[12], buf[13], buf[14], buf[15]]); - frame.checksum_1 = u32::from_be_bytes([buf[16], buf[17], buf[18], buf[19]]); - frame.checksum_2 = u32::from_be_bytes([buf[20], buf[21], buf[22], buf[23]]); - Ok(()) -} - /* Checks if payload will overflow a cell based on max local and it will return the min size that will be stored in that case,