mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-30 22:44:21 +01:00
Merge 'btree: use binary search in seek/move_to for table btrees' from Jussi Saurio
Implements binary search to find the correct cell within a page, specialized for table btrees only due to lack of energy at 8:30 PM --- I used a [1GB TPC-H database](https://github.com/lovasoa/TPCH- sqlite/releases/download/v1.0/TPC-H.db) for benchmarking and ran this query which does a lot of seeks: before ```sql limbo> .timer on limbo> select l_orderkey, 3 as revenue, o_orderdate, o_shippriority from lineitem, orders, customer where c_mktsegment = 'FURNITURE' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < cast('1995-03-29' as datetime) and l_shipdate > cast('1995-03-29' as datetime); ┌────────────┬─────────┬─────────────┬────────────────┐ │ l_orderkey │ revenue │ o_orderdate │ o_shippriority │ ├────────────┼─────────┼─────────────┼────────────────┤ └────────────┴─────────┴─────────────┴────────────────┘ Command stats: ---------------------------- total: 16.267797375 s (this includes parsing/coloring of cli app) ``` after ```sql limbo> .timer on limbo> select l_orderkey, 3 as revenue, o_orderdate, o_shippriority from lineitem, orders, customer where c_mktsegment = 'FURNITURE' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < cast('1995-03-29' as datetime) and l_shipdate > cast('1995-03-29' as datetime); ┌────────────┬─────────┬─────────────┬────────────────┐ │ l_orderkey │ revenue │ o_orderdate │ o_shippriority │ ├────────────┼─────────┼─────────────┼────────────────┤ └────────────┴─────────┴─────────────┴────────────────┘ Command stats: ---------------------------- total: 5.20604125 s (this includes parsing/coloring of cli app) ``` BTW sqlite completes this in 600 milliseconds so there's still a lot of fuckiness somewhere. --- UPDATE: refactored table btree seek (on leaf pages) to use binary search too. I also updated the above numbers so that I ran each a few times and took the lowest time i got for each. This is after binsearch on leaf pages too: ```sql limbo> select l_orderkey, 3 as revenue, o_orderdate, o_shippriority from lineitem, orders, customer where c_mktsegment = 'FURNITURE' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < cast('1995-03-29' as datetime) and l_shipdate > cast('1995-03-29' as datetime); ┌────────────┬─────────┬─────────────┬────────────────┐ │ l_orderkey │ revenue │ o_orderdate │ o_shippriority │ ├────────────┼─────────┼─────────────┼────────────────┤ └────────────┴─────────┴─────────────┴────────────────┘ Command stats: ---------------------------- total: 4.529645958 s (this includes parsing/coloring of cli app) ``` Closes #1357
This commit is contained in:
@@ -976,6 +976,9 @@ impl BTreeCursor {
|
||||
/// We don't include the rowid in the comparison and that's why the last value from the record is not included.
|
||||
fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result<CursorResult<Option<u64>>> {
|
||||
let cell_iter_dir = op.iteration_direction();
|
||||
if let SeekKey::TableRowId(rowid) = key {
|
||||
return self.tablebtree_seek(rowid, op, cell_iter_dir);
|
||||
}
|
||||
return_if_io!(self.move_to(key.clone(), op.clone()));
|
||||
|
||||
{
|
||||
@@ -1178,6 +1181,419 @@ impl BTreeCursor {
|
||||
}
|
||||
}
|
||||
|
||||
/// Specialized version of move_to() for table btrees.
|
||||
fn tablebtree_move_to_binsearch(
|
||||
&mut self,
|
||||
rowid: u64,
|
||||
seek_op: SeekOp,
|
||||
iter_dir: IterationDirection,
|
||||
) -> Result<CursorResult<()>> {
|
||||
'outer: loop {
|
||||
let page = self.stack.top();
|
||||
return_if_locked!(page);
|
||||
let contents = page.get().contents.as_ref().unwrap();
|
||||
if contents.is_leaf() {
|
||||
return Ok(CursorResult::Ok(()));
|
||||
}
|
||||
|
||||
let cell_count = contents.cell_count();
|
||||
let mut min: isize = 0;
|
||||
let mut max: isize = cell_count as isize - 1;
|
||||
let mut leftmost_matching_cell = None;
|
||||
loop {
|
||||
if min > max {
|
||||
if let Some(leftmost_matching_cell) = leftmost_matching_cell {
|
||||
self.stack.set_cell_index(leftmost_matching_cell as i32);
|
||||
let matching_cell = contents.cell_get(
|
||||
leftmost_matching_cell,
|
||||
payload_overflow_threshold_max(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
payload_overflow_threshold_min(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
self.usable_space(),
|
||||
)?;
|
||||
// If we found our target rowid in the left subtree,
|
||||
// we need to move the parent cell pointer forwards or backwards depending on the iteration direction.
|
||||
// For example: since the internal node contains the max rowid of the left subtree, we need to move the
|
||||
// parent pointer backwards in backwards iteration so that we don't come back to the parent again.
|
||||
// E.g.
|
||||
// this parent: rowid 666
|
||||
// left child has: 664,665,666
|
||||
// we need to move to the previous parent (with e.g. rowid 663) when iterating backwards.
|
||||
self.stack.next_cell_in_direction(iter_dir);
|
||||
let BTreeCell::TableInteriorCell(TableInteriorCell {
|
||||
_left_child_page,
|
||||
..
|
||||
}) = matching_cell
|
||||
else {
|
||||
unreachable!("unexpected cell type: {:?}", matching_cell);
|
||||
};
|
||||
let mem_page = self.pager.read_page(_left_child_page as usize)?;
|
||||
self.stack.push(mem_page);
|
||||
continue 'outer;
|
||||
}
|
||||
self.stack.set_cell_index(contents.cell_count() as i32 + 1);
|
||||
match contents.rightmost_pointer() {
|
||||
Some(right_most_pointer) => {
|
||||
let mem_page = self.pager.read_page(right_most_pointer as usize)?;
|
||||
self.stack.push(mem_page);
|
||||
continue 'outer;
|
||||
}
|
||||
None => {
|
||||
unreachable!("we shall not go back up! The only way is down the slope");
|
||||
}
|
||||
}
|
||||
}
|
||||
let cur_cell_idx = (min + max) / 2;
|
||||
self.stack.set_cell_index(cur_cell_idx as i32);
|
||||
let cur_cell = contents.cell_get(
|
||||
cur_cell_idx as usize,
|
||||
payload_overflow_threshold_max(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
payload_overflow_threshold_min(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
self.usable_space(),
|
||||
)?;
|
||||
|
||||
match &cur_cell {
|
||||
BTreeCell::TableInteriorCell(TableInteriorCell {
|
||||
_left_child_page,
|
||||
_rowid: cell_rowid,
|
||||
}) => {
|
||||
// in sqlite btrees left child pages have <= keys.
|
||||
// table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10,
|
||||
// and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees)
|
||||
// the left page may have a rowid=10.
|
||||
// Logic table for determining if target leaf page is in left subtree
|
||||
//
|
||||
// Forwards iteration (looking for first match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// GT | > | go left | First > key is in left subtree
|
||||
// GT | = or < | go right | First > key is in right subtree
|
||||
// GE | > or = | go left | First >= key is in left subtree
|
||||
// GE | < | go right | First >= key is in right subtree
|
||||
//
|
||||
// Backwards iteration (looking for last match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// LE | > or = | go left | Last <= key is in left subtree
|
||||
// LE | < | go right | Last <= key is in right subtree
|
||||
// LT | > or = | go left | Last < key is in left subtree
|
||||
// LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less
|
||||
//
|
||||
// No iteration (point query):
|
||||
// EQ | > or = | go left | Last = key is in left subtree
|
||||
// EQ | < | go right | Last = key is in right subtree
|
||||
let is_on_left = match seek_op {
|
||||
SeekOp::GT => *cell_rowid > rowid,
|
||||
SeekOp::GE => *cell_rowid >= rowid,
|
||||
SeekOp::LE => *cell_rowid >= rowid,
|
||||
SeekOp::LT => *cell_rowid + 1 >= rowid,
|
||||
SeekOp::EQ => *cell_rowid >= rowid,
|
||||
};
|
||||
if is_on_left {
|
||||
leftmost_matching_cell = Some(cur_cell_idx as usize);
|
||||
max = cur_cell_idx - 1;
|
||||
} else {
|
||||
min = cur_cell_idx + 1;
|
||||
}
|
||||
}
|
||||
_ => unreachable!("unexpected cell type: {:?}", cur_cell),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Specialized version of move_to() for index btrees.
|
||||
/// TODO: refactor this to use binary search instead of iterating cells in order.
|
||||
fn indexbtree_move_to<'a>(
|
||||
&mut self,
|
||||
index_key: &'a ImmutableRecord,
|
||||
cmp: SeekOp,
|
||||
iter_dir: IterationDirection,
|
||||
) -> Result<CursorResult<()>> {
|
||||
loop {
|
||||
let page = self.stack.top();
|
||||
return_if_locked!(page);
|
||||
|
||||
let contents = page.get().contents.as_ref().unwrap();
|
||||
if contents.is_leaf() {
|
||||
return Ok(CursorResult::Ok(()));
|
||||
}
|
||||
|
||||
let mut found_cell = false;
|
||||
for cell_idx in 0..contents.cell_count() {
|
||||
let cell = contents.cell_get(
|
||||
cell_idx,
|
||||
payload_overflow_threshold_max(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
payload_overflow_threshold_min(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
self.usable_space(),
|
||||
)?;
|
||||
let BTreeCell::IndexInteriorCell(IndexInteriorCell {
|
||||
left_child_page,
|
||||
payload,
|
||||
first_overflow_page,
|
||||
payload_size,
|
||||
}) = &cell
|
||||
else {
|
||||
unreachable!("unexpected cell type: {:?}", cell);
|
||||
};
|
||||
|
||||
if let Some(next_page) = first_overflow_page {
|
||||
return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size))
|
||||
} else {
|
||||
crate::storage::sqlite3_ondisk::read_record(
|
||||
payload,
|
||||
self.get_immutable_record_or_create().as_mut().unwrap(),
|
||||
)?
|
||||
};
|
||||
let record = self.get_immutable_record();
|
||||
let record = record.as_ref().unwrap();
|
||||
let record_slice_equal_number_of_cols =
|
||||
&record.get_values().as_slice()[..index_key.get_values().len()];
|
||||
let interior_cell_vs_index_key = compare_immutable(
|
||||
record_slice_equal_number_of_cols,
|
||||
index_key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
);
|
||||
// in sqlite btrees left child pages have <= keys.
|
||||
// in general, in forwards iteration we want to find the first key that matches the seek condition.
|
||||
// in backwards iteration we want to find the last key that matches the seek condition.
|
||||
//
|
||||
// Logic table for determining if target leaf page is in left subtree.
|
||||
// For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload).
|
||||
// and for non-unique indexes there might be several cells with the same key.
|
||||
//
|
||||
// Forwards iteration (looking for first match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// GT | > | go left | First > key could be exactly this one, or in left subtree
|
||||
// GT | = or < | go right | First > key must be in right subtree
|
||||
// GE | > | go left | First >= key could be exactly this one, or in left subtree
|
||||
// GE | = | go left | First >= key could be exactly this one, or in left subtree
|
||||
// GE | < | go right | First >= key must be in right subtree
|
||||
//
|
||||
// Backwards iteration (looking for last match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// LE | > | go left | Last <= key must be in left subtree
|
||||
// LE | = | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure
|
||||
// LE | < | go right | Last <= key must be in right subtree
|
||||
// LT | > | go left | Last < key must be in left subtree
|
||||
// LT | = | go left | Last < key must be in left subtree since we want strictly less than
|
||||
// LT | < | go right | Last < key could be exactly this one, or in right subtree
|
||||
//
|
||||
// No iteration (point query):
|
||||
// EQ | > | go left | First = key must be in left subtree
|
||||
// EQ | = | go left | First = key could be exactly this one, or in left subtree
|
||||
// EQ | < | go right | First = key must be in right subtree
|
||||
|
||||
let target_leaf_page_is_in_left_subtree = match cmp {
|
||||
SeekOp::GT => interior_cell_vs_index_key.is_gt(),
|
||||
SeekOp::GE => interior_cell_vs_index_key.is_ge(),
|
||||
SeekOp::EQ => interior_cell_vs_index_key.is_ge(),
|
||||
SeekOp::LE => interior_cell_vs_index_key.is_gt(),
|
||||
SeekOp::LT => interior_cell_vs_index_key.is_ge(),
|
||||
};
|
||||
if target_leaf_page_is_in_left_subtree {
|
||||
// we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up.
|
||||
// in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again.
|
||||
// Example:
|
||||
// this parent: key 666, and we found the target key in the left child.
|
||||
// left child has: key 663, key 664, key 665
|
||||
// we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again.
|
||||
if iter_dir == IterationDirection::Backwards {
|
||||
self.stack.retreat();
|
||||
}
|
||||
let mem_page = self.pager.read_page(*left_child_page as usize)?;
|
||||
self.stack.push(mem_page);
|
||||
found_cell = true;
|
||||
break;
|
||||
} else {
|
||||
self.stack.advance();
|
||||
}
|
||||
}
|
||||
|
||||
if !found_cell {
|
||||
match contents.rightmost_pointer() {
|
||||
Some(right_most_pointer) => {
|
||||
self.stack.advance();
|
||||
let mem_page = self.pager.read_page(right_most_pointer as usize)?;
|
||||
self.stack.push(mem_page);
|
||||
continue;
|
||||
}
|
||||
None => {
|
||||
unreachable!("we shall not go back up! The only way is down the slope");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Specialized version of do_seek() for table btrees that uses binary search instead
|
||||
/// of iterating cells in order.
|
||||
fn tablebtree_seek(
|
||||
&mut self,
|
||||
rowid: u64,
|
||||
seek_op: SeekOp,
|
||||
iter_dir: IterationDirection,
|
||||
) -> Result<CursorResult<Option<u64>>> {
|
||||
assert!(self.mv_cursor.is_none());
|
||||
self.move_to_root();
|
||||
return_if_io!(self.tablebtree_move_to_binsearch(rowid, seek_op, iter_dir));
|
||||
let page = self.stack.top();
|
||||
return_if_locked!(page);
|
||||
let contents = page.get().contents.as_ref().unwrap();
|
||||
assert!(
|
||||
contents.is_leaf(),
|
||||
"tablebtree_seek_binsearch() called on non-leaf page"
|
||||
);
|
||||
|
||||
let cell_count = contents.cell_count();
|
||||
let mut min: isize = 0;
|
||||
let mut max: isize = cell_count as isize - 1;
|
||||
|
||||
// If iter dir is forwards, we want the first cell that matches;
|
||||
// If iter dir is backwards, we want the last cell that matches.
|
||||
let mut nearest_matching_cell = None;
|
||||
loop {
|
||||
if min > max {
|
||||
let Some(nearest_matching_cell) = nearest_matching_cell else {
|
||||
return Ok(CursorResult::Ok(None));
|
||||
};
|
||||
self.stack.set_cell_index(nearest_matching_cell as i32);
|
||||
let matching_cell = contents.cell_get(
|
||||
nearest_matching_cell,
|
||||
payload_overflow_threshold_max(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
payload_overflow_threshold_min(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
self.usable_space(),
|
||||
)?;
|
||||
let BTreeCell::TableLeafCell(TableLeafCell {
|
||||
_rowid: cell_rowid,
|
||||
_payload,
|
||||
first_overflow_page,
|
||||
payload_size,
|
||||
..
|
||||
}) = matching_cell
|
||||
else {
|
||||
unreachable!("unexpected cell type: {:?}", matching_cell);
|
||||
};
|
||||
|
||||
return_if_io!(self.read_record_w_possible_overflow(
|
||||
_payload,
|
||||
first_overflow_page,
|
||||
payload_size
|
||||
));
|
||||
self.stack.next_cell_in_direction(iter_dir);
|
||||
|
||||
return Ok(CursorResult::Ok(Some(cell_rowid)));
|
||||
}
|
||||
|
||||
let cur_cell_idx = (min + max) / 2;
|
||||
self.stack.set_cell_index(cur_cell_idx as i32);
|
||||
let cur_cell = contents.cell_get(
|
||||
cur_cell_idx as usize,
|
||||
payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16),
|
||||
payload_overflow_threshold_min(contents.page_type(), self.usable_space() as u16),
|
||||
self.usable_space(),
|
||||
)?;
|
||||
|
||||
let BTreeCell::TableLeafCell(TableLeafCell {
|
||||
_rowid: cell_rowid,
|
||||
_payload,
|
||||
first_overflow_page,
|
||||
payload_size,
|
||||
..
|
||||
}) = cur_cell
|
||||
else {
|
||||
unreachable!("unexpected cell type: {:?}", cur_cell);
|
||||
};
|
||||
|
||||
let cmp = cell_rowid.cmp(&rowid);
|
||||
|
||||
let found = match seek_op {
|
||||
SeekOp::GT => cmp.is_gt(),
|
||||
SeekOp::GE => cmp.is_ge(),
|
||||
SeekOp::EQ => cmp.is_eq(),
|
||||
SeekOp::LE => cmp.is_le(),
|
||||
SeekOp::LT => cmp.is_lt(),
|
||||
};
|
||||
|
||||
// rowids are unique, so we can return the rowid immediately
|
||||
if found && SeekOp::EQ == seek_op {
|
||||
return_if_io!(self.read_record_w_possible_overflow(
|
||||
_payload,
|
||||
first_overflow_page,
|
||||
payload_size
|
||||
));
|
||||
self.stack.next_cell_in_direction(iter_dir);
|
||||
return Ok(CursorResult::Ok(Some(cell_rowid)));
|
||||
}
|
||||
|
||||
if found {
|
||||
match iter_dir {
|
||||
IterationDirection::Forwards => {
|
||||
nearest_matching_cell = Some(cur_cell_idx as usize);
|
||||
max = cur_cell_idx - 1;
|
||||
}
|
||||
IterationDirection::Backwards => {
|
||||
nearest_matching_cell = Some(cur_cell_idx as usize);
|
||||
min = cur_cell_idx + 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cmp.is_gt() {
|
||||
max = cur_cell_idx - 1;
|
||||
} else if cmp.is_lt() {
|
||||
min = cur_cell_idx + 1;
|
||||
} else {
|
||||
match iter_dir {
|
||||
IterationDirection::Forwards => {
|
||||
min = cur_cell_idx + 1;
|
||||
}
|
||||
IterationDirection::Backwards => {
|
||||
max = cur_cell_idx - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_record_w_possible_overflow(
|
||||
&mut self,
|
||||
payload: &'static [u8],
|
||||
next_page: Option<u32>,
|
||||
payload_size: u64,
|
||||
) -> Result<CursorResult<()>> {
|
||||
if let Some(next_page) = next_page {
|
||||
self.process_overflow_read(payload, next_page, payload_size)
|
||||
} else {
|
||||
crate::storage::sqlite3_ondisk::read_record(
|
||||
payload,
|
||||
self.get_immutable_record_or_create().as_mut().unwrap(),
|
||||
)?;
|
||||
Ok(CursorResult::Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result<CursorResult<()>> {
|
||||
assert!(self.mv_cursor.is_none());
|
||||
tracing::trace!("move_to(key={:?} cmp={:?})", key, cmp);
|
||||
@@ -1207,201 +1623,12 @@ impl BTreeCursor {
|
||||
self.move_to_root();
|
||||
|
||||
let iter_dir = cmp.iteration_direction();
|
||||
|
||||
loop {
|
||||
let page = self.stack.top();
|
||||
return_if_locked!(page);
|
||||
|
||||
let contents = page.get().contents.as_ref().unwrap();
|
||||
if contents.is_leaf() {
|
||||
return Ok(CursorResult::Ok(()));
|
||||
match key {
|
||||
SeekKey::TableRowId(rowid_key) => {
|
||||
return self.tablebtree_move_to_binsearch(rowid_key, cmp, iter_dir);
|
||||
}
|
||||
|
||||
let mut found_cell = false;
|
||||
for cell_idx in 0..contents.cell_count() {
|
||||
let cell = contents.cell_get(
|
||||
cell_idx,
|
||||
payload_overflow_threshold_max(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
payload_overflow_threshold_min(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
self.usable_space(),
|
||||
)?;
|
||||
match &cell {
|
||||
BTreeCell::TableInteriorCell(TableInteriorCell {
|
||||
_left_child_page,
|
||||
_rowid: cell_rowid,
|
||||
}) => {
|
||||
let SeekKey::TableRowId(rowid_key) = key else {
|
||||
unreachable!("table seek key should be a rowid");
|
||||
};
|
||||
// in sqlite btrees left child pages have <= keys.
|
||||
// table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10,
|
||||
// and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees)
|
||||
// the left page may have a rowid=10.
|
||||
// Logic table for determining if target leaf page is in left subtree
|
||||
//
|
||||
// Forwards iteration (looking for first match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// GT | > | go left | First > key is in left subtree
|
||||
// GT | = or < | go right | First > key is in right subtree
|
||||
// GE | > or = | go left | First >= key is in left subtree
|
||||
// GE | < | go right | First >= key is in right subtree
|
||||
//
|
||||
// Backwards iteration (looking for last match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// LE | > or = | go left | Last <= key is in left subtree
|
||||
// LE | < | go right | Last <= key is in right subtree
|
||||
// LT | > or = | go left | Last < key is in left subtree
|
||||
// LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less
|
||||
//
|
||||
// No iteration (point query):
|
||||
// EQ | > or = | go left | Last = key is in left subtree
|
||||
// EQ | < | go right | Last = key is in right subtree
|
||||
let target_leaf_page_is_in_left_subtree = match cmp {
|
||||
SeekOp::GT => *cell_rowid > rowid_key,
|
||||
SeekOp::GE => *cell_rowid >= rowid_key,
|
||||
SeekOp::LE => *cell_rowid >= rowid_key,
|
||||
SeekOp::LT => *cell_rowid + 1 >= rowid_key,
|
||||
SeekOp::EQ => *cell_rowid >= rowid_key,
|
||||
};
|
||||
if target_leaf_page_is_in_left_subtree {
|
||||
// If we found our target rowid in the left subtree,
|
||||
// we need to move the parent cell pointer forwards or backwards depending on the iteration direction.
|
||||
// For example: since the internal node contains the max rowid of the left subtree, we need to move the
|
||||
// parent pointer backwards in backwards iteration so that we don't come back to the parent again.
|
||||
// E.g.
|
||||
// this parent: rowid 666
|
||||
// left child has: 664,665,666
|
||||
// we need to move to the previous parent (with e.g. rowid 663) when iterating backwards.
|
||||
self.stack.next_cell_in_direction(iter_dir);
|
||||
let mem_page = self.pager.read_page(*_left_child_page as usize)?;
|
||||
self.stack.push(mem_page);
|
||||
found_cell = true;
|
||||
break;
|
||||
} else {
|
||||
self.stack.advance();
|
||||
}
|
||||
}
|
||||
BTreeCell::TableLeafCell(TableLeafCell {
|
||||
_rowid: _,
|
||||
_payload: _,
|
||||
first_overflow_page: _,
|
||||
..
|
||||
}) => {
|
||||
unreachable!(
|
||||
"we don't iterate leaf cells while trying to move to a leaf cell"
|
||||
);
|
||||
}
|
||||
BTreeCell::IndexInteriorCell(IndexInteriorCell {
|
||||
left_child_page,
|
||||
payload,
|
||||
first_overflow_page,
|
||||
payload_size,
|
||||
}) => {
|
||||
let SeekKey::IndexKey(index_key) = key else {
|
||||
unreachable!("index seek key should be a record");
|
||||
};
|
||||
if let Some(next_page) = first_overflow_page {
|
||||
return_if_io!(self.process_overflow_read(
|
||||
payload,
|
||||
*next_page,
|
||||
*payload_size
|
||||
))
|
||||
} else {
|
||||
crate::storage::sqlite3_ondisk::read_record(
|
||||
payload,
|
||||
self.get_immutable_record_or_create().as_mut().unwrap(),
|
||||
)?
|
||||
};
|
||||
let record = self.get_immutable_record();
|
||||
let record = record.as_ref().unwrap();
|
||||
let record_slice_equal_number_of_cols =
|
||||
&record.get_values().as_slice()[..index_key.get_values().len()];
|
||||
let interior_cell_vs_index_key = compare_immutable(
|
||||
record_slice_equal_number_of_cols,
|
||||
index_key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
);
|
||||
// in sqlite btrees left child pages have <= keys.
|
||||
// in general, in forwards iteration we want to find the first key that matches the seek condition.
|
||||
// in backwards iteration we want to find the last key that matches the seek condition.
|
||||
//
|
||||
// Logic table for determining if target leaf page is in left subtree.
|
||||
// For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload).
|
||||
// and for non-unique indexes there might be several cells with the same key.
|
||||
//
|
||||
// Forwards iteration (looking for first match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// GT | > | go left | First > key could be exactly this one, or in left subtree
|
||||
// GT | = or < | go right | First > key must be in right subtree
|
||||
// GE | > | go left | First >= key could be exactly this one, or in left subtree
|
||||
// GE | = | go left | First >= key could be exactly this one, or in left subtree
|
||||
// GE | < | go right | First >= key must be in right subtree
|
||||
//
|
||||
// Backwards iteration (looking for last match in tree):
|
||||
// OP | Current Cell vs Seek Key | Action? | Explanation
|
||||
// LE | > | go left | Last <= key must be in left subtree
|
||||
// LE | = | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure
|
||||
// LE | < | go right | Last <= key must be in right subtree
|
||||
// LT | > | go left | Last < key must be in left subtree
|
||||
// LT | = | go left | Last < key must be in left subtree since we want strictly less than
|
||||
// LT | < | go right | Last < key could be exactly this one, or in right subtree
|
||||
//
|
||||
// No iteration (point query):
|
||||
// EQ | > | go left | First = key must be in left subtree
|
||||
// EQ | = | go left | First = key could be exactly this one, or in left subtree
|
||||
// EQ | < | go right | First = key must be in right subtree
|
||||
|
||||
let target_leaf_page_is_in_left_subtree = match cmp {
|
||||
SeekOp::GT => interior_cell_vs_index_key.is_gt(),
|
||||
SeekOp::GE => interior_cell_vs_index_key.is_ge(),
|
||||
SeekOp::EQ => interior_cell_vs_index_key.is_ge(),
|
||||
SeekOp::LE => interior_cell_vs_index_key.is_gt(),
|
||||
SeekOp::LT => interior_cell_vs_index_key.is_ge(),
|
||||
};
|
||||
if target_leaf_page_is_in_left_subtree {
|
||||
// we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up.
|
||||
// in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again.
|
||||
// Example:
|
||||
// this parent: key 666, and we found the target key in the left child.
|
||||
// left child has: key 663, key 664, key 665
|
||||
// we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again.
|
||||
if iter_dir == IterationDirection::Backwards {
|
||||
self.stack.retreat();
|
||||
}
|
||||
let mem_page = self.pager.read_page(*left_child_page as usize)?;
|
||||
self.stack.push(mem_page);
|
||||
found_cell = true;
|
||||
break;
|
||||
} else {
|
||||
self.stack.advance();
|
||||
}
|
||||
}
|
||||
BTreeCell::IndexLeafCell(_) => {
|
||||
unreachable!(
|
||||
"we don't iterate leaf cells while trying to move to a leaf cell"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !found_cell {
|
||||
match contents.rightmost_pointer() {
|
||||
Some(right_most_pointer) => {
|
||||
self.stack.advance();
|
||||
let mem_page = self.pager.read_page(right_most_pointer as usize)?;
|
||||
self.stack.push(mem_page);
|
||||
continue;
|
||||
}
|
||||
None => {
|
||||
unreachable!("we shall not go back up! The only way is down the slope");
|
||||
}
|
||||
}
|
||||
SeekKey::IndexKey(index_key) => {
|
||||
return self.indexbtree_move_to(index_key, cmp, iter_dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user