mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-08 10:44:20 +01:00
Merge 'Remove find_cell, allow overwrite of index interior cell' from Jussi Saurio
Currently we are using `find_cell` which does a binary search on every insert, even if the cursor is already in the correct place. 1. Remove `find_cell` and use `seek` when inserting instead of `move_to`. 2. After removing `find_cell`, we now need to be more mindful of when we do actually require a seek before insertion. This is signified by the addition of `InsertFlags::REQUIRE_SEEK`. Previously some inserts worked by accident because we always happened to be on the correct page -- `find_cell()´ only binary searches a single page. 3. After removing `find_cell` we also need to call `next()` after `Insn::NewRowid`, because `NewRowid` positions us at the old maximum rowid, and we need to be positioned after it. 4. After removing `find_cell` we also need to leave the cursor positioned correctly during seeks where we don't find a match. 5. Allow overwrite of an index interior cell, which Closes #1975. Reviewed-by: Pere Diaz Bou <pere-altea@homail.com> Closes #1988
This commit is contained in:
@@ -196,7 +196,6 @@ enum DeleteState {
|
||||
post_balancing_seek_key: Option<DeleteSavepoint>,
|
||||
},
|
||||
CheckNeedsBalancing {
|
||||
rightmost_cell_was_dropped: bool,
|
||||
post_balancing_seek_key: Option<DeleteSavepoint>,
|
||||
},
|
||||
WaitForBalancingToComplete {
|
||||
@@ -306,14 +305,6 @@ impl BTreeKey<'_> {
|
||||
BTreeKey::IndexKey(_) => panic!("BTreeKey::to_rowid called on IndexKey"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Assert that the key is an index key and return it.
|
||||
fn to_index_key_values(&self) -> Vec<RefValue> {
|
||||
match self {
|
||||
BTreeKey::TableRowId(_) => panic!("BTreeKey::to_index_key called on TableRowId"),
|
||||
BTreeKey::IndexKey(key) => key.get_values(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -433,6 +424,8 @@ pub enum CursorValidState {
|
||||
Valid,
|
||||
/// Cursor may be pointing to a non-existent location/cell. This can happen after balancing operations
|
||||
RequireSeek,
|
||||
/// Cursor requires an advance after a seek
|
||||
RequireAdvance(IterationDirection),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -462,29 +455,21 @@ pub enum CursorSeekState {
|
||||
/// 1. We have not seen an EQ during the traversal
|
||||
/// 2. We are looking for an exact match ([SeekOp::GE] or [SeekOp::LE] with eq_only: true)
|
||||
eq_seen: Cell<bool>,
|
||||
/// In multiple places, we do a seek that checks for an exact match (SeekOp::EQ) in the tree.
|
||||
/// In those cases, we need to know where to land if we don't find an exact match in the leaf page.
|
||||
/// For non-eq-only conditions (GT, LT, GE, LE), this is pretty simple:
|
||||
/// - If we are looking for GT/GE and don't find a match, we should end up beyond the end of the page (idx=cell count).
|
||||
/// - If we are looking for LT/LE and don't find a match, we should end up before the beginning of the page (idx=-1).
|
||||
///
|
||||
/// For eq-only conditions (GE { eq_only: true } or LE { eq_only: true }), we need to know where to land if we don't find an exact match.
|
||||
/// For GE, we want to land at the first cell that is greater than the seek key.
|
||||
/// For LE, we want to land at the last cell that is less than the seek key.
|
||||
/// This is because e.g. when we attempt to insert rowid 666, we first check if it exists.
|
||||
/// If it doesn't, we want to land in the place where rowid 666 WOULD be inserted.
|
||||
target_cell_when_not_found: Cell<i32>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FindCellState(Option<(usize, usize)>); // low, high
|
||||
|
||||
impl FindCellState {
|
||||
#[inline]
|
||||
fn set(&mut self, lowhigh: (usize, usize)) {
|
||||
self.0 = Some(lowhigh);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_state(&mut self) -> (usize, usize) {
|
||||
self.0.expect("get can only be called after a set")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reset(&mut self) {
|
||||
self.0 = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BTreeCursor {
|
||||
/// The multi-version cursor that is used to read and write to the database file.
|
||||
mv_cursor: Option<Rc<RefCell<MvCursor>>>,
|
||||
@@ -523,8 +508,6 @@ pub struct BTreeCursor {
|
||||
/// Separate state to read a record with overflow pages. This separation from `state` is necessary as
|
||||
/// we can be in a function that relies on `state`, but also needs to process overflow pages
|
||||
read_overflow_state: RefCell<Option<ReadPayloadOverflow>>,
|
||||
/// Contains the current cell_idx for `find_cell`
|
||||
find_cell_state: FindCellState,
|
||||
/// `RecordCursor` is used to parse SQLite record format data retrieved from B-tree
|
||||
/// leaf pages. It provides incremental parsing, only deserializing the columns that are
|
||||
/// actually accessed, which is crucial for performance when dealing with wide tables
|
||||
@@ -592,7 +575,6 @@ impl BTreeCursor {
|
||||
valid_state: CursorValidState::Valid,
|
||||
seek_state: CursorSeekState::Start,
|
||||
read_overflow_state: RefCell::new(None),
|
||||
find_cell_state: FindCellState(None),
|
||||
parse_record_state: RefCell::new(ParseRecordState::Init),
|
||||
record_cursor: RefCell::new(RecordCursor::with_capacity(num_columns)),
|
||||
}
|
||||
@@ -1743,6 +1725,10 @@ impl BTreeCursor {
|
||||
max_cell_idx,
|
||||
nearest_matching_cell,
|
||||
eq_seen: Cell::new(false), // not relevant for table btrees
|
||||
target_cell_when_not_found: Cell::new(match seek_op.iteration_direction() {
|
||||
IterationDirection::Forwards => cell_count as i32,
|
||||
IterationDirection::Backwards => -1,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1750,6 +1736,7 @@ impl BTreeCursor {
|
||||
min_cell_idx,
|
||||
max_cell_idx,
|
||||
nearest_matching_cell,
|
||||
target_cell_when_not_found,
|
||||
..
|
||||
} = &self.seek_state
|
||||
else {
|
||||
@@ -1767,6 +1754,7 @@ impl BTreeCursor {
|
||||
if min > max {
|
||||
if let Some(nearest_matching_cell) = nearest_matching_cell.get() {
|
||||
self.stack.set_cell_index(nearest_matching_cell as i32);
|
||||
self.has_record.set(true);
|
||||
return Ok(IOResult::Done(SeekResult::Found));
|
||||
} else {
|
||||
// if !eq_only - matching entry can exist in neighbour leaf page
|
||||
@@ -1774,19 +1762,14 @@ impl BTreeCursor {
|
||||
// in such case BTree can navigate to the leaf which no longer has matching key for seek_op
|
||||
// in this case, caller must advance cursor if necessary
|
||||
return Ok(IOResult::Done(if seek_op.eq_only() {
|
||||
let has_record = target_cell_when_not_found.get() >= 0
|
||||
&& target_cell_when_not_found.get() < contents.cell_count() as i32;
|
||||
self.has_record.set(has_record);
|
||||
self.stack.set_cell_index(target_cell_when_not_found.get());
|
||||
SeekResult::NotFound
|
||||
} else {
|
||||
let contents = page.get().contents.as_ref().unwrap();
|
||||
turso_assert!(
|
||||
contents.is_leaf(),
|
||||
"tablebtree_seek() called on non-leaf page"
|
||||
);
|
||||
let cell_count = contents.cell_count();
|
||||
// set cursor to the position where which would hold the op-boundary if it were present
|
||||
self.stack.set_cell_index(match &seek_op {
|
||||
SeekOp::GT | SeekOp::GE { .. } => cell_count as i32,
|
||||
SeekOp::LT | SeekOp::LE { .. } => 0,
|
||||
});
|
||||
self.stack.set_cell_index(target_cell_when_not_found.get());
|
||||
SeekResult::TryAdvance
|
||||
}));
|
||||
};
|
||||
@@ -1809,6 +1792,7 @@ impl BTreeCursor {
|
||||
// rowids are unique, so we can return the rowid immediately
|
||||
if found && seek_op.eq_only() {
|
||||
self.stack.set_cell_index(cur_cell_idx as i32);
|
||||
self.has_record.set(true);
|
||||
return Ok(IOResult::Done(SeekResult::Found));
|
||||
}
|
||||
|
||||
@@ -1823,8 +1807,16 @@ impl BTreeCursor {
|
||||
}
|
||||
}
|
||||
} else if cmp.is_gt() {
|
||||
if matches!(seek_op, SeekOp::GE { eq_only: true }) {
|
||||
target_cell_when_not_found
|
||||
.set(target_cell_when_not_found.get().min(cur_cell_idx as i32));
|
||||
}
|
||||
max_cell_idx.set(cur_cell_idx - 1);
|
||||
} else if cmp.is_lt() {
|
||||
if matches!(seek_op, SeekOp::LE { eq_only: true }) {
|
||||
target_cell_when_not_found
|
||||
.set(target_cell_when_not_found.get().max(cur_cell_idx as i32));
|
||||
}
|
||||
min_cell_idx.set(cur_cell_idx + 1);
|
||||
} else {
|
||||
match iter_dir {
|
||||
@@ -1896,6 +1888,10 @@ impl BTreeCursor {
|
||||
max_cell_idx: max,
|
||||
nearest_matching_cell,
|
||||
eq_seen: Cell::new(eq_seen),
|
||||
target_cell_when_not_found: Cell::new(match seek_op.iteration_direction() {
|
||||
IterationDirection::Forwards => cell_count as i32,
|
||||
IterationDirection::Backwards => -1,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1904,6 +1900,7 @@ impl BTreeCursor {
|
||||
max_cell_idx,
|
||||
nearest_matching_cell,
|
||||
eq_seen,
|
||||
target_cell_when_not_found,
|
||||
} = &self.seek_state
|
||||
else {
|
||||
unreachable!(
|
||||
@@ -1916,7 +1913,6 @@ impl BTreeCursor {
|
||||
return_if_locked_maybe_load!(self.pager, page);
|
||||
let page = page.get();
|
||||
let contents = page.get().contents.as_ref().unwrap();
|
||||
let cell_count = contents.cell_count();
|
||||
|
||||
let iter_dir = seek_op.iteration_direction();
|
||||
|
||||
@@ -1926,19 +1922,21 @@ impl BTreeCursor {
|
||||
if min > max {
|
||||
if let Some(nearest_matching_cell) = nearest_matching_cell.get() {
|
||||
self.stack.set_cell_index(nearest_matching_cell as i32);
|
||||
self.has_record.set(true);
|
||||
return Ok(IOResult::Done(SeekResult::Found));
|
||||
} else {
|
||||
// set cursor to the position where which would hold the op-boundary if it were present
|
||||
let target_cell = target_cell_when_not_found.get();
|
||||
self.stack.set_cell_index(target_cell);
|
||||
let has_record = target_cell >= 0 && target_cell < contents.cell_count() as i32;
|
||||
self.has_record.set(has_record);
|
||||
|
||||
// Similar logic as in tablebtree_seek(), but for indexes.
|
||||
// The difference is that since index keys are not necessarily unique, we need to TryAdvance
|
||||
// even when eq_only=true and we have seen an EQ match up in the tree in an interior node.
|
||||
if seek_op.eq_only() && !eq_seen.get() {
|
||||
return Ok(IOResult::Done(SeekResult::NotFound));
|
||||
}
|
||||
// set cursor to the position where which would hold the op-boundary if it were present
|
||||
self.stack.set_cell_index(match &seek_op {
|
||||
SeekOp::GT | SeekOp::GE { .. } => cell_count as i32,
|
||||
SeekOp::LT | SeekOp::LE { .. } => 0,
|
||||
});
|
||||
return Ok(IOResult::Done(SeekResult::TryAdvance));
|
||||
};
|
||||
}
|
||||
@@ -1989,8 +1987,16 @@ impl BTreeCursor {
|
||||
}
|
||||
}
|
||||
} else if cmp.is_gt() {
|
||||
if matches!(seek_op, SeekOp::GE { eq_only: true }) {
|
||||
target_cell_when_not_found
|
||||
.set(target_cell_when_not_found.get().min(cur_cell_idx as i32));
|
||||
}
|
||||
max_cell_idx.set(cur_cell_idx - 1);
|
||||
} else if cmp.is_lt() {
|
||||
if matches!(seek_op, SeekOp::LE { eq_only: true }) {
|
||||
target_cell_when_not_found
|
||||
.set(target_cell_when_not_found.get().max(cur_cell_idx as i32));
|
||||
}
|
||||
min_cell_idx.set(cur_cell_idx + 1);
|
||||
} else {
|
||||
match iter_dir {
|
||||
@@ -2032,30 +2038,7 @@ impl BTreeCursor {
|
||||
(cmp, found)
|
||||
}
|
||||
|
||||
fn read_record_w_possible_overflow(
|
||||
&mut self,
|
||||
payload: &'static [u8],
|
||||
next_page: Option<u32>,
|
||||
payload_size: u64,
|
||||
) -> Result<IOResult<()>> {
|
||||
if let Some(next_page) = next_page {
|
||||
self.process_overflow_read(payload, next_page, payload_size)
|
||||
} else {
|
||||
self.get_immutable_record_or_create()
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.invalidate();
|
||||
self.get_immutable_record_or_create()
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.start_serialization(payload);
|
||||
|
||||
self.record_cursor.borrow_mut().invalidate();
|
||||
Ok(IOResult::Done(()))
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip_all, level = Level::DEBUG)]
|
||||
#[instrument(skip_all, level = Level::INFO)]
|
||||
pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result<IOResult<()>> {
|
||||
turso_assert!(
|
||||
self.mv_cursor.is_none(),
|
||||
@@ -2131,23 +2114,20 @@ impl BTreeCursor {
|
||||
return_if_locked_maybe_load!(self.pager, page);
|
||||
|
||||
// get page and find cell
|
||||
let (cell_idx, page_type) = {
|
||||
let cell_idx = {
|
||||
return_if_locked!(page.get());
|
||||
let page = page.get();
|
||||
|
||||
page.set_dirty();
|
||||
self.pager.add_dirty(page.get().id);
|
||||
|
||||
let page = page.get().contents.as_mut().unwrap();
|
||||
turso_assert!(
|
||||
matches!(page.page_type(), PageType::TableLeaf | PageType::IndexLeaf),
|
||||
"expected table or index leaf page"
|
||||
);
|
||||
|
||||
// find cell
|
||||
(return_if_io!(self.find_cell(page, bkey)), page.page_type())
|
||||
self.stack.current_cell_index()
|
||||
};
|
||||
self.stack.set_cell_index(cell_idx as i32);
|
||||
if cell_idx == -1 {
|
||||
// This might be a brand new table and the cursor hasn't moved yet. Let's advance it to the first slot.
|
||||
self.stack.set_cell_index(0);
|
||||
}
|
||||
let cell_idx = self.stack.current_cell_index() as usize;
|
||||
tracing::debug!(cell_idx);
|
||||
|
||||
// if the cell index is less than the total cells, check: if its an existing
|
||||
@@ -2178,8 +2158,8 @@ impl BTreeCursor {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
BTreeCell::IndexLeafCell(..) => {
|
||||
// Not necessary to read record again here, as find_cell already does that for us
|
||||
BTreeCell::IndexLeafCell(..) | BTreeCell::IndexInteriorCell(..) => {
|
||||
return_if_io!(self.record());
|
||||
let cmp = compare_immutable(
|
||||
record_values.as_slice(),
|
||||
self.get_immutable_record()
|
||||
@@ -2206,18 +2186,25 @@ impl BTreeCursor {
|
||||
self.save_context(CursorContext::IndexKeyRowId((*record).clone()));
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
turso_assert!(
|
||||
!matches!(cell, BTreeCell::IndexInteriorCell(..)),
|
||||
"we should not be inserting a new index interior cell. the only valid operation on an index interior cell is an overwrite!"
|
||||
);
|
||||
}
|
||||
}
|
||||
other => panic!("unexpected cell type, expected TableLeaf or IndexLeaf, found: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
// insert cell
|
||||
|
||||
let mut cell_payload: Vec<u8> = Vec::with_capacity(record_values.len() + 4);
|
||||
fill_cell_payload(
|
||||
page_type,
|
||||
page.get().get().contents.as_ref().unwrap(),
|
||||
bkey.maybe_rowid(),
|
||||
&mut cell_payload,
|
||||
cell_idx,
|
||||
record,
|
||||
self.usable_space() as u16,
|
||||
self.pager.clone(),
|
||||
@@ -3910,86 +3897,6 @@ impl BTreeCursor {
|
||||
self.pager.usable_space()
|
||||
}
|
||||
|
||||
/// Find the index of the cell in the page that contains the given rowid.
|
||||
#[instrument( skip_all, level = Level::DEBUG)]
|
||||
fn find_cell(&mut self, page: &PageContent, key: &BTreeKey) -> Result<IOResult<usize>> {
|
||||
let cell_count = page.cell_count();
|
||||
let mut low = 0;
|
||||
let mut high = if cell_count > 0 { cell_count - 1 } else { 0 };
|
||||
let mut result_index = cell_count;
|
||||
if self.find_cell_state.0.is_some() {
|
||||
(low, high) = self.find_cell_state.get_state();
|
||||
}
|
||||
|
||||
while low <= high && cell_count > 0 {
|
||||
let mid = low + (high - low) / 2;
|
||||
self.find_cell_state.set((low, high));
|
||||
let cell = page.cell_get(mid, self.usable_space())?;
|
||||
|
||||
let comparison_result = match cell {
|
||||
BTreeCell::TableLeafCell(cell) => key.to_rowid().cmp(&cell.rowid),
|
||||
BTreeCell::TableInteriorCell(cell) => key.to_rowid().cmp(&cell.rowid),
|
||||
BTreeCell::IndexInteriorCell(IndexInteriorCell {
|
||||
payload,
|
||||
first_overflow_page,
|
||||
payload_size,
|
||||
..
|
||||
})
|
||||
| BTreeCell::IndexLeafCell(IndexLeafCell {
|
||||
payload,
|
||||
first_overflow_page,
|
||||
payload_size,
|
||||
..
|
||||
}) => {
|
||||
// TODO: implement efficient comparison of records
|
||||
// e.g. https://github.com/sqlite/sqlite/blob/master/src/vdbeaux.c#L4719
|
||||
return_if_io!(self.read_record_w_possible_overflow(
|
||||
payload,
|
||||
first_overflow_page,
|
||||
payload_size
|
||||
));
|
||||
|
||||
let key_values = key.to_index_key_values();
|
||||
let record = self.get_immutable_record();
|
||||
let record = record.as_ref().unwrap();
|
||||
let record_same_number_cols = &record.get_values()[..key_values.len()];
|
||||
compare_immutable(
|
||||
key_values.as_slice(),
|
||||
record_same_number_cols,
|
||||
self.index_info
|
||||
.as_ref()
|
||||
.expect("indexbtree_find_cell without index_info")
|
||||
.key_info
|
||||
.as_slice(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
match comparison_result {
|
||||
Ordering::Equal => {
|
||||
result_index = mid;
|
||||
break;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
low = mid + 1;
|
||||
}
|
||||
Ordering::Less => {
|
||||
result_index = mid;
|
||||
if mid == 0 {
|
||||
break;
|
||||
}
|
||||
high = mid - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.find_cell_state.reset();
|
||||
assert!(result_index <= cell_count);
|
||||
|
||||
Ok(IOResult::Done(result_index))
|
||||
}
|
||||
|
||||
#[instrument(skip_all, level = Level::DEBUG)]
|
||||
pub fn seek_end(&mut self) -> Result<IOResult<()>> {
|
||||
assert!(self.mv_cursor.is_none()); // unsure about this -_-
|
||||
self.move_to_root()?;
|
||||
@@ -4148,8 +4055,6 @@ impl BTreeCursor {
|
||||
// Reset seek state
|
||||
self.seek_state = CursorSeekState::Start;
|
||||
self.valid_state = CursorValidState::Valid;
|
||||
self.has_record
|
||||
.replace(matches!(seek_result, SeekResult::Found));
|
||||
Ok(IOResult::Done(seek_result))
|
||||
}
|
||||
|
||||
@@ -4246,7 +4151,10 @@ impl BTreeCursor {
|
||||
pub fn insert(
|
||||
&mut self,
|
||||
key: &BTreeKey,
|
||||
mut moved_before: bool, /* Indicate whether it's necessary to traverse to find the leaf page */
|
||||
// Indicate whether it's necessary to traverse to find the leaf page
|
||||
// FIXME: refactor this out into a state machine, these ad-hoc state
|
||||
// variables are very hard to reason about
|
||||
mut moved_before: bool,
|
||||
) -> Result<IOResult<()>> {
|
||||
tracing::debug!(valid_state = ?self.valid_state, cursor_state = ?self.state, is_write_in_progress = self.is_write_in_progress());
|
||||
match &self.mv_cursor {
|
||||
@@ -4260,30 +4168,50 @@ impl BTreeCursor {
|
||||
None => todo!("Support mvcc inserts with index btrees"),
|
||||
},
|
||||
None => {
|
||||
if self.valid_state != CursorValidState::Valid && !self.is_write_in_progress() {
|
||||
// A balance happened so we need to move.
|
||||
moved_before = false;
|
||||
}
|
||||
match (&self.valid_state, self.is_write_in_progress()) {
|
||||
(CursorValidState::Valid, _) => {
|
||||
// consider the current position valid unless the caller explicitly asks us to seek.
|
||||
}
|
||||
(CursorValidState::RequireSeek, false) => {
|
||||
// we must seek.
|
||||
moved_before = false;
|
||||
}
|
||||
(CursorValidState::RequireSeek, true) => {
|
||||
// illegal to seek during a write no matter what CursorValidState or caller says -- we might e.g. move to the wrong page during balancing
|
||||
moved_before = true;
|
||||
}
|
||||
(CursorValidState::RequireAdvance(direction), _) => {
|
||||
// FIXME: this is a hack to support the case where we need to advance the cursor after a seek.
|
||||
// We should have a proper state machine for this.
|
||||
return_if_io!(match direction {
|
||||
IterationDirection::Forwards => self.next(),
|
||||
IterationDirection::Backwards => self.prev(),
|
||||
});
|
||||
self.valid_state = CursorValidState::Valid;
|
||||
self.seek_state = CursorSeekState::Start;
|
||||
moved_before = true;
|
||||
}
|
||||
};
|
||||
if !moved_before {
|
||||
// Use move_to() so that we always end up on a leaf page. seek() might go back to an interior cell in index seeks,
|
||||
// which we never want. The reason we can use move_to() is that
|
||||
// find_cell() iterates the leaf page from left to right to find the insertion point anyway, so we don't care
|
||||
// which cell we are in as long as we are on the right page.
|
||||
// FIXME: find_cell() should not use linear search because it's slow.
|
||||
match key {
|
||||
let seek_result = match key {
|
||||
BTreeKey::IndexKey(_) => {
|
||||
return_if_io!(self.move_to(
|
||||
return_if_io!(self.seek(
|
||||
SeekKey::IndexKey(key.get_record().unwrap()),
|
||||
SeekOp::GE { eq_only: true }
|
||||
))
|
||||
}
|
||||
BTreeKey::TableRowId(_) => {
|
||||
return_if_io!(self.move_to(
|
||||
return_if_io!(self.seek(
|
||||
SeekKey::TableRowId(key.to_rowid()),
|
||||
SeekOp::GE { eq_only: true }
|
||||
))
|
||||
}
|
||||
};
|
||||
if SeekResult::TryAdvance == seek_result {
|
||||
self.valid_state =
|
||||
CursorValidState::RequireAdvance(IterationDirection::Forwards);
|
||||
return_if_io!(self.next());
|
||||
}
|
||||
self.context.take(); // we know where we wanted to move so if there was any saved context, discard it.
|
||||
self.valid_state = CursorValidState::Valid;
|
||||
self.seek_state = CursorSeekState::Start;
|
||||
@@ -4449,12 +4377,10 @@ impl BTreeCursor {
|
||||
post_balancing_seek_key,
|
||||
};
|
||||
} else {
|
||||
let is_last_cell = cell_idx == contents.cell_count().saturating_sub(1);
|
||||
drop_cell(contents, cell_idx, self.usable_space() as u16)?;
|
||||
|
||||
let delete_info = self.state.mut_delete_info().unwrap();
|
||||
delete_info.state = DeleteState::CheckNeedsBalancing {
|
||||
rightmost_cell_was_dropped: is_last_cell,
|
||||
post_balancing_seek_key,
|
||||
};
|
||||
}
|
||||
@@ -4555,13 +4481,11 @@ impl BTreeCursor {
|
||||
|
||||
let delete_info = self.state.mut_delete_info().unwrap();
|
||||
delete_info.state = DeleteState::CheckNeedsBalancing {
|
||||
rightmost_cell_was_dropped: false,
|
||||
post_balancing_seek_key,
|
||||
};
|
||||
}
|
||||
|
||||
DeleteState::CheckNeedsBalancing {
|
||||
rightmost_cell_was_dropped,
|
||||
post_balancing_seek_key,
|
||||
} => {
|
||||
let page = self.stack.top();
|
||||
@@ -4573,15 +4497,6 @@ impl BTreeCursor {
|
||||
let needs_balancing = self.stack.has_parent()
|
||||
&& free_space as usize * 3 > self.usable_space() * 2;
|
||||
|
||||
if rightmost_cell_was_dropped {
|
||||
// If we drop a cell in the middle, e.g. our current index is 2 and we drop 'c' from [a,b,c,d,e], then we don't need to retreat index,
|
||||
// because index 2 is still going to be the right place [a,b,D,e]
|
||||
// but:
|
||||
// If we drop the rightmost cell, e.g. our current index is 4 and we drop 'e' from [a,b,c,d,e], then we need to retreat index,
|
||||
// because index 4 is now pointing beyond the last cell [a,b,c,d] _ <-- index 4
|
||||
self.stack.retreat();
|
||||
}
|
||||
|
||||
if needs_balancing {
|
||||
let delete_info = self.state.mut_delete_info().unwrap();
|
||||
if delete_info.balance_write_info.is_none() {
|
||||
@@ -4700,10 +4615,9 @@ impl BTreeCursor {
|
||||
};
|
||||
let seek_result =
|
||||
return_if_io!(self.seek(SeekKey::TableRowId(*int_key), SeekOp::GE { eq_only: true }));
|
||||
let has_record = matches!(seek_result, SeekResult::Found);
|
||||
self.has_record.set(has_record);
|
||||
let exists = matches!(seek_result, SeekResult::Found);
|
||||
self.invalidate_record();
|
||||
Ok(IOResult::Done(has_record))
|
||||
Ok(IOResult::Done(exists))
|
||||
}
|
||||
|
||||
/// Clear the overflow pages linked to a specific page provided by the leaf cell
|
||||
@@ -4965,14 +4879,16 @@ impl BTreeCursor {
|
||||
record: &ImmutableRecord,
|
||||
) -> Result<IOResult<()>> {
|
||||
// build the new payload
|
||||
let page_type = page_ref.get().get().contents.as_ref().unwrap().page_type();
|
||||
let page = page_ref.get();
|
||||
let page_contents = page.get().contents.as_ref().unwrap();
|
||||
let serial_types_len = self.record_cursor.borrow_mut().len(record);
|
||||
let mut new_payload = Vec::with_capacity(serial_types_len);
|
||||
let rowid = return_if_io!(self.rowid());
|
||||
fill_cell_payload(
|
||||
page_type,
|
||||
page_contents,
|
||||
rowid,
|
||||
&mut new_payload,
|
||||
cell_idx,
|
||||
record,
|
||||
self.usable_space() as u16,
|
||||
self.pager.clone(),
|
||||
@@ -5138,7 +5054,19 @@ impl BTreeCursor {
|
||||
/// If context is defined, restore it and set it None on success
|
||||
#[instrument(skip_all, level = Level::DEBUG)]
|
||||
fn restore_context(&mut self) -> Result<IOResult<()>> {
|
||||
if self.context.is_none() || !matches!(self.valid_state, CursorValidState::RequireSeek) {
|
||||
if self.context.is_none() || matches!(self.valid_state, CursorValidState::Valid) {
|
||||
return Ok(IOResult::Done(()));
|
||||
}
|
||||
if let CursorValidState::RequireAdvance(direction) = self.valid_state {
|
||||
let has_record = return_if_io!(match direction {
|
||||
// Avoid calling next()/prev() directly because they immediately call restore_context()
|
||||
IterationDirection::Forwards => self.get_next_record(),
|
||||
IterationDirection::Backwards => self.get_prev_record(),
|
||||
});
|
||||
self.has_record.set(has_record);
|
||||
self.invalidate_record();
|
||||
self.context = None;
|
||||
self.valid_state = CursorValidState::Valid;
|
||||
return Ok(IOResult::Done(()));
|
||||
}
|
||||
let ctx = self.context.take().unwrap();
|
||||
@@ -5148,7 +5076,13 @@ impl BTreeCursor {
|
||||
};
|
||||
let res = self.seek(seek_key, SeekOp::GE { eq_only: true })?;
|
||||
match res {
|
||||
IOResult::Done(_) => {
|
||||
IOResult::Done(res) => {
|
||||
if let SeekResult::TryAdvance = res {
|
||||
self.valid_state =
|
||||
CursorValidState::RequireAdvance(IterationDirection::Forwards);
|
||||
self.context = Some(ctx);
|
||||
return Ok(IOResult::IO);
|
||||
}
|
||||
self.valid_state = CursorValidState::Valid;
|
||||
Ok(IOResult::Done(()))
|
||||
}
|
||||
@@ -6273,9 +6207,10 @@ fn _insert_into_cell(
|
||||
) -> Result<()> {
|
||||
assert!(
|
||||
cell_idx <= page.cell_count() + page.overflow_cells.len(),
|
||||
"attempting to add cell to an incorrect place cell_idx={} cell_count={}",
|
||||
"attempting to add cell to an incorrect place cell_idx={} cell_count={} page_type={:?}",
|
||||
cell_idx,
|
||||
page.cell_count()
|
||||
page.cell_count(),
|
||||
page.page_type()
|
||||
);
|
||||
let already_has_overflow = !page.overflow_cells.is_empty();
|
||||
let enough_space = if already_has_overflow && !allow_regular_insert_despite_overflow {
|
||||
@@ -6463,21 +6398,25 @@ fn allocate_cell_space(page_ref: &PageContent, amount: u16, usable_space: u16) -
|
||||
/// Fill in the cell payload with the record.
|
||||
/// If the record is too large to fit in the cell, it will spill onto overflow pages.
|
||||
fn fill_cell_payload(
|
||||
page_type: PageType,
|
||||
page_contents: &PageContent,
|
||||
int_key: Option<i64>,
|
||||
cell_payload: &mut Vec<u8>,
|
||||
cell_idx: usize,
|
||||
record: &ImmutableRecord,
|
||||
usable_space: u16,
|
||||
pager: Rc<Pager>,
|
||||
) {
|
||||
assert!(matches!(
|
||||
page_type,
|
||||
PageType::TableLeaf | PageType::IndexLeaf
|
||||
));
|
||||
// TODO: make record raw from start, having to serialize is not good
|
||||
let record_buf = record.get_payload().to_vec();
|
||||
|
||||
let page_type = page_contents.page_type();
|
||||
// fill in header
|
||||
if matches!(page_type, PageType::IndexInterior) {
|
||||
// if a write happened on an index interior page, it is always an overwrite.
|
||||
// we must copy the left child pointer of the replaced cell to the new cell.
|
||||
let left_child_page = page_contents.cell_interior_read_left_child_page(cell_idx);
|
||||
cell_payload.extend_from_slice(&left_child_page.to_be_bytes());
|
||||
}
|
||||
if matches!(page_type, PageType::TableLeaf) {
|
||||
let int_key = int_key.unwrap();
|
||||
write_varint_to_vec(record_buf.len() as u64, cell_payload);
|
||||
@@ -6711,9 +6650,10 @@ mod tests {
|
||||
) -> Vec<u8> {
|
||||
let mut payload: Vec<u8> = Vec::new();
|
||||
fill_cell_payload(
|
||||
page.page_type(),
|
||||
page,
|
||||
Some(id as i64),
|
||||
&mut payload,
|
||||
pos,
|
||||
&record,
|
||||
4096,
|
||||
conn.pager.borrow().clone(),
|
||||
@@ -7269,6 +7209,15 @@ mod tests {
|
||||
.map(|col| Register::Value(Value::Integer(*col)))
|
||||
.collect::<Vec<_>>();
|
||||
let value = ImmutableRecord::from_registers(®s, regs.len());
|
||||
run_until_done(
|
||||
|| {
|
||||
let record = ImmutableRecord::from_registers(®s, regs.len());
|
||||
let key = SeekKey::IndexKey(&record);
|
||||
cursor.seek(key, SeekOp::GE { eq_only: true })
|
||||
},
|
||||
pager.deref(),
|
||||
)
|
||||
.unwrap();
|
||||
run_until_done(
|
||||
|| {
|
||||
cursor.insert(
|
||||
@@ -7857,9 +7806,10 @@ mod tests {
|
||||
let record = ImmutableRecord::from_registers(regs, regs.len());
|
||||
let mut payload: Vec<u8> = Vec::new();
|
||||
fill_cell_payload(
|
||||
page.page_type(),
|
||||
page,
|
||||
Some(i as i64),
|
||||
&mut payload,
|
||||
cell_idx,
|
||||
&record,
|
||||
4096,
|
||||
conn.pager.borrow().clone(),
|
||||
@@ -7930,9 +7880,10 @@ mod tests {
|
||||
let record = ImmutableRecord::from_registers(regs, regs.len());
|
||||
let mut payload: Vec<u8> = Vec::new();
|
||||
fill_cell_payload(
|
||||
page.page_type(),
|
||||
page,
|
||||
Some(i),
|
||||
&mut payload,
|
||||
cell_idx,
|
||||
&record,
|
||||
4096,
|
||||
conn.pager.borrow().clone(),
|
||||
@@ -8294,9 +8245,10 @@ mod tests {
|
||||
let record = ImmutableRecord::from_registers(regs, regs.len());
|
||||
let mut payload: Vec<u8> = Vec::new();
|
||||
fill_cell_payload(
|
||||
page.get().get_contents().page_type(),
|
||||
page.get().get_contents(),
|
||||
Some(0),
|
||||
&mut payload,
|
||||
0,
|
||||
&record,
|
||||
4096,
|
||||
conn.pager.borrow().clone(),
|
||||
@@ -8371,9 +8323,10 @@ mod tests {
|
||||
let record = ImmutableRecord::from_registers(regs, regs.len());
|
||||
let mut payload: Vec<u8> = Vec::new();
|
||||
fill_cell_payload(
|
||||
page.get().get_contents().page_type(),
|
||||
page.get().get_contents(),
|
||||
Some(0),
|
||||
&mut payload,
|
||||
0,
|
||||
&record,
|
||||
4096,
|
||||
conn.pager.borrow().clone(),
|
||||
@@ -8726,7 +8679,7 @@ mod tests {
|
||||
// add a bunch of cells
|
||||
while compute_free_space(page.get_contents(), pager.usable_space() as u16) >= size + 10
|
||||
{
|
||||
insert_cell(i, size, page.get_contents(), pager.clone(), page_type);
|
||||
insert_cell(i, size, page.get_contents(), pager.clone());
|
||||
i += 1;
|
||||
size = (rng.next_u64() % 1024) as u16;
|
||||
}
|
||||
@@ -8783,24 +8736,25 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_cell(
|
||||
i: u64,
|
||||
size: u16,
|
||||
contents: &mut PageContent,
|
||||
pager: Rc<Pager>,
|
||||
page_type: PageType,
|
||||
) {
|
||||
fn insert_cell(cell_idx: u64, size: u16, contents: &mut PageContent, pager: Rc<Pager>) {
|
||||
let mut payload = Vec::new();
|
||||
let regs = &[Register::Value(Value::Blob(vec![0; size as usize]))];
|
||||
let record = ImmutableRecord::from_registers(regs, regs.len());
|
||||
fill_cell_payload(
|
||||
page_type,
|
||||
Some(i as i64),
|
||||
contents,
|
||||
Some(cell_idx as i64),
|
||||
&mut payload,
|
||||
cell_idx as usize,
|
||||
&record,
|
||||
pager.usable_space() as u16,
|
||||
pager.clone(),
|
||||
);
|
||||
insert_into_cell(contents, &payload, i as usize, pager.usable_space() as u16).unwrap();
|
||||
insert_into_cell(
|
||||
contents,
|
||||
&payload,
|
||||
cell_idx as usize,
|
||||
pager.usable_space() as u16,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,7 +333,7 @@ fn create_dedupe_index(
|
||||
root_page: 0,
|
||||
ephemeral: true,
|
||||
table_name: String::new(),
|
||||
unique: true,
|
||||
unique: false,
|
||||
has_rowid: false,
|
||||
});
|
||||
let cursor_id = program.alloc_cursor_id(CursorType::BTreeIndex(dedupe_index.clone()));
|
||||
|
||||
@@ -1153,7 +1153,13 @@ fn emit_update_insns(
|
||||
cursor: cursor_id,
|
||||
key_reg: rowid_set_clause_reg.unwrap_or(beg),
|
||||
record_reg,
|
||||
flag: InsertFlags::new().update(true),
|
||||
flag: if has_user_provided_rowid {
|
||||
// The previous Insn::NotExists and Insn::Delete seek to the old rowid,
|
||||
// so to insert a new user-provided rowid, we need to seek to the correct place.
|
||||
InsertFlags::new().require_seek()
|
||||
} else {
|
||||
InsertFlags::new()
|
||||
},
|
||||
table_name: table_ref.identifier.clone(),
|
||||
});
|
||||
|
||||
|
||||
@@ -231,7 +231,8 @@ pub fn translate_insert(
|
||||
cursor: temp_cursor_id,
|
||||
key_reg: rowid_reg,
|
||||
record_reg,
|
||||
flag: InsertFlags::new(),
|
||||
// since we are not doing an Insn::NewRowid or an Insn::NotExists here, we need to seek to ensure the insertion happens in the correct place.
|
||||
flag: InsertFlags::new().require_seek(),
|
||||
table_name: "".to_string(),
|
||||
});
|
||||
|
||||
|
||||
@@ -128,7 +128,8 @@ pub fn emit_result_row_and_limit(
|
||||
cursor: *table_cursor_id,
|
||||
key_reg: result_columns_start_reg + (plan.result_columns.len() - 1), // Rowid reg is the last register
|
||||
record_reg,
|
||||
flag: InsertFlags(0),
|
||||
// since we are not doing an Insn::NewRowid or an Insn::NotExists here, we need to seek to ensure the insertion happens in the correct place.
|
||||
flag: InsertFlags::new().require_seek(),
|
||||
table_name: table.name.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2235,7 +2235,7 @@ macro_rules! return_if_io {
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum SeekResult {
|
||||
/// Record matching the [SeekOp] found in the B-tree and cursor was positioned to point onto that record
|
||||
Found,
|
||||
|
||||
@@ -13,6 +13,7 @@ use crate::types::{
|
||||
compare_immutable, compare_records_generic, ImmutableRecord, SeekResult, Text, TextSubtype,
|
||||
};
|
||||
use crate::util::normalize_ident;
|
||||
use crate::vdbe::insn::InsertFlags;
|
||||
use crate::vdbe::registers_to_ref_values;
|
||||
use crate::{
|
||||
error::{
|
||||
@@ -4884,8 +4885,15 @@ pub fn op_insert(
|
||||
Register::Aggregate(..) => unreachable!("Cannot insert an aggregate value."),
|
||||
};
|
||||
|
||||
// query planner must emit NewRowId/NotExists/etc op-codes which will properly reposition cursor
|
||||
return_if_io!(cursor.insert(&BTreeKey::new_table_rowid(key, Some(record.as_ref())), true));
|
||||
// In a table insert, if the caller does not pass InsertFlags::REQUIRE_SEEK, they must ensure that a seek has already happened to the correct location.
|
||||
// This typically happens by invoking either Insn::NewRowid or Insn::NotExists, because:
|
||||
// 1. op_new_rowid() seeks to the end of the table, which is the correct insertion position.
|
||||
// 2. op_not_exists() seeks to the position in the table where the target rowid would be inserted.
|
||||
let moved_before = !flag.has(InsertFlags::REQUIRE_SEEK);
|
||||
return_if_io!(cursor.insert(
|
||||
&BTreeKey::new_table_rowid(key, Some(record.as_ref())),
|
||||
moved_before
|
||||
));
|
||||
}
|
||||
|
||||
// Only update last_insert_rowid for regular table inserts, not schema modifications
|
||||
@@ -5192,8 +5200,17 @@ pub enum OpNewRowidState {
|
||||
Start,
|
||||
SeekingToLast,
|
||||
ReadingMaxRowid,
|
||||
GeneratingRandom { attempts: u32 },
|
||||
VerifyingCandidate { attempts: u32, candidate: i64 },
|
||||
GeneratingRandom {
|
||||
attempts: u32,
|
||||
},
|
||||
VerifyingCandidate {
|
||||
attempts: u32,
|
||||
candidate: i64,
|
||||
},
|
||||
/// In case a rowid was generated and not provided by the user, we need to call next() on the cursor
|
||||
/// after generating the rowid. This is because the rowid was generated by seeking to the last row in the
|
||||
/// table, and we need to insert _after_ that row.
|
||||
GoNext,
|
||||
}
|
||||
|
||||
pub fn op_new_rowid(
|
||||
@@ -5250,9 +5267,8 @@ pub fn op_new_rowid(
|
||||
Some(rowid) if rowid < MAX_ROWID => {
|
||||
// Can use sequential
|
||||
state.registers[*rowid_reg] = Register::Value(Value::Integer(rowid + 1));
|
||||
state.op_new_rowid_state = OpNewRowidState::Start;
|
||||
state.pc += 1;
|
||||
return Ok(InsnFunctionStepResult::Step);
|
||||
state.op_new_rowid_state = OpNewRowidState::GoNext;
|
||||
continue;
|
||||
}
|
||||
Some(_) => {
|
||||
// Must use random (rowid == MAX_ROWID)
|
||||
@@ -5262,9 +5278,8 @@ pub fn op_new_rowid(
|
||||
None => {
|
||||
// Empty table
|
||||
state.registers[*rowid_reg] = Register::Value(Value::Integer(1));
|
||||
state.op_new_rowid_state = OpNewRowidState::Start;
|
||||
state.pc += 1;
|
||||
return Ok(InsnFunctionStepResult::Step);
|
||||
state.op_new_rowid_state = OpNewRowidState::GoNext;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5315,6 +5330,16 @@ pub fn op_new_rowid(
|
||||
};
|
||||
}
|
||||
}
|
||||
OpNewRowidState::GoNext => {
|
||||
{
|
||||
let mut cursor = state.get_cursor(*cursor);
|
||||
let cursor = cursor.as_btree_mut();
|
||||
return_if_io!(cursor.next());
|
||||
}
|
||||
state.op_new_rowid_state = OpNewRowidState::Start;
|
||||
state.pc += 1;
|
||||
return Ok(InsnFunctionStepResult::Step);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,6 +109,7 @@ pub struct InsertFlags(pub u8);
|
||||
|
||||
impl InsertFlags {
|
||||
pub const UPDATE: u8 = 0x01; // Flag indicating this is part of an UPDATE statement
|
||||
pub const REQUIRE_SEEK: u8 = 0x02; // Flag indicating that a seek is required to insert the row
|
||||
|
||||
pub fn new() -> Self {
|
||||
InsertFlags(0)
|
||||
@@ -118,12 +119,8 @@ impl InsertFlags {
|
||||
(self.0 & flag) != 0
|
||||
}
|
||||
|
||||
pub fn update(mut self, is_update: bool) -> Self {
|
||||
if is_update {
|
||||
self.0 |= InsertFlags::UPDATE;
|
||||
} else {
|
||||
self.0 &= !InsertFlags::UPDATE;
|
||||
}
|
||||
pub fn require_seek(mut self) -> Self {
|
||||
self.0 |= InsertFlags::REQUIRE_SEEK;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user