Merge 'Add state machine for op_idx_delete + DeleteState simplification' from Pere Diaz Bou

DeleteState had a bit too many unnecessary states so I removed them.
Usually we care about having a different state when I/O is triggered
requiring a state to be stored for later.
Furthermore, there was a bug with op_idx_delete where if balance is
triggered, op_idx_delete wouldn't be re-entrant. So a state machine was
added to prevent that from happening.

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #1421
This commit is contained in:
Pekka Enberg
2025-04-29 21:49:31 +03:00
4 changed files with 100 additions and 91 deletions

View File

@@ -165,21 +165,13 @@ enum DeleteState {
cell_idx: usize,
original_child_pointer: Option<u32>,
},
DropCell {
cell_idx: usize,
},
CheckNeedsBalancing,
StartBalancing {
target_key: DeleteSavepoint,
},
WaitForBalancingToComplete {
target_key: DeleteSavepoint,
},
SeekAfterBalancing {
target_key: DeleteSavepoint,
},
StackRetreat,
Finish,
}
#[derive(Clone)]
@@ -3530,15 +3522,12 @@ impl BTreeCursor {
/// 1. Start -> check if the rowid to be delete is present in the page or not. If not we early return
/// 2. LoadPage -> load the page.
/// 3. FindCell -> find the cell to be deleted in the page.
/// 4. ClearOverflowPages -> clear overflow pages associated with the cell. here if the cell is a leaf page go to DropCell state
/// or else go to InteriorNodeReplacement
/// 4. ClearOverflowPages -> Clear the overflow pages if there are any before dropping the cell, then if we are in a leaf page we just drop the cell in place.
/// if we are in interior page, we need to rotate keys in order to replace current cell (InteriorNodeReplacement).
/// 5. InteriorNodeReplacement -> we copy the left subtree leaf node into the deleted interior node's place.
/// 6. DropCell -> only for leaf nodes. drop the cell.
/// 7. CheckNeedsBalancing -> check if balancing is needed. If yes, move to StartBalancing else move to StackRetreat
/// 8. WaitForBalancingToComplete -> perform balancing
/// 9. SeekAfterBalancing -> adjust the cursor to a node that is closer to the deleted value. go to Finish
/// 10. StackRetreat -> perform stack retreat for cursor positioning. only when balancing is not needed. go to Finish
/// 11. Finish -> Delete operation is done. Return CursorResult(Ok())
/// 6. WaitForBalancingToComplete -> perform balancing
/// 7. SeekAfterBalancing -> adjust the cursor to a node that is closer to the deleted value. go to Finish
/// 8. Finish -> Delete operation is done. Return CursorResult(Ok())
pub fn delete(&mut self) -> Result<CursorResult<()>> {
assert!(self.mv_cursor.is_none());
@@ -3554,10 +3543,13 @@ impl BTreeCursor {
let delete_info = self.state.delete_info().expect("cannot get delete info");
delete_info.state.clone()
};
tracing::debug!("delete state: {:?}", delete_state);
match delete_state {
DeleteState::Start => {
let page = self.stack.top();
page.set_dirty();
self.pager.add_dirty(page.get().id);
if matches!(
page.get_contents().page_type(),
PageType::TableLeaf | PageType::TableInterior
@@ -3646,7 +3638,11 @@ impl BTreeCursor {
original_child_pointer,
};
} else {
delete_info.state = DeleteState::DropCell { cell_idx };
let contents = page.get().contents.as_mut().unwrap();
drop_cell(contents, cell_idx, self.usable_space() as u16)?;
let delete_info = self.state.mut_delete_info().unwrap();
delete_info.state = DeleteState::CheckNeedsBalancing;
}
}
@@ -3724,33 +3720,9 @@ impl BTreeCursor {
delete_info.state = DeleteState::CheckNeedsBalancing;
}
DeleteState::DropCell { cell_idx } => {
let page = self.stack.top();
return_if_locked!(page);
if !page.is_loaded() {
self.pager.load_page(page.clone())?;
return Ok(CursorResult::IO);
}
page.set_dirty();
self.pager.add_dirty(page.get().id);
let contents = page.get().contents.as_mut().unwrap();
drop_cell(contents, cell_idx, self.usable_space() as u16)?;
let delete_info = self.state.mut_delete_info().unwrap();
delete_info.state = DeleteState::CheckNeedsBalancing;
}
DeleteState::CheckNeedsBalancing => {
let page = self.stack.top();
return_if_locked!(page);
if !page.is_loaded() {
self.pager.load_page(page.clone())?;
return Ok(CursorResult::IO);
}
return_if_locked_maybe_load!(self.pager, page);
let contents = page.get().contents.as_ref().unwrap();
let free_space = compute_free_space(contents, self.usable_space() as u16);
@@ -3764,24 +3736,20 @@ impl BTreeCursor {
let delete_info = self.state.mut_delete_info().unwrap();
if needs_balancing {
delete_info.state = DeleteState::StartBalancing { target_key };
if delete_info.balance_write_info.is_none() {
let mut write_info = WriteInfo::new();
write_info.state = WriteState::BalanceStart;
delete_info.balance_write_info = Some(write_info);
}
delete_info.state = DeleteState::WaitForBalancingToComplete { target_key }
} else {
delete_info.state = DeleteState::StackRetreat;
self.stack.retreat();
self.state = CursorState::None;
return Ok(CursorResult::Ok(()));
}
}
DeleteState::StartBalancing { target_key } => {
let delete_info = self.state.mut_delete_info().unwrap();
if delete_info.balance_write_info.is_none() {
let mut write_info = WriteInfo::new();
write_info.state = WriteState::BalanceStart;
delete_info.balance_write_info = Some(write_info);
}
delete_info.state = DeleteState::WaitForBalancingToComplete { target_key }
}
DeleteState::WaitForBalancingToComplete { target_key } => {
let delete_info = self.state.mut_delete_info().unwrap();
@@ -3806,6 +3774,7 @@ impl BTreeCursor {
}
CursorResult::IO => {
// Move to seek state
// Save balance progress and return IO
let write_info = match &self.state {
CursorState::Write(wi) => wi.clone(),
@@ -3830,19 +3799,6 @@ impl BTreeCursor {
};
return_if_io!(self.seek(key, SeekOp::EQ));
let delete_info = self.state.mut_delete_info().unwrap();
delete_info.state = DeleteState::Finish;
delete_info.balance_write_info = None;
}
DeleteState::StackRetreat => {
self.stack.retreat();
let delete_info = self.state.mut_delete_info().unwrap();
delete_info.state = DeleteState::Finish;
delete_info.balance_write_info = None;
}
DeleteState::Finish => {
self.state = CursorState::None;
return Ok(CursorResult::Ok(()));
}

View File

@@ -903,6 +903,26 @@ impl ImmutableRecord {
}
}
impl Display for ImmutableRecord {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for value in &self.values {
match value {
RefValue::Null => write!(f, "NULL")?,
RefValue::Integer(i) => write!(f, "Integer({})", *i)?,
RefValue::Float(flo) => write!(f, "Float({})", *flo)?,
RefValue::Text(text_ref) => write!(f, "Text({})", text_ref.as_str())?,
RefValue::Blob(raw_slice) => {
write!(f, "Blob({})", String::from_utf8_lossy(raw_slice.to_slice()))?
}
}
if value != self.values.last().unwrap() {
write!(f, ", ")?;
}
}
Ok(())
}
}
impl Clone for ImmutableRecord {
fn clone(&self) -> Self {
let mut new_values = Vec::new();

View File

@@ -3,6 +3,7 @@ use crate::numeric::{NullableInteger, Numeric};
use crate::storage::database::FileMemoryStorage;
use crate::storage::page_cache::DumbLruPageCache;
use crate::storage::pager::CreateBTreeFlags;
use crate::types::ImmutableRecord;
use crate::{
error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY},
ext::ExtValue,
@@ -3756,6 +3757,11 @@ pub fn op_delete(
{
let mut cursor = state.get_cursor(*cursor_id);
let cursor = cursor.as_btree_mut();
tracing::debug!(
"op_delete(record={:?}, rowid={:?})",
cursor.record(),
cursor.rowid()?
);
return_if_io!(cursor.delete());
}
let prev_changes = program.n_change.get();
@@ -3764,6 +3770,10 @@ pub fn op_delete(
Ok(InsnFunctionStepResult::Step)
}
pub enum OpIdxDeleteState {
Seeking(ImmutableRecord), // First seek row to delete
Deleting,
}
pub fn op_idx_delete(
program: &Program,
state: &mut ProgramState,
@@ -3779,29 +3789,50 @@ pub fn op_idx_delete(
else {
unreachable!("unexpected Insn {:?}", insn)
};
let record = make_record(&state.registers, start_reg, num_regs);
{
let mut cursor = state.get_cursor(*cursor_id);
let cursor = cursor.as_btree_mut();
return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ));
if cursor.rowid()?.is_none() {
// If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching
// index entry is found. This happens when running an UPDATE or DELETE statement and the
// index entry to be updated or deleted is not found. For some uses of IdxDelete
// (example: the EXCEPT operator) it does not matter that no matching entry is found.
// For those cases, P5 is zero. Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode.
return Err(LimboError::Corrupt(format!(
"IdxDelete: no matching index entry found for record {:?}",
record
)));
loop {
match &state.op_idx_delete_state {
Some(OpIdxDeleteState::Seeking(record)) => {
{
let mut cursor = state.get_cursor(*cursor_id);
let cursor = cursor.as_btree_mut();
return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ));
tracing::debug!(
"op_idx_delete(seek={}, record={} rowid={:?})",
&record,
cursor.record().as_ref().unwrap(),
cursor.rowid()
);
if cursor.rowid()?.is_none() {
// If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching
// index entry is found. This happens when running an UPDATE or DELETE statement and the
// index entry to be updated or deleted is not found. For some uses of IdxDelete
// (example: the EXCEPT operator) it does not matter that no matching entry is found.
// For those cases, P5 is zero. Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode.
return Err(LimboError::Corrupt(format!(
"IdxDelete: no matching index entry found for record {:?}",
record
)));
}
}
state.op_idx_delete_state = Some(OpIdxDeleteState::Deleting);
}
Some(OpIdxDeleteState::Deleting) => {
{
let mut cursor = state.get_cursor(*cursor_id);
let cursor = cursor.as_btree_mut();
return_if_io!(cursor.delete());
}
let n_change = program.n_change.get();
program.n_change.set(n_change + 1);
state.pc += 1;
return Ok(InsnFunctionStepResult::Step);
}
None => {
let record = make_record(&state.registers, start_reg, num_regs);
state.op_idx_delete_state = Some(OpIdxDeleteState::Seeking(record));
}
}
return_if_io!(cursor.delete());
}
let n_change = program.n_change.get();
program.n_change.set(n_change + 1);
state.pc += 1;
Ok(InsnFunctionStepResult::Step)
}
pub fn op_idx_insert(

View File

@@ -43,7 +43,7 @@ use crate::CheckpointStatus;
#[cfg(feature = "json")]
use crate::json::JsonCacheCell;
use crate::{Connection, MvStore, Result, TransactionState};
use execute::{InsnFunction, InsnFunctionStepResult};
use execute::{InsnFunction, InsnFunctionStepResult, OpIdxDeleteState};
use rand::{
distributions::{Distribution, Uniform},
@@ -257,6 +257,7 @@ pub struct ProgramState {
halt_state: Option<HaltState>,
#[cfg(feature = "json")]
json_cache: JsonCacheCell,
op_idx_delete_state: Option<OpIdxDeleteState>,
}
impl ProgramState {
@@ -280,6 +281,7 @@ impl ProgramState {
halt_state: None,
#[cfg(feature = "json")]
json_cache: JsonCacheCell::new(),
op_idx_delete_state: None,
}
}