diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 7b2419cee..f9426bd34 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -22,6 +22,13 @@ use crate::{ LimboError, Result, }; +use super::{ + pager::PageRef, + sqlite3_ondisk::{ + write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, DATABASE_HEADER_SIZE, + MINIMUM_CELL_SIZE, + }, +}; #[cfg(debug_assertions)] use std::collections::HashSet; use std::{ @@ -34,13 +41,6 @@ use std::{ sync::Arc, }; -use super::{ - pager::PageRef, - sqlite3_ondisk::{ - write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, DATABASE_HEADER_SIZE, - }, -}; - /// The B-Tree page header is 12 bytes for interior pages and 8 bytes for leaf pages. /// /// +--------+-----------------+-----------------+-----------------+--------+----- ..... ----+ @@ -6364,7 +6364,10 @@ fn compute_free_space(page: &PageContent, usable_space: u16) -> u16 { /// Allocate space for a cell on a page. fn allocate_cell_space(page_ref: &PageContent, amount: u16, usable_space: u16) -> Result { - let amount = amount as usize; + let mut amount = amount as usize; + if amount < MINIMUM_CELL_SIZE { + amount = MINIMUM_CELL_SIZE; + } let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size(); let gap = cell_offset + 2 * page_ref.cell_count(); diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index f9fcacb77..b07df89df 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -88,6 +88,9 @@ pub const DEFAULT_PAGE_SIZE: u16 = 4096; pub const DATABASE_HEADER_PAGE_ID: usize = 1; +/// The minimum size of a cell in bytes. +pub const MINIMUM_CELL_SIZE: usize = 4; + /// The database header. /// The first 100 bytes of the database file comprise the database file header. /// The database file header is divided into fields as shown by the table below. @@ -668,7 +671,11 @@ impl PageContent { if overflows { to_read + n_payload } else { - len_payload as usize + n_payload + let mut size = len_payload as usize + n_payload; + if size < MINIMUM_CELL_SIZE { + size = MINIMUM_CELL_SIZE; + } + size } } PageType::TableLeaf => { @@ -683,7 +690,11 @@ impl PageContent { if overflows { to_read + n_payload + n_rowid } else { - len_payload as usize + n_payload + n_rowid + let mut size = len_payload as usize + n_payload + n_rowid; + if size < MINIMUM_CELL_SIZE { + size = MINIMUM_CELL_SIZE; + } + size } } }; diff --git a/core/translate/compound_select.rs b/core/translate/compound_select.rs index d7e6f4689..58dcd5372 100644 --- a/core/translate/compound_select.rs +++ b/core/translate/compound_select.rs @@ -150,9 +150,9 @@ fn emit_compound_select( CompoundOperator::Union => { let mut new_dedupe_index = false; let dedupe_index = match right_most.query_destination { - QueryDestination::EphemeralIndex { cursor_id, index } => { - (cursor_id, index.clone()) - } + QueryDestination::EphemeralIndex { + cursor_id, index, .. + } => (cursor_id, index.clone()), _ => { new_dedupe_index = true; create_dedupe_index(program, &right_most, schema)? @@ -161,6 +161,7 @@ fn emit_compound_select( plan.query_destination = QueryDestination::EphemeralIndex { cursor_id: dedupe_index.0, index: dedupe_index.1.clone(), + is_delete: false, }; let compound_select = Plan::CompoundSelect { left, @@ -182,20 +183,18 @@ fn emit_compound_select( right_most.query_destination = QueryDestination::EphemeralIndex { cursor_id: dedupe_index.0, index: dedupe_index.1.clone(), + is_delete: false, }; emit_query(program, &mut right_most, &mut right_most_ctx)?; if new_dedupe_index { - let label_jump_over_dedupe = program.allocate_label(); - read_deduplicated_union_rows( + read_deduplicated_union_or_except_rows( program, dedupe_index.0, dedupe_index.1.as_ref(), limit_ctx, - label_jump_over_dedupe, yield_reg, ); - program.preassign_label_to_next_insn(label_jump_over_dedupe); } } CompoundOperator::Intersect => { @@ -211,6 +210,7 @@ fn emit_compound_select( plan.query_destination = QueryDestination::EphemeralIndex { cursor_id: left_cursor_id, index: left_index.clone(), + is_delete: false, }; let compound_select = Plan::CompoundSelect { left, @@ -234,6 +234,7 @@ fn emit_compound_select( right_most.query_destination = QueryDestination::EphemeralIndex { cursor_id: right_cursor_id, index: right_index, + is_delete: false, }; emit_query(program, &mut right_most, &mut right_most_ctx)?; read_intersect_rows( @@ -246,8 +247,49 @@ fn emit_compound_select( yield_reg, ); } - _ => { - crate::bail_parse_error!("unimplemented compound select operator: {:?}", operator); + CompoundOperator::Except => { + let mut new_index = false; + let (cursor_id, index) = match right_most.query_destination { + QueryDestination::EphemeralIndex { + cursor_id, index, .. + } => (cursor_id, index), + _ => { + new_index = true; + create_dedupe_index(program, &right_most, schema)? + } + }; + plan.query_destination = QueryDestination::EphemeralIndex { + cursor_id, + index: index.clone(), + is_delete: false, + }; + let compound_select = Plan::CompoundSelect { + left, + right_most: plan, + limit, + offset, + order_by, + }; + emit_compound_select( + program, + compound_select, + schema, + syms, + None, + yield_reg, + reg_result_cols_start, + )?; + right_most.query_destination = QueryDestination::EphemeralIndex { + cursor_id, + index: index.clone(), + is_delete: true, + }; + emit_query(program, &mut right_most, &mut right_most_ctx)?; + if new_index { + read_deduplicated_union_or_except_rows( + program, cursor_id, &index, limit_ctx, yield_reg, + ); + } } }, None => { @@ -302,15 +344,16 @@ fn create_dedupe_index( Ok((cursor_id, dedupe_index.clone())) } -/// Emits the bytecode for reading deduplicated rows from the ephemeral index created for UNION operators. -fn read_deduplicated_union_rows( +/// Emits the bytecode for reading deduplicated rows from the ephemeral index created for +/// UNION or EXCEPT operators. +fn read_deduplicated_union_or_except_rows( program: &mut ProgramBuilder, dedupe_cursor_id: usize, dedupe_index: &Index, limit_ctx: Option, - label_limit_reached: BranchOffset, yield_reg: Option, ) { + let label_close = program.allocate_label(); let label_dedupe_next = program.allocate_label(); let label_dedupe_loop_start = program.allocate_label(); let dedupe_cols_start_reg = program.alloc_registers(dedupe_index.columns.len()); @@ -348,7 +391,7 @@ fn read_deduplicated_union_rows( if let Some(limit_ctx) = limit_ctx { program.emit_insn(Insn::DecrJumpZero { reg: limit_ctx.reg_limit, - target_pc: label_limit_reached, + target_pc: label_close, }) } program.preassign_label_to_next_insn(label_dedupe_next); @@ -356,6 +399,7 @@ fn read_deduplicated_union_rows( cursor_id: dedupe_cursor_id, pc_if_next: label_dedupe_loop_start, }); + program.preassign_label_to_next_insn(label_close); program.emit_insn(Insn::Close { cursor_id: dedupe_cursor_id, }); diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index c0f98f101..0b50c40a6 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -565,6 +565,7 @@ fn emit_delete_insns( start_reg, num_regs, cursor_id: index_cursor_id, + raise_error_if_no_matching_entry: true, }); } } @@ -1083,6 +1084,7 @@ fn emit_update_insns( start_reg, num_regs, cursor_id: idx_cursor_id, + raise_error_if_no_matching_entry: true, }); // Insert new index key (filled further above with values from set_clauses) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 462f38442..6347b500f 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -324,6 +324,8 @@ pub enum QueryDestination { cursor_id: CursorID, /// The index that will be used to store the results. index: Arc, + /// Whether this is a delete operation that will remove the index entries + is_delete: bool, }, /// The results of the query are stored in an ephemeral table, /// later used by the parent query. diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index c079e2eb9..62c6bc96a 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -85,21 +85,31 @@ pub fn emit_result_row_and_limit( QueryDestination::EphemeralIndex { cursor_id: index_cursor_id, index: dedupe_index, + is_delete, } => { - let record_reg = program.alloc_register(); - program.emit_insn(Insn::MakeRecord { - start_reg: result_columns_start_reg, - count: plan.result_columns.len(), - dest_reg: record_reg, - index_name: Some(dedupe_index.name.clone()), - }); - program.emit_insn(Insn::IdxInsert { - cursor_id: *index_cursor_id, - record_reg, - unpacked_start: None, - unpacked_count: None, - flags: IdxInsertFlags::new().no_op_duplicate(), - }); + if *is_delete { + program.emit_insn(Insn::IdxDelete { + start_reg: result_columns_start_reg, + num_regs: plan.result_columns.len(), + cursor_id: *index_cursor_id, + raise_error_if_no_matching_entry: false, + }); + } else { + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: result_columns_start_reg, + count: plan.result_columns.len(), + dest_reg: record_reg, + index_name: Some(dedupe_index.name.clone()), + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: *index_cursor_id, + record_reg, + unpacked_start: None, + unpacked_count: None, + flags: IdxInsertFlags::new().no_op_duplicate(), + }); + } } QueryDestination::EphemeralTable { cursor_id: table_cursor_id, diff --git a/core/translate/select.rs b/core/translate/select.rs index 25f23d993..df968b16a 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -124,15 +124,6 @@ pub fn prepare_select_plan( let mut left = Vec::with_capacity(compounds.len()); for CompoundSelect { select, operator } in compounds { - // TODO: add support for EXCEPT - if operator != ast::CompoundOperator::UnionAll - && operator != ast::CompoundOperator::Union - && operator != ast::CompoundOperator::Intersect - { - crate::bail_parse_error!( - "only UNION ALL, UNION and INTERSECT are supported for compound SELECTs" - ); - } left.push((last, operator)); last = prepare_one_select_plan( schema, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 592070d52..8cbbf486d 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4362,6 +4362,7 @@ pub fn op_idx_delete( cursor_id, start_reg, num_regs, + raise_error_if_no_matching_entry, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -4377,7 +4378,7 @@ pub fn op_idx_delete( ); match &state.op_idx_delete_state { Some(OpIdxDeleteState::Seeking(record)) => { - { + let found = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); let found = return_if_io!( @@ -4389,6 +4390,21 @@ pub fn op_idx_delete( cursor.root_page(), record ); + found + }; + + if !found { + // If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching index entry is found + // Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode. + if *raise_error_if_no_matching_entry { + return Err(LimboError::Corrupt(format!( + "IdxDelete: no matching index entry found for record {:?}", + record + ))); + } + state.pc += 1; + state.op_idx_delete_state = None; + return Ok(InsnFunctionStepResult::Step); } state.op_idx_delete_state = Some(OpIdxDeleteState::Verifying); } @@ -4399,12 +4415,7 @@ pub fn op_idx_delete( return_if_io!(cursor.rowid()) }; - if rowid.is_none() { - // If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching - // index entry is found. This happens when running an UPDATE or DELETE statement and the - // index entry to be updated or deleted is not found. For some uses of IdxDelete - // (example: the EXCEPT operator) it does not matter that no matching entry is found. - // For those cases, P5 is zero. Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode. + if rowid.is_none() && *raise_error_if_no_matching_entry { return Err(LimboError::Corrupt(format!( "IdxDelete: no matching index entry found for record {:?}", make_record(&state.registers, start_reg, num_regs) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 933b82ee2..13efe1322 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1120,13 +1120,14 @@ pub fn insn_to_str( cursor_id, start_reg, num_regs, + raise_error_if_no_matching_entry, } => ( "IdxDelete", *cursor_id as i32, *start_reg as i32, *num_regs as i32, Value::build_text(""), - 0, + *raise_error_if_no_matching_entry as u16, "".to_string(), ), Insn::NewRowid { diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 41ad4e30c..f094f3db0 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -742,10 +742,15 @@ pub enum Insn { cursor_id: CursorID, }, + /// If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching index entry + /// is found. This happens when running an UPDATE or DELETE statement and the index entry to + /// be updated or deleted is not found. For some uses of IdxDelete (example: the EXCEPT operator) + /// it does not matter that no matching entry is found. For those cases, P5 is zero. IdxDelete { start_reg: usize, num_regs: usize, cursor_id: CursorID, + raise_error_if_no_matching_entry: bool, // P5 }, NewRowid { diff --git a/testing/insert.test b/testing/insert.test index 4f3fef7b1..b94d7c8d2 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -360,6 +360,17 @@ if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-s INSERT INTO t SELECT * FROM t1 INTERSECT SELECT * FROM t2 INTERSECT SELECT * FROM t3; SELECT * FROM t; } {2|200} + + do_execsql_test_on_specific_db {:memory:} insert_from_select_except { + CREATE TABLE t(a, b); + CREATE TABLE t1(a, b); + CREATE TABLE t2(a, b); + + INSERT INTO t1 VALUES (1, 100), (2, 200); + INSERT INTO t2 VALUES (2, 200), (3, 300); + INSERT INTO t SELECT * FROM t1 EXCEPT SELECT * FROM t2; + SELECT * FROM t; + } {1|100} } do_execsql_test_on_specific_db {:memory:} negative-primary-integer-key { diff --git a/testing/select.test b/testing/select.test index ba16fd672..d3142be3e 100755 --- a/testing/select.test +++ b/testing/select.test @@ -449,4 +449,114 @@ if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-s } {a|a b|b z|z} + + do_execsql_test_on_specific_db {:memory:} select-except-1 { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('z','y'); + + select * from t EXCEPT select * from u; + } {y|y} + + do_execsql_test_on_specific_db {:memory:} select-except-2 { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('y','y'); + + select * from t EXCEPT select * from u; + } {} + + do_execsql_test_on_specific_db {:memory:} select-except-3 { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + CREATE TABLE v(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('a','y'); + INSERT INTO v VALUES('a','x'),('b','y'); + + select * from t EXCEPT select * from u EXCEPT select * from v; + } {y|y} + + do_execsql_test_on_specific_db {:memory:} select-except-limit { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + INSERT INTO t VALUES('a', 'a'),('x','x'),('y','y'),('z','z'); + INSERT INTO u VALUES('x','x'),('z','y'); + + select * from t EXCEPT select * from u limit 2; + } {a|a + y|y} + + do_execsql_test_on_specific_db {:memory:} select-except-union-all { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + CREATE TABLE v(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('z','y'); + INSERT INTO v VALUES('x','x'),('y','y'); + + select * from t EXCEPT select * from u UNION ALL select * from v; + } {y|y + x|x + y|y} + + do_execsql_test_on_specific_db {:memory:} select-union-all-except { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + CREATE TABLE v(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('z','y'); + INSERT INTO v VALUES('x','x'),('y','y'); + + select * from t UNION ALL select * from u EXCEPT select * from v; + } {z|y} + + do_execsql_test_on_specific_db {:memory:} select-except-union { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + CREATE TABLE v(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('z','y'); + INSERT INTO v VALUES('x','x'),('z','z'); + + select * from t EXCEPT select * from u UNION select * from v; + } {x|x + y|y + z|z} + + do_execsql_test_on_specific_db {:memory:} select-union-except { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + CREATE TABLE v(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('z','y'); + INSERT INTO v VALUES('x','x'),('z','z'); + + select * from t UNION select * from u EXCEPT select * from v; + } {y|y + z|y} + + do_execsql_test_on_specific_db {:memory:} select-except-intersect { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + CREATE TABLE v(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('z','y'); + INSERT INTO v VALUES('y','y'),('z','z'); + + select * from t EXCEPT select * from u INTERSECT select * from v; + } {y|y} + + do_execsql_test_on_specific_db {:memory:} select-intersect-except { + CREATE TABLE t(x TEXT, y TEXT); + CREATE TABLE u(x TEXT, y TEXT); + CREATE TABLE v(x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + INSERT INTO u VALUES('x','x'),('z','y'); + INSERT INTO v VALUES('x','x'),('z','z'); + + select * from t INTERSECT select * from u EXCEPT select * from v; + } {} } diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 2015f8ae7..ecb478fcc 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -584,7 +584,8 @@ mod tests { )); } - const COMPOUND_OPERATORS: [&str; 3] = [" UNION ALL ", " UNION ", " INTERSECT "]; + const COMPOUND_OPERATORS: [&str; 4] = + [" UNION ALL ", " UNION ", " INTERSECT ", " EXCEPT "]; let mut query = String::new(); for (i, select_statement) in select_statements.iter().enumerate() {