mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-23 17:05:36 +01:00
Merge 'Index search fixes' from Nikita Sivukhin
This PR bundles 2 fixes:
1. Index search must skip NULL values
2. UPDATE must avoid using index which column is used in the SET clause
* This was an optimization to not do full scan in case of `UPDATE t
SET ... WHERE col = ?` but instead of doing this hacks we must properly
load updated row set to the ephemeral index and flush it after update
will be finished instead of modifying BTree inplace
* So, for now we completely remove this optimization and quitely
wait for proper optimization to land
Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>
Closes #3459
This commit is contained in:
@@ -551,90 +551,89 @@ pub fn open_loop(
|
||||
);
|
||||
// Open the loop for the index search.
|
||||
// Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup.
|
||||
if let Search::RowidEq { cmp_expr } = search {
|
||||
let src_reg = program.alloc_register();
|
||||
translate_expr(
|
||||
program,
|
||||
Some(table_references),
|
||||
cmp_expr,
|
||||
src_reg,
|
||||
&t_ctx.resolver,
|
||||
)?;
|
||||
program.emit_insn(Insn::SeekRowid {
|
||||
cursor_id: table_cursor_id
|
||||
.expect("Search::RowidEq requires a table cursor"),
|
||||
src_reg,
|
||||
target_pc: next,
|
||||
});
|
||||
} else {
|
||||
// Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore.
|
||||
if let Search::Seek {
|
||||
index: Some(index), ..
|
||||
} = search
|
||||
{
|
||||
if index.ephemeral {
|
||||
let table_has_rowid = if let Table::BTree(btree) = &table.table {
|
||||
btree.has_rowid
|
||||
} else {
|
||||
false
|
||||
};
|
||||
Some(emit_autoindex(
|
||||
program,
|
||||
index,
|
||||
table_cursor_id
|
||||
.expect("an ephemeral index must have a source table cursor"),
|
||||
index_cursor_id
|
||||
.expect("an ephemeral index must have an index cursor"),
|
||||
table_has_rowid,
|
||||
)?)
|
||||
} else {
|
||||
index_cursor_id
|
||||
match search {
|
||||
Search::RowidEq { cmp_expr } => {
|
||||
let src_reg = program.alloc_register();
|
||||
translate_expr(
|
||||
program,
|
||||
Some(table_references),
|
||||
cmp_expr,
|
||||
src_reg,
|
||||
&t_ctx.resolver,
|
||||
)?;
|
||||
program.emit_insn(Insn::SeekRowid {
|
||||
cursor_id: table_cursor_id
|
||||
.expect("Search::RowidEq requires a table cursor"),
|
||||
src_reg,
|
||||
target_pc: next,
|
||||
});
|
||||
}
|
||||
Search::Seek { index, .. } => {
|
||||
// Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore.
|
||||
if let Some(index) = index {
|
||||
if index.ephemeral {
|
||||
let table_has_rowid = if let Table::BTree(btree) = &table.table {
|
||||
btree.has_rowid
|
||||
} else {
|
||||
false
|
||||
};
|
||||
let _ = emit_autoindex(
|
||||
program,
|
||||
index,
|
||||
table_cursor_id.expect(
|
||||
"an ephemeral index must have a source table cursor",
|
||||
),
|
||||
index_cursor_id
|
||||
.expect("an ephemeral index must have an index cursor"),
|
||||
table_has_rowid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
index_cursor_id
|
||||
};
|
||||
|
||||
let is_index = index_cursor_id.is_some();
|
||||
let seek_cursor_id = temp_cursor_id.unwrap_or_else(|| {
|
||||
index_cursor_id.unwrap_or_else(|| {
|
||||
table_cursor_id
|
||||
.expect("Either ephemeral or index or table cursor must be opened")
|
||||
})
|
||||
});
|
||||
let Search::Seek { seek_def, .. } = search else {
|
||||
unreachable!("Rowid equality point lookup should have been handled above");
|
||||
};
|
||||
let seek_cursor_id = temp_cursor_id.unwrap_or_else(|| {
|
||||
index_cursor_id.unwrap_or_else(|| {
|
||||
table_cursor_id.expect(
|
||||
"Either ephemeral or index or table cursor must be opened",
|
||||
)
|
||||
})
|
||||
});
|
||||
let Search::Seek { seek_def, .. } = search else {
|
||||
unreachable!(
|
||||
"Rowid equality point lookup should have been handled above"
|
||||
);
|
||||
};
|
||||
|
||||
let start_reg = program.alloc_registers(seek_def.key.len());
|
||||
emit_seek(
|
||||
program,
|
||||
table_references,
|
||||
seek_def,
|
||||
t_ctx,
|
||||
seek_cursor_id,
|
||||
start_reg,
|
||||
loop_end,
|
||||
is_index,
|
||||
)?;
|
||||
emit_seek_termination(
|
||||
program,
|
||||
table_references,
|
||||
seek_def,
|
||||
t_ctx,
|
||||
seek_cursor_id,
|
||||
start_reg,
|
||||
loop_start,
|
||||
loop_end,
|
||||
is_index,
|
||||
)?;
|
||||
let start_reg = program.alloc_registers(seek_def.key.len());
|
||||
emit_seek(
|
||||
program,
|
||||
table_references,
|
||||
seek_def,
|
||||
t_ctx,
|
||||
seek_cursor_id,
|
||||
start_reg,
|
||||
loop_end,
|
||||
index.as_ref(),
|
||||
)?;
|
||||
emit_seek_termination(
|
||||
program,
|
||||
table_references,
|
||||
seek_def,
|
||||
t_ctx,
|
||||
seek_cursor_id,
|
||||
start_reg,
|
||||
loop_start,
|
||||
loop_end,
|
||||
index.as_ref(),
|
||||
)?;
|
||||
|
||||
if let Some(index_cursor_id) = index_cursor_id {
|
||||
if let Some(table_cursor_id) = table_cursor_id {
|
||||
// Don't do a btree table seek until it's actually necessary to read from the table.
|
||||
program.emit_insn(Insn::DeferredSeek {
|
||||
index_cursor_id,
|
||||
table_cursor_id,
|
||||
});
|
||||
if let Some(index_cursor_id) = index_cursor_id {
|
||||
if let Some(table_cursor_id) = table_cursor_id {
|
||||
// Don't do a btree table seek until it's actually necessary to read from the table.
|
||||
program.emit_insn(Insn::DeferredSeek {
|
||||
index_cursor_id,
|
||||
table_cursor_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1141,23 +1140,53 @@ fn emit_seek(
|
||||
seek_cursor_id: usize,
|
||||
start_reg: usize,
|
||||
loop_end: BranchOffset,
|
||||
is_index: bool,
|
||||
seek_index: Option<&Arc<Index>>,
|
||||
) -> Result<()> {
|
||||
let is_index = seek_index.is_some();
|
||||
let Some(seek) = seek_def.seek.as_ref() else {
|
||||
// If there is no seek key, we start from the first or last row of the index,
|
||||
// depending on the iteration direction.
|
||||
//
|
||||
// Also, if we will encounter NULLs in the index at the beginning of iteration (Forward + Asc OR Backward + Desc)
|
||||
// then, we must explicitly skip them as seek always has some bound condition over indexed column (e.g. c < ?, c >= ?, ...)
|
||||
//
|
||||
// note that table seek has some rules to convert seek key values to the integer affinity + it has some logic to check for explicit NULL searches
|
||||
// so, we emit simple Rewind/Last in case of search over table BTree
|
||||
// (this is safe as table BTree keys are always non-null single integer)
|
||||
match seek_def.iter_dir {
|
||||
IterationDirection::Forwards => {
|
||||
program.emit_insn(Insn::Rewind {
|
||||
cursor_id: seek_cursor_id,
|
||||
pc_if_empty: loop_end,
|
||||
});
|
||||
if seek_index.is_some_and(|index| index.columns[0].order == SortOrder::Asc) {
|
||||
program.emit_null(start_reg, None);
|
||||
program.emit_insn(Insn::SeekGT {
|
||||
is_index,
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
num_regs: 1,
|
||||
target_pc: loop_end,
|
||||
});
|
||||
} else {
|
||||
program.emit_insn(Insn::Rewind {
|
||||
cursor_id: seek_cursor_id,
|
||||
pc_if_empty: loop_end,
|
||||
});
|
||||
}
|
||||
}
|
||||
IterationDirection::Backwards => {
|
||||
program.emit_insn(Insn::Last {
|
||||
cursor_id: seek_cursor_id,
|
||||
pc_if_empty: loop_end,
|
||||
});
|
||||
if seek_index.is_some_and(|index| index.columns[0].order == SortOrder::Desc) {
|
||||
program.emit_null(start_reg, None);
|
||||
program.emit_insn(Insn::SeekLT {
|
||||
is_index,
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
num_regs: 1,
|
||||
target_pc: loop_end,
|
||||
});
|
||||
} else {
|
||||
program.emit_insn(Insn::Last {
|
||||
cursor_id: seek_cursor_id,
|
||||
pc_if_empty: loop_end,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
@@ -1254,10 +1283,37 @@ fn emit_seek_termination(
|
||||
start_reg: usize,
|
||||
loop_start: BranchOffset,
|
||||
loop_end: BranchOffset,
|
||||
is_index: bool,
|
||||
seek_index: Option<&Arc<Index>>,
|
||||
) -> Result<()> {
|
||||
let is_index = seek_index.is_some();
|
||||
let Some(termination) = seek_def.termination.as_ref() else {
|
||||
program.preassign_label_to_next_insn(loop_start);
|
||||
// If we will encounter NULLs in the index at the end of iteration (Forward + Desc OR Backward + Asc)
|
||||
// then, we must explicitly stop before them as seek always has some bound condition over indexed column (e.g. c < ?, c >= ?, ...)
|
||||
match seek_def.iter_dir {
|
||||
IterationDirection::Forwards => {
|
||||
if seek_index.is_some_and(|index| index.columns[0].order == SortOrder::Desc) {
|
||||
program.emit_null(start_reg, None);
|
||||
program.emit_insn(Insn::IdxGE {
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
num_regs: 1,
|
||||
target_pc: loop_end,
|
||||
});
|
||||
}
|
||||
}
|
||||
IterationDirection::Backwards => {
|
||||
if seek_index.is_some_and(|index| index.columns[0].order == SortOrder::Asc) {
|
||||
program.emit_null(start_reg, None);
|
||||
program.emit_insn(Insn::IdxLE {
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
num_regs: 1,
|
||||
target_pc: loop_end,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
|
||||
@@ -142,10 +142,6 @@ fn optimize_update_plan(plan: &mut UpdatePlan, schema: &Schema) -> Result<()> {
|
||||
if !plan.indexes_to_update.iter().any(|i| Arc::ptr_eq(index, i)) {
|
||||
return Ok(());
|
||||
}
|
||||
// Fine to use index if we aren't going to be iterating over it, since it returns at most 1 row.
|
||||
if table_ref.op.returns_max_1_row() {
|
||||
return Ok(());
|
||||
}
|
||||
// Otherwise, fall back to a table scan.
|
||||
table_ref.op = Operation::Scan(Scan::BTreeTable {
|
||||
iter_dir: IterationDirection::Forwards,
|
||||
|
||||
@@ -795,19 +795,6 @@ impl Operation {
|
||||
Operation::Search(Search::Seek { index, .. }) => index.as_ref(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn returns_max_1_row(&self) -> bool {
|
||||
match self {
|
||||
Operation::Scan(_) => false,
|
||||
Operation::Search(Search::RowidEq { .. }) => true,
|
||||
Operation::Search(Search::Seek { index, seek_def }) => {
|
||||
let Some(index) = index else {
|
||||
return false;
|
||||
};
|
||||
index.unique && seek_def.seek.as_ref().is_some_and(|seek| seek.op.eq_only())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl JoinedTable {
|
||||
|
||||
@@ -787,3 +787,28 @@ do_execsql_test_on_specific_db {:memory:} rowid-references {
|
||||
AND `oid` = 2
|
||||
AND [oid] = 2;
|
||||
} {2|2|2|2|2|2|2|2|2|2|2|2}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} null-in-search {
|
||||
CREATE TABLE t_x_asc (id INTEGER PRIMARY KEY, x);
|
||||
CREATE INDEX t_x_asc_idx ON t_x_asc(x ASC);
|
||||
CREATE TABLE t_x_desc (id INTEGER PRIMARY KEY, x);
|
||||
CREATE INDEX t_x_desc_idx ON t_x_desc(x DESC);
|
||||
INSERT INTO t_x_asc VALUES (1, NULL), (2, 2), (10, 10);
|
||||
INSERT INTO t_x_desc VALUES (1, NULL), (2, 2), (10, 10);
|
||||
SELECT * FROM t_x_asc WHERE x > 5 ORDER BY x ASC;
|
||||
SELECT * FROM t_x_asc WHERE x > 5 ORDER BY x DESC;
|
||||
SELECT * FROM t_x_asc WHERE x < 5 ORDER BY x ASC;
|
||||
SELECT * FROM t_x_asc WHERE x < 5 ORDER BY x DESC;
|
||||
SELECT * FROM t_x_desc WHERE x > 5 ORDER BY x ASC;
|
||||
SELECT * FROM t_x_desc WHERE x > 5 ORDER BY x DESC;
|
||||
SELECT * FROM t_x_desc WHERE x < 5 ORDER BY x ASC;
|
||||
SELECT * FROM t_x_desc WHERE x < 5 ORDER BY x DESC;
|
||||
} {10|10
|
||||
10|10
|
||||
2|2
|
||||
2|2
|
||||
10|10
|
||||
10|10
|
||||
2|2
|
||||
2|2}
|
||||
|
||||
|
||||
@@ -672,6 +672,15 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
pub fn partial_index_mutation_and_upsert_fuzz() {
|
||||
index_mutation_upsert_fuzz(1.0, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn simple_index_mutation_and_upsert_fuzz() {
|
||||
index_mutation_upsert_fuzz(0.0, 0);
|
||||
}
|
||||
|
||||
fn index_mutation_upsert_fuzz(partial_index_prob: f64, conflict_chain_max_len: u32) {
|
||||
let _ = env_logger::try_init();
|
||||
const OUTER_ITERS: usize = 5;
|
||||
const INNER_ITERS: usize = 500;
|
||||
@@ -723,6 +732,7 @@ mod tests {
|
||||
let functions = ["lower", "upper", "length"];
|
||||
|
||||
let num_pidx = rng.random_range(0..=3);
|
||||
let mut conflict_match_targets = vec!["".to_string(), "(id)".to_string()];
|
||||
let mut idx_ddls: Vec<String> = Vec::new();
|
||||
for i in 0..num_pidx {
|
||||
// Pick 1 or 2 key columns; always include "k" sometimes to get frequent conflicts.
|
||||
@@ -790,13 +800,24 @@ mod tests {
|
||||
parts.join(" AND ")
|
||||
};
|
||||
|
||||
let ddl = format!(
|
||||
"CREATE UNIQUE INDEX idx_p{}_{} ON t({}) WHERE {}",
|
||||
outer,
|
||||
i,
|
||||
key_cols.join(","),
|
||||
pred
|
||||
);
|
||||
let ddl = if rng.random_bool(partial_index_prob) {
|
||||
format!(
|
||||
"CREATE UNIQUE INDEX idx_p{}_{} ON t({}) WHERE {}",
|
||||
outer,
|
||||
i,
|
||||
key_cols.join(","),
|
||||
pred
|
||||
)
|
||||
} else {
|
||||
// ON CONFLICT (...) can use only column set with non-partial UNIQUE constraint
|
||||
conflict_match_targets.push(format!("({})", key_cols.join(",")));
|
||||
format!(
|
||||
"CREATE UNIQUE INDEX idx_p{}_{} ON t({})",
|
||||
outer,
|
||||
i,
|
||||
key_cols.join(","),
|
||||
)
|
||||
};
|
||||
idx_ddls.push(ddl.clone());
|
||||
// Create in both engines
|
||||
println!("{ddl};");
|
||||
@@ -817,9 +838,11 @@ mod tests {
|
||||
vals.push(v);
|
||||
}
|
||||
let ins = format!("INSERT INTO t VALUES ({})", vals.join(", "));
|
||||
println!("{ins}; -- seed");
|
||||
// Execute on both; ignore errors due to partial unique conflicts (keep seeding going)
|
||||
let _ = sqlite.execute(&ins, rusqlite::params![]);
|
||||
let _ = limbo_exec_rows_fallible(&limbo_db, &limbo_conn, &ins);
|
||||
let sqlite_res = sqlite.execute(&ins, rusqlite::params![]);
|
||||
let limbo_res = limbo_exec_rows_fallible(&limbo_db, &limbo_conn, &ins);
|
||||
assert!(sqlite_res.is_ok() == limbo_res.is_ok());
|
||||
}
|
||||
|
||||
for _ in 0..INNER_ITERS {
|
||||
@@ -909,37 +932,43 @@ mod tests {
|
||||
});
|
||||
}
|
||||
}
|
||||
if rng.random_bool(0.3) {
|
||||
// 30% chance ON CONFLICT DO UPDATE SET ...
|
||||
let mut set_list = Vec::new();
|
||||
let num_set = rng.random_range(1..=cols_list.len());
|
||||
let set_cols = cols_list
|
||||
.choose_multiple(&mut rng, num_set)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
for c in set_cols.iter() {
|
||||
let v = if c == "k" {
|
||||
format!("'{}'", K_POOL.choose(&mut rng).unwrap())
|
||||
} else if rng.random_bool(0.2) {
|
||||
"NULL".into()
|
||||
} else {
|
||||
rng.random_range(-5..=15).to_string()
|
||||
};
|
||||
set_list.push(format!("{c} = {v}"));
|
||||
let chain_length = rng.random_range(0..=conflict_chain_max_len);
|
||||
let mut on_conflict = String::new();
|
||||
for _ in 0..chain_length {
|
||||
let idx = rng.random_range(0..conflict_match_targets.len());
|
||||
let target = &conflict_match_targets[idx];
|
||||
if rng.random_bool(0.8) {
|
||||
let mut set_list = Vec::new();
|
||||
let num_set = rng.random_range(1..=cols_list.len());
|
||||
let set_cols = cols_list
|
||||
.choose_multiple(&mut rng, num_set)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
for c in set_cols.iter() {
|
||||
let v = if c == "k" {
|
||||
format!("'{}'", K_POOL.choose(&mut rng).unwrap())
|
||||
} else if rng.random_bool(0.2) {
|
||||
"NULL".into()
|
||||
} else {
|
||||
rng.random_range(-5..=15).to_string()
|
||||
};
|
||||
set_list.push(format!("{c} = {v}"));
|
||||
}
|
||||
on_conflict.push_str(&format!(
|
||||
" ON CONFLICT{} DO UPDATE SET {}",
|
||||
target,
|
||||
set_list.join(", ")
|
||||
));
|
||||
} else {
|
||||
on_conflict.push_str(&format!(" ON CONFLICT{target} DO NOTHING"));
|
||||
}
|
||||
format!(
|
||||
"INSERT INTO t({}) VALUES({}) ON CONFLICT DO UPDATE SET {}",
|
||||
cols_list.join(","),
|
||||
vals_list.join(","),
|
||||
set_list.join(", ")
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"INSERT INTO t({}) VALUES({}) ON CONFLICT DO NOTHING",
|
||||
cols_list.join(","),
|
||||
vals_list.join(",")
|
||||
)
|
||||
}
|
||||
format!(
|
||||
"INSERT INTO t({}) VALUES({}) {}",
|
||||
cols_list.join(","),
|
||||
vals_list.join(","),
|
||||
on_conflict
|
||||
)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::common::{self, maybe_setup_tracing};
|
||||
use crate::common::{self, limbo_exec_rows, maybe_setup_tracing};
|
||||
use crate::common::{compare_string, do_flush, TempDatabase};
|
||||
use log::debug;
|
||||
use std::io::{Read, Seek, Write};
|
||||
@@ -790,3 +790,52 @@ fn test_insert_with_column_names() -> anyhow::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn delete_search_op_ignore_nulls() {
|
||||
let limbo = TempDatabase::new_empty(true);
|
||||
let conn = limbo.db.connect().unwrap();
|
||||
for sql in [
|
||||
"CREATE TABLE t (id INTEGER PRIMARY KEY AUTOINCREMENT, c INT);",
|
||||
"CREATE UNIQUE INDEX t_idx ON t(c);",
|
||||
"INSERT INTO t VALUES (NULL, NULL)",
|
||||
"DELETE FROM t WHERE c < -1;",
|
||||
] {
|
||||
conn.execute(sql).unwrap();
|
||||
}
|
||||
assert_eq!(
|
||||
vec![vec![
|
||||
rusqlite::types::Value::Integer(1),
|
||||
rusqlite::types::Value::Null
|
||||
]],
|
||||
limbo_exec_rows(&limbo, &conn, "SELECT * FROM t ORDER BY id")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn delete_eq_correct() {
|
||||
let limbo = TempDatabase::new_empty(true);
|
||||
let conn = limbo.db.connect().unwrap();
|
||||
for sql in [
|
||||
"CREATE TABLE t (id INTEGER PRIMARY KEY AUTOINCREMENT, c INT);",
|
||||
"CREATE UNIQUE INDEX t_idx ON t(c);",
|
||||
"INSERT INTO t VALUES (null, -1);",
|
||||
"INSERT INTO t VALUES (null, -2);",
|
||||
"UPDATE t SET c = NULL WHERE c = -1;",
|
||||
] {
|
||||
conn.execute(sql).unwrap();
|
||||
}
|
||||
assert_eq!(
|
||||
vec![
|
||||
vec![
|
||||
rusqlite::types::Value::Integer(1),
|
||||
rusqlite::types::Value::Null
|
||||
],
|
||||
vec![
|
||||
rusqlite::types::Value::Integer(2),
|
||||
rusqlite::types::Value::Integer(-2),
|
||||
]
|
||||
],
|
||||
limbo_exec_rows(&limbo, &conn, "SELECT * FROM t ORDER BY id")
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user