From 5af10e6ccb4d936535ad5386efba05333a90fa82 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 17:57:42 +0400 Subject: [PATCH 01/23] add IndexMethod specific VM instructions --- core/vdbe/insn.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index c949d804f..b9f9cc622 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -879,6 +879,24 @@ pub enum Insn { flags: CreateBTreeFlags, }, + /// Create custom index method (calls [crate::index_method::IndexMethodCursor::create] under the hood) + IndexMethodCreate { + db: usize, + cursor_id: CursorID, + }, + /// Destroy custom index method (calls [crate::index_method::IndexMethodCursor::destroy] under the hood) + IndexMethodDestroy { + db: usize, + cursor_id: CursorID, + }, + /// Query custom index method (call [crate::index_method::IndexMethodCursor::query_start] under the hood) + IndexMethodQuery { + db: usize, + cursor_id: CursorID, + start_reg: usize, + count_reg: usize, + }, + /// Deletes an entire database table or index whose root page in the database file is given by P1. Destroy { /// The root page of the table/index to destroy @@ -1318,6 +1336,9 @@ impl InsnVariants { InsnVariants::OpenWrite => execute::op_open_write, InsnVariants::Copy => execute::op_copy, InsnVariants::CreateBtree => execute::op_create_btree, + InsnVariants::IndexMethodCreate => execute::op_index_method_create, + InsnVariants::IndexMethodDestroy => execute::op_index_method_destroy, + InsnVariants::IndexMethodQuery => execute::op_index_method_query, InsnVariants::Destroy => execute::op_destroy, InsnVariants::ResetSorter => execute::op_reset_sorter, InsnVariants::DropTable => execute::op_drop_table, From b994e2cbd86afc02f85995cdf3a4355a299a2417 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 17:59:14 +0400 Subject: [PATCH 02/23] add new Cursor type --- core/vdbe/builder.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index c3bff8423..ba8f1d200 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -7,6 +7,7 @@ use tracing::{instrument, Level}; use turso_parser::ast::{self, TableInternalId}; use crate::{ + index_method::IndexMethodAttachment, numeric::Numeric, parameters::Parameters, schema::{BTreeTable, Index, PseudoCursorType, Schema, Table}, @@ -134,6 +135,7 @@ pub struct ProgramBuilder { pub enum CursorType { BTreeTable(Arc), BTreeIndex(Arc), + IndexMethod(Arc), Pseudo(PseudoCursorType), Sorter, VirtualTable(Arc), @@ -332,6 +334,20 @@ impl ProgramBuilder { } } + /// allocate proper cursor for the given index (either [CursorType::BTreeIndex] or [CursorType::IndexMethod]) + pub fn alloc_cursor_index( + &mut self, + key: Option, + index: &Arc, + ) -> crate::Result { + let module = index.index_method.as_ref(); + if module.is_some_and(|m| !m.definition().backing_btree) { + let module = module.unwrap().clone(); + return Ok(self._alloc_cursor_id(key, CursorType::IndexMethod(module))); + } + Ok(self._alloc_cursor_id(key, CursorType::BTreeIndex(index.clone()))) + } + pub fn alloc_cursor_id(&mut self, cursor_type: CursorType) -> usize { self._alloc_cursor_id(None, cursor_type) } From 37de39e5d14c58e3937f8522a63d957eaab07b86 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:06:35 +0400 Subject: [PATCH 03/23] integrate IndexMethod to the insert/delete flow --- core/translate/index.rs | 136 ++++++++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 19 deletions(-) diff --git a/core/translate/index.rs b/core/translate/index.rs index 889827d3e..bacbf0668 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -172,7 +172,7 @@ pub fn translate_create_index( let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(sqlite_table.clone())); let table_ref = program.table_reference_counter.next(); - let index_cursor_id = program.alloc_cursor_id(CursorType::BTreeIndex(idx.clone())); + let index_cursor_id = program.alloc_cursor_index(None, &idx)?; let table_cursor_id = program.alloc_cursor_id_keyed( CursorKey::table(table_ref), CursorType::BTreeTable(tbl.clone()), @@ -201,11 +201,20 @@ pub fn translate_create_index( // Create a new B-Tree and store the root page index in a register let root_page_reg = program.alloc_register(); - program.emit_insn(Insn::CreateBtree { - db: 0, - root: root_page_reg, - flags: CreateBTreeFlags::new_index(), - }); + if !idx.is_backing_btree_index() { + program.emit_insn(Insn::IndexMethodCreate { + db: 0, + cursor_id: index_cursor_id, + }); + // index method sqlite_schema row always has root_page equals to zero in the schema (same as virtual tables) + program.emit_int(0, root_page_reg); + } else { + program.emit_insn(Insn::CreateBtree { + db: 0, + root: root_page_reg, + flags: CreateBTreeFlags::new_index(), + }); + } // open the sqlite schema table for writing and create a new entry for the index program.emit_insn(Insn::OpenWrite { @@ -226,7 +235,92 @@ pub fn translate_create_index( Some(sql), )?; - if index_method.is_none() { + if index_method + .as_ref() + .is_some_and(|m| !m.definition().backing_btree) + { + // open the table we are creating the index on for reading + program.emit_insn(Insn::OpenRead { + cursor_id: table_cursor_id, + root_page: tbl.root_page, + db: 0, + }); + + // Open the index btree we created for writing to insert the + // newly sorted index records. + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id, + root_page: RegisterOrLiteral::Register(root_page_reg), + db: 0, + }); + + let loop_start_label = program.allocate_label(); + let loop_end_label = program.allocate_label(); + program.emit_insn(Insn::Rewind { + cursor_id: table_cursor_id, + pc_if_empty: loop_end_label, + }); + program.preassign_label_to_next_insn(loop_start_label); + + // Loop start: + // Collect index values into start_reg..rowid_reg + // emit MakeRecord (index key + rowid) into record_reg. + // + // Then insert the record into the sorter + let mut skip_row_label = None; + if let Some(where_clause) = where_clause { + let label = program.allocate_label(); + translate_condition_expr( + &mut program, + &table_references, + &where_clause, + ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_false: label, + jump_target_when_true: BranchOffset::Placeholder, + jump_target_when_null: label, + }, + resolver, + )?; + skip_row_label = Some(label); + } + + let start_reg = program.alloc_registers(columns.len() + 1); + for (i, col) in columns.iter().enumerate() { + program.emit_column_or_rowid(table_cursor_id, col.pos_in_table, start_reg + i); + } + let rowid_reg = start_reg + columns.len(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: columns.len() + 1, + dest_reg: record_reg, + index_name: Some(idx_name.clone()), + affinity_str: None, + }); + + // insert new index record + program.emit_insn(Insn::IdxInsert { + cursor_id: index_cursor_id, + record_reg, + unpacked_start: Some(start_reg), + unpacked_count: Some((columns.len() + 1) as u16), + flags: IdxInsertFlags::new().use_seek(false), + }); + + if let Some(skip_row_label) = skip_row_label { + program.resolve_label(skip_row_label, program.offset()); + } + program.emit_insn(Insn::Next { + cursor_id: table_cursor_id, + pc_if_next: loop_start_label, + }); + program.preassign_label_to_next_insn(loop_end_label); + } else if index_method.is_none() { // determine the order of the columns in the index for the sorter let order = idx.columns.iter().map(|c| c.order).collect(); // open the sorter and the pseudo table @@ -642,20 +736,24 @@ pub fn translate_drop_index( p5: 0, }); - // Destroy index btree - program.emit_insn(Insn::Destroy { - root: maybe_index.unwrap().root_page, - former_root_reg: 0, - is_temp: 0, - }); - - // Remove from the Schema any mention of the index - if let Some(idx) = maybe_index { - program.emit_insn(Insn::DropIndex { - index: idx.clone(), - db: 0, + let index = maybe_index.unwrap(); + if !index.is_backing_btree_index() { + let cursor_id = program.alloc_cursor_index(None, index)?; + program.emit_insn(Insn::IndexMethodDestroy { db: 0, cursor_id }); + } else { + // Destroy index btree + program.emit_insn(Insn::Destroy { + root: index.root_page, + former_root_reg: 0, + is_temp: 0, }); } + // Remove from the Schema any mention of the index + program.emit_insn(Insn::DropIndex { + index: index.clone(), + db: 0, + }); + Ok(program) } From e9b1ca12b6061589e4c5801faedb11053ba1ee0c Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:10:20 +0400 Subject: [PATCH 04/23] add new access operation through IndexMethod --- core/translate/plan.rs | 46 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 653763d07..a8c5cac52 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,7 +1,5 @@ -use std::{cmp::Ordering, sync::Arc}; -use turso_parser::ast::{ - self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder, SubqueryType, -}; +use std::{cmp::Ordering, collections::HashMap, sync::Arc}; +use turso_parser::ast::{self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder}; use crate::{ function::AggFunc, @@ -38,7 +36,13 @@ impl ResultSetColumn { } match &self.expr { ast::Expr::Column { table, column, .. } => { - let (_, table_ref) = tables.find_table_by_internal_id(*table).unwrap(); + let joined_table_ref = tables.find_joined_table_by_internal_id(*table).unwrap(); + if let Operation::IndexMethodQuery(module) = &joined_table_ref.op { + if module.covered_columns.contains_key(column) { + return None; + } + } + let table_ref = &joined_table_ref.table; table_ref.get_column_at(*column).unwrap().name.as_deref() } ast::Expr::RowId { table, .. } => { @@ -851,6 +855,8 @@ pub enum Operation { // This operation is used to search for a row in a table using an index // (i.e. a primary key or a secondary index) Search(Search), + // Access through custom index method query + IndexMethodQuery(IndexMethodQuery), } impl Operation { @@ -872,9 +878,10 @@ impl Operation { pub fn index(&self) -> Option<&Arc> { match self { Operation::Scan(Scan::BTreeTable { index, .. }) => index.as_ref(), + Operation::Search(Search::Seek { index, .. }) => index.as_ref(), + Operation::IndexMethodQuery(IndexMethodQuery { index, .. }) => Some(index), Operation::Scan(_) => None, Operation::Search(Search::RowidEq { .. }) => None, - Operation::Search(Search::Seek { index, .. }) => index.as_ref(), } } } @@ -1000,12 +1007,14 @@ impl JoinedTable { ) }; - let index_cursor_id = index.map(|index| { - program.alloc_cursor_id_keyed( - CursorKey::index(self.internal_id, index.clone()), - CursorType::BTreeIndex(index.clone()), - ) - }); + let index_cursor_id = index + .map(|index| { + program.alloc_cursor_index( + Some(CursorKey::index(self.internal_id, index.clone())), + index, + ) + }) + .transpose()?; Ok((table_cursor_id, index_cursor_id)) } Table::Virtual(virtual_table) => { @@ -1221,6 +1230,19 @@ pub enum Search { }, } +#[allow(clippy::large_enum_variant)] +#[derive(Clone, Debug)] +pub struct IndexMethodQuery { + /// index method to use + pub index: Arc, + /// idx of the pattern from [crate::index_method::IndexMethodAttachment::definition] which planner chose to use for the access + pub pattern_idx: usize, + /// captured arguments for the pattern chosen by the planner + pub arguments: Vec, + /// mapping from index of [ast::Expr::Column] to the column index of IndexMethod response + pub covered_columns: HashMap, +} + #[derive(Debug, Clone, PartialEq)] pub struct Aggregate { pub func: AggFunc, From 8dd2644c07f2b35d612344b29ac24c59b517265d Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:15:03 +0400 Subject: [PATCH 05/23] add support for new cursor type in existing op codes and also implement new opcodes in the VM --- core/vdbe/execute.rs | 209 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 195 insertions(+), 14 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 7672a142e..4fccdeb19 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -83,7 +83,7 @@ use super::{ sorter::Sorter, }; use regex::{Regex, RegexBuilder}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; #[cfg(feature = "json")] use crate::{ @@ -1045,6 +1045,20 @@ pub fn op_open_read( let pager = program.get_pager_from_database_index(db); + if let (_, CursorType::IndexMethod(module)) = &program.cursor_ref[*cursor_id] { + if state.cursors[*cursor_id].is_none() { + let cursor = module.init()?; + let cursor_ref = &mut state.cursors[*cursor_id]; + *cursor_ref = Some(Cursor::IndexMethod(cursor)); + } + + let cursor = state.cursors[*cursor_id].as_mut().unwrap(); + let cursor = cursor.as_index_method_mut(); + return_if_io!(cursor.open_read(&program.connection)); + state.pc += 1; + return Ok(InsnFunctionStepResult::Step); + } + let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); if program.connection.get_mv_tx_id().is_none() { assert!(*root_page >= 0, ""); @@ -1137,6 +1151,9 @@ pub fn op_open_read( CursorType::Sorter => { panic!("OpenRead on sorter cursor"); } + CursorType::IndexMethod(..) => { + unreachable!("IndexMethod handled above") + } CursorType::VirtualTable(_) => { panic!("OpenRead on virtual table cursor, use Insn:VOpen instead"); } @@ -1536,8 +1553,11 @@ pub fn op_column( } => { let Some(rowid) = ({ let index_cursor = state.get_cursor(index_cursor_id); - let index_cursor = index_cursor.as_btree_mut(); - return_if_io!(index_cursor.rowid()) + match index_cursor { + Cursor::BTree(cursor) => return_if_io!(cursor.rowid()), + Cursor::IndexMethod(cursor) => return_if_io!(cursor.query_rowid()), + _ => panic!("unexpected cursor type"), + } }) else { state.registers[*dest] = Register::Value(Value::Null); break 'outer; @@ -1858,6 +1878,12 @@ pub fn op_column( }; state.registers[*dest] = Register::Value(value); } + CursorType::IndexMethod(..) => { + let cursor = state.cursors[*cursor_id].as_mut().unwrap(); + let cursor = cursor.as_index_method_mut(); + let value = return_if_io!(cursor.query_column(*column)); + state.registers[*dest] = Register::Value(value); + } CursorType::VirtualTable(_) => { panic!("Insn:Column on virtual table cursor, use Insn:VColumn instead"); } @@ -2038,6 +2064,11 @@ pub fn op_next( let has_more = return_if_io!(mv_cursor.next()); !has_more } + Cursor::IndexMethod(_) => { + let cursor = cursor.as_index_method_mut(); + let has_more = return_if_io!(cursor.query_next()); + !has_more + } _ => panic!("Next on non-btree/materialized-view cursor"), } }; @@ -2773,15 +2804,22 @@ pub fn op_row_id( } => { let rowid = { let index_cursor = state.get_cursor(index_cursor_id); - let index_cursor = index_cursor.as_btree_mut(); - let record = return_if_io!(index_cursor.record()); - let record = record.as_ref().unwrap(); - let mut record_cursor_ref = index_cursor.record_cursor_mut(); - let record_cursor = record_cursor_ref.deref_mut(); - let rowid = record.last_value(record_cursor).unwrap(); - match rowid { - Ok(ValueRef::Integer(rowid)) => rowid, - _ => unreachable!(), + match index_cursor { + Cursor::BTree(index_cursor) => { + let record = return_if_io!(index_cursor.record()); + let record = record.as_ref().unwrap(); + let mut record_cursor_ref = index_cursor.record_cursor_mut(); + let record_cursor = record_cursor_ref.deref_mut(); + let rowid = record.last_value(record_cursor).unwrap(); + match rowid { + Ok(ValueRef::Integer(rowid)) => rowid, + _ => unreachable!(), + } + } + Cursor::IndexMethod(index_cursor) => { + return_if_io!(index_cursor.query_rowid()).unwrap() + } + _ => panic!("unexpected cursor type"), } }; state.op_row_id_state = OpRowIdState::Seek { @@ -2827,6 +2865,14 @@ pub fn op_row_id( } else { state.registers[*dest] = Register::Value(Value::Null); } + } else if let Some(Cursor::IndexMethod(cursor)) = + cursors.get_mut(*cursor_id).unwrap() + { + if let Some(rowid) = return_if_io!(cursor.query_rowid()) { + state.registers[*dest] = Register::Value(Value::Integer(rowid)); + } else { + state.registers[*dest] = Register::Value(Value::Null); + } } else { return Err(LimboError::InternalError( "RowId: cursor is not a table, virtual, or materialized view cursor" @@ -2853,8 +2899,12 @@ pub fn op_idx_row_id( load_insn!(IdxRowId { cursor_id, dest }, insn); let cursors = &mut state.cursors; let cursor = cursors.get_mut(*cursor_id).unwrap().as_mut().unwrap(); - let cursor = cursor.as_btree_mut(); - let rowid = return_if_io!(cursor.rowid()); + + let rowid = match cursor { + Cursor::BTree(cursor) => return_if_io!(cursor.rowid()), + Cursor::IndexMethod(cursor) => return_if_io!(cursor.query_rowid()), + _ => panic!("unexpected cursor type"), + }; state.registers[*dest] = match rowid { Some(rowid) => Register::Value(Value::Integer(rowid)), None => Register::Value(Value::Null), @@ -6314,6 +6364,12 @@ pub fn op_idx_delete( insn ); + if let Some(Cursor::IndexMethod(cursor)) = &mut state.cursors[*cursor_id] { + return_if_io!(cursor.delete(&state.registers[*start_reg..*start_reg + *num_regs])); + state.pc += 1; + return Ok(InsnFunctionStepResult::Step); + } + loop { #[cfg(debug_assertions)] tracing::debug!( @@ -6420,11 +6476,29 @@ pub fn op_idx_insert( cursor_id, record_reg, flags, + unpacked_start, + unpacked_count, .. }, *insn ); + if let Some(Cursor::IndexMethod(cursor)) = &mut state.cursors[cursor_id] { + let Some(start) = unpacked_start else { + return Err(LimboError::InternalError( + "IndexMethod must receive unpacked values".to_string(), + )); + }; + let Some(count) = unpacked_count else { + return Err(LimboError::InternalError( + "IndexMethod must receive unpacked values".to_string(), + )); + }; + return_if_io!(cursor.insert(&state.registers[start..start + count as usize])); + state.pc += 1; + return Ok(InsnFunctionStepResult::Step); + } + let record_to_insert = match &state.registers[record_reg] { Register::Record(ref r) => r, o => { @@ -6929,6 +7003,20 @@ pub fn op_open_write( } let pager = program.get_pager_from_database_index(db); + if let (_, CursorType::IndexMethod(module)) = &program.cursor_ref[*cursor_id] { + if state.cursors[*cursor_id].is_none() { + let cursor = module.init()?; + let cursor_ref = &mut state.cursors[*cursor_id]; + *cursor_ref = Some(Cursor::IndexMethod(cursor)); + } + + let cursor = state.cursors[*cursor_id].as_mut().unwrap(); + let cursor = cursor.as_index_method_mut(); + return_if_io!(cursor.open_write(&program.connection)); + state.pc += 1; + return Ok(InsnFunctionStepResult::Step); + } + let root_page = match root_page { RegisterOrLiteral::Literal(lit) => *lit, RegisterOrLiteral::Register(reg) => match &state.registers[*reg].get_value() { @@ -7087,6 +7175,96 @@ pub fn op_create_btree( Ok(InsnFunctionStepResult::Step) } +pub fn op_index_method_create( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Arc, + mv_store: Option<&Arc>, +) -> Result { + load_insn!(IndexMethodCreate { db, cursor_id }, insn); + assert_eq!(*db, 0); + if program.connection.is_readonly(*db) { + return Err(LimboError::ReadOnly); + } + if let Some(mv_store) = mv_store { + todo!("MVCC is not supported yet"); + } + if let (_, CursorType::IndexMethod(module)) = &program.cursor_ref[*cursor_id] { + if state.cursors[*cursor_id].is_none() { + let cursor = module.init()?; + let cursor_ref = &mut state.cursors[*cursor_id]; + *cursor_ref = Some(Cursor::IndexMethod(cursor)); + } + } + let cursor = state.cursors[*cursor_id].as_mut().unwrap(); + let cursor = cursor.as_index_method_mut(); + return_if_io!(cursor.create(&program.connection)); + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + +pub fn op_index_method_destroy( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Arc, + mv_store: Option<&Arc>, +) -> Result { + load_insn!(IndexMethodDestroy { db, cursor_id }, insn); + assert_eq!(*db, 0); + if program.connection.is_readonly(*db) { + return Err(LimboError::ReadOnly); + } + if let Some(mv_store) = mv_store { + todo!("MVCC is not supported yet"); + } + if let (_, CursorType::IndexMethod(module)) = &program.cursor_ref[*cursor_id] { + if state.cursors[*cursor_id].is_none() { + let cursor = module.init()?; + let cursor_ref = &mut state.cursors[*cursor_id]; + *cursor_ref = Some(Cursor::IndexMethod(cursor)); + } + } + let cursor = state.cursors[*cursor_id].as_mut().unwrap(); + let cursor = cursor.as_index_method_mut(); + return_if_io!(cursor.destroy(&program.connection)); + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + +pub fn op_index_method_query( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Arc, + mv_store: Option<&Arc>, +) -> Result { + load_insn!( + IndexMethodQuery { + db, + cursor_id, + start_reg, + count_reg, + }, + insn + ); + assert_eq!(*db, 0); + if program.connection.is_readonly(*db) { + return Err(LimboError::ReadOnly); + } + if let Some(mv_store) = mv_store { + todo!("MVCC is not supported yet"); + } + let cursor = state.cursors[*cursor_id].as_mut().unwrap(); + let cursor = cursor.as_index_method_mut(); + return_if_io!(cursor.query_start(&state.registers[*start_reg..*start_reg + *count_reg])); + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub enum OpDestroyState { CreateCursor, DestroyBtree(Arc>), @@ -7830,6 +8008,9 @@ pub fn op_open_ephemeral( CursorType::VirtualTable(_) => { panic!("OpenEphemeral on virtual table cursor, use Insn::VOpen instead"); } + CursorType::IndexMethod(..) => { + panic!("OpenEphemeral on index method cursor") + } CursorType::MaterializedView(_, _) => { panic!("OpenEphemeral on materialized view cursor"); } From 35b96ae8d8348aa700d220c1343aea2885d75eba Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:17:49 +0400 Subject: [PATCH 06/23] fix few places which needs to be hooked into new types --- cli/app.rs | 2 +- core/translate/display.rs | 27 +++++++++++++++++++++++++++ core/translate/subquery.rs | 4 ++++ core/types.rs | 10 ++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/cli/app.rs b/cli/app.rs index 9ae6b220e..a31cafc73 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -865,7 +865,7 @@ impl Limbo { ) -> usize { let indent_count = match prev_insn { "Rewind" | "Last" | "SorterSort" | "SeekGE" | "SeekGT" | "SeekLE" - | "SeekLT" | "BeginSubrtn" => indent_count + 1, + | "SeekLT" | "BeginSubrtn" | "IndexMethodQuery" => indent_count + 1, _ => indent_count, }; diff --git a/core/translate/display.rs b/core/translate/display.rs index 5939407dc..2b590d69b 100644 --- a/core/translate/display.rs +++ b/core/translate/display.rs @@ -119,6 +119,15 @@ impl Display for SelectPlan { )?; } }, + Operation::IndexMethodQuery(query) => { + let index_method = query.index.index_method.as_ref().unwrap(); + writeln!( + f, + "{}QUERY INDEX METHOD {}", + indent, + index_method.definition().method_name + )?; + } } } Ok(()) @@ -161,6 +170,15 @@ impl Display for DeletePlan { )?; } }, + Operation::IndexMethodQuery(query) => { + let module = query.index.index_method.as_ref().unwrap(); + writeln!( + f, + "{}QUERY MODULE {}", + indent, + module.definition().method_name + )?; + } } } Ok(()) @@ -215,6 +233,15 @@ impl fmt::Display for UpdatePlan { )?; } }, + Operation::IndexMethodQuery(query) => { + let module = query.index.index_method.as_ref().unwrap(); + writeln!( + f, + "{}QUERY MODULE {}", + indent, + module.definition().method_name + )?; + } } } if !self.order_by.is_empty() { diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 38dd7b4b0..a2bc5e874 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -390,6 +390,10 @@ pub fn emit_from_clause_subqueries( ) } }, + Operation::IndexMethodQuery(query) => { + let index_method = query.index.index_method.as_ref().unwrap(); + format!("QUERY INDEX METHOD {}", index_method.definition().method_name) + } } ); diff --git a/core/types.rs b/core/types.rs index a7259f8b1..617d68a2e 100644 --- a/core/types.rs +++ b/core/types.rs @@ -5,6 +5,7 @@ use turso_parser::ast::SortOrder; use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; +use crate::index_method::IndexMethodCursor; use crate::numeric::format_float; use crate::pseudo::PseudoCursor; use crate::schema::Index; @@ -2271,6 +2272,7 @@ impl Record { pub enum Cursor { BTree(Box), + IndexMethod(Box), Pseudo(PseudoCursor), Sorter(Sorter), Virtual(VirtualTableCursor), @@ -2281,6 +2283,7 @@ impl Debug for Cursor { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::BTree(..) => f.debug_tuple("BTree").finish(), + Self::IndexMethod(..) => f.debug_tuple("IndexMethod").finish(), Self::Pseudo(..) => f.debug_tuple("Pseudo").finish(), Self::Sorter(..) => f.debug_tuple("Sorter").finish(), Self::Virtual(..) => f.debug_tuple("Virtual").finish(), @@ -2344,6 +2347,13 @@ impl Cursor { _ => panic!("Cursor is not a materialized view cursor"), } } + + pub fn as_index_method_mut(&mut self) -> &mut dyn IndexMethodCursor { + match self { + Self::IndexMethod(cursor) => cursor.as_mut(), + _ => panic!("Cursor is not an IndexMethod cursor"), + } + } } #[derive(Debug)] From d65b7eddc06424fa806955be63fb73e1b64e1a48 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:18:48 +0400 Subject: [PATCH 07/23] add helper for simple binding of values in the AST --- core/util.rs | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) diff --git a/core/util.rs b/core/util.rs index 8f0b5d9aa..745eb7a0b 100644 --- a/core/util.rs +++ b/core/util.rs @@ -2,7 +2,9 @@ use crate::incremental::view::IncrementalView; use crate::numeric::StrToF64; use crate::translate::emitter::TransactionMode; -use crate::translate::expr::WalkControl; +use crate::translate::expr::{walk_expr_mut, WalkControl}; +use crate::translate::plan::JoinedTable; +use crate::translate::planner::parse_row_id; use crate::types::IOResult; use crate::{ schema::{self, BTreeTable, Column, Schema, Table, Type, DBSP_TABLE_PREFIX}, @@ -318,6 +320,154 @@ pub fn check_literal_equivalency(lhs: &Literal, rhs: &Literal) -> bool { } } +/// bind AST identifiers to either Column or Rowid if possible +pub fn simple_bind_expr( + schema: &Schema, + joined_table: &JoinedTable, + result_columns: &[ast::ResultColumn], + expr: &mut ast::Expr, +) -> Result<()> { + let internal_id = joined_table.internal_id; + walk_expr_mut(expr, &mut |expr: &mut ast::Expr| -> Result { + #[allow(clippy::single_match)] + match expr { + Expr::Id(id) => { + let normalized_id = normalize_ident(id.as_str()); + + for result_column in result_columns.iter() { + if let ast::ResultColumn::Expr(result, Some(ast::As::As(alias))) = result_column + { + if alias.as_str().eq_ignore_ascii_case(&normalized_id) { + *expr = *result.clone(); + return Ok(WalkControl::Continue); + } + } + } + let col_idx = joined_table.columns().iter().position(|c| { + c.name + .as_ref() + .is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id)) + }); + if let Some(col_idx) = col_idx { + let col = joined_table.table.columns().get(col_idx).unwrap(); + *expr = ast::Expr::Column { + database: None, + table: internal_id, + column: col_idx, + is_rowid_alias: col.is_rowid_alias, + }; + } else { + // only if we haven't found a match, check for explicit rowid reference + let is_btree_table = matches!(joined_table.table, Table::BTree(_)); + if is_btree_table { + if let Some(rowid) = parse_row_id(&normalized_id, internal_id, || false)? { + *expr = rowid; + } + } + } + } + _ => {} + } + Ok(WalkControl::Continue) + }); + Ok(()) +} + +pub fn try_substitute_parameters( + pattern: &Expr, + parameters: &HashMap, +) -> Option> { + match pattern { + Expr::FunctionCall { + name, + distinctness, + args, + order_by, + filter_over, + } => { + let mut substituted = Vec::new(); + for arg in args { + substituted.push(try_substitute_parameters(arg, parameters)?); + } + Some(Box::new(Expr::FunctionCall { + args: substituted, + distinctness: *distinctness, + name: name.clone(), + order_by: order_by.clone(), + filter_over: filter_over.clone(), + })) + } + Expr::Variable(var) => { + let Ok(var) = var.parse::() else { + return None; + }; + Some(Box::new(parameters.get(&var)?.clone())) + } + _ => Some(Box::new(pattern.clone())), + } +} + +pub fn try_capture_parameters(pattern: &Expr, query: &Expr) -> Option> { + let mut captured = HashMap::new(); + match (pattern, query) { + ( + Expr::FunctionCall { + name: name1, + distinctness: distinct1, + args: args1, + order_by: order1, + filter_over: filter1, + }, + Expr::FunctionCall { + name: name2, + distinctness: distinct2, + args: args2, + order_by: order2, + filter_over: filter2, + }, + ) => { + if !name1.as_str().eq_ignore_ascii_case(name2.as_str()) { + return None; + } + if distinct1.is_some() || distinct2.is_some() { + return None; + } + if !order1.is_empty() || !order2.is_empty() { + return None; + } + if filter1.filter_clause.is_some() || filter1.over_clause.is_some() { + return None; + } + if filter2.filter_clause.is_some() || filter2.over_clause.is_some() { + return None; + } + for (arg1, arg2) in args1.iter().zip(args2.iter()) { + let result = try_capture_parameters(arg1, arg2)?; + captured.extend(result); + } + Some(captured) + } + (Expr::Variable(var), expr) => { + let Ok(var) = var.parse::() else { + return None; + }; + captured.insert(var, expr.clone()); + Some(captured) + } + ( + Expr::Id(_) | Expr::Name(_) | Expr::Column { .. }, + Expr::Id(_) | Expr::Name(_) | Expr::Column { .. }, + ) => { + if pattern == query { + Some(captured) + } else { + None + } + } + (_, _) => None, + } +} + /// This function is used to determine whether two expressions are logically /// equivalent in the context of queries, even if their representations /// differ. e.g.: `SUM(x)` and `sum(x)`, `x + y` and `y + x` From d9ea3be4b84c7b3c5d47f67e2d6ebf80530e70f3 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:19:14 +0400 Subject: [PATCH 08/23] forbid usage of IndexMethod in insert/delete loops --- core/translate/emitter.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index db84007f0..3c95f028a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -670,6 +670,9 @@ fn emit_delete_insns( index: Some(index), .. } => program.resolve_cursor_id(&CursorKey::index(internal_id, index.clone())), }, + Operation::IndexMethodQuery(_) => { + panic!("access through IndexMethod is not supported for delete statements") + } }; let main_table_cursor_id = program.resolve_cursor_id(&CursorKey::table(internal_id)); @@ -1087,6 +1090,9 @@ fn emit_update_insns( false, ), }, + Operation::IndexMethodQuery(_) => { + panic!("access through IndexMethod is not supported for update operations") + } }; let beg = program.alloc_registers( From 61c9279a57cfa271e2d45173a3b682388d3a6b4f Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:20:37 +0400 Subject: [PATCH 09/23] properly translate column which was covered by index method --- core/translate/expr.rs | 52 +++++++++++++++++++++++++++------------- core/translate/insert.rs | 28 ++++++++++------------ 2 files changed, 49 insertions(+), 31 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 929a62a4e..f97425a8b 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -12,7 +12,7 @@ use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc, VectorFunc}; use crate::functions::datetime; use crate::schema::{affinity, Affinity, Table, Type}; use crate::translate::optimizer::TakeOwnership; -use crate::translate::plan::ResultSetColumn; +use crate::translate::plan::{Operation, ResultSetColumn}; use crate::translate::planner::parse_row_id; use crate::util::{exprs_are_equivalent, normalize_ident, parse_numeric_literal}; use crate::vdbe::builder::CursorKey; @@ -2098,19 +2098,25 @@ pub fn translate_expr( column, is_rowid_alias, } => { - let (index, use_covering_index) = { + let (index, index_method, use_covering_index) = { if let Some(table_reference) = referenced_tables .unwrap() .find_joined_table_by_internal_id(*table_ref_id) { ( table_reference.op.index(), + if let Operation::IndexMethodQuery(index_method) = &table_reference.op { + Some(index_method) + } else { + None + }, table_reference.utilizes_covering_index(), ) } else { - (None, false) + (None, None, false) } }; + let use_index_method = index_method.and_then(|m| m.covered_columns.get(column)); let (is_from_outer_query_scope, table) = referenced_tables .unwrap() @@ -2122,11 +2128,13 @@ pub fn translate_expr( ) }); - let Some(table_column) = table.get_column_at(*column) else { - crate::bail_parse_error!("column index out of bounds"); - }; - // Counter intuitive but a column always needs to have a collation - program.set_collation(Some((table_column.collation.unwrap_or_default(), false))); + if use_index_method.is_none() { + let Some(table_column) = table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + // Counter intuitive but a column always needs to have a collation + program.set_collation(Some((table_column.collation.unwrap_or_default(), false))); + } // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. @@ -2161,7 +2169,17 @@ pub fn translate_expr( (table_cursor_id, index_cursor_id) }; - if *is_rowid_alias { + let index_cursor_id = index.map(|index| { + program.resolve_cursor_id(&CursorKey::index(*table_ref_id, index.clone())) + }); + if let Some(custom_module_column) = use_index_method { + program.emit_column_or_rowid( + index_cursor_id.unwrap(), + *custom_module_column, + target_register, + ); + } else { + if *is_rowid_alias { if let Some(index_cursor_id) = index_cursor_id { program.emit_insn(Insn::IdxRowId { cursor_id: index_cursor_id, @@ -2202,12 +2220,13 @@ pub fn translate_expr( *column }; - program.emit_column_or_rowid(read_cursor, column, target_register); + program.emit_column_or_rowid(read_cursor, column, target_register); + } + let Some(column) = table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + maybe_apply_affinity(column.ty, target_register, program); } - let Some(column) = table.get_column_at(*column) else { - crate::bail_parse_error!("column index out of bounds"); - }; - maybe_apply_affinity(column.ty, target_register, program); Ok(target_register) } Table::FromClauseSubquery(from_clause_subquery) => { @@ -3596,7 +3615,7 @@ pub fn bind_and_rewrite_expr<'a>( connection: &'a Arc, param_state: &mut ParamState, binding_behavior: BindingBehavior, -) -> Result { +) -> Result<()> { walk_expr_mut( top_level_expr, &mut |expr: &mut ast::Expr| -> Result { @@ -3894,7 +3913,8 @@ pub fn bind_and_rewrite_expr<'a>( } Ok(WalkControl::Continue) }, - ) + )?; + Ok(()) } /// Recursively walks a mutable expression, applying a function to each sub-expression. diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 9209bdd2a..d6ff96096 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -139,26 +139,24 @@ impl<'a> InsertEmitCtx<'a> { cdc_table: Option<(usize, Arc)>, num_values: usize, temp_table_ctx: Option, - ) -> Self { + ) -> Result { // allocate cursor id's for each btree index cursor we'll need to populate the indexes - let idx_cursors = resolver - .schema - .get_indices(table.name.as_str()) - .map(|idx| { - ( - &idx.name, - idx.root_page, - program.alloc_cursor_id(CursorType::BTreeIndex(idx.clone())), - ) - }) - .collect::>(); + let indices = resolver.schema.get_indices(table.name.as_str()); + let mut idx_cursors = Vec::new(); + for idx in indices { + idx_cursors.push(( + &idx.name, + idx.root_page, + program.alloc_cursor_index(None, idx)?, + )); + } let halt_label = program.allocate_label(); let loop_start_label = program.allocate_label(); let row_done_label = program.allocate_label(); let stmt_epilogue = program.allocate_label(); let key_ready_for_uniqueness_check_label = program.allocate_label(); let key_generation_label = program.allocate_label(); - Self { + Ok(Self { table, idx_cursors, temp_table_ctx, @@ -176,7 +174,7 @@ impl<'a> InsertEmitCtx<'a> { key_ready_for_uniqueness_check_label, key_generation_label, autoincrement_meta: None, - } + }) } } @@ -263,7 +261,7 @@ pub fn translate_insert( cdc_table, values.len(), None, - ); + )?; program = init_source_emission( program, From 212bcfe08ffbcaa7ca60c6433927ad38ac78142b Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:21:33 +0400 Subject: [PATCH 10/23] integrate IndexMethod into select main loop --- core/translate/main_loop.rs | 74 +++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index bf0c1514b..3f6a78a76 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -257,10 +257,10 @@ pub fn init_loop( { continue; } - let cursor_id = program.alloc_cursor_id_keyed( - CursorKey::index(table.internal_id, index.clone()), - CursorType::BTreeIndex(index.clone()), - ); + let cursor_id = program.alloc_cursor_index( + Some(CursorKey::index(table.internal_id, index.clone())), + index, + )?; program.emit_insn(Insn::OpenWrite { cursor_id, root_page: index.root_page.into(), @@ -350,10 +350,10 @@ pub fn init_loop( { continue; } - let cursor_id = program.alloc_cursor_id_keyed( - CursorKey::index(table.internal_id, index.clone()), - CursorType::BTreeIndex(index.clone()), - ); + let cursor_id = program.alloc_cursor_index( + Some(CursorKey::index(table.internal_id, index.clone())), + index, + )?; program.emit_insn(Insn::OpenWrite { cursor_id, root_page: index.root_page.into(), @@ -397,6 +397,24 @@ pub fn init_loop( } } } + Operation::IndexMethodQuery(_) => match mode { + OperationMode::SELECT => { + if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::OpenRead { + cursor_id: table_cursor_id, + root_page: table.table.get_root_page(), + db: table.database_id, + }); + } + let index_cursor_id = index_cursor_id.unwrap(); + program.emit_insn(Insn::OpenRead { + cursor_id: index_cursor_id, + root_page: table.op.index().unwrap().root_page, + db: table.database_id, + }); + } + _ => panic!("only SELECT is supported for index method"), + }, } } @@ -685,6 +703,38 @@ pub fn open_loop( } } } + Operation::IndexMethodQuery(query) => { + let start_reg = program.alloc_registers(query.arguments.len() + 1); + program.emit_int(query.pattern_idx as i64, start_reg); + for i in 0..query.arguments.len() { + translate_expr( + program, + Some(table_references), + &query.arguments[i], + start_reg + 1 + i, + &t_ctx.resolver, + )?; + } + program.emit_insn(Insn::IndexMethodQuery { + db: 0, + cursor_id: index_cursor_id.expect("IndexMethod requires a index cursor"), + start_reg, + count_reg: query.arguments.len() + 1, + }); + program.emit_insn(Insn::Next { + cursor_id: index_cursor_id.expect("IndexMethod requires a index cursor"), + pc_if_next: loop_end, + }); + program.preassign_label_to_next_insn(loop_start); + if let Some(table_cursor_id) = table_cursor_id { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } + } + } } for subquery in subqueries.iter_mut().filter(|s| !s.has_been_evaluated()) { @@ -1169,6 +1219,14 @@ pub fn close_loop( } program.preassign_label_to_next_insn(loop_labels.loop_end); } + Operation::IndexMethodQuery(_) => { + program.resolve_label(loop_labels.next, program.offset()); + program.emit_insn(Insn::Next { + cursor_id: index_cursor_id.unwrap(), + pc_if_next: loop_labels.loop_start, + }); + program.preassign_label_to_next_insn(loop_labels.loop_end); + } } // Handle OUTER JOIN logic. The reason this comes after the "loop end" mark is that we may need to still jump back From 56796151bcbd4c8981bc22652fa16d293887d0e3 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:22:03 +0400 Subject: [PATCH 11/23] support necessary helpers --- core/translate/select.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/translate/select.rs b/core/translate/select.rs index 0c9ebf2de..7c9d29ea5 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -645,6 +645,7 @@ fn count_plan_required_cursors(plan: &SelectPlan) -> usize { Search::RowidEq { .. } => 1, Search::Seek { index, .. } => 1 + index.is_some() as usize, } + Operation::IndexMethodQuery(_) => 1, } + if let Table::FromClauseSubquery(from_clause_subquery) = &t.table { count_plan_required_cursors(&from_clause_subquery.plan) } else { @@ -664,6 +665,7 @@ fn estimate_num_instructions(select: &SelectPlan) -> usize { .map(|t| match &t.op { Operation::Scan { .. } => 10, Operation::Search(_) => 15, + Operation::IndexMethodQuery(_) => 15, } + if let Table::FromClauseSubquery(from_clause_subquery) = &t.table { 10 + estimate_num_instructions(&from_clause_subquery.plan) } else { @@ -687,6 +689,7 @@ fn estimate_num_labels(select: &SelectPlan) -> usize { .map(|t| match &t.op { Operation::Scan { .. } => 3, Operation::Search(_) => 3, + Operation::IndexMethodQuery(_) => 3, } + if let Table::FromClauseSubquery(from_clause_subquery) = &t.table { 3 + estimate_num_labels(&from_clause_subquery.plan) } else { From d6972a9cf36c841e241b250cbf4b604907276c4a Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:22:35 +0400 Subject: [PATCH 12/23] fix explain --- core/vdbe/explain.rs | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 4f09e2ea0..ccaca542d 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -19,11 +19,12 @@ pub fn insn_to_row( let get_table_or_index_name = |cursor_id: usize| { let cursor_type = &program.cursor_ref[cursor_id].1; match cursor_type { - CursorType::BTreeTable(table) => &table.name, - CursorType::BTreeIndex(index) => &index.name, + CursorType::BTreeTable(table) => table.name.as_str(), + CursorType::BTreeIndex(index) => index.name.as_str(), + CursorType::IndexMethod(descriptor) => descriptor.definition().index_name, CursorType::Pseudo(_) => "pseudo", - CursorType::VirtualTable(virtual_table) => &virtual_table.name, - CursorType::MaterializedView(table, _) => &table.name, + CursorType::VirtualTable(virtual_table) => virtual_table.name.as_str(), + CursorType::MaterializedView(table, _) => table.name.as_str(), CursorType::Sorter => "sorter", } }; @@ -551,6 +552,7 @@ pub fn insn_to_row( } CursorType::Pseudo(_) => None, CursorType::Sorter => None, + CursorType::IndexMethod(..) => None, CursorType::VirtualTable(v) => v.columns.get(*column).unwrap().name.as_ref(), }; ( @@ -1283,6 +1285,33 @@ pub fn insn_to_row( 0, format!("r[{}]=root iDb={} flags={}", root, db, flags.get_flags()), ), + Insn::IndexMethodCreate { db, cursor_id } => ( + "IndexMethodCreate", + *db as i32, + *cursor_id as i32, + 0, + Value::build_text(""), + 0, + "".to_string() + ), + Insn::IndexMethodDestroy { db, cursor_id } => ( + "IndexMethodDestroy", + *db as i32, + *cursor_id as i32, + 0, + Value::build_text(""), + 0, + "".to_string() + ), + Insn::IndexMethodQuery { db, cursor_id, start_reg, .. } => ( + "IndexMethodQuery", + *db as i32, + *cursor_id as i32, + *start_reg as i32, + Value::build_text(""), + 0, + "".to_string() + ), Insn::Destroy { root, former_root_reg, From 180713d32a983d16685ff368d7035cbb38aceb67 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:23:15 +0400 Subject: [PATCH 13/23] plug IndexMethod into optimizer --- core/translate/expr.rs | 72 +++++------ core/translate/optimizer/mod.rs | 217 +++++++++++++++++++++++++++++++- 2 files changed, 252 insertions(+), 37 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index f97425a8b..9f8295668 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -2179,46 +2179,46 @@ pub fn translate_expr( target_register, ); } else { - if *is_rowid_alias { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxRowId { - cursor_id: index_cursor_id, - dest: target_register, - }); - } else if let Some(table_cursor_id) = table_cursor_id { - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: target_register, - }); + if *is_rowid_alias { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxRowId { + cursor_id: index_cursor_id, + dest: target_register, + }); + } else if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: target_register, + }); + } else { + unreachable!("Either index or table cursor must be opened"); + } } else { - unreachable!("Either index or table cursor must be opened"); - } - } else { - let read_from_index = if is_from_outer_query_scope { - index_cursor_id.is_some() - } else { - use_covering_index - }; - let read_cursor = if read_from_index { - index_cursor_id.expect("index cursor should be opened") - } else { - table_cursor_id.expect("table cursor should be opened") - }; - let column = if read_from_index { - let index = program.resolve_index_for_cursor_id( - index_cursor_id.expect("index cursor should be opened"), - ); - index - .column_table_pos_to_index_pos(*column) - .unwrap_or_else(|| { - panic!( + let read_from_index = if is_from_outer_query_scope { + index_cursor_id.is_some() + } else { + use_covering_index + }; + let read_cursor = if read_from_index { + index_cursor_id.expect("index cursor should be opened") + } else { + table_cursor_id.expect("table cursor should be opened") + }; + let column = if read_from_index { + let index = program.resolve_index_for_cursor_id( + index_cursor_id.expect("index cursor should be opened"), + ); + index + .column_table_pos_to_index_pos(*column) + .unwrap_or_else(|| { + panic!( "index {} does not contain column number {} of table {}", index.name, column, table_ref_id ) - }) - } else { - *column - }; + }) + } else { + *column + }; program.emit_column_or_rowid(read_cursor, column, target_register); } diff --git a/core/translate/optimizer/mod.rs b/core/translate/optimizer/mod.rs index 4d6b42b69..5c18f4425 100644 --- a/core/translate/optimizer/mod.rs +++ b/core/translate/optimizer/mod.rs @@ -23,11 +23,14 @@ use crate::{ constraints::{RangeConstraintRef, SeekRangeConstraint, TableConstraints}, }, plan::{ - ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, QueryDestination, + ColumnUsedMask, IndexMethodQuery, NonFromClauseSubquery, OuterQueryReference, QueryDestination, ResultSetColumn, Scan, SeekKeyComponent, }, }, types::SeekOp, + util::{ + exprs_are_equivalent, simple_bind_expr, try_capture_parameters, try_substitute_parameters, + }, vdbe::builder::{CursorKey, CursorType, ProgramBuilder}, LimboError, Result, }; @@ -84,12 +87,15 @@ pub fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<() let best_join_order = optimize_table_access( schema, + &mut plan.result_columns, &mut plan.table_references, &schema.indexes, &mut plan.where_clause, &mut plan.order_by, &mut plan.group_by, &plan.non_from_clause_subqueries, + &mut plan.limit, + &mut plan.offset, )?; if let Some(best_join_order) = best_join_order { @@ -110,12 +116,15 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> { let _ = optimize_table_access( schema, + &mut plan.result_columns, &mut plan.table_references, &schema.indexes, &mut plan.where_clause, &mut plan.order_by, &mut None, &[], + &mut plan.limit, + &mut plan.offset, )?; Ok(()) @@ -135,12 +144,15 @@ fn optimize_update_plan( } let _ = optimize_table_access( schema, + &mut [], &mut plan.table_references, &schema.indexes, &mut plan.where_clause, &mut plan.order_by, &mut None, &[], + &mut plan.limit, + &mut plan.offset, )?; let table_ref = &mut plan.table_references.joined_tables_mut()[0]; @@ -322,6 +334,187 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { Ok(()) } +#[allow(clippy::too_many_arguments)] +fn optimize_table_access_with_custom_modules( + schema: &Schema, + result_columns: &mut [ResultSetColumn], + table_references: &mut TableReferences, + available_indexes: &HashMap>>, + where_query: &mut [WhereTerm], + order_by: &mut Vec<(Box, SortOrder)>, + group_by: &mut Option, + limit: &mut Option>, + offset: &mut Option>, +) -> Result { + let tables = table_references.joined_tables_mut(); + assert_eq!(tables.len(), 1); + + // group by is not supported for now + if group_by.is_some() { + return Ok(false); + } + + let table = &mut tables[0]; + let Some(indexes) = available_indexes.get(table.table.get_name()) else { + return Ok(false); + }; + for index in indexes { + let Some(module) = &index.index_method else { + continue; + }; + if index.is_backing_btree_index() { + continue; + } + let definition = module.definition(); + 'pattern: for (pattern_idx, pattern) in definition.patterns.iter().enumerate() { + let mut pattern = pattern.clone(); + assert!(pattern.with.is_none()); + assert!(pattern.body.compounds.is_empty()); + let ast::OneSelect::Select { + columns, + from: Some(ast::FromClause { select, joins }), + distinctness: None, + ref mut where_clause, + group_by: None, + window_clause, + } = &mut pattern.body.select + else { + panic!("unexpected select pattern body"); + }; + assert!(window_clause.is_empty()); + assert!(joins.is_empty()); + let ast::SelectTable::Table(name, _, _) = select.as_ref() else { + panic!("unexpected from clause"); + }; + + for column in columns.iter_mut() { + if let ast::ResultColumn::Expr(e, _) = column { + simple_bind_expr(schema, table, &[], e)?; + } + } + for column in pattern.order_by.iter_mut() { + simple_bind_expr(schema, table, columns, &mut column.expr)?; + } + if let Some(pattern_where) = where_clause { + simple_bind_expr(schema, table, columns, pattern_where)?; + } + + if name.name.as_str() != table.table.get_name() { + continue; + } + if order_by.len() != pattern.order_by.len() { + continue; + } + + let mut where_query_covered = None; + let mut parameters = HashMap::new(); + + for (pattern_column, (query_column, query_order)) in + pattern.order_by.iter().zip(order_by.iter()) + { + if *query_order != pattern_column.order.unwrap_or(SortOrder::Asc) { + continue 'pattern; + } + let Some(captured) = try_capture_parameters(&pattern_column.expr, query_column) + else { + continue 'pattern; + }; + parameters.extend(captured); + } + match (pattern.limit.as_ref().map(|x| &x.expr), &limit) { + (None, Some(_)) | (Some(_), None) => continue, + (Some(pattern_limit), Some(query_limit)) => { + let Some(captured) = try_capture_parameters(pattern_limit, query_limit) else { + continue 'pattern; + }; + parameters.extend(captured); + } + (None, None) => {} + } + match ( + pattern.limit.as_ref().and_then(|x| x.offset.as_ref()), + &offset, + ) { + (None, Some(_)) | (Some(_), None) => continue, + (Some(pattern_off), Some(query_off)) => { + let Some(captured) = try_capture_parameters(pattern_off, query_off) else { + continue 'pattern; + }; + parameters.extend(captured); + } + (None, None) => {} + } + if let Some(pattern_where) = where_clause { + for (i, query_where) in where_query.iter().enumerate() { + let captured = try_capture_parameters(pattern_where, &query_where.expr); + let Some(captured) = captured else { + continue; + }; + parameters.extend(captured); + where_query_covered = Some(i); + break; + } + } + + if where_clause.is_some() && where_query_covered.is_none() { + continue; + } + + let where_covered_completely = + where_query.is_empty() || where_query_covered.is_some() && where_query.len() == 1; + + if !where_covered_completely + && (!order_by.is_empty() || limit.is_some() || offset.is_some()) + { + continue; + } + + if let Some(where_covered) = where_query_covered { + where_query[where_covered].consumed = true; + } + + // todo: fix this + let mut covered_column_id = 1_000_000; + let mut covered_columns = HashMap::new(); + for (patter_column_id, pattern_column) in columns.iter().enumerate() { + let ast::ResultColumn::Expr(pattern, _) = pattern_column else { + continue; + }; + let Some(pattern) = try_substitute_parameters(pattern, ¶meters) else { + continue; + }; + for query_column in result_columns.iter_mut() { + if !exprs_are_equivalent(&query_column.expr, &pattern) { + continue; + } + query_column.expr = ast::Expr::Column { + database: None, + table: table.internal_id, + column: covered_column_id, + is_rowid_alias: false, + }; + covered_columns.insert(covered_column_id, patter_column_id); + covered_column_id += 1; + } + } + let _ = order_by.drain(..); + let _ = limit.take(); + let _ = offset.take(); + let mut arguments = parameters.iter().collect::>(); + arguments.sort_by_key(|(&i, _)| i); + + table.op = Operation::IndexMethodQuery(IndexMethodQuery { + index: index.clone(), + pattern_idx, + covered_columns, + arguments: arguments.iter().map(|(_, e)| (*e).clone()).collect(), + }); + return Ok(true); + } + } + Ok(false) +} + /// Optimize the join order and index selection for a query. /// /// This function does the following: @@ -333,14 +526,18 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { /// - Removes sorting operations if the selected join order and access methods satisfy the [crate::translate::optimizer::order::OrderTarget]. /// /// Returns the join order if it was optimized, or None if the default join order was considered best. +#[allow(clippy::too_many_arguments)] fn optimize_table_access( schema: &Schema, + result_columns: &mut [ResultSetColumn], table_references: &mut TableReferences, available_indexes: &HashMap>>, where_clause: &mut [WhereTerm], order_by: &mut Vec<(Box, SortOrder)>, group_by: &mut Option, subqueries: &[NonFromClauseSubquery], + limit: &mut Option>, + offset: &mut Option>, ) -> Result>> { if table_references.joined_tables().len() > TableReferences::MAX_JOINED_TABLES { crate::bail_parse_error!( @@ -348,6 +545,24 @@ fn optimize_table_access( TableReferences::MAX_JOINED_TABLES ); } + + if table_references.joined_tables().len() == 1 { + let optimized = optimize_table_access_with_custom_modules( + schema, + result_columns, + table_references, + available_indexes, + where_clause, + order_by, + group_by, + limit, + offset, + )?; + if optimized { + return Ok(None); + } + } + let access_methods_arena = RefCell::new(Vec::new()); let maybe_order_target = compute_order_target(order_by, group_by.as_mut()); let constraints_per_table = constraints_from_where_clause( From 6206294584e68833b40fcd3aaa2a76d2119151ac Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 18:24:49 +0400 Subject: [PATCH 14/23] fix clippy --- core/vdbe/execute.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4fccdeb19..7e5858639 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -83,7 +83,7 @@ use super::{ sorter::Sorter, }; use regex::{Regex, RegexBuilder}; -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap}; #[cfg(feature = "json")] use crate::{ From 67c1855ba8bad9ae9439d6a9411c600bbc50d2ec Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 22:17:41 +0400 Subject: [PATCH 15/23] fix bug --- core/translate/index.rs | 4 ++-- core/vdbe/execute.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/translate/index.rs b/core/translate/index.rs index bacbf0668..f4e40d7ae 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -201,7 +201,7 @@ pub fn translate_create_index( // Create a new B-Tree and store the root page index in a register let root_page_reg = program.alloc_register(); - if !idx.is_backing_btree_index() { + if idx.index_method.is_some() && !idx.is_backing_btree_index() { program.emit_insn(Insn::IndexMethodCreate { db: 0, cursor_id: index_cursor_id, @@ -737,7 +737,7 @@ pub fn translate_drop_index( }); let index = maybe_index.unwrap(); - if !index.is_backing_btree_index() { + if index.index_method.is_some() && !index.is_backing_btree_index() { let cursor_id = program.alloc_cursor_index(None, index)?; program.emit_insn(Insn::IndexMethodDestroy { db: 0, cursor_id }); } else { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 7e5858639..8050e6f2b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -83,7 +83,7 @@ use super::{ sorter::Sorter, }; use regex::{Regex, RegexBuilder}; -use std::collections::{HashMap}; +use std::collections::HashMap; #[cfg(feature = "json")] use crate::{ From e42ce245343e9b5aa74c1aee266e4240fc01f945 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 22:22:57 +0400 Subject: [PATCH 16/23] fix fmt --- core/translate/subquery.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index a2bc5e874..84b2ef939 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -392,7 +392,10 @@ pub fn emit_from_clause_subqueries( }, Operation::IndexMethodQuery(query) => { let index_method = query.index.index_method.as_ref().unwrap(); - format!("QUERY INDEX METHOD {}", index_method.definition().method_name) + format!( + "QUERY INDEX METHOD {}", + index_method.definition().method_name + ) } } ); From 8acbe3de663b64190f5200a80a7906a876d4b07d Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 23:07:37 +0400 Subject: [PATCH 17/23] make query_start method to return bool - if result will have some rows or not --- core/index_method/mod.rs | 4 +++- core/index_method/toy_vector_sparse_ivf.rs | 21 ++++++++------------- core/translate/main_loop.rs | 5 +---- core/vdbe/builder.rs | 3 +++ core/vdbe/execute.rs | 9 +++++++-- core/vdbe/insn.rs | 1 + 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/core/index_method/mod.rs b/core/index_method/mod.rs index 38b47084a..8f3b5097d 100644 --- a/core/index_method/mod.rs +++ b/core/index_method/mod.rs @@ -87,7 +87,9 @@ pub trait IndexMethodCursor { /// For example, for 2 patterns ["SELECT * FROM {table} LIMIT ?", "SELECT * FROM {table} WHERE x = ?"], query_start(...) call can have following arguments: /// - [Integer(0), Integer(10)] - pattern "SELECT * FROM {table} LIMIT ?" was chosen with LIMIT parameter equals to 10 /// - [Integer(1), Text("turso")] - pattern "SELECT * FROM {table} WHERE x = ?" was chosen with equality comparison equals to "turso" - fn query_start(&mut self, values: &[Register]) -> Result>; + /// + /// Returns false if query will produce no rows (similar to VFilter/Rewind op codes) + fn query_start(&mut self, values: &[Register]) -> Result>; /// Moves cursor to the next response row /// Returns false if query exhausted all rows diff --git a/core/index_method/toy_vector_sparse_ivf.rs b/core/index_method/toy_vector_sparse_ivf.rs index 87fa18121..2e169ddc6 100644 --- a/core/index_method/toy_vector_sparse_ivf.rs +++ b/core/index_method/toy_vector_sparse_ivf.rs @@ -151,7 +151,6 @@ pub struct VectorSparseInvertedIndexMethodCursor { delete_state: VectorSparseInvertedIndexDeleteState, search_state: VectorSparseInvertedIndexSearchState, search_result: VecDeque<(i64, f64)>, - search_row: Option<(i64, f64)>, } impl IndexMethod for VectorSparseInvertedIndexMethod { @@ -199,7 +198,6 @@ impl VectorSparseInvertedIndexMethodCursor { scratch_cursor: None, main_btree: None, search_result: VecDeque::new(), - search_row: None, create_state: VectorSparseInvertedIndexCreateState::Init, insert_state: VectorSparseInvertedIndexInsertState::Init, delete_state: VectorSparseInvertedIndexDeleteState::Init, @@ -459,7 +457,7 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { } } - fn query_start(&mut self, values: &[Register]) -> Result> { + fn query_start(&mut self, values: &[Register]) -> Result> { let Some(scratch) = &mut self.scratch_cursor else { return Err(LimboError::InternalError( "cursor must be opened".to_string(), @@ -521,7 +519,7 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { continue; } let position = p[*idx]; - let key = ImmutableRecord::from_values(&[Value::Integer(position as i64)], 2); + let key = ImmutableRecord::from_values(&[Value::Integer(position as i64)], 1); self.search_state = VectorSparseInvertedIndexSearchState::Seek { collected: collected.take(), positions: positions.take(), @@ -551,7 +549,7 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { limit: *limit, }; } - SeekResult::TryAdvance => { + SeekResult::TryAdvance | SeekResult::NotFound => { self.search_state = VectorSparseInvertedIndexSearchState::Next { collected: collected.take(), positions: positions.take(), @@ -560,9 +558,6 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { limit: *limit, }; } - SeekResult::NotFound => { - return Err(LimboError::Corrupt("inverted index corrupted".to_string())) - } } } VectorSparseInvertedIndexSearchState::Read { @@ -637,7 +632,7 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { let Some(rowid) = rowids.as_ref().unwrap().last() else { let distances = distances.take().unwrap(); self.search_result = distances.iter().map(|(d, i)| (*i, d.0)).collect(); - return Ok(IOResult::Done(())); + return Ok(IOResult::Done(!self.search_result.is_empty())); }; let result = return_if_io!( main.seek(SeekKey::TableRowId(*rowid), SeekOp::GE { eq_only: true }) @@ -709,17 +704,17 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { } fn query_rowid(&mut self) -> Result>> { - let result = self.search_row.as_ref().unwrap(); + let result = self.search_result.front().unwrap(); Ok(IOResult::Done(Some(result.0))) } fn query_column(&mut self, _: usize) -> Result> { - let result = self.search_row.as_ref().unwrap(); + let result = self.search_result.front().unwrap(); Ok(IOResult::Done(Value::Float(result.1))) } fn query_next(&mut self) -> Result> { - self.search_row = self.search_result.pop_front(); - Ok(IOResult::Done(self.search_row.is_some())) + let _ = self.search_result.pop_front(); + Ok(IOResult::Done(!self.search_result.is_empty())) } } diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 3f6a78a76..8cdedf6f8 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -720,10 +720,7 @@ pub fn open_loop( cursor_id: index_cursor_id.expect("IndexMethod requires a index cursor"), start_reg, count_reg: query.arguments.len() + 1, - }); - program.emit_insn(Insn::Next { - cursor_id: index_cursor_id.expect("IndexMethod requires a index cursor"), - pc_if_next: loop_end, + pc_if_empty: loop_end, }); program.preassign_label_to_next_insn(loop_start); if let Some(table_cursor_id) = table_cursor_id { diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index ba8f1d200..7ca227198 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -819,6 +819,9 @@ impl ProgramBuilder { Insn::IdxLT { target_pc, .. } => { resolve(target_pc, "IdxLT"); } + Insn::IndexMethodQuery { pc_if_empty, .. } => { + resolve(pc_if_empty, "IndexMethodQuery"); + } Insn::IsNull { reg: _, target_pc } => { resolve(target_pc, "IsNull"); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 8050e6f2b..b2386ffa3 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -7248,6 +7248,7 @@ pub fn op_index_method_query( cursor_id, start_reg, count_reg, + pc_if_empty, }, insn ); @@ -7260,8 +7261,12 @@ pub fn op_index_method_query( } let cursor = state.cursors[*cursor_id].as_mut().unwrap(); let cursor = cursor.as_index_method_mut(); - return_if_io!(cursor.query_start(&state.registers[*start_reg..*start_reg + *count_reg])); - state.pc += 1; + let has_rows = return_if_io!(cursor.query_start(&state.registers[*start_reg..*start_reg + *count_reg])); + if !has_rows { + state.pc = pc_if_empty.as_offset_int(); + } else { + state.pc += 1; + } Ok(InsnFunctionStepResult::Step) } diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index b9f9cc622..98693d718 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -895,6 +895,7 @@ pub enum Insn { cursor_id: CursorID, start_reg: usize, count_reg: usize, + pc_if_empty: BranchOffset, }, /// Deletes an entire database table or index whose root page in the database file is given by P1. From 18989185d46ec9aaa0da34cc2463c02167065037 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 23:07:57 +0400 Subject: [PATCH 18/23] add simple fuzz test --- tests/integration/index_method/mod.rs | 72 +++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/integration/index_method/mod.rs b/tests/integration/index_method/mod.rs index a74ca4d1f..015ddb5eb 100644 --- a/tests/integration/index_method/mod.rs +++ b/tests/integration/index_method/mod.rs @@ -1,5 +1,8 @@ use std::collections::HashMap; +use core_tester::common::rng_from_time_or_env; +use rand::RngCore; +use rand_chacha::ChaCha8Rng; use turso_core::{ index_method::{ toy_vector_sparse_ivf::VectorSparseInvertedIndexMethod, IndexMethod, @@ -253,3 +256,72 @@ fn test_vector_sparse_ivf_update() { ); assert!(!run(&tmp_db, || reader.query_next()).unwrap()); } + +#[test] +fn test_vector_sparse_ivf_fuzz() { + let _ = env_logger::try_init(); + let simple_db = + TempDatabase::new_with_rusqlite("CREATE TABLE t(key TEXT PRIMARY KEY, embedding)"); + let index_db = + TempDatabase::new_with_rusqlite("CREATE TABLE t(key TEXT PRIMARY KEY, embedding)"); + let simple_conn = simple_db.connect_limbo(); + let index_conn = index_db.connect_limbo(); + index_conn + .execute("CREATE INDEX t_idx ON t USING toy_vector_sparse_ivf (embedding)") + .unwrap(); + + let vector = |dim: usize, rng: &mut ChaCha8Rng| { + let mut values = Vec::with_capacity(dim); + for _ in 0..dim { + if rng.next_u32() % 10 == 0 { + values.push((rng.next_u32() as f32 / (u32::MAX as f32)).to_string()); + } else { + values.push("0".to_string()) + } + } + format!("[{}]", values.join(", ")) + }; + + let (mut rng, seed) = rng_from_time_or_env(); + tracing::info!("seed: {}", seed); + let mut keys = Vec::new(); + for _ in 0..10000 { + let choice = rng.next_u32() % 4; + if choice == 0 { + let key = rng.next_u64().to_string(); + let v = vector(128, &mut rng); + let sql = format!("INSERT INTO t VALUES ('{}', vector32_sparse('{}'))", key, v); + tracing::info!("{}", sql); + simple_conn.execute(&sql).unwrap(); + index_conn.execute(sql).unwrap(); + keys.push(key); + } else if choice == 1 && !keys.is_empty() { + // let idx = rng.next_u32() as usize % keys.len(); + // let key = &keys[idx]; + // let v = vector(128, &mut rng); + // let sql = format!( + // "UPDATE t SET embedding = vector32_sparse('{}') WHERE key = '{}'", + // v, key + // ); + // tracing::info!("{}", sql); + // simple_conn.execute(&sql).unwrap(); + // index_conn.execute(&sql).unwrap(); + } else if choice == 2 && !keys.is_empty() { + // let idx = rng.next_u32() as usize % keys.len(); + // let key = &keys[idx]; + // let sql = format!("DELETE FROM t WHERE key = '{}'", key); + // tracing::info!("{}", sql); + // simple_conn.execute(&sql).unwrap(); + // index_conn.execute(&sql).unwrap(); + // keys.remove(idx); + } else { + let v = vector(128, &mut rng); + let k = rng.next_u32() % 20; + let sql = format!("SELECT key, vector_distance_jaccard(embedding, vector32_sparse('{}')) as d FROM t ORDER BY d LIMIT {}", v, k); + tracing::info!("{}", sql); + let simple_rows = limbo_exec_rows(&simple_db, &simple_conn, &sql); + let index_rows = limbo_exec_rows(&index_db, &index_conn, &sql); + tracing::info!("simple: {:?}, index_rows: {:?}", simple_rows, index_rows); + } + } +} From 6f62621b5e931ce958bb75cb7a5383289d94052c Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 23:37:01 +0400 Subject: [PATCH 19/23] adjust test more --- tests/integration/index_method/mod.rs | 136 +++++++++++++++----------- 1 file changed, 77 insertions(+), 59 deletions(-) diff --git a/tests/integration/index_method/mod.rs b/tests/integration/index_method/mod.rs index 015ddb5eb..565c8a328 100644 --- a/tests/integration/index_method/mod.rs +++ b/tests/integration/index_method/mod.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use core_tester::common::rng_from_time_or_env; -use rand::RngCore; +use rand::{RngCore, SeedableRng}; use rand_chacha::ChaCha8Rng; use turso_core::{ index_method::{ @@ -260,68 +260,86 @@ fn test_vector_sparse_ivf_update() { #[test] fn test_vector_sparse_ivf_fuzz() { let _ = env_logger::try_init(); - let simple_db = - TempDatabase::new_with_rusqlite("CREATE TABLE t(key TEXT PRIMARY KEY, embedding)"); - let index_db = - TempDatabase::new_with_rusqlite("CREATE TABLE t(key TEXT PRIMARY KEY, embedding)"); - let simple_conn = simple_db.connect_limbo(); - let index_conn = index_db.connect_limbo(); - index_conn - .execute("CREATE INDEX t_idx ON t USING toy_vector_sparse_ivf (embedding)") - .unwrap(); - let vector = |dim: usize, rng: &mut ChaCha8Rng| { - let mut values = Vec::with_capacity(dim); - for _ in 0..dim { - if rng.next_u32() % 10 == 0 { - values.push((rng.next_u32() as f32 / (u32::MAX as f32)).to_string()); - } else { - values.push("0".to_string()) - } - } - format!("[{}]", values.join(", ")) - }; + const DIMS: usize = 40; + const MOD: u32 = 5; let (mut rng, seed) = rng_from_time_or_env(); - tracing::info!("seed: {}", seed); let mut keys = Vec::new(); - for _ in 0..10000 { - let choice = rng.next_u32() % 4; - if choice == 0 { - let key = rng.next_u64().to_string(); - let v = vector(128, &mut rng); - let sql = format!("INSERT INTO t VALUES ('{}', vector32_sparse('{}'))", key, v); - tracing::info!("{}", sql); - simple_conn.execute(&sql).unwrap(); - index_conn.execute(sql).unwrap(); - keys.push(key); - } else if choice == 1 && !keys.is_empty() { - // let idx = rng.next_u32() as usize % keys.len(); - // let key = &keys[idx]; - // let v = vector(128, &mut rng); - // let sql = format!( - // "UPDATE t SET embedding = vector32_sparse('{}') WHERE key = '{}'", - // v, key - // ); - // tracing::info!("{}", sql); - // simple_conn.execute(&sql).unwrap(); - // index_conn.execute(&sql).unwrap(); - } else if choice == 2 && !keys.is_empty() { - // let idx = rng.next_u32() as usize % keys.len(); - // let key = &keys[idx]; - // let sql = format!("DELETE FROM t WHERE key = '{}'", key); - // tracing::info!("{}", sql); - // simple_conn.execute(&sql).unwrap(); - // index_conn.execute(&sql).unwrap(); - // keys.remove(idx); - } else { - let v = vector(128, &mut rng); - let k = rng.next_u32() % 20; - let sql = format!("SELECT key, vector_distance_jaccard(embedding, vector32_sparse('{}')) as d FROM t ORDER BY d LIMIT {}", v, k); - tracing::info!("{}", sql); - let simple_rows = limbo_exec_rows(&simple_db, &simple_conn, &sql); - let index_rows = limbo_exec_rows(&index_db, &index_conn, &sql); - tracing::info!("simple: {:?}, index_rows: {:?}", simple_rows, index_rows); + for _ in 0..10 { + let seed = rng.next_u64(); + tracing::info!("======== seed: {} ========", seed); + + let mut rng = ChaCha8Rng::seed_from_u64(seed); + let simple_db = + TempDatabase::new_with_rusqlite("CREATE TABLE t(key TEXT PRIMARY KEY, embedding)"); + let index_db = + TempDatabase::new_with_rusqlite("CREATE TABLE t(key TEXT PRIMARY KEY, embedding)"); + let simple_conn = simple_db.connect_limbo(); + let index_conn = index_db.connect_limbo(); + index_conn + .execute("CREATE INDEX t_idx ON t USING toy_vector_sparse_ivf (embedding)") + .unwrap(); + let vector = |rng: &mut ChaCha8Rng| { + let mut values = Vec::with_capacity(DIMS); + for _ in 0..DIMS { + if rng.next_u32() % MOD == 0 { + values.push((rng.next_u32() as f32 / (u32::MAX as f32)).to_string()); + } else { + values.push("0".to_string()) + } + } + format!("[{}]", values.join(", ")) + }; + + for _ in 0..200 { + let choice = rng.next_u32() % 4; + if choice == 0 { + let key = rng.next_u64().to_string(); + let v = vector(&mut rng); + let sql = format!("INSERT INTO t VALUES ('{}', vector32_sparse('{}'))", key, v); + tracing::info!("{}", sql); + simple_conn.execute(&sql).unwrap(); + index_conn.execute(sql).unwrap(); + keys.push(key); + } else if choice == 1 && !keys.is_empty() { + let idx = rng.next_u32() as usize % keys.len(); + let key = &keys[idx]; + let v = vector(&mut rng); + let sql = format!( + "UPDATE t SET embedding = vector32_sparse('{}') WHERE key = '{}'", + v, key + ); + tracing::info!("{}", sql); + simple_conn.execute(&sql).unwrap(); + index_conn.execute(&sql).unwrap(); + } else if choice == 2 && !keys.is_empty() { + let idx = rng.next_u32() as usize % keys.len(); + let key = &keys[idx]; + let sql = format!("DELETE FROM t WHERE key = '{}'", key); + tracing::info!("{}", sql); + simple_conn.execute(&sql).unwrap(); + index_conn.execute(&sql).unwrap(); + keys.remove(idx); + } else { + let v = vector(&mut rng); + let k = rng.next_u32() % 20 + 1; + let sql = format!("SELECT key, vector_distance_jaccard(embedding, vector32_sparse('{}')) as d FROM t ORDER BY d LIMIT {}", v, k); + tracing::info!("{}", sql); + let simple_rows = limbo_exec_rows(&simple_db, &simple_conn, &sql); + let index_rows = limbo_exec_rows(&index_db, &index_conn, &sql); + assert!(index_rows.len() <= simple_rows.len()); + for i in 0..index_rows.len() { + assert_eq!(index_rows[i], simple_rows[i]); + } + for i in index_rows.len()..simple_rows.len() { + match &simple_rows[i][1] { + rusqlite::types::Value::Real(r) => assert_eq!(*r, 1.0), + _ => panic!("unexpected simple row value"), + } + } + tracing::info!("simple: {:?}, index_rows: {:?}", simple_rows, index_rows); + } } } } From 8ea733f9171d3001fe3f2f1c988ef9b8ea39f691 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 23:37:20 +0400 Subject: [PATCH 20/23] fix bug with cursor allocation --- core/index_method/toy_vector_sparse_ivf.rs | 6 +++++- core/translate/emitter.rs | 2 +- core/vdbe/builder.rs | 1 + core/vdbe/execute.rs | 4 +++- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/index_method/toy_vector_sparse_ivf.rs b/core/index_method/toy_vector_sparse_ivf.rs index 2e169ddc6..51b53dc03 100644 --- a/core/index_method/toy_vector_sparse_ivf.rs +++ b/core/index_method/toy_vector_sparse_ivf.rs @@ -43,6 +43,7 @@ pub enum VectorSparseInvertedIndexCreateState { Run { stmt: Box }, } +#[derive(Debug)] pub enum VectorSparseInvertedIndexInsertState { Init, Prepare { @@ -64,6 +65,7 @@ pub enum VectorSparseInvertedIndexInsertState { }, } +#[derive(Debug)] pub enum VectorSparseInvertedIndexDeleteState { Init, Prepare { @@ -286,6 +288,7 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { )); }; loop { + tracing::debug!("insert_state: {:?}", self.insert_state); match &mut self.insert_state { VectorSparseInvertedIndexInsertState::Init => { let Some(vector) = values[0].get_value().to_blob() else { @@ -375,6 +378,7 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { )); }; loop { + tracing::debug!("delete_state: {:?}", self.delete_state); match &mut self.delete_state { VectorSparseInvertedIndexDeleteState::Init => { let Some(vector) = values[0].get_value().to_blob() else { @@ -469,7 +473,7 @@ impl IndexMethodCursor for VectorSparseInvertedIndexMethodCursor { )); }; loop { - tracing::debug!("state: {:?}", self.search_state); + tracing::debug!("query_state: {:?}", self.search_state); match &mut self.search_state { VectorSparseInvertedIndexSearchState::Init => { let Some(vector) = values[1].get_value().to_blob() else { diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 3c95f028a..66c0bda59 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -983,7 +983,7 @@ fn emit_program_for_update( )) { cursor } else { - let cursor = program.alloc_cursor_id(CursorType::BTreeIndex(index.clone())); + let cursor = program.alloc_cursor_index(None, &index)?; program.emit_insn(Insn::OpenWrite { cursor_id: cursor, root_page: RegisterOrLiteral::Literal(index.root_page), diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 7ca227198..72cfe9b78 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -340,6 +340,7 @@ impl ProgramBuilder { key: Option, index: &Arc, ) -> crate::Result { + tracing::debug!("alloc cursor: {:?} {:?}", key, index.index_method.is_some()); let module = index.index_method.as_ref(); if module.is_some_and(|m| !m.definition().backing_btree) { let module = module.unwrap().clone(); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index b2386ffa3..5b283d8d5 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -6364,6 +6364,7 @@ pub fn op_idx_delete( insn ); + tracing::info!("idx_delete cursor: {:?}", program.cursor_ref[*cursor_id]); if let Some(Cursor::IndexMethod(cursor)) = &mut state.cursors[*cursor_id] { return_if_io!(cursor.delete(&state.registers[*start_reg..*start_reg + *num_regs])); state.pc += 1; @@ -7261,7 +7262,8 @@ pub fn op_index_method_query( } let cursor = state.cursors[*cursor_id].as_mut().unwrap(); let cursor = cursor.as_index_method_mut(); - let has_rows = return_if_io!(cursor.query_start(&state.registers[*start_reg..*start_reg + *count_reg])); + let has_rows = + return_if_io!(cursor.query_start(&state.registers[*start_reg..*start_reg + *count_reg])); if !has_rows { state.pc = pc_if_empty.as_offset_int(); } else { From e7cab016d4d0e5c62039bfb8d5382198720c59e3 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Mon, 27 Oct 2025 23:43:56 +0400 Subject: [PATCH 21/23] fix tests --- tests/integration/index_method/mod.rs | 41 +++++++++++++-------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/tests/integration/index_method/mod.rs b/tests/integration/index_method/mod.rs index 565c8a328..dd5e32634 100644 --- a/tests/integration/index_method/mod.rs +++ b/tests/integration/index_method/mod.rs @@ -157,10 +157,9 @@ fn test_vector_sparse_ivf_insert_query() { Register::Value(sparse_vector(vector)), Register::Value(Value::Integer(5)), ]; - run(&tmp_db, || cursor.query_start(&values)).unwrap(); + assert!(run(&tmp_db, || cursor.query_start(&values)).unwrap()); - for (rowid, dist) in results { - assert!(run(&tmp_db, || cursor.query_next()).unwrap()); + for (i, (rowid, dist)) in results.iter().enumerate() { assert_eq!( *rowid, run(&tmp_db, || cursor.query_rowid()).unwrap().unwrap() @@ -169,9 +168,11 @@ fn test_vector_sparse_ivf_insert_query() { *dist, run(&tmp_db, || cursor.query_column(0)).unwrap().as_float() ); + assert_eq!( + i + 1 < results.len(), + run(&tmp_db, || cursor.query_next()).unwrap() + ); } - - assert!(!run(&tmp_db, || cursor.query_next()).unwrap()); } } @@ -234,8 +235,7 @@ fn test_vector_sparse_ivf_update() { let mut reader = attached.init().unwrap(); run(&tmp_db, || reader.open_read(&conn)).unwrap(); - run(&tmp_db, || reader.query_start(&query_values)).unwrap(); - assert!(!run(&tmp_db, || reader.query_next()).unwrap()); + assert!(!run(&tmp_db, || reader.query_start(&query_values)).unwrap()); limbo_exec_rows( &tmp_db, @@ -247,8 +247,7 @@ fn test_vector_sparse_ivf_update() { let mut reader = attached.init().unwrap(); run(&tmp_db, || reader.open_read(&conn)).unwrap(); - run(&tmp_db, || reader.query_start(&query_values)).unwrap(); - assert!(run(&tmp_db, || reader.query_next()).unwrap()); + assert!(run(&tmp_db, || reader.query_start(&query_values)).unwrap()); assert_eq!(1, run(&tmp_db, || reader.query_rowid()).unwrap().unwrap()); assert_eq!( 0.0, @@ -264,7 +263,7 @@ fn test_vector_sparse_ivf_fuzz() { const DIMS: usize = 40; const MOD: u32 = 5; - let (mut rng, seed) = rng_from_time_or_env(); + let (mut rng, _) = rng_from_time_or_env(); let mut keys = Vec::new(); for _ in 0..10 { let seed = rng.next_u64(); @@ -297,7 +296,7 @@ fn test_vector_sparse_ivf_fuzz() { if choice == 0 { let key = rng.next_u64().to_string(); let v = vector(&mut rng); - let sql = format!("INSERT INTO t VALUES ('{}', vector32_sparse('{}'))", key, v); + let sql = format!("INSERT INTO t VALUES ('{key}', vector32_sparse('{v}'))"); tracing::info!("{}", sql); simple_conn.execute(&sql).unwrap(); index_conn.execute(sql).unwrap(); @@ -306,17 +305,15 @@ fn test_vector_sparse_ivf_fuzz() { let idx = rng.next_u32() as usize % keys.len(); let key = &keys[idx]; let v = vector(&mut rng); - let sql = format!( - "UPDATE t SET embedding = vector32_sparse('{}') WHERE key = '{}'", - v, key - ); + let sql = + format!("UPDATE t SET embedding = vector32_sparse('{v}') WHERE key = '{key}'",); tracing::info!("{}", sql); simple_conn.execute(&sql).unwrap(); index_conn.execute(&sql).unwrap(); } else if choice == 2 && !keys.is_empty() { let idx = rng.next_u32() as usize % keys.len(); let key = &keys[idx]; - let sql = format!("DELETE FROM t WHERE key = '{}'", key); + let sql = format!("DELETE FROM t WHERE key = '{key}'"); tracing::info!("{}", sql); simple_conn.execute(&sql).unwrap(); index_conn.execute(&sql).unwrap(); @@ -324,17 +321,17 @@ fn test_vector_sparse_ivf_fuzz() { } else { let v = vector(&mut rng); let k = rng.next_u32() % 20 + 1; - let sql = format!("SELECT key, vector_distance_jaccard(embedding, vector32_sparse('{}')) as d FROM t ORDER BY d LIMIT {}", v, k); + let sql = format!("SELECT key, vector_distance_jaccard(embedding, vector32_sparse('{v}')) as d FROM t ORDER BY d LIMIT {k}"); tracing::info!("{}", sql); let simple_rows = limbo_exec_rows(&simple_db, &simple_conn, &sql); let index_rows = limbo_exec_rows(&index_db, &index_conn, &sql); assert!(index_rows.len() <= simple_rows.len()); - for i in 0..index_rows.len() { - assert_eq!(index_rows[i], simple_rows[i]); + for (a, b) in index_rows.iter().zip(simple_rows.iter()) { + assert_eq!(a, b); } - for i in index_rows.len()..simple_rows.len() { - match &simple_rows[i][1] { - rusqlite::types::Value::Real(r) => assert_eq!(*r, 1.0), + for row in simple_rows.iter().skip(index_rows.len()) { + match row[1] { + rusqlite::types::Value::Real(r) => assert_eq!(r, 1.0), _ => panic!("unexpected simple row value"), } } From bec295f2c098296fafa56d066fae074623fe273b Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 28 Oct 2025 00:44:23 +0400 Subject: [PATCH 22/23] fix clippy --- core/translate/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 66c0bda59..10dd99131 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -983,7 +983,7 @@ fn emit_program_for_update( )) { cursor } else { - let cursor = program.alloc_cursor_index(None, &index)?; + let cursor = program.alloc_cursor_index(None, index)?; program.emit_insn(Insn::OpenWrite { cursor_id: cursor, root_page: RegisterOrLiteral::Literal(index.root_page), From 0da3b4bfd316598cdf88792ebac6d15a464e3505 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 28 Oct 2025 11:26:02 +0400 Subject: [PATCH 23/23] fix after rebase --- core/translate/expr.rs | 5 +---- core/translate/optimizer/mod.rs | 4 ++-- core/translate/plan.rs | 4 +++- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 9f8295668..c0065dd3b 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -2157,7 +2157,7 @@ pub fn translate_expr( ) } } else { - let table_cursor_id = if use_covering_index { + let table_cursor_id = if use_covering_index || use_index_method.is_some() { None } else { Some(program.resolve_cursor_id(&CursorKey::table(*table_ref_id))) @@ -2169,9 +2169,6 @@ pub fn translate_expr( (table_cursor_id, index_cursor_id) }; - let index_cursor_id = index.map(|index| { - program.resolve_cursor_id(&CursorKey::index(*table_ref_id, index.clone())) - }); if let Some(custom_module_column) = use_index_method { program.emit_column_or_rowid( index_cursor_id.unwrap(), diff --git a/core/translate/optimizer/mod.rs b/core/translate/optimizer/mod.rs index 5c18f4425..862aa3790 100644 --- a/core/translate/optimizer/mod.rs +++ b/core/translate/optimizer/mod.rs @@ -23,8 +23,8 @@ use crate::{ constraints::{RangeConstraintRef, SeekRangeConstraint, TableConstraints}, }, plan::{ - ColumnUsedMask, IndexMethodQuery, NonFromClauseSubquery, OuterQueryReference, QueryDestination, - ResultSetColumn, Scan, SeekKeyComponent, + ColumnUsedMask, IndexMethodQuery, NonFromClauseSubquery, OuterQueryReference, + QueryDestination, ResultSetColumn, Scan, SeekKeyComponent, }, }, types::SeekOp, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index a8c5cac52..32daaa8b2 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,5 +1,7 @@ use std::{cmp::Ordering, collections::HashMap, sync::Arc}; -use turso_parser::ast::{self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder}; +use turso_parser::ast::{ + self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder, SubqueryType, +}; use crate::{ function::AggFunc,