From 04cb09be2a059cfe609b0ea4d49abf58d59d1c01 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Mon, 14 Apr 2025 20:57:54 +0800 Subject: [PATCH 01/16] Bump julian_day_converter to 0.4.5 --- Cargo.lock | 4 ++-- core/Cargo.toml | 2 +- testing/scalar-functions-datetime.test | 4 ++++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 410bf6e34..bbd4f15ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1540,9 +1540,9 @@ dependencies = [ [[package]] name = "julian_day_converter" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa5652b85ab018289638c6b924db618da9edd2ddfff7fa0ec38a8b51a9192d3" +checksum = "f2987f71b89b85c812c8484cbf0c5d7912589e77bfdc66fd3e52f760e7859f16" dependencies = [ "chrono", ] diff --git a/core/Cargo.toml b/core/Cargo.toml index a790a0ca3..eb5d092b0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -53,7 +53,7 @@ regex-syntax = { version = "0.8.5", default-features = false, features = [ "unicode", ] } chrono = { version = "0.4.38", default-features = false, features = ["clock"] } -julian_day_converter = "0.4.4" +julian_day_converter = "0.4.5" rand = "0.8.5" libm = "0.2" limbo_macros = { workspace = true } diff --git a/testing/scalar-functions-datetime.test b/testing/scalar-functions-datetime.test index 3c2f7b771..33caf52c2 100755 --- a/testing/scalar-functions-datetime.test +++ b/testing/scalar-functions-datetime.test @@ -597,6 +597,10 @@ foreach i $FMT { do_execsql_test strftime-invalid-$i "SELECT strftime('$i','2025-01-23T13:14:30.567');" {} } +do_execsql_test strftime-julianday { + SELECT strftime('%Y-%m-%d %H:%M:%fZ', 2459717.08070103); +} {"2022-05-17 13:56:12.569Z"} + # Tests for the TIMEDIFF function From 1c827524735d53da95318707574f6a359652c63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20K=C3=B6sters?= Date: Mon, 21 Apr 2025 13:14:12 +0200 Subject: [PATCH 02/16] feat: Statement::columns function for Rust bindings --- bindings/rust/src/lib.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 61e6271c9..8c57e7909 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -190,6 +190,39 @@ impl Statement { } } } + + pub fn columns(&self) -> Vec { + let stmt = self.inner.lock().unwrap(); + + let n = stmt.num_columns(); + + let mut cols = Vec::with_capacity(n); + + for i in 0..n { + let name = stmt.get_column_name(i).into_owned(); + cols.push(Column { + name, + decl_type: None, // TODO + }); + } + + cols + } +} + +pub struct Column { + name: String, + decl_type: Option, +} + +impl Column { + pub fn name(&self) -> &str { + &self.name + } + + pub fn decl_type(&self) -> Option<&str> { + self.decl_type.as_deref() + } } pub trait IntoValue { From d0da7307be40e0ff915cb5e9abea3dfc088c78d6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 20:33:57 +0300 Subject: [PATCH 03/16] Index: add new field ephemeral: bool --- core/schema.rs | 3 +++ core/translate/index.rs | 1 + 2 files changed, 4 insertions(+) diff --git a/core/schema.rs b/core/schema.rs index 0a5a8d80f..dd09671ab 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -692,6 +692,7 @@ pub struct Index { pub root_page: usize, pub columns: Vec, pub unique: bool, + pub ephemeral: bool, } #[allow(dead_code)] @@ -741,6 +742,7 @@ impl Index { root_page, columns: index_columns, unique, + ephemeral: false, }) } _ => todo!("Expected create index statement"), @@ -783,6 +785,7 @@ impl Index { root_page, columns: index_columns, unique: true, // Primary key indexes are always unique + ephemeral: false, }) } diff --git a/core/translate/index.rs b/core/translate/index.rs index de79aed23..55222e40f 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -62,6 +62,7 @@ pub fn translate_create_index( }) .collect(), unique: unique_if_not_exists.0, + ephemeral: false, }); // Allocate the necessary cursors: From 09ad6d8f0149020fe1a5c81832e021ad095782a5 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 14:23:13 +0300 Subject: [PATCH 04/16] vdbe: resolve labels for Insn::Once --- core/vdbe/builder.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 648044d1d..05fdc4938 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -363,6 +363,12 @@ impl ProgramBuilder { Insn::Next { pc_if_next, .. } => { resolve(pc_if_next, "Next"); } + Insn::Once { + target_pc_when_reentered, + .. + } => { + resolve(target_pc_when_reentered, "Once"); + } Insn::Prev { pc_if_prev, .. } => { resolve(pc_if_prev, "Prev"); } From c1b2dfc32b17fcd75f756b16a909dd4581b3c213 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:20:25 +0300 Subject: [PATCH 05/16] TableReference: add method column_is_used() --- core/translate/plan.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 07a8de392..46d4d38da 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -590,6 +590,10 @@ impl TableReference { }; self.index_is_covering(index.as_ref()) } + + pub fn column_is_used(&self, index: usize) -> bool { + self.col_used_mask.get(index) + } } /// A definition of a rowid/index search. From af21f60887bc3fce20930c7c152def5c0b619450 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 14:22:54 +0300 Subject: [PATCH 06/16] translate/main_loop: create autoindex when index.ephemeral=true --- core/translate/main_loop.rs | 135 ++++++++++++++++++++++++++++++------ core/translate/plan.rs | 5 +- 2 files changed, 119 insertions(+), 21 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7354eb4a1..c56680446 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,14 +1,16 @@ use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast; +use std::sync::Arc; + use crate::{ - schema::Table, + schema::{Index, Table}, translate::result_row::emit_select_result, types::SeekOp, vdbe::{ builder::ProgramBuilder, - insn::{CmpInsFlags, Insn}, - BranchOffset, + insn::{CmpInsFlags, IdxInsertFlags, Insn}, + BranchOffset, CursorID, }, Result, }; @@ -156,23 +158,26 @@ pub fn init_loop( index: Some(index), .. } = search { - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenRead { - cursor_id: index_cursor_id - .expect("index cursor is always opened in Seek with index"), - root_page: index.root_page, - }); - } - OperationMode::UPDATE | OperationMode::DELETE => { - program.emit_insn(Insn::OpenWrite { - cursor_id: index_cursor_id - .expect("index cursor is always opened in Seek with index"), - root_page: index.root_page.into(), - }); - } - _ => { - unimplemented!() + // Ephemeral index cursor are opened ad-hoc when needed. + if !index.ephemeral { + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenRead { + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), + root_page: index.root_page, + }); + } + OperationMode::UPDATE | OperationMode::DELETE => { + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), + root_page: index.root_page.into(), + }); + } + _ => { + unimplemented!() + } } } } @@ -437,6 +442,32 @@ pub fn open_loop( }); } else { // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. + if let Search::Seek { + index: Some(index), .. + } = search + { + if index.ephemeral { + let table_has_rowid = if let Table::BTree(btree) = &table.table { + btree.has_rowid + } else { + false + }; + Some(emit_autoindex( + program, + &index, + table_cursor_id + .expect("an ephemeral index must have a source table cursor"), + index_cursor_id + .expect("an ephemeral index must have an index cursor"), + table_has_rowid, + )?) + } else { + index_cursor_id + } + } else { + index_cursor_id + }; + let is_index = index_cursor_id.is_some(); let seek_cursor_id = index_cursor_id.unwrap_or_else(|| { table_cursor_id.expect("Either index or table cursor must be opened") @@ -1125,3 +1156,67 @@ fn emit_seek_termination( Ok(()) } + +/// Open an ephemeral index cursor and build an automatic index on a table. +/// This is used as a last-resort to avoid a nested full table scan +/// Returns the cursor id of the ephemeral index cursor. +fn emit_autoindex( + program: &mut ProgramBuilder, + index: &Arc, + table_cursor_id: CursorID, + index_cursor_id: CursorID, + table_has_rowid: bool, +) -> Result { + assert!(index.ephemeral, "Index {} is not ephemeral", index.name); + let label_ephemeral_build_end = program.allocate_label(); + // Since this typically happens in an inner loop, we only build it once. + program.emit_insn(Insn::Once { + target_pc_when_reentered: label_ephemeral_build_end, + }); + program.emit_insn(Insn::OpenAutoindex { + cursor_id: index_cursor_id, + }); + // Rewind source table + program.emit_insn(Insn::Rewind { + cursor_id: table_cursor_id, + pc_if_empty: label_ephemeral_build_end, + }); + let offset_ephemeral_build_loop_start = program.offset(); + // Emit all columns from source table that are needed in the ephemeral index. + // Also reserve a register for the rowid if the source table has rowids. + let num_regs_to_reserve = index.columns.len() + table_has_rowid as usize; + let ephemeral_cols_start_reg = program.alloc_registers(num_regs_to_reserve); + for (i, col) in index.columns.iter().enumerate() { + let reg = ephemeral_cols_start_reg + i; + program.emit_insn(Insn::Column { + cursor_id: table_cursor_id, + column: col.pos_in_table, + dest: reg, + }); + } + if table_has_rowid { + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: ephemeral_cols_start_reg + index.columns.len(), + }); + } + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: ephemeral_cols_start_reg, + count: num_regs_to_reserve, + dest_reg: record_reg, + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: index_cursor_id, + record_reg, + unpacked_start: Some(ephemeral_cols_start_reg), + unpacked_count: Some(num_regs_to_reserve as u16), + flags: IdxInsertFlags::new().use_seek(false), + }); + program.emit_insn(Insn::Next { + cursor_id: table_cursor_id, + pc_if_next: offset_ephemeral_build_loop_start, + }); + program.resolve_label(label_ephemeral_build_end, program.offset()); + Ok(index_cursor_id) +} diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 46d4d38da..51bc3f7c6 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -509,7 +509,10 @@ impl TableReference { match &self.table { Table::BTree(btree) => { let use_covering_index = self.utilizes_covering_index(); - let table_cursor_id = if use_covering_index && mode == OperationMode::SELECT { + let index_is_ephemeral = index.map_or(false, |index| index.ephemeral); + let table_not_required = + OperationMode::SELECT == mode && use_covering_index && !index_is_ephemeral; + let table_cursor_id = if table_not_required { None } else { Some(program.alloc_cursor_id( From a50fa03d247af77c6f19d885f585939f126b8412 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:21:34 +0300 Subject: [PATCH 07/16] optimizer: allow calling try_extract_index... without any persistent indexes --- core/translate/optimizer.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index fe764ee50..8e796c175 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -355,15 +355,18 @@ fn use_indexes( // but we just don't do that yet. continue; } + let placeholder = vec![]; + let mut usable_indexes_ref = &placeholder; if let Some(indexes) = available_indexes.get(table_name) { - if let Some(search) = try_extract_index_search_from_where_clause( - where_clause, - table_index, - table_reference, - indexes, - )? { - table_reference.op = Operation::Search(search); - } + usable_indexes_ref = indexes; + } + if let Some(search) = try_extract_index_search_from_where_clause( + where_clause, + table_index, + table_reference, + usable_indexes_ref, + )? { + table_reference.op = Operation::Search(search); } } } @@ -730,10 +733,6 @@ pub fn try_extract_index_search_from_where_clause( if where_clause.is_empty() { return Ok(None); } - // If there are no indexes, we can't extract a search - if table_indexes.is_empty() { - return Ok(None); - } let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op { *iter_dir From 6924424f11bc5c5f02c3da816e49c96d5cd32e66 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:22:40 +0300 Subject: [PATCH 08/16] optimizer: add highly unintelligent heuristics-based cost estimation --- core/translate/optimizer.rs | 160 ++++++++++++++++++++++++++++-------- 1 file changed, 125 insertions(+), 35 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 8e796c175..872758257 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -713,14 +713,80 @@ fn opposite_cmp_op(op: ast::Operator) -> ast::Operator { } /// Struct used for scoring index scans -/// Currently we just score by the number of index columns that can be utilized -/// in the scan, i.e. no statistics are used. +/// Currently we just estimate cost in a really dumb way, +/// i.e. no statistics are used. struct IndexScore { index: Option>, - score: usize, + cost: f64, constraints: Vec, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct IndexInfo { + unique: bool, + column_count: usize, +} + +const ESTIMATED_HARDCODED_ROWS_PER_TABLE: f64 = 1000.0; + +/// Unbelievably dumb cost estimate for rows scanned by an index scan. +fn dumb_cost_estimator( + index_info: Option, + constraints: &[IndexConstraint], + is_inner_loop: bool, + is_ephemeral: bool, +) -> f64 { + // assume that the outer table always does a full table scan :) + // this discourages building ephemeral indexes on the outer table + // (since a scan reads TABLE_ROWS rows, so an ephemeral index on the outer table would both read TABLE_ROWS rows to build the index and then seek the index) + // but encourages building it on the inner table because it's only built once but the inner loop is run as many times as the outer loop has iterations. + let loop_multiplier = if is_inner_loop { + ESTIMATED_HARDCODED_ROWS_PER_TABLE + } else { + 1.0 + }; + + // If we are building an ephemeral index, we assume we will scan the entire source table to build it. + // Non-ephemeral indexes don't need to be built. + let cost_to_build_index = is_ephemeral as usize as f64 * ESTIMATED_HARDCODED_ROWS_PER_TABLE; + + let Some(index_info) = index_info else { + return cost_to_build_index + ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier; + }; + + let final_constraint_is_range = constraints + .last() + .map_or(false, |c| c.operator != ast::Operator::Equals); + let equalities_count = constraints + .iter() + .take(if final_constraint_is_range { + constraints.len() - 1 + } else { + constraints.len() + }) + .count() as f64; + + let selectivity = match ( + index_info.unique, + index_info.column_count as f64, + equalities_count, + ) { + // no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming + (_, _, 0.0) => { + if final_constraint_is_range { + 0.4 + } else { + 1.0 + } + } + // on an unique index if we have equalities across all index columns, assume very high selectivity + (true, index_cols, eq_count) if eq_count == index_cols => 0.01 * eq_count, + // some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4 + (_, _, eq_count) => (eq_count * 0.1) * if final_constraint_is_range { 0.4 } else { 1.0 }, + }; + cost_to_build_index + selectivity * ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier +} + /// Try to extract an index search from the WHERE clause /// Returns an optional [Search] struct if an index search can be extracted, otherwise returns None. pub fn try_extract_index_search_from_where_clause( @@ -747,10 +813,11 @@ pub fn try_extract_index_search_from_where_clause( // 3. constrain the index columns in the order that they appear in the index // - e.g. if the index is on (a,b,c) then we can use all of "a = 1 AND b = 2 AND c = 3" to constrain the index scan, // - but if the where clause is "a = 1 and c = 3" then we can only use "a = 1". + let cost_of_full_table_scan = dumb_cost_estimator(None, &[], table_index != 0, false); let mut constraints_cur = vec![]; let mut best_index = IndexScore { index: None, - score: 0, + cost: cost_of_full_table_scan, constraints: vec![], }; @@ -759,10 +826,18 @@ pub fn try_extract_index_search_from_where_clause( find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?; // naive scoring since we don't have statistics: prefer the index where we can use the most columns // e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c. - let score = constraints_cur.len(); - if score > best_index.score { + let cost = dumb_cost_estimator( + Some(IndexInfo { + unique: index.unique, + column_count: index.columns.len(), + }), + &constraints_cur, + table_index != 0, + false, + ); + if cost < best_index.cost { best_index.index = Some(Arc::clone(index)); - best_index.score = score; + best_index.cost = cost; best_index.constraints.clear(); best_index.constraints.append(&mut constraints_cur); } @@ -873,6 +948,45 @@ fn get_column_position_in_index( Ok(index.column_table_pos_to_index_pos(*column)) } +fn is_potential_index_constraint(term: &WhereTerm, table_index: usize) -> bool { + // Skip terms that cannot be evaluated at this table's loop level + if !term.should_eval_at_loop(table_index) { + return false; + } + // Skip terms that are not binary comparisons + let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else { + return false; + }; + // Only consider index scans for binary ops that are comparisons + if !matches!( + *operator, + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals + | ast::Operator::Less + | ast::Operator::LessEquals + ) { + return false; + } + + // If both lhs and rhs refer to columns from this table, we can't use this constraint + // because we can't use the index to satisfy the condition. + // Examples: + // - WHERE t.x > t.y + // - WHERE t.x + 1 > t.y - 5 + // - WHERE t.x = (t.x) + let Ok(eval_at_left) = determine_where_to_eval_expr(&lhs) else { + return false; + }; + let Ok(eval_at_right) = determine_where_to_eval_expr(&rhs) else { + return false; + }; + if eval_at_left == EvalAt::Loop(table_index) && eval_at_right == EvalAt::Loop(table_index) { + return false; + } + true +} + /// Find all [IndexConstraint]s for a given WHERE clause /// Constraints are appended as long as they constrain the index in column order. /// E.g. for index (a,b,c) to be fully used, there must be a [WhereTerm] for each of a, b, and c. @@ -886,37 +1000,13 @@ fn find_index_constraints( for position_in_index in 0..index.columns.len() { let mut found = false; for (position_in_where_clause, term) in where_clause.iter().enumerate() { - // Skip terms that cannot be evaluated at this table's loop level - if !term.should_eval_at_loop(table_index) { - continue; - } - // Skip terms that are not binary comparisons - let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else { - continue; - }; - // Only consider index scans for binary ops that are comparisons - if !matches!( - *operator, - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals - ) { + if !is_potential_index_constraint(term, table_index) { continue; } - // If both lhs and rhs refer to columns from this table, we can't use this constraint - // because we can't use the index to satisfy the condition. - // Examples: - // - WHERE t.x > t.y - // - WHERE t.x + 1 > t.y - 5 - // - WHERE t.x = (t.x) - if determine_where_to_eval_expr(&lhs)? == EvalAt::Loop(table_index) - && determine_where_to_eval_expr(&rhs)? == EvalAt::Loop(table_index) - { - continue; - } + let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else { + panic!("expected binary expression"); + }; // Check if lhs is a column that is in the i'th position of the index if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? { From 3b44b269a31eb00be79a9fe74fd9daf2a5bf58ae Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:22:57 +0300 Subject: [PATCH 09/16] optimizer: try to build ephemeral index to avoid nested table scan --- core/translate/optimizer.rs | 157 +++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 872758257..41e34418e 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,9 +1,9 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{cmp::Ordering, collections::HashMap, sync::Arc}; use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ - schema::{Index, Schema}, + schema::{Index, IndexColumn, Schema}, translate::plan::TerminationKey, types::SeekOp, util::exprs_are_equivalent, @@ -843,6 +843,25 @@ pub fn try_extract_index_search_from_where_clause( } } + // We haven't found a persistent btree index that is any better than a full table scan; + // let's see if building an ephemeral index would be better. + if best_index.index.is_none() { + let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) = + ephemeral_index_estimate_cost(where_clause, table_reference, table_index); + if ephemeral_cost < best_index.cost { + // ephemeral index makes sense, so let's build it now. + // ephemeral columns are: columns from the table_reference, constraints first, then the rest + let ephemeral_index = + ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx); + best_index.index = Some(Arc::new(ephemeral_index)); + best_index.cost = ephemeral_cost; + best_index.constraints.clear(); + best_index + .constraints + .append(&mut constraints_without_col_idx); + } + } + if best_index.index.is_none() { return Ok(None); } @@ -869,6 +888,140 @@ pub fn try_extract_index_search_from_where_clause( })); } +fn ephemeral_index_estimate_cost( + where_clause: &mut Vec, + table_reference: &TableReference, + table_index: usize, +) -> (f64, Vec<(usize, IndexConstraint)>, Vec) { + let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause + .iter() + .enumerate() + .filter(|(_, term)| is_potential_index_constraint(term, table_index)) + .filter_map(|(i, term)| { + let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else { + panic!("expected binary expression"); + }; + if let ast::Expr::Column { table, column, .. } = lhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Rhs), + operator: *operator, + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + if let ast::Expr::Column { table, column, .. } = rhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Lhs), + operator: opposite_cmp_op(*operator), + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + None + }) + .collect(); + // sort equalities first + constraints_with_col_idx.sort_by(|a, _| { + if a.1.operator == ast::Operator::Equals { + Ordering::Less + } else { + Ordering::Equal + } + }); + // drop everything after the first inequality + constraints_with_col_idx.truncate( + constraints_with_col_idx + .iter() + .position(|c| c.1.operator != ast::Operator::Equals) + .unwrap_or(constraints_with_col_idx.len()), + ); + + let ephemeral_column_count = table_reference + .columns() + .iter() + .enumerate() + .filter(|(i, _)| table_reference.column_is_used(*i)) + .count(); + + let constraints_without_col_idx = constraints_with_col_idx + .iter() + .cloned() + .map(|(_, c)| c) + .collect::>(); + let ephemeral_cost = dumb_cost_estimator( + Some(IndexInfo { + unique: false, + column_count: ephemeral_column_count, + }), + &constraints_without_col_idx, + table_index != 0, + true, + ); + ( + ephemeral_cost, + constraints_with_col_idx, + constraints_without_col_idx, + ) +} + +fn ephemeral_index_build( + table_reference: &TableReference, + table_index: usize, + index_constraints: &[(usize, IndexConstraint)], +) -> Index { + let mut ephemeral_columns: Vec = table_reference + .columns() + .iter() + .enumerate() + .map(|(i, c)| IndexColumn { + name: c.name.clone().unwrap(), + order: SortOrder::Asc, + pos_in_table: i, + }) + // only include columns that are used in the query + .filter(|c| table_reference.column_is_used(c.pos_in_table)) + .collect(); + // sort so that constraints first, then rest in whatever order they were in in the table + ephemeral_columns.sort_by(|a, b| { + let a_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == a.pos_in_table); + let b_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == b.pos_in_table); + match (a_constraint, b_constraint) { + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx), + (None, None) => Ordering::Equal, + } + }); + let ephemeral_index = Index { + name: format!( + "ephemeral_{}_{}", + table_reference.table.get_name(), + table_index + ), + columns: ephemeral_columns, + unique: false, + ephemeral: true, + table_name: table_reference.table.get_name().to_string(), + root_page: 0, + }; + + ephemeral_index +} + #[derive(Debug, Clone)] /// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key. /// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z", From f256fb46fd5eeda8459f91d40ec3cde10e757458 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:46:11 +0300 Subject: [PATCH 10/16] remove print spam from index insert --- core/vdbe/execute.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index de871f54c..0869491d6 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3766,7 +3766,6 @@ pub fn op_idx_insert( pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - dbg!("op_idx_insert_"); if let Insn::IdxInsert { cursor_id, record_reg, @@ -3807,7 +3806,6 @@ pub fn op_idx_insert( } }; - dbg!(moved_before); // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to the following opcode // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. From 1928dcfa1073619c3844f9bb3207cf7300c8148c Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 21 Apr 2025 23:05:01 -0300 Subject: [PATCH 11/16] Correct docs regarding between --- COMPAT.md | 2 +- core/translate/expr.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index ced9fbb6d..799411193 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -200,7 +200,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | (NOT) MATCH | No | | | IS (NOT) | Yes | | | IS (NOT) DISTINCT FROM | Yes | | -| (NOT) BETWEEN ... AND ... | No | | +| (NOT) BETWEEN ... AND ... | Yes | Expression is rewritten in the optimizer | | (NOT) IN (subquery) | No | | | (NOT) EXISTS (subquery) | No | | | CASE WHEN THEN ELSE END | Yes | | diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6c9072ab9..53deb7e0f 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -186,7 +186,9 @@ pub fn translate_condition_expr( resolver: &Resolver, ) -> Result<()> { match expr { - ast::Expr::Between { .. } => todo!(), + ast::Expr::Between { .. } => { + unreachable!("expression should have been rewritten in optmizer") + } ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { // In a binary AND, never jump to the parent 'jump_target_when_true' label on the first condition, because // the second condition MUST also be true. Instead we instruct the child expression to jump to a local @@ -492,7 +494,9 @@ pub fn translate_expr( return Ok(target_register); } match expr { - ast::Expr::Between { .. } => todo!(), + ast::Expr::Between { .. } => { + unreachable!("expression should have been rewritten in optmizer") + } ast::Expr::Binary(e1, op, e2) => { // Check if both sides of the expression are equivalent and reuse the same register if so if exprs_are_equivalent(e1, e2) { From 094fd0e21143d9cb50a49fe844f021407394e462 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 09:46:16 +0300 Subject: [PATCH 12/16] Add TPC-H instructions to PERF.md --- PERF.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/PERF.md b/PERF.md index 40edcf7ea..54a55f2ac 100644 --- a/PERF.md +++ b/PERF.md @@ -32,3 +32,28 @@ make clickbench This will build Limbo in release mode, create a database, and run the benchmarks with a small subset of the Clickbench dataset. It will run the queries for both Limbo and SQLite, and print the results. + +## TPC-H + +1. Clone the Taratool TPC-H benchmarking tool: + +```shell +git clone git@github.com:tarantool/tpch.git +``` + +2. Patch the benchmark runner script: + +```patch +diff --git a/bench_queries.sh b/bench_queries.sh +index 6b894f9..c808e9a 100755 +--- a/bench_queries.sh ++++ b/bench_queries.sh +@@ -4,7 +4,7 @@ function check_q { + local query=queries/$*.sql + ( + echo $query +- time ( sqlite3 TPC-H.db < $query > /dev/null ) ++ time ( ../../limbo/target/release/limbo -m list TPC-H.db < $query > /dev/null ) + ) + } +``` From 68d8b86bb7d2365237c04c4310a3157af73d43fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20K=C3=B6sters?= Date: Mon, 21 Apr 2025 13:15:04 +0200 Subject: [PATCH 13/16] fix: get name of rowid column --- bindings/java/rs_src/limbo_statement.rs | 2 +- core/lib.rs | 2 +- core/translate/plan.rs | 19 ++++++++++++++++--- tests/integration/common.rs | 10 +++++----- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/bindings/java/rs_src/limbo_statement.rs b/bindings/java/rs_src/limbo_statement.rs index b28ff55b1..c49469cd6 100644 --- a/bindings/java/rs_src/limbo_statement.rs +++ b/bindings/java/rs_src/limbo_statement.rs @@ -138,7 +138,7 @@ pub extern "system" fn Java_tech_turso_core_LimboStatement_columns<'local>( for i in 0..num_columns { let column_name = stmt.stmt.get_column_name(i); - let str = env.new_string(column_name.as_str()).unwrap(); + let str = env.new_string(column_name.into_owned()).unwrap(); env.set_object_array_element(&obj_arr, i as i32, str) .unwrap(); } diff --git a/core/lib.rs b/core/lib.rs index e130306f7..67d168640 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -591,7 +591,7 @@ impl Statement { self.program.result_columns.len() } - pub fn get_column_name(&self, idx: usize) -> Cow { + pub fn get_column_name(&self, idx: usize) -> Cow { let column = &self.program.result_columns[idx]; match column.name(&self.program.table_references) { Some(name) => Cow::Borrowed(name), diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 07a8de392..48ce4c854 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -34,13 +34,26 @@ pub struct ResultSetColumn { } impl ResultSetColumn { - pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a String> { + pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a str> { if let Some(alias) = &self.alias { return Some(alias); } match &self.expr { ast::Expr::Column { table, column, .. } => { - tables[*table].columns()[*column].name.as_ref() + tables[*table].columns()[*column].name.as_deref() + } + ast::Expr::RowId { table, .. } => { + // If there is a rowid alias column, use its name + if let Table::BTree(table) = &tables[*table].table { + if let Some(rowid_alias_column) = table.get_rowid_alias_column() { + if let Some(name) = &rowid_alias_column.1.name { + return Some(name); + } + } + } + + // If there is no rowid alias, use "rowid". + Some("rowid") } _ => None, } @@ -465,7 +478,7 @@ impl TableReference { plan.result_columns .iter() .map(|rc| Column { - name: rc.name(&plan.table_references).map(String::clone), + name: rc.name(&plan.table_references).map(String::from), ty: Type::Text, // FIXME: infer proper type ty_str: "TEXT".to_string(), is_rowid_alias: false, diff --git a/tests/integration/common.rs b/tests/integration/common.rs index a034b36ae..2c668a12f 100644 --- a/tests/integration/common.rs +++ b/tests/integration/common.rs @@ -120,16 +120,16 @@ mod tests { let columns = stmt.num_columns(); assert_eq!(columns, 3); - assert_eq!(stmt.get_column_name(0), "foo".into()); - assert_eq!(stmt.get_column_name(1), "bar".into()); - assert_eq!(stmt.get_column_name(2), "baz".into()); + assert_eq!(stmt.get_column_name(0), "foo"); + assert_eq!(stmt.get_column_name(1), "bar"); + assert_eq!(stmt.get_column_name(2), "baz"); let stmt = conn.prepare("select foo, bar from test;")?; let columns = stmt.num_columns(); assert_eq!(columns, 2); - assert_eq!(stmt.get_column_name(0), "foo".into()); - assert_eq!(stmt.get_column_name(1), "bar".into()); + assert_eq!(stmt.get_column_name(0), "foo"); + assert_eq!(stmt.get_column_name(1), "bar"); let stmt = conn.prepare("delete from test;")?; let columns = stmt.num_columns(); From c2cf4756ef8601de234170fc44f620ca0861640e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 12:10:02 +0300 Subject: [PATCH 14/16] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index cc72d1133..1bf0cd1e7 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@

+ PyPI PyPI
From 936365a44e15a7658a13761d1b42c0ae798752a3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 12:11:23 +0300 Subject: [PATCH 15/16] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1bf0cd1e7..255843d80 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@

- PyPI - PyPI + PyPI + PyPI + PyPI
From fc5099e2efd7fd4537d12394078e9364a6b6f5db Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 12:04:47 +0300 Subject: [PATCH 16/16] antithesis: Enable RUST_BACKTRACE for workload --- Dockerfile.antithesis | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 1f4f3ba10..6305c12f0 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -71,4 +71,5 @@ COPY --from=builder /app/target/release/limbo_stress /bin/limbo_stress COPY stress/docker-entrypoint.sh /bin RUN chmod +x /bin/docker-entrypoint.sh ENTRYPOINT ["/bin/docker-entrypoint.sh"] +ENV RUST_BACKTRACE=1 CMD ["/bin/limbo_stress"]