diff --git a/COMPAT.md b/COMPAT.md
index ced9fbb6d..799411193 100644
--- a/COMPAT.md
+++ b/COMPAT.md
@@ -200,7 +200,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html).
| (NOT) MATCH | No | |
| IS (NOT) | Yes | |
| IS (NOT) DISTINCT FROM | Yes | |
-| (NOT) BETWEEN ... AND ... | No | |
+| (NOT) BETWEEN ... AND ... | Yes | Expression is rewritten in the optimizer |
| (NOT) IN (subquery) | No | |
| (NOT) EXISTS (subquery) | No | |
| CASE WHEN THEN ELSE END | Yes | |
diff --git a/Cargo.lock b/Cargo.lock
index 810b9983e..0b9a8bd75 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1540,9 +1540,9 @@ dependencies = [
[[package]]
name = "julian_day_converter"
-version = "0.4.4"
+version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aa5652b85ab018289638c6b924db618da9edd2ddfff7fa0ec38a8b51a9192d3"
+checksum = "f2987f71b89b85c812c8484cbf0c5d7912589e77bfdc66fd3e52f760e7859f16"
dependencies = [
"chrono",
]
diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis
index 1f4f3ba10..6305c12f0 100644
--- a/Dockerfile.antithesis
+++ b/Dockerfile.antithesis
@@ -71,4 +71,5 @@ COPY --from=builder /app/target/release/limbo_stress /bin/limbo_stress
COPY stress/docker-entrypoint.sh /bin
RUN chmod +x /bin/docker-entrypoint.sh
ENTRYPOINT ["/bin/docker-entrypoint.sh"]
+ENV RUST_BACKTRACE=1
CMD ["/bin/limbo_stress"]
diff --git a/PERF.md b/PERF.md
index fb25045fb..0eda689e5 100644
--- a/PERF.md
+++ b/PERF.md
@@ -34,6 +34,7 @@ This will build Limbo in release mode, create a database, and run the benchmarks
It will run the queries for both Limbo and SQLite, and print the results.
+
## Comparing VFS's/IO Back-ends (io_uring | syscall)
```shell
@@ -42,3 +43,30 @@ make bench-vfs SQL="select * from users;" N=500
The naive script will build and run limbo in release mode and execute the given SQL (against a copy of the `testing/testing.db` file)
`N` times with each `vfs`. This is not meant to be a definitive or thorough performance benchmark but serves to compare the two.
+
+
+## TPC-H
+
+1. Clone the Taratool TPC-H benchmarking tool:
+
+```shell
+git clone git@github.com:tarantool/tpch.git
+```
+
+2. Patch the benchmark runner script:
+
+```patch
+diff --git a/bench_queries.sh b/bench_queries.sh
+index 6b894f9..c808e9a 100755
+--- a/bench_queries.sh
++++ b/bench_queries.sh
+@@ -4,7 +4,7 @@ function check_q {
+ local query=queries/$*.sql
+ (
+ echo $query
+- time ( sqlite3 TPC-H.db < $query > /dev/null )
++ time ( ../../limbo/target/release/limbo -m list TPC-H.db < $query > /dev/null )
+ )
+ }
+```
+
diff --git a/README.md b/README.md
index cc72d1133..255843d80 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,9 @@
-
+
+
+
diff --git a/bindings/java/rs_src/limbo_statement.rs b/bindings/java/rs_src/limbo_statement.rs
index b28ff55b1..c49469cd6 100644
--- a/bindings/java/rs_src/limbo_statement.rs
+++ b/bindings/java/rs_src/limbo_statement.rs
@@ -138,7 +138,7 @@ pub extern "system" fn Java_tech_turso_core_LimboStatement_columns<'local>(
for i in 0..num_columns {
let column_name = stmt.stmt.get_column_name(i);
- let str = env.new_string(column_name.as_str()).unwrap();
+ let str = env.new_string(column_name.into_owned()).unwrap();
env.set_object_array_element(&obj_arr, i as i32, str)
.unwrap();
}
diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs
index 61e6271c9..8c57e7909 100644
--- a/bindings/rust/src/lib.rs
+++ b/bindings/rust/src/lib.rs
@@ -190,6 +190,39 @@ impl Statement {
}
}
}
+
+ pub fn columns(&self) -> Vec {
+ let stmt = self.inner.lock().unwrap();
+
+ let n = stmt.num_columns();
+
+ let mut cols = Vec::with_capacity(n);
+
+ for i in 0..n {
+ let name = stmt.get_column_name(i).into_owned();
+ cols.push(Column {
+ name,
+ decl_type: None, // TODO
+ });
+ }
+
+ cols
+ }
+}
+
+pub struct Column {
+ name: String,
+ decl_type: Option,
+}
+
+impl Column {
+ pub fn name(&self) -> &str {
+ &self.name
+ }
+
+ pub fn decl_type(&self) -> Option<&str> {
+ self.decl_type.as_deref()
+ }
}
pub trait IntoValue {
diff --git a/core/Cargo.toml b/core/Cargo.toml
index a790a0ca3..eb5d092b0 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -53,7 +53,7 @@ regex-syntax = { version = "0.8.5", default-features = false, features = [
"unicode",
] }
chrono = { version = "0.4.38", default-features = false, features = ["clock"] }
-julian_day_converter = "0.4.4"
+julian_day_converter = "0.4.5"
rand = "0.8.5"
libm = "0.2"
limbo_macros = { workspace = true }
diff --git a/core/lib.rs b/core/lib.rs
index e130306f7..67d168640 100644
--- a/core/lib.rs
+++ b/core/lib.rs
@@ -591,7 +591,7 @@ impl Statement {
self.program.result_columns.len()
}
- pub fn get_column_name(&self, idx: usize) -> Cow {
+ pub fn get_column_name(&self, idx: usize) -> Cow {
let column = &self.program.result_columns[idx];
match column.name(&self.program.table_references) {
Some(name) => Cow::Borrowed(name),
diff --git a/core/schema.rs b/core/schema.rs
index 0a5a8d80f..dd09671ab 100644
--- a/core/schema.rs
+++ b/core/schema.rs
@@ -692,6 +692,7 @@ pub struct Index {
pub root_page: usize,
pub columns: Vec,
pub unique: bool,
+ pub ephemeral: bool,
}
#[allow(dead_code)]
@@ -741,6 +742,7 @@ impl Index {
root_page,
columns: index_columns,
unique,
+ ephemeral: false,
})
}
_ => todo!("Expected create index statement"),
@@ -783,6 +785,7 @@ impl Index {
root_page,
columns: index_columns,
unique: true, // Primary key indexes are always unique
+ ephemeral: false,
})
}
diff --git a/core/translate/expr.rs b/core/translate/expr.rs
index 6c9072ab9..53deb7e0f 100644
--- a/core/translate/expr.rs
+++ b/core/translate/expr.rs
@@ -186,7 +186,9 @@ pub fn translate_condition_expr(
resolver: &Resolver,
) -> Result<()> {
match expr {
- ast::Expr::Between { .. } => todo!(),
+ ast::Expr::Between { .. } => {
+ unreachable!("expression should have been rewritten in optmizer")
+ }
ast::Expr::Binary(lhs, ast::Operator::And, rhs) => {
// In a binary AND, never jump to the parent 'jump_target_when_true' label on the first condition, because
// the second condition MUST also be true. Instead we instruct the child expression to jump to a local
@@ -492,7 +494,9 @@ pub fn translate_expr(
return Ok(target_register);
}
match expr {
- ast::Expr::Between { .. } => todo!(),
+ ast::Expr::Between { .. } => {
+ unreachable!("expression should have been rewritten in optmizer")
+ }
ast::Expr::Binary(e1, op, e2) => {
// Check if both sides of the expression are equivalent and reuse the same register if so
if exprs_are_equivalent(e1, e2) {
diff --git a/core/translate/index.rs b/core/translate/index.rs
index de79aed23..55222e40f 100644
--- a/core/translate/index.rs
+++ b/core/translate/index.rs
@@ -62,6 +62,7 @@ pub fn translate_create_index(
})
.collect(),
unique: unique_if_not_exists.0,
+ ephemeral: false,
});
// Allocate the necessary cursors:
diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs
index 7354eb4a1..c56680446 100644
--- a/core/translate/main_loop.rs
+++ b/core/translate/main_loop.rs
@@ -1,14 +1,16 @@
use limbo_ext::VTabKind;
use limbo_sqlite3_parser::ast;
+use std::sync::Arc;
+
use crate::{
- schema::Table,
+ schema::{Index, Table},
translate::result_row::emit_select_result,
types::SeekOp,
vdbe::{
builder::ProgramBuilder,
- insn::{CmpInsFlags, Insn},
- BranchOffset,
+ insn::{CmpInsFlags, IdxInsertFlags, Insn},
+ BranchOffset, CursorID,
},
Result,
};
@@ -156,23 +158,26 @@ pub fn init_loop(
index: Some(index), ..
} = search
{
- match mode {
- OperationMode::SELECT => {
- program.emit_insn(Insn::OpenRead {
- cursor_id: index_cursor_id
- .expect("index cursor is always opened in Seek with index"),
- root_page: index.root_page,
- });
- }
- OperationMode::UPDATE | OperationMode::DELETE => {
- program.emit_insn(Insn::OpenWrite {
- cursor_id: index_cursor_id
- .expect("index cursor is always opened in Seek with index"),
- root_page: index.root_page.into(),
- });
- }
- _ => {
- unimplemented!()
+ // Ephemeral index cursor are opened ad-hoc when needed.
+ if !index.ephemeral {
+ match mode {
+ OperationMode::SELECT => {
+ program.emit_insn(Insn::OpenRead {
+ cursor_id: index_cursor_id
+ .expect("index cursor is always opened in Seek with index"),
+ root_page: index.root_page,
+ });
+ }
+ OperationMode::UPDATE | OperationMode::DELETE => {
+ program.emit_insn(Insn::OpenWrite {
+ cursor_id: index_cursor_id
+ .expect("index cursor is always opened in Seek with index"),
+ root_page: index.root_page.into(),
+ });
+ }
+ _ => {
+ unimplemented!()
+ }
}
}
}
@@ -437,6 +442,32 @@ pub fn open_loop(
});
} else {
// Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore.
+ if let Search::Seek {
+ index: Some(index), ..
+ } = search
+ {
+ if index.ephemeral {
+ let table_has_rowid = if let Table::BTree(btree) = &table.table {
+ btree.has_rowid
+ } else {
+ false
+ };
+ Some(emit_autoindex(
+ program,
+ &index,
+ table_cursor_id
+ .expect("an ephemeral index must have a source table cursor"),
+ index_cursor_id
+ .expect("an ephemeral index must have an index cursor"),
+ table_has_rowid,
+ )?)
+ } else {
+ index_cursor_id
+ }
+ } else {
+ index_cursor_id
+ };
+
let is_index = index_cursor_id.is_some();
let seek_cursor_id = index_cursor_id.unwrap_or_else(|| {
table_cursor_id.expect("Either index or table cursor must be opened")
@@ -1125,3 +1156,67 @@ fn emit_seek_termination(
Ok(())
}
+
+/// Open an ephemeral index cursor and build an automatic index on a table.
+/// This is used as a last-resort to avoid a nested full table scan
+/// Returns the cursor id of the ephemeral index cursor.
+fn emit_autoindex(
+ program: &mut ProgramBuilder,
+ index: &Arc,
+ table_cursor_id: CursorID,
+ index_cursor_id: CursorID,
+ table_has_rowid: bool,
+) -> Result {
+ assert!(index.ephemeral, "Index {} is not ephemeral", index.name);
+ let label_ephemeral_build_end = program.allocate_label();
+ // Since this typically happens in an inner loop, we only build it once.
+ program.emit_insn(Insn::Once {
+ target_pc_when_reentered: label_ephemeral_build_end,
+ });
+ program.emit_insn(Insn::OpenAutoindex {
+ cursor_id: index_cursor_id,
+ });
+ // Rewind source table
+ program.emit_insn(Insn::Rewind {
+ cursor_id: table_cursor_id,
+ pc_if_empty: label_ephemeral_build_end,
+ });
+ let offset_ephemeral_build_loop_start = program.offset();
+ // Emit all columns from source table that are needed in the ephemeral index.
+ // Also reserve a register for the rowid if the source table has rowids.
+ let num_regs_to_reserve = index.columns.len() + table_has_rowid as usize;
+ let ephemeral_cols_start_reg = program.alloc_registers(num_regs_to_reserve);
+ for (i, col) in index.columns.iter().enumerate() {
+ let reg = ephemeral_cols_start_reg + i;
+ program.emit_insn(Insn::Column {
+ cursor_id: table_cursor_id,
+ column: col.pos_in_table,
+ dest: reg,
+ });
+ }
+ if table_has_rowid {
+ program.emit_insn(Insn::RowId {
+ cursor_id: table_cursor_id,
+ dest: ephemeral_cols_start_reg + index.columns.len(),
+ });
+ }
+ let record_reg = program.alloc_register();
+ program.emit_insn(Insn::MakeRecord {
+ start_reg: ephemeral_cols_start_reg,
+ count: num_regs_to_reserve,
+ dest_reg: record_reg,
+ });
+ program.emit_insn(Insn::IdxInsert {
+ cursor_id: index_cursor_id,
+ record_reg,
+ unpacked_start: Some(ephemeral_cols_start_reg),
+ unpacked_count: Some(num_regs_to_reserve as u16),
+ flags: IdxInsertFlags::new().use_seek(false),
+ });
+ program.emit_insn(Insn::Next {
+ cursor_id: table_cursor_id,
+ pc_if_next: offset_ephemeral_build_loop_start,
+ });
+ program.resolve_label(label_ephemeral_build_end, program.offset());
+ Ok(index_cursor_id)
+}
diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs
index fe764ee50..41e34418e 100644
--- a/core/translate/optimizer.rs
+++ b/core/translate/optimizer.rs
@@ -1,9 +1,9 @@
-use std::{collections::HashMap, sync::Arc};
+use std::{cmp::Ordering, collections::HashMap, sync::Arc};
use limbo_sqlite3_parser::ast::{self, Expr, SortOrder};
use crate::{
- schema::{Index, Schema},
+ schema::{Index, IndexColumn, Schema},
translate::plan::TerminationKey,
types::SeekOp,
util::exprs_are_equivalent,
@@ -355,15 +355,18 @@ fn use_indexes(
// but we just don't do that yet.
continue;
}
+ let placeholder = vec![];
+ let mut usable_indexes_ref = &placeholder;
if let Some(indexes) = available_indexes.get(table_name) {
- if let Some(search) = try_extract_index_search_from_where_clause(
- where_clause,
- table_index,
- table_reference,
- indexes,
- )? {
- table_reference.op = Operation::Search(search);
- }
+ usable_indexes_ref = indexes;
+ }
+ if let Some(search) = try_extract_index_search_from_where_clause(
+ where_clause,
+ table_index,
+ table_reference,
+ usable_indexes_ref,
+ )? {
+ table_reference.op = Operation::Search(search);
}
}
}
@@ -710,14 +713,80 @@ fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
}
/// Struct used for scoring index scans
-/// Currently we just score by the number of index columns that can be utilized
-/// in the scan, i.e. no statistics are used.
+/// Currently we just estimate cost in a really dumb way,
+/// i.e. no statistics are used.
struct IndexScore {
index: Option>,
- score: usize,
+ cost: f64,
constraints: Vec,
}
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+struct IndexInfo {
+ unique: bool,
+ column_count: usize,
+}
+
+const ESTIMATED_HARDCODED_ROWS_PER_TABLE: f64 = 1000.0;
+
+/// Unbelievably dumb cost estimate for rows scanned by an index scan.
+fn dumb_cost_estimator(
+ index_info: Option,
+ constraints: &[IndexConstraint],
+ is_inner_loop: bool,
+ is_ephemeral: bool,
+) -> f64 {
+ // assume that the outer table always does a full table scan :)
+ // this discourages building ephemeral indexes on the outer table
+ // (since a scan reads TABLE_ROWS rows, so an ephemeral index on the outer table would both read TABLE_ROWS rows to build the index and then seek the index)
+ // but encourages building it on the inner table because it's only built once but the inner loop is run as many times as the outer loop has iterations.
+ let loop_multiplier = if is_inner_loop {
+ ESTIMATED_HARDCODED_ROWS_PER_TABLE
+ } else {
+ 1.0
+ };
+
+ // If we are building an ephemeral index, we assume we will scan the entire source table to build it.
+ // Non-ephemeral indexes don't need to be built.
+ let cost_to_build_index = is_ephemeral as usize as f64 * ESTIMATED_HARDCODED_ROWS_PER_TABLE;
+
+ let Some(index_info) = index_info else {
+ return cost_to_build_index + ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier;
+ };
+
+ let final_constraint_is_range = constraints
+ .last()
+ .map_or(false, |c| c.operator != ast::Operator::Equals);
+ let equalities_count = constraints
+ .iter()
+ .take(if final_constraint_is_range {
+ constraints.len() - 1
+ } else {
+ constraints.len()
+ })
+ .count() as f64;
+
+ let selectivity = match (
+ index_info.unique,
+ index_info.column_count as f64,
+ equalities_count,
+ ) {
+ // no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming
+ (_, _, 0.0) => {
+ if final_constraint_is_range {
+ 0.4
+ } else {
+ 1.0
+ }
+ }
+ // on an unique index if we have equalities across all index columns, assume very high selectivity
+ (true, index_cols, eq_count) if eq_count == index_cols => 0.01 * eq_count,
+ // some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4
+ (_, _, eq_count) => (eq_count * 0.1) * if final_constraint_is_range { 0.4 } else { 1.0 },
+ };
+ cost_to_build_index + selectivity * ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier
+}
+
/// Try to extract an index search from the WHERE clause
/// Returns an optional [Search] struct if an index search can be extracted, otherwise returns None.
pub fn try_extract_index_search_from_where_clause(
@@ -730,10 +799,6 @@ pub fn try_extract_index_search_from_where_clause(
if where_clause.is_empty() {
return Ok(None);
}
- // If there are no indexes, we can't extract a search
- if table_indexes.is_empty() {
- return Ok(None);
- }
let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op {
*iter_dir
@@ -748,10 +813,11 @@ pub fn try_extract_index_search_from_where_clause(
// 3. constrain the index columns in the order that they appear in the index
// - e.g. if the index is on (a,b,c) then we can use all of "a = 1 AND b = 2 AND c = 3" to constrain the index scan,
// - but if the where clause is "a = 1 and c = 3" then we can only use "a = 1".
+ let cost_of_full_table_scan = dumb_cost_estimator(None, &[], table_index != 0, false);
let mut constraints_cur = vec![];
let mut best_index = IndexScore {
index: None,
- score: 0,
+ cost: cost_of_full_table_scan,
constraints: vec![],
};
@@ -760,15 +826,42 @@ pub fn try_extract_index_search_from_where_clause(
find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?;
// naive scoring since we don't have statistics: prefer the index where we can use the most columns
// e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c.
- let score = constraints_cur.len();
- if score > best_index.score {
+ let cost = dumb_cost_estimator(
+ Some(IndexInfo {
+ unique: index.unique,
+ column_count: index.columns.len(),
+ }),
+ &constraints_cur,
+ table_index != 0,
+ false,
+ );
+ if cost < best_index.cost {
best_index.index = Some(Arc::clone(index));
- best_index.score = score;
+ best_index.cost = cost;
best_index.constraints.clear();
best_index.constraints.append(&mut constraints_cur);
}
}
+ // We haven't found a persistent btree index that is any better than a full table scan;
+ // let's see if building an ephemeral index would be better.
+ if best_index.index.is_none() {
+ let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) =
+ ephemeral_index_estimate_cost(where_clause, table_reference, table_index);
+ if ephemeral_cost < best_index.cost {
+ // ephemeral index makes sense, so let's build it now.
+ // ephemeral columns are: columns from the table_reference, constraints first, then the rest
+ let ephemeral_index =
+ ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx);
+ best_index.index = Some(Arc::new(ephemeral_index));
+ best_index.cost = ephemeral_cost;
+ best_index.constraints.clear();
+ best_index
+ .constraints
+ .append(&mut constraints_without_col_idx);
+ }
+ }
+
if best_index.index.is_none() {
return Ok(None);
}
@@ -795,6 +888,140 @@ pub fn try_extract_index_search_from_where_clause(
}));
}
+fn ephemeral_index_estimate_cost(
+ where_clause: &mut Vec,
+ table_reference: &TableReference,
+ table_index: usize,
+) -> (f64, Vec<(usize, IndexConstraint)>, Vec) {
+ let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause
+ .iter()
+ .enumerate()
+ .filter(|(_, term)| is_potential_index_constraint(term, table_index))
+ .filter_map(|(i, term)| {
+ let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else {
+ panic!("expected binary expression");
+ };
+ if let ast::Expr::Column { table, column, .. } = lhs.as_ref() {
+ if *table == table_index {
+ return Some((
+ *column,
+ IndexConstraint {
+ position_in_where_clause: (i, BinaryExprSide::Rhs),
+ operator: *operator,
+ index_column_sort_order: SortOrder::Asc,
+ },
+ ));
+ }
+ }
+ if let ast::Expr::Column { table, column, .. } = rhs.as_ref() {
+ if *table == table_index {
+ return Some((
+ *column,
+ IndexConstraint {
+ position_in_where_clause: (i, BinaryExprSide::Lhs),
+ operator: opposite_cmp_op(*operator),
+ index_column_sort_order: SortOrder::Asc,
+ },
+ ));
+ }
+ }
+ None
+ })
+ .collect();
+ // sort equalities first
+ constraints_with_col_idx.sort_by(|a, _| {
+ if a.1.operator == ast::Operator::Equals {
+ Ordering::Less
+ } else {
+ Ordering::Equal
+ }
+ });
+ // drop everything after the first inequality
+ constraints_with_col_idx.truncate(
+ constraints_with_col_idx
+ .iter()
+ .position(|c| c.1.operator != ast::Operator::Equals)
+ .unwrap_or(constraints_with_col_idx.len()),
+ );
+
+ let ephemeral_column_count = table_reference
+ .columns()
+ .iter()
+ .enumerate()
+ .filter(|(i, _)| table_reference.column_is_used(*i))
+ .count();
+
+ let constraints_without_col_idx = constraints_with_col_idx
+ .iter()
+ .cloned()
+ .map(|(_, c)| c)
+ .collect::>();
+ let ephemeral_cost = dumb_cost_estimator(
+ Some(IndexInfo {
+ unique: false,
+ column_count: ephemeral_column_count,
+ }),
+ &constraints_without_col_idx,
+ table_index != 0,
+ true,
+ );
+ (
+ ephemeral_cost,
+ constraints_with_col_idx,
+ constraints_without_col_idx,
+ )
+}
+
+fn ephemeral_index_build(
+ table_reference: &TableReference,
+ table_index: usize,
+ index_constraints: &[(usize, IndexConstraint)],
+) -> Index {
+ let mut ephemeral_columns: Vec = table_reference
+ .columns()
+ .iter()
+ .enumerate()
+ .map(|(i, c)| IndexColumn {
+ name: c.name.clone().unwrap(),
+ order: SortOrder::Asc,
+ pos_in_table: i,
+ })
+ // only include columns that are used in the query
+ .filter(|c| table_reference.column_is_used(c.pos_in_table))
+ .collect();
+ // sort so that constraints first, then rest in whatever order they were in in the table
+ ephemeral_columns.sort_by(|a, b| {
+ let a_constraint = index_constraints
+ .iter()
+ .enumerate()
+ .find(|(_, c)| c.0 == a.pos_in_table);
+ let b_constraint = index_constraints
+ .iter()
+ .enumerate()
+ .find(|(_, c)| c.0 == b.pos_in_table);
+ match (a_constraint, b_constraint) {
+ (Some(_), None) => Ordering::Less,
+ (None, Some(_)) => Ordering::Greater,
+ (Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx),
+ (None, None) => Ordering::Equal,
+ }
+ });
+ let ephemeral_index = Index {
+ name: format!(
+ "ephemeral_{}_{}",
+ table_reference.table.get_name(),
+ table_index
+ ),
+ columns: ephemeral_columns,
+ unique: false,
+ ephemeral: true,
+ table_name: table_reference.table.get_name().to_string(),
+ root_page: 0,
+ };
+
+ ephemeral_index
+}
+
#[derive(Debug, Clone)]
/// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key.
/// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z",
@@ -874,6 +1101,45 @@ fn get_column_position_in_index(
Ok(index.column_table_pos_to_index_pos(*column))
}
+fn is_potential_index_constraint(term: &WhereTerm, table_index: usize) -> bool {
+ // Skip terms that cannot be evaluated at this table's loop level
+ if !term.should_eval_at_loop(table_index) {
+ return false;
+ }
+ // Skip terms that are not binary comparisons
+ let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else {
+ return false;
+ };
+ // Only consider index scans for binary ops that are comparisons
+ if !matches!(
+ *operator,
+ ast::Operator::Equals
+ | ast::Operator::Greater
+ | ast::Operator::GreaterEquals
+ | ast::Operator::Less
+ | ast::Operator::LessEquals
+ ) {
+ return false;
+ }
+
+ // If both lhs and rhs refer to columns from this table, we can't use this constraint
+ // because we can't use the index to satisfy the condition.
+ // Examples:
+ // - WHERE t.x > t.y
+ // - WHERE t.x + 1 > t.y - 5
+ // - WHERE t.x = (t.x)
+ let Ok(eval_at_left) = determine_where_to_eval_expr(&lhs) else {
+ return false;
+ };
+ let Ok(eval_at_right) = determine_where_to_eval_expr(&rhs) else {
+ return false;
+ };
+ if eval_at_left == EvalAt::Loop(table_index) && eval_at_right == EvalAt::Loop(table_index) {
+ return false;
+ }
+ true
+}
+
/// Find all [IndexConstraint]s for a given WHERE clause
/// Constraints are appended as long as they constrain the index in column order.
/// E.g. for index (a,b,c) to be fully used, there must be a [WhereTerm] for each of a, b, and c.
@@ -887,37 +1153,13 @@ fn find_index_constraints(
for position_in_index in 0..index.columns.len() {
let mut found = false;
for (position_in_where_clause, term) in where_clause.iter().enumerate() {
- // Skip terms that cannot be evaluated at this table's loop level
- if !term.should_eval_at_loop(table_index) {
- continue;
- }
- // Skip terms that are not binary comparisons
- let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else {
- continue;
- };
- // Only consider index scans for binary ops that are comparisons
- if !matches!(
- *operator,
- ast::Operator::Equals
- | ast::Operator::Greater
- | ast::Operator::GreaterEquals
- | ast::Operator::Less
- | ast::Operator::LessEquals
- ) {
+ if !is_potential_index_constraint(term, table_index) {
continue;
}
- // If both lhs and rhs refer to columns from this table, we can't use this constraint
- // because we can't use the index to satisfy the condition.
- // Examples:
- // - WHERE t.x > t.y
- // - WHERE t.x + 1 > t.y - 5
- // - WHERE t.x = (t.x)
- if determine_where_to_eval_expr(&lhs)? == EvalAt::Loop(table_index)
- && determine_where_to_eval_expr(&rhs)? == EvalAt::Loop(table_index)
- {
- continue;
- }
+ let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else {
+ panic!("expected binary expression");
+ };
// Check if lhs is a column that is in the i'th position of the index
if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? {
diff --git a/core/translate/plan.rs b/core/translate/plan.rs
index 07a8de392..44a43f73a 100644
--- a/core/translate/plan.rs
+++ b/core/translate/plan.rs
@@ -34,13 +34,26 @@ pub struct ResultSetColumn {
}
impl ResultSetColumn {
- pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a String> {
+ pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a str> {
if let Some(alias) = &self.alias {
return Some(alias);
}
match &self.expr {
ast::Expr::Column { table, column, .. } => {
- tables[*table].columns()[*column].name.as_ref()
+ tables[*table].columns()[*column].name.as_deref()
+ }
+ ast::Expr::RowId { table, .. } => {
+ // If there is a rowid alias column, use its name
+ if let Table::BTree(table) = &tables[*table].table {
+ if let Some(rowid_alias_column) = table.get_rowid_alias_column() {
+ if let Some(name) = &rowid_alias_column.1.name {
+ return Some(name);
+ }
+ }
+ }
+
+ // If there is no rowid alias, use "rowid".
+ Some("rowid")
}
_ => None,
}
@@ -465,7 +478,7 @@ impl TableReference {
plan.result_columns
.iter()
.map(|rc| Column {
- name: rc.name(&plan.table_references).map(String::clone),
+ name: rc.name(&plan.table_references).map(String::from),
ty: Type::Text, // FIXME: infer proper type
ty_str: "TEXT".to_string(),
is_rowid_alias: false,
@@ -509,7 +522,10 @@ impl TableReference {
match &self.table {
Table::BTree(btree) => {
let use_covering_index = self.utilizes_covering_index();
- let table_cursor_id = if use_covering_index && mode == OperationMode::SELECT {
+ let index_is_ephemeral = index.map_or(false, |index| index.ephemeral);
+ let table_not_required =
+ OperationMode::SELECT == mode && use_covering_index && !index_is_ephemeral;
+ let table_cursor_id = if table_not_required {
None
} else {
Some(program.alloc_cursor_id(
@@ -590,6 +606,10 @@ impl TableReference {
};
self.index_is_covering(index.as_ref())
}
+
+ pub fn column_is_used(&self, index: usize) -> bool {
+ self.col_used_mask.get(index)
+ }
}
/// A definition of a rowid/index search.
diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs
index 648044d1d..05fdc4938 100644
--- a/core/vdbe/builder.rs
+++ b/core/vdbe/builder.rs
@@ -363,6 +363,12 @@ impl ProgramBuilder {
Insn::Next { pc_if_next, .. } => {
resolve(pc_if_next, "Next");
}
+ Insn::Once {
+ target_pc_when_reentered,
+ ..
+ } => {
+ resolve(target_pc_when_reentered, "Once");
+ }
Insn::Prev { pc_if_prev, .. } => {
resolve(pc_if_prev, "Prev");
}
diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs
index de871f54c..0869491d6 100644
--- a/core/vdbe/execute.rs
+++ b/core/vdbe/execute.rs
@@ -3766,7 +3766,6 @@ pub fn op_idx_insert(
pager: &Rc,
mv_store: Option<&Rc>,
) -> Result {
- dbg!("op_idx_insert_");
if let Insn::IdxInsert {
cursor_id,
record_reg,
@@ -3807,7 +3806,6 @@ pub fn op_idx_insert(
}
};
- dbg!(moved_before);
// Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages,
// therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to the following opcode
// because it could trigger a movement to child page after a balance root which will leave the current page as the root page.
diff --git a/testing/scalar-functions-datetime.test b/testing/scalar-functions-datetime.test
index 3c2f7b771..33caf52c2 100755
--- a/testing/scalar-functions-datetime.test
+++ b/testing/scalar-functions-datetime.test
@@ -597,6 +597,10 @@ foreach i $FMT {
do_execsql_test strftime-invalid-$i "SELECT strftime('$i','2025-01-23T13:14:30.567');" {}
}
+do_execsql_test strftime-julianday {
+ SELECT strftime('%Y-%m-%d %H:%M:%fZ', 2459717.08070103);
+} {"2022-05-17 13:56:12.569Z"}
+
# Tests for the TIMEDIFF function
diff --git a/tests/integration/common.rs b/tests/integration/common.rs
index a034b36ae..2c668a12f 100644
--- a/tests/integration/common.rs
+++ b/tests/integration/common.rs
@@ -120,16 +120,16 @@ mod tests {
let columns = stmt.num_columns();
assert_eq!(columns, 3);
- assert_eq!(stmt.get_column_name(0), "foo".into());
- assert_eq!(stmt.get_column_name(1), "bar".into());
- assert_eq!(stmt.get_column_name(2), "baz".into());
+ assert_eq!(stmt.get_column_name(0), "foo");
+ assert_eq!(stmt.get_column_name(1), "bar");
+ assert_eq!(stmt.get_column_name(2), "baz");
let stmt = conn.prepare("select foo, bar from test;")?;
let columns = stmt.num_columns();
assert_eq!(columns, 2);
- assert_eq!(stmt.get_column_name(0), "foo".into());
- assert_eq!(stmt.get_column_name(1), "bar".into());
+ assert_eq!(stmt.get_column_name(0), "foo");
+ assert_eq!(stmt.get_column_name(1), "bar");
let stmt = conn.prepare("delete from test;")?;
let columns = stmt.num_columns();