This commit is contained in:
alpaylan
2025-04-09 14:03:38 -04:00
40 changed files with 3065 additions and 636 deletions

1
.gitignore vendored
View File

@@ -34,4 +34,5 @@ dist/
# testing
testing/limbo_output.txt
**/limbo_output.txt
testing/test.log
.bugbase

View File

@@ -41,7 +41,6 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte
* ⛔️ Concurrent access from multiple processes is not supported.
* ⛔️ Savepoints are not supported.
* ⛔️ Triggers are not supported.
* ⛔️ Indexes are not supported.
* ⛔️ Views are not supported.
* ⛔️ Vacuum is not supported.
@@ -58,13 +57,14 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte
| COMMIT TRANSACTION | Partial | Transaction names are not supported. |
| CREATE INDEX | Yes | |
| CREATE TABLE | Partial | |
| CREATE TABLE ... STRICT | Yes | |
| CREATE TRIGGER | No | |
| CREATE VIEW | No | |
| CREATE VIRTUAL TABLE | No | |
| CREATE VIRTUAL TABLE | Yes | |
| DELETE | Yes | |
| DETACH DATABASE | No | |
| DROP INDEX | No | |
| DROP TABLE | No | |
| DROP TABLE | Yes | |
| DROP TRIGGER | No | |
| DROP VIEW | No | |
| END TRANSACTION | Partial | Alias for `COMMIT TRANSACTION` |

22
Cargo.lock generated
View File

@@ -583,6 +583,15 @@ dependencies = [
"itertools",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
@@ -1699,6 +1708,7 @@ dependencies = [
"shlex",
"syntect",
"tracing",
"tracing-appender",
"tracing-subscriber",
]
@@ -3504,6 +3514,18 @@ dependencies = [
"tracing-core",
]
[[package]]
name = "tracing-appender"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf"
dependencies = [
"crossbeam-channel",
"thiserror 1.0.69",
"time",
"tracing-subscriber",
]
[[package]]
name = "tracing-attributes"
version = "0.1.28"

View File

@@ -62,7 +62,7 @@ limbo-wasm:
cargo build --package limbo-wasm --target wasm32-wasi
.PHONY: limbo-wasm
test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions
test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions test-memory
.PHONY: test
test-extensions: limbo
@@ -94,6 +94,10 @@ test-json:
SQLITE_EXEC=$(SQLITE_EXEC) ./testing/json.test
.PHONY: test-json
test-memory:
SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/memory.py
.PHONY: test-memory
clickbench:
./perf/clickbench/benchmark.sh
.PHONY: clickbench

View File

@@ -39,6 +39,7 @@ rustyline = { version = "15.0.0", default-features = true, features = [
shlex = "1.3.0"
syntect = "5.2.0"
tracing = "0.1.41"
tracing-appender = "0.2.3"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }

View File

@@ -1,11 +1,17 @@
use crate::{
commands::{args::EchoMode, import::ImportFile, Command, CommandParser},
commands::{
args::{EchoMode, TimerMode},
import::ImportFile,
Command, CommandParser,
},
helper::LimboHelper,
input::{get_io, get_writer, DbLocation, OutputMode, Settings},
opcodes_dictionary::OPCODE_DESCRIPTIONS,
};
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table};
use limbo_core::{Database, LimboError, OwnedValue, Statement, StepResult};
use tracing_appender::non_blocking::WorkerGuard;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
use clap::Parser;
use rustyline::{history::DefaultHistory, Editor};
@@ -18,6 +24,7 @@ use std::{
atomic::{AtomicUsize, Ordering},
Arc,
},
time::{Duration, Instant},
};
#[derive(Parser)]
@@ -49,6 +56,8 @@ pub struct Opts {
pub vfs: Option<String>,
#[clap(long, help = "Enable experimental MVCC feature")]
pub experimental_mvcc: bool,
#[clap(short = 't', long, help = "specify output file for log traces")]
pub tracing_output: Option<String>,
}
const PROMPT: &str = "limbo> ";
@@ -64,6 +73,11 @@ pub struct Limbo<'a> {
pub rl: &'a mut Editor<LimboHelper, DefaultHistory>,
}
struct QueryStatistics {
io_time_elapsed_samples: Vec<Duration>,
execute_time_elapsed_samples: Vec<Duration>,
}
macro_rules! query_internal {
($self:expr, $query:expr, $body:expr) => {{
let rows = $self.conn.query($query)?;
@@ -130,6 +144,8 @@ impl<'a> Limbo<'a> {
})
.expect("Error setting Ctrl-C handler");
}
let sql = opts.sql.clone();
let quiet = opts.quiet;
let mut app = Self {
prompt: PROMPT.to_string(),
io,
@@ -137,21 +153,25 @@ impl<'a> Limbo<'a> {
conn,
interrupt_count,
input_buff: String::new(),
opts: Settings::from(&opts),
opts: Settings::from(opts),
rl,
};
if opts.sql.is_some() {
app.handle_first_input(opts.sql.as_ref().unwrap());
}
if !opts.quiet {
app.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?;
app.writeln("Enter \".help\" for usage hints.")?;
app.display_in_memory()?;
}
app.first_run(sql, quiet)?;
Ok(app)
}
fn first_run(&mut self, sql: Option<String>, quiet: bool) -> io::Result<()> {
if let Some(sql) = sql {
self.handle_first_input(&sql);
}
if !quiet {
self.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?;
self.writeln("Enter \".help\" for usage hints.")?;
self.display_in_memory()?;
}
Ok(())
}
fn handle_first_input(&mut self, cmd: &str) {
if cmd.trim().starts_with('.') {
self.handle_dot_command(&cmd[1..]);
@@ -381,7 +401,15 @@ impl<'a> Limbo<'a> {
let _ = self.writeln(input);
}
if input.trim_start().starts_with("explain") {
let start = Instant::now();
let mut stats = QueryStatistics {
io_time_elapsed_samples: vec![],
execute_time_elapsed_samples: vec![],
};
// TODO this is a quickfix. Some ideas to do case insensitive comparisons is to use
// Uncased or Unicase.
let temp = input.to_lowercase();
if temp.trim_start().starts_with("explain") {
if let Ok(Some(stmt)) = self.conn.query(input) {
let _ = self.writeln(stmt.explain().as_bytes());
}
@@ -389,14 +417,59 @@ impl<'a> Limbo<'a> {
let conn = self.conn.clone();
let runner = conn.query_runner(input.as_bytes());
for output in runner {
if self.print_query_result(input, output).is_err() {
if self
.print_query_result(input, output, Some(&mut stats))
.is_err()
{
break;
}
}
}
self.print_query_performance_stats(start, stats);
self.reset_input();
}
fn print_query_performance_stats(&mut self, start: Instant, stats: QueryStatistics) {
let elapsed_as_str = |duration: Duration| {
if duration.as_secs() >= 1 {
format!("{} s", duration.as_secs_f64())
} else if duration.as_millis() >= 1 {
format!("{} ms", duration.as_millis() as f64)
} else if duration.as_micros() >= 1 {
format!("{} us", duration.as_micros() as f64)
} else {
format!("{} ns", duration.as_nanos())
}
};
let sample_stats_as_str = |name: &str, samples: Vec<Duration>| {
if samples.is_empty() {
return format!("{}: No samples available", name);
}
let avg_time_spent = samples.iter().sum::<Duration>() / samples.len() as u32;
let total_time = samples.iter().fold(Duration::ZERO, |acc, x| acc + *x);
format!(
"{}: avg={}, total={}",
name,
elapsed_as_str(avg_time_spent),
elapsed_as_str(total_time),
)
};
if self.opts.timer {
let _ = self.writeln("Command stats:\n----------------------------");
let _ = self.writeln(format!(
"total: {} (this includes parsing/coloring of cli app)\n",
elapsed_as_str(start.elapsed())
));
let _ = self.writeln("query execution stats:\n----------------------------");
let _ = self.writeln(sample_stats_as_str(
"Execution",
stats.execute_time_elapsed_samples,
));
let _ = self.writeln(sample_stats_as_str("I/O", stats.io_time_elapsed_samples));
}
}
fn reset_line(&mut self, line: &str) -> rustyline::Result<()> {
self.rl.add_history_entry(line.to_owned())?;
self.interrupt_count.store(0, Ordering::SeqCst);
@@ -426,7 +499,7 @@ impl<'a> Limbo<'a> {
let conn = self.conn.clone();
let runner = conn.query_runner(after_comment.as_bytes());
for output in runner {
if let Err(e) = self.print_query_result(after_comment, output) {
if let Err(e) = self.print_query_result(after_comment, output, None) {
let _ = self.writeln(e.to_string());
}
}
@@ -555,6 +628,12 @@ impl<'a> Limbo<'a> {
let _ = self.writeln(v);
});
}
Command::Timer(timer_mode) => {
self.opts.timer = match timer_mode.mode {
TimerMode::On => true,
TimerMode::Off => false,
};
}
},
}
}
@@ -563,6 +642,7 @@ impl<'a> Limbo<'a> {
&mut self,
sql: &str,
mut output: Result<Option<Statement>, LimboError>,
mut statistics: Option<&mut QueryStatistics>,
) -> anyhow::Result<()> {
match output {
Ok(Some(ref mut rows)) => match self.opts.output_mode {
@@ -572,8 +652,13 @@ impl<'a> Limbo<'a> {
return Ok(());
}
let start = Instant::now();
match rows.step() {
Ok(StepResult::Row) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
let row = rows.row().unwrap();
for (i, value) in row.get_values().enumerate() {
if i > 0 {
@@ -588,17 +673,30 @@ impl<'a> Limbo<'a> {
let _ = self.writeln("");
}
Ok(StepResult::IO) => {
let start = Instant::now();
self.io.run_once()?;
if let Some(ref mut stats) = statistics {
stats.io_time_elapsed_samples.push(start.elapsed());
}
}
Ok(StepResult::Interrupt) => break,
Ok(StepResult::Done) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
break;
}
Ok(StepResult::Busy) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
let _ = self.writeln("database is busy");
break;
}
Err(err) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
let _ = self.writeln(err.to_string());
break;
}
@@ -626,8 +724,12 @@ impl<'a> Limbo<'a> {
table.set_header(header);
}
loop {
let start = Instant::now();
match rows.step() {
Ok(StepResult::Row) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
let record = rows.row().unwrap();
let mut row = Row::new();
row.max_height(1);
@@ -658,15 +760,35 @@ impl<'a> Limbo<'a> {
table.add_row(row);
}
Ok(StepResult::IO) => {
let start = Instant::now();
self.io.run_once()?;
if let Some(ref mut stats) = statistics {
stats.io_time_elapsed_samples.push(start.elapsed());
}
}
Ok(StepResult::Interrupt) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
break;
}
Ok(StepResult::Done) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
break;
}
Ok(StepResult::Interrupt) => break,
Ok(StepResult::Done) => break,
Ok(StepResult::Busy) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
let _ = self.writeln("database is busy");
break;
}
Err(err) => {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
let _ = self.write_fmt(format_args!(
"{:?}",
miette::Error::from(err).with_source_code(sql.to_owned())
@@ -695,6 +817,32 @@ impl<'a> Limbo<'a> {
Ok(())
}
pub fn init_tracing(&mut self) -> Result<WorkerGuard, std::io::Error> {
let (non_blocking, guard) = if let Some(file) = &self.opts.tracing_output {
tracing_appender::non_blocking(
std::fs::File::options()
.append(true)
.create(true)
.open(file)?,
)
} else {
tracing_appender::non_blocking(std::io::stderr())
};
if let Err(e) = tracing_subscriber::registry()
.with(
tracing_subscriber::fmt::layer()
.with_writer(non_blocking)
.with_line_number(true)
.with_thread_ids(true),
)
.with(EnvFilter::from_default_env())
.try_init()
{
println!("Unable to setup tracing appender: {:?}", e);
}
Ok(guard)
}
fn display_schema(&mut self, table: Option<&str>) -> anyhow::Result<()> {
let sql = match table {
Some(table_name) => format!(

View File

@@ -106,3 +106,15 @@ pub struct LoadExtensionArgs {
#[arg(add = ArgValueCompleter::new(PathCompleter::file()))]
pub path: String,
}
#[derive(Debug, ValueEnum, Clone)]
pub enum TimerMode {
On,
Off,
}
#[derive(Debug, Clone, Args)]
pub struct TimerArgs {
#[arg(value_enum)]
pub mode: TimerMode,
}

View File

@@ -3,7 +3,7 @@ pub mod import;
use args::{
CwdArgs, EchoArgs, ExitArgs, LoadExtensionArgs, NullValueArgs, OpcodesArgs, OpenArgs,
OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs,
OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, TimerArgs,
};
use clap::Parser;
use import::ImportArgs;
@@ -72,6 +72,8 @@ pub enum Command {
/// List vfs modules available
#[command(name = "vfslist", display_name = ".vfslist")]
ListVfs,
#[command(name = "timer", display_name = ".timer")]
Timer(TimerArgs),
}
const _HELP_TEMPLATE: &str = "{before-help}{name}

View File

@@ -81,28 +81,32 @@ pub struct Settings {
pub echo: bool,
pub is_stdout: bool,
pub io: Io,
pub tracing_output: Option<String>,
pub timer: bool,
}
impl From<&Opts> for Settings {
fn from(opts: &Opts) -> Self {
impl From<Opts> for Settings {
fn from(opts: Opts) -> Self {
Self {
null_value: String::new(),
output_mode: opts.output_mode,
echo: false,
is_stdout: opts.output.is_empty(),
output_filename: opts.output.clone(),
output_filename: opts.output,
db_file: opts
.database
.as_ref()
.map_or(":memory:".to_string(), |p| p.to_string_lossy().to_string()),
io: match opts.vfs.as_ref().unwrap_or(&String::new()).as_str() {
"memory" => Io::Memory,
"memory" | ":memory:" => Io::Memory,
"syscall" => Io::Syscall,
#[cfg(all(target_os = "linux", feature = "io_uring"))]
"io_uring" => Io::IoUring,
"" => Io::default(),
vfs => Io::External(vfs.to_string()),
},
tracing_output: opts.tracing_output,
timer: false,
}
}
}

View File

@@ -7,7 +7,6 @@ mod opcodes_dictionary;
use rustyline::{error::ReadlineError, Config, Editor};
use std::sync::atomic::Ordering;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
fn rustyline_config() -> Config {
Config::builder()
@@ -17,15 +16,8 @@ fn rustyline_config() -> Config {
fn main() -> anyhow::Result<()> {
let mut rl = Editor::with_config(rustyline_config())?;
tracing_subscriber::registry()
.with(
tracing_subscriber::fmt::layer()
.with_line_number(true)
.with_thread_ids(true),
)
.with(EnvFilter::from_default_env())
.init();
let mut app = app::Limbo::new(&mut rl)?;
let _guard = app.init_tracing()?;
let home = dirs::home_dir().expect("Could not determine home directory");
let history_file = home.join(".limbo_history");
if history_file.exists() {

View File

@@ -6,6 +6,7 @@ use libloading::{Library, Symbol};
use limbo_ext::{ExtensionApi, ExtensionApiRef, ExtensionEntryPoint, ResultCode, VfsImpl};
use std::{
ffi::{c_char, CString},
rc::Rc,
sync::{Arc, Mutex, OnceLock},
};
@@ -29,7 +30,10 @@ unsafe impl Send for VfsMod {}
unsafe impl Sync for VfsMod {}
impl Connection {
pub fn load_extension<P: AsRef<std::ffi::OsStr>>(&self, path: P) -> crate::Result<()> {
pub fn load_extension<P: AsRef<std::ffi::OsStr>>(
self: &Rc<Connection>,
path: P,
) -> crate::Result<()> {
use limbo_ext::ExtensionApiRef;
let api = Box::new(self.build_limbo_ext());
@@ -44,7 +48,15 @@ impl Connection {
let result_code = unsafe { entry(api_ptr) };
if result_code.is_ok() {
let extensions = get_extension_libraries();
extensions.lock().unwrap().push((Arc::new(lib), api_ref));
extensions
.lock()
.map_err(|_| {
LimboError::ExtensionError("Error locking extension libraries".to_string())
})?
.push((Arc::new(lib), api_ref));
{
self.parse_schema_rows()?;
}
Ok(())
} else {
if !api_ptr.is_null() {

View File

@@ -189,6 +189,7 @@ cfg_block! {
mod unix;
#[cfg(feature = "fs")]
pub use unix::UnixIO;
pub use unix::UnixIO as SyscallIO;
pub use io_uring::UringIO as PlatformIO;
}
@@ -197,16 +198,19 @@ cfg_block! {
#[cfg(feature = "fs")]
pub use unix::UnixIO;
pub use unix::UnixIO as PlatformIO;
pub use PlatformIO as SyscallIO;
}
#[cfg(target_os = "windows")] {
mod windows;
pub use windows::WindowsIO as PlatformIO;
pub use PlatformIO as SyscallIO;
}
#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] {
mod generic;
pub use generic::GenericIO as PlatformIO;
pub use PlatformIO as SyscallIO;
}
}

View File

@@ -32,7 +32,9 @@ pub use io::clock::{Clock, Instant};
pub use io::UnixIO;
#[cfg(all(feature = "fs", target_os = "linux", feature = "io_uring"))]
pub use io::UringIO;
pub use io::{Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, WriteCompletion, IO};
pub use io::{
Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO,
};
use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl};
use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser};
use parking_lot::RwLock;
@@ -70,7 +72,7 @@ use vdbe::{builder::QueryMode, VTabOpaqueCursor};
pub type Result<T, E = LimboError> = std::result::Result<T, E>;
pub static DATABASE_VERSION: OnceLock<String> = OnceLock::new();
#[derive(Clone, PartialEq, Eq)]
#[derive(Clone, Copy, PartialEq, Eq)]
enum TransactionState {
Write,
Read,
@@ -158,7 +160,13 @@ impl Database {
.try_write()
.expect("lock on schema should succeed first try");
let syms = conn.syms.borrow();
parse_schema_rows(rows, &mut schema, io, syms.deref(), None)?;
if let Err(LimboError::ExtensionError(e)) =
parse_schema_rows(rows, &mut schema, io, &syms, None)
{
// this means that a vtab exists and we no longer have the module loaded. we print
// a warning to the user to load the module
eprintln!("Warning: {}", e);
}
}
Ok(db)
}
@@ -186,9 +194,9 @@ impl Database {
schema: self.schema.clone(),
header: self.header.clone(),
last_insert_rowid: Cell::new(0),
auto_commit: RefCell::new(true),
auto_commit: Cell::new(true),
mv_transactions: RefCell::new(Vec::new()),
transaction_state: RefCell::new(TransactionState::None),
transaction_state: Cell::new(TransactionState::None),
last_change: Cell::new(0),
syms: RefCell::new(SymbolTable::new()),
total_changes: Cell::new(0),
@@ -209,7 +217,7 @@ impl Database {
Some(vfs) => vfs,
None => match vfs.trim() {
"memory" => Arc::new(MemoryIO::new()),
"syscall" => Arc::new(PlatformIO::new()?),
"syscall" => Arc::new(SyscallIO::new()?),
#[cfg(all(target_os = "linux", feature = "io_uring"))]
"io_uring" => Arc::new(UringIO::new()?),
other => {
@@ -278,9 +286,9 @@ pub struct Connection {
pager: Rc<Pager>,
schema: Arc<RwLock<Schema>>,
header: Arc<SpinLock<DatabaseHeader>>,
auto_commit: RefCell<bool>,
auto_commit: Cell<bool>,
mv_transactions: RefCell<Vec<crate::mvcc::database::TxID>>,
transaction_state: RefCell<TransactionState>,
transaction_state: Cell<TransactionState>,
last_insert_rowid: Cell<u64>,
last_change: Cell<i64>,
total_changes: Cell<i64>,
@@ -517,7 +525,26 @@ impl Connection {
}
pub fn get_auto_commit(&self) -> bool {
*self.auto_commit.borrow()
self.auto_commit.get()
}
pub fn parse_schema_rows(self: &Rc<Connection>) -> Result<()> {
let rows = self.query("SELECT * FROM sqlite_schema")?;
let mut schema = self
.schema
.try_write()
.expect("lock on schema should succeed first try");
{
let syms = self.syms.borrow();
if let Err(LimboError::ExtensionError(e)) =
parse_schema_rows(rows, &mut schema, self.pager.io.clone(), &syms, None)
{
// this means that a vtab exists and we no longer have the module loaded. we print
// a warning to the user to load the module
eprintln!("Warning: {}", e);
}
}
Ok(())
}
}
@@ -630,7 +657,7 @@ impl VirtualTable {
module_name
)))?;
if let VTabKind::VirtualTable = kind {
if module.module_kind != VTabKind::VirtualTable {
if module.module_kind == VTabKind::TableValuedFunction {
return Err(LimboError::ExtensionError(format!(
"{} is not a virtual table module",
module_name

View File

@@ -161,6 +161,7 @@ pub struct BTreeTable {
pub primary_key_column_names: Vec<String>,
pub columns: Vec<Column>,
pub has_rowid: bool,
pub is_strict: bool,
}
impl BTreeTable {
@@ -262,12 +263,14 @@ fn create_table(
let mut has_rowid = true;
let mut primary_key_column_names = vec![];
let mut cols = vec![];
let is_strict: bool;
match body {
CreateTableBody::ColumnsAndConstraints {
columns,
constraints,
options,
} => {
is_strict = options.contains(TableOptions::STRICT);
if let Some(constraints) = constraints {
for c in constraints {
if let limbo_sqlite3_parser::ast::TableConstraint::PrimaryKey {
@@ -390,6 +393,7 @@ fn create_table(
has_rowid,
primary_key_column_names,
columns: cols,
is_strict,
})
}
@@ -456,7 +460,7 @@ pub fn affinity(datatype: &str) -> Affinity {
}
// Rule 3: BLOB or empty -> BLOB affinity (historically called NONE)
if datatype.contains("BLOB") || datatype.is_empty() {
if datatype.contains("BLOB") || datatype.is_empty() || datatype.contains("ANY") {
return Affinity::Blob;
}
@@ -508,11 +512,11 @@ pub enum Affinity {
Numeric,
}
pub const SQLITE_AFF_TEXT: char = 'a';
pub const SQLITE_AFF_NONE: char = 'b'; // Historically called NONE, but it's the same as BLOB
pub const SQLITE_AFF_NUMERIC: char = 'c';
pub const SQLITE_AFF_INTEGER: char = 'd';
pub const SQLITE_AFF_REAL: char = 'e';
pub const SQLITE_AFF_NONE: char = 'A'; // Historically called NONE, but it's the same as BLOB
pub const SQLITE_AFF_TEXT: char = 'B';
pub const SQLITE_AFF_NUMERIC: char = 'C';
pub const SQLITE_AFF_INTEGER: char = 'D';
pub const SQLITE_AFF_REAL: char = 'E';
impl Affinity {
/// This is meant to be used in opcodes like Eq, which state:
@@ -552,6 +556,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
root_page: 1,
name: "sqlite_schema".to_string(),
has_rowid: true,
is_strict: false,
primary_key_column_names: vec![],
columns: vec![
Column {
@@ -1046,6 +1051,7 @@ mod tests {
root_page: 0,
name: "t1".to_string(),
has_rowid: true,
is_strict: false,
primary_key_column_names: vec!["nonexistent".to_string()],
columns: vec![Column {
name: Some("a".to_string()),

File diff suppressed because it is too large Load Diff

View File

@@ -626,9 +626,9 @@ impl PageContent {
usable_size,
);
if overflows {
4 + to_read + n_payload + 4
4 + to_read + n_payload
} else {
4 + len_payload as usize + n_payload + 4
4 + len_payload as usize + n_payload
}
}
PageType::TableInterior => {
@@ -644,9 +644,9 @@ impl PageContent {
usable_size,
);
if overflows {
to_read + n_payload + 4
to_read + n_payload
} else {
len_payload as usize + n_payload + 4
len_payload as usize + n_payload
}
}
PageType::TableLeaf => {

View File

@@ -7,7 +7,7 @@ use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode};
use crate::{schema::Schema, Result, SymbolTable};
use limbo_sqlite3_parser::ast::{Expr, Limit, QualifiedName};
use super::plan::TableReference;
use super::plan::{IterationDirection, TableReference};
pub fn translate_delete(
query_mode: QueryMode,
@@ -53,7 +53,10 @@ pub fn prepare_delete_plan(
let table_references = vec![TableReference {
table,
identifier: name,
op: Operation::Scan { iter_dir: None },
op: Operation::Scan {
iter_dir: IterationDirection::Forwards,
index: None,
},
join_info: None,
}];

View File

@@ -1,6 +1,8 @@
// This module contains code for emitting bytecode instructions for SQL query execution.
// It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine.
use std::rc::Rc;
use limbo_sqlite3_parser::ast::{self};
use crate::function::Func;
@@ -135,6 +137,7 @@ fn prologue<'a>(
Ok((t_ctx, init_label, start_offset))
}
#[derive(Clone, Copy, Debug)]
pub enum TransactionMode {
None,
Read,
@@ -527,29 +530,67 @@ fn emit_update_insns(
) -> crate::Result<()> {
let table_ref = &plan.table_references.first().unwrap();
let loop_labels = t_ctx.labels_main_loop.first().unwrap();
let (cursor_id, index) = match &table_ref.op {
Operation::Scan { .. } => (program.resolve_cursor_id(&table_ref.identifier), None),
let (cursor_id, index, is_virtual) = match &table_ref.op {
Operation::Scan { .. } => (
program.resolve_cursor_id(&table_ref.identifier),
None,
table_ref.virtual_table().is_some(),
),
Operation::Search(search) => match search {
&Search::RowidEq { .. } | Search::RowidSearch { .. } => {
(program.resolve_cursor_id(&table_ref.identifier), None)
}
&Search::RowidEq { .. } | Search::RowidSearch { .. } => (
program.resolve_cursor_id(&table_ref.identifier),
None,
false,
),
Search::IndexSearch { index, .. } => (
program.resolve_cursor_id(&table_ref.identifier),
Some((index.clone(), program.resolve_cursor_id(&index.name))),
false,
),
},
_ => return Ok(()),
};
let rowid_reg = program.alloc_register();
for cond in plan.where_clause.iter().filter(|c| c.is_constant()) {
let jump_target = program.allocate_label();
let meta = ConditionMetadata {
jump_if_condition_is_true: false,
jump_target_when_true: jump_target,
jump_target_when_false: t_ctx.label_main_loop_end.unwrap(),
};
translate_condition_expr(
program,
&plan.table_references,
&cond.expr,
meta,
&t_ctx.resolver,
)?;
program.resolve_label(jump_target, program.offset());
}
let beg = program.alloc_registers(
table_ref.table.columns().len()
+ if is_virtual {
2 // two args before the relevant columns for VUpdate
} else {
1 // rowid reg
},
);
program.emit_insn(Insn::RowId {
cursor_id,
dest: rowid_reg,
dest: beg,
});
// if no rowid, we're done
program.emit_insn(Insn::IsNull {
reg: rowid_reg,
reg: beg,
target_pc: t_ctx.label_main_loop_end.unwrap(),
});
if is_virtual {
program.emit_insn(Insn::Copy {
src_reg: beg,
dst_reg: beg + 1,
amount: 0,
})
}
if let Some(offset) = t_ctx.reg_offset {
program.emit_insn(Insn::IfPos {
@@ -573,12 +614,13 @@ fn emit_update_insns(
&t_ctx.resolver,
)?;
}
let first_col_reg = program.alloc_registers(table_ref.table.columns().len());
// we scan a column at a time, loading either the column's values, or the new value
// from the Set expression, into registers so we can emit a MakeRecord and update the row.
let start = if is_virtual { beg + 2 } else { beg + 1 };
for idx in 0..table_ref.columns().len() {
if let Some((idx, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) {
let target_reg = first_col_reg + idx;
let target_reg = start + idx;
if let Some((_, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) {
translate_expr(
program,
Some(&plan.table_references),
@@ -593,9 +635,17 @@ fn emit_update_insns(
.iter()
.position(|c| Some(&c.name) == table_column.name.as_ref())
});
let dest = first_col_reg + idx;
if table_column.primary_key {
program.emit_null(dest, None);
// don't emit null for pkey of virtual tables. they require first two args
// before the 'record' to be explicitly non-null
if table_column.primary_key && !is_virtual {
program.emit_null(target_reg, None);
} else if is_virtual {
program.emit_insn(Insn::VColumn {
cursor_id,
column: idx,
dest: target_reg,
});
} else {
program.emit_insn(Insn::Column {
cursor_id: *index
@@ -609,24 +659,43 @@ fn emit_update_insns(
})
.unwrap_or(&cursor_id),
column: column_idx_in_index.unwrap_or(idx),
dest,
dest: target_reg,
});
}
}
}
let record_reg = program.alloc_register();
program.emit_insn(Insn::MakeRecord {
start_reg: first_col_reg,
count: table_ref.columns().len(),
dest_reg: record_reg,
});
program.emit_insn(Insn::InsertAsync {
cursor: cursor_id,
key_reg: rowid_reg,
record_reg,
flag: 0,
});
program.emit_insn(Insn::InsertAwait { cursor_id });
if let Some(btree_table) = table_ref.btree() {
if btree_table.is_strict {
program.emit_insn(Insn::TypeCheck {
start_reg: start,
count: table_ref.columns().len(),
check_generated: true,
table_reference: Rc::clone(&btree_table),
});
}
let record_reg = program.alloc_register();
program.emit_insn(Insn::MakeRecord {
start_reg: start,
count: table_ref.columns().len(),
dest_reg: record_reg,
});
program.emit_insn(Insn::InsertAsync {
cursor: cursor_id,
key_reg: beg,
record_reg,
flag: 0,
});
program.emit_insn(Insn::InsertAwait { cursor_id });
} else if let Some(vtab) = table_ref.virtual_table() {
let arg_count = table_ref.columns().len() + 2;
program.emit_insn(Insn::VUpdate {
cursor_id,
arg_count,
start_reg: beg,
vtab_ptr: vtab.implementation.as_ref().ctx as usize,
conflict_action: 0u16,
});
}
if let Some(limit_reg) = t_ctx.reg_limit {
program.emit_insn(Insn::DecrJumpZero {

View File

@@ -6,10 +6,10 @@ use limbo_sqlite3_parser::ast::{
};
use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY;
use crate::schema::Table;
use crate::schema::{IndexColumn, Table};
use crate::util::normalize_ident;
use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode};
use crate::vdbe::insn::RegisterOrLiteral;
use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral};
use crate::vdbe::BranchOffset;
use crate::{
schema::{Column, Schema},
@@ -83,6 +83,22 @@ pub fn translate_insert(
Some(table_name.0.clone()),
CursorType::BTreeTable(btree_table.clone()),
);
// allocate cursor id's for each btree index cursor we'll need to populate the indexes
// (idx name, root_page, idx cursor id)
let idx_cursors = schema
.get_indices(&table_name.0)
.iter()
.map(|idx| {
(
&idx.name,
idx.root_page,
program.alloc_cursor_id(
Some(table_name.0.clone()),
CursorType::BTreeIndex(idx.clone()),
),
)
})
.collect::<Vec<(&String, usize, usize)>>();
let root_page = btree_table.root_page;
let values = match body {
InsertBody::Select(select, _) => match &select.body.select.deref() {
@@ -93,6 +109,7 @@ pub fn translate_insert(
};
let column_mappings = resolve_columns_for_insert(&table, columns, values)?;
let index_col_mappings = resolve_indicies_for_insert(schema, table.as_ref(), &column_mappings)?;
// Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias)
let rowid_alias_index = btree_table.columns.iter().position(|c| c.is_rowid_alias);
let has_user_provided_rowid = {
@@ -183,7 +200,14 @@ pub fn translate_insert(
&resolver,
)?;
}
// Open all the index btrees for writing
for idx_cursor in idx_cursors.iter() {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: idx_cursor.2,
root_page: idx_cursor.1.into(),
});
program.emit_insn(Insn::OpenWriteAwait {});
}
// Common record insertion logic for both single and multiple rows
let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label()));
if let Some(reg) = rowid_alias_reg {
@@ -251,6 +275,17 @@ pub fn translate_insert(
program.resolve_label(make_record_label, program.offset());
}
match table.btree() {
Some(t) if t.is_strict => {
program.emit_insn(Insn::TypeCheck {
start_reg: column_registers_start,
count: num_cols,
check_generated: true,
table_reference: Rc::clone(&t),
});
}
_ => (),
}
// Create and insert the record
program.emit_insn(Insn::MakeRecord {
start_reg: column_registers_start,
@@ -265,7 +300,55 @@ pub fn translate_insert(
flag: 0,
});
program.emit_insn(Insn::InsertAwait { cursor_id });
for index_col_mapping in index_col_mappings.iter() {
// find which cursor we opened earlier for this index
let idx_cursor_id = idx_cursors
.iter()
.find(|(name, _, _)| *name == &index_col_mapping.idx_name)
.map(|(_, _, c_id)| *c_id)
.expect("no cursor found for index");
let num_cols = index_col_mapping.columns.len();
// allocate scratch registers for the index columns plus rowid
let idx_start_reg = program.alloc_registers(num_cols + 1);
// copy each index column from the table's column registers into these scratch regs
for (i, col) in index_col_mapping.columns.iter().enumerate() {
// copy from the table's column register over to the index's scratch register
program.emit_insn(Insn::Copy {
src_reg: column_registers_start + col.0,
dst_reg: idx_start_reg + i,
amount: 0,
});
}
// last register is the rowid
program.emit_insn(Insn::Copy {
src_reg: rowid_reg,
dst_reg: idx_start_reg + num_cols,
amount: 0,
});
let record_reg = program.alloc_register();
program.emit_insn(Insn::MakeRecord {
start_reg: idx_start_reg,
count: num_cols + 1,
dest_reg: record_reg,
});
// now do the actual index insertion using the unpacked registers
program.emit_insn(Insn::IdxInsertAsync {
cursor_id: idx_cursor_id,
record_reg,
unpacked_start: Some(idx_start_reg), // TODO: enable optimization
unpacked_count: Some((num_cols + 1) as u16),
// TODO: figure out how to determine whether or not we need to seek prior to insert.
flags: IdxInsertFlags::new(),
});
program.emit_insn(Insn::IdxInsertAwait {
cursor_id: idx_cursor_id,
});
}
if inserting_multiple_rows {
// For multiple rows, loop back
program.emit_insn(Insn::Goto {
@@ -393,6 +476,69 @@ fn resolve_columns_for_insert<'a>(
Ok(mappings)
}
/// Represents how a column in an index should be populated during an INSERT.
/// Similar to ColumnMapping above but includes the index name, as well as multiple
/// possible value indices for each.
#[derive(Default)]
struct IndexColMapping {
idx_name: String,
columns: Vec<(usize, IndexColumn)>,
value_indicies: Vec<Option<usize>>,
}
impl IndexColMapping {
fn new(name: String) -> Self {
IndexColMapping {
idx_name: name,
..Default::default()
}
}
}
/// Example:
/// Table 'test': (a, b, c);
/// Index 'idx': test(a, b);
///________________________________
/// Insert (a, c): (2, 3)
/// Record: (2, NULL, 3)
/// IndexColMapping: (a, b) = (2, NULL)
fn resolve_indicies_for_insert(
schema: &Schema,
table: &Table,
columns: &[ColumnMapping<'_>],
) -> Result<Vec<IndexColMapping>> {
let mut index_col_mappings = Vec::new();
// Iterate over all indices for this table
for index in schema.get_indices(table.get_name()) {
let mut idx_map = IndexColMapping::new(index.name.clone());
// For each column in the index (in the order defined by the index),
// try to find the corresponding column in the inserts column mapping.
for idx_col in &index.columns {
let target_name = normalize_ident(idx_col.name.as_str());
if let Some((i, col_mapping)) = columns.iter().enumerate().find(|(_, mapping)| {
mapping
.column
.name
.as_ref()
.map_or(false, |name| name.eq_ignore_ascii_case(&target_name))
}) {
idx_map.columns.push((i, idx_col.clone()));
idx_map.value_indicies.push(col_mapping.value_index);
} else {
return Err(crate::LimboError::ParseError(format!(
"Column {} not found in index {}",
target_name, index.name
)));
}
}
// Add the mapping if at least one column was found.
if !idx_map.columns.is_empty() {
index_col_mappings.push(idx_map);
}
}
Ok(index_col_mappings)
}
/// Populates the column registers with values for a single row
fn populate_column_registers(
program: &mut ProgramBuilder,

View File

@@ -79,7 +79,7 @@ pub fn init_loop(
}
}
match &table.op {
Operation::Scan { .. } => {
Operation::Scan { index, .. } => {
let cursor_id = program.alloc_cursor_id(
Some(table.identifier.clone()),
match &table.table {
@@ -90,6 +90,9 @@ pub fn init_loop(
other => panic!("Invalid table reference type in Scan: {:?}", other),
},
);
let index_cursor_id = index.as_ref().map(|i| {
program.alloc_cursor_id(Some(i.name.clone()), CursorType::BTreeIndex(i.clone()))
});
match (mode, &table.table) {
(OperationMode::SELECT, Table::BTree(btree)) => {
let root_page = btree.root_page;
@@ -98,6 +101,13 @@ pub fn init_loop(
root_page,
});
program.emit_insn(Insn::OpenReadAwait {});
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::OpenReadAsync {
cursor_id: index_cursor_id,
root_page: index.as_ref().unwrap().root_page,
});
program.emit_insn(Insn::OpenReadAwait {});
}
}
(OperationMode::DELETE, Table::BTree(btree)) => {
let root_page = btree.root_page;
@@ -113,13 +123,15 @@ pub fn init_loop(
cursor_id,
root_page: root_page.into(),
});
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: index_cursor_id,
root_page: index.as_ref().unwrap().root_page.into(),
});
}
program.emit_insn(Insn::OpenWriteAwait {});
}
(OperationMode::SELECT, Table::Virtual(_)) => {
program.emit_insn(Insn::VOpenAsync { cursor_id });
program.emit_insn(Insn::VOpenAwait {});
}
(OperationMode::DELETE, Table::Virtual(_)) => {
(_, Table::Virtual(_)) => {
program.emit_insn(Insn::VOpenAsync { cursor_id });
program.emit_insn(Insn::VOpenAwait {});
}
@@ -142,14 +154,7 @@ pub fn init_loop(
});
program.emit_insn(Insn::OpenReadAwait {});
}
OperationMode::DELETE => {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: table_cursor_id,
root_page: table.table.get_root_page().into(),
});
program.emit_insn(Insn::OpenWriteAwait {});
}
OperationMode::UPDATE => {
OperationMode::DELETE | OperationMode::UPDATE => {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: table_cursor_id,
root_page: table.table.get_root_page().into(),
@@ -175,14 +180,7 @@ pub fn init_loop(
});
program.emit_insn(Insn::OpenReadAwait);
}
OperationMode::DELETE => {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: index_cursor_id,
root_page: index.root_page.into(),
});
program.emit_insn(Insn::OpenWriteAwait {});
}
OperationMode::UPDATE => {
OperationMode::UPDATE | OperationMode::DELETE => {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: index_cursor_id,
root_page: index.root_page.into(),
@@ -282,36 +280,35 @@ pub fn open_loop(
program.resolve_label(jump_target_when_true, program.offset());
}
}
Operation::Scan { iter_dir } => {
Operation::Scan { iter_dir, index } => {
let cursor_id = program.resolve_cursor_id(&table.identifier);
let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name));
let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id);
if !matches!(&table.table, Table::Virtual(_)) {
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
program.emit_insn(Insn::LastAsync { cursor_id });
if *iter_dir == IterationDirection::Backwards {
program.emit_insn(Insn::LastAsync {
cursor_id: iteration_cursor_id,
});
} else {
program.emit_insn(Insn::RewindAsync { cursor_id });
program.emit_insn(Insn::RewindAsync {
cursor_id: iteration_cursor_id,
});
}
}
match &table.table {
Table::BTree(_) => program.emit_insn(
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
Table::BTree(_) => {
program.emit_insn(if *iter_dir == IterationDirection::Backwards {
Insn::LastAwait {
cursor_id,
cursor_id: iteration_cursor_id,
pc_if_empty: loop_end,
}
} else {
Insn::RewindAwait {
cursor_id,
cursor_id: iteration_cursor_id,
pc_if_empty: loop_end,
}
},
),
})
}
Table::Virtual(ref table) => {
let start_reg = program
.alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0));
@@ -337,6 +334,13 @@ pub fn open_loop(
}
program.resolve_label(loop_start, program.offset());
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id: cursor_id,
});
}
for cond in predicates
.iter()
.filter(|cond| cond.should_eval_at_loop(table_index))
@@ -361,139 +365,6 @@ pub fn open_loop(
let table_cursor_id = program.resolve_cursor_id(&table.identifier);
// Open the loop for the index search.
// Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup.
if !matches!(search, Search::RowidEq { .. }) {
let index_cursor_id = if let Search::IndexSearch { index, .. } = search {
Some(program.resolve_cursor_id(&index.name))
} else {
None
};
let cmp_reg = program.alloc_register();
let (cmp_expr, cmp_op) = match search {
Search::IndexSearch {
cmp_expr, cmp_op, ..
} => (cmp_expr, cmp_op),
Search::RowidSearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op),
Search::RowidEq { .. } => unreachable!(),
};
// TODO this only handles ascending indexes
match cmp_op {
ast::Operator::Equals
| ast::Operator::Greater
| ast::Operator::GreaterEquals => {
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
cmp_reg,
&t_ctx.resolver,
)?;
}
ast::Operator::Less | ast::Operator::LessEquals => {
program.emit_insn(Insn::Null {
dest: cmp_reg,
dest_end: None,
});
}
_ => unreachable!(),
}
// If we try to seek to a key that is not present in the table/index, we exit the loop entirely.
program.emit_insn(match cmp_op {
ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
},
ast::Operator::Greater
| ast::Operator::Less
| ast::Operator::LessEquals => Insn::SeekGT {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
},
_ => unreachable!(),
});
if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals {
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
cmp_reg,
&t_ctx.resolver,
)?;
}
program.resolve_label(loop_start, program.offset());
// TODO: We are currently only handling ascending indexes.
// For conditions like index_key > 10, we have already sought to the first key greater than 10, and can just scan forward.
// For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end.
// For conditions like index_key = 10, we have already sought to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end.
// For conditions like index_key >= 10, we have already sought to the first key greater than or equal to 10, and can just scan forward.
// For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end.
// For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all.
//
// For primary key searches we emit RowId and then compare it to the seek value.
match cmp_op {
ast::Operator::Equals | ast::Operator::LessEquals => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxGT {
cursor_id: index_cursor_id,
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Gt {
lhs: rowid_reg,
rhs: cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
ast::Operator::Less => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxGE {
cursor_id: index_cursor_id,
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Ge {
lhs: rowid_reg,
rhs: cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
_ => {}
}
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id,
});
}
}
if let Search::RowidEq { cmp_expr } = search {
let src_reg = program.alloc_register();
translate_expr(
@@ -508,7 +379,280 @@ pub fn open_loop(
src_reg,
target_pc: next,
});
} else {
// Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore.
let index_cursor_id = if let Search::IndexSearch { index, .. } = search {
Some(program.resolve_cursor_id(&index.name))
} else {
None
};
let (cmp_expr, cmp_op, iter_dir) = match search {
Search::IndexSearch {
cmp_expr,
cmp_op,
iter_dir,
..
} => (cmp_expr, cmp_op, iter_dir),
Search::RowidSearch {
cmp_expr,
cmp_op,
iter_dir,
} => (cmp_expr, cmp_op, iter_dir),
Search::RowidEq { .. } => unreachable!(),
};
// There are a few steps in an index seek:
// 1. Emit the comparison expression for the rowid/index seek. For example, if we a clause 'WHERE index_key >= 10', we emit the comparison expression 10 into cmp_reg.
//
// 2. Emit the seek instruction. SeekGE and SeekGT are used in forwards iteration, SeekLT and SeekLE are used in backwards iteration.
// All of the examples below assume an ascending index, because we do not support descending indexes yet.
// If we are scanning the ascending index:
// - Forwards, and have a GT/GE/EQ comparison, the comparison expression from step 1 is used as the value to seek to, because that is the lowest possible value that satisfies the clause.
// - Forwards, and have a LT/LE comparison, NULL is used as the comparison expression because we actually want to start scanning from the beginning of the index.
// - Backwards, and have a GT/GE comparison, no Seek instruction is emitted and we emit LastAsync instead, because we want to start scanning from the end of the index.
// - Backwards, and have a LT/LE/EQ comparison, we emit a Seek instruction with the comparison expression from step 1 as the value to seek to, since that is the highest possible
// value that satisfies the clause.
let seek_cmp_reg = program.alloc_register();
let mut comparison_expr_translated = false;
match (cmp_op, iter_dir) {
// Forwards, GT/GE/EQ -> use the comparison expression (i.e. seek to the first key where the cmp expr is satisfied, and then scan forwards)
(
ast::Operator::Equals
| ast::Operator::Greater
| ast::Operator::GreaterEquals,
IterationDirection::Forwards,
) => {
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
seek_cmp_reg,
&t_ctx.resolver,
)?;
comparison_expr_translated = true;
match cmp_op {
ast::Operator::Equals | ast::Operator::GreaterEquals => {
program.emit_insn(Insn::SeekGE {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: seek_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
}
ast::Operator::Greater => {
program.emit_insn(Insn::SeekGT {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: seek_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
}
_ => unreachable!(),
}
}
// Forwards, LT/LE -> use NULL (i.e. start from the beginning of the index)
(
ast::Operator::Less | ast::Operator::LessEquals,
IterationDirection::Forwards,
) => {
program.emit_insn(Insn::Null {
dest: seek_cmp_reg,
dest_end: None,
});
program.emit_insn(Insn::SeekGT {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: seek_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
}
// Backwards, GT/GE -> no seek, emit LastAsync (i.e. start from the end of the index)
(
ast::Operator::Greater | ast::Operator::GreaterEquals,
IterationDirection::Backwards,
) => {
program.emit_insn(Insn::LastAsync {
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
});
program.emit_insn(Insn::LastAwait {
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
pc_if_empty: loop_end,
});
}
// Backwards, LT/LE/EQ -> use the comparison expression (i.e. seek from the end of the index until the cmp expr is satisfied, and then scan backwards)
(
ast::Operator::Less | ast::Operator::LessEquals | ast::Operator::Equals,
IterationDirection::Backwards,
) => {
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
seek_cmp_reg,
&t_ctx.resolver,
)?;
comparison_expr_translated = true;
match cmp_op {
ast::Operator::Less => {
program.emit_insn(Insn::SeekLT {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: seek_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
}
ast::Operator::LessEquals | ast::Operator::Equals => {
program.emit_insn(Insn::SeekLE {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: seek_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
}
_ => unreachable!(),
}
}
_ => unreachable!(),
};
program.resolve_label(loop_start, program.offset());
let scan_terminating_cmp_reg = if comparison_expr_translated {
seek_cmp_reg
} else {
let reg = program.alloc_register();
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
reg,
&t_ctx.resolver,
)?;
reg
};
// 3. Emit a scan-terminating comparison instruction (IdxGT, IdxGE, IdxLT, IdxLE if index; GT, GE, LT, LE if btree rowid scan).
// Here the comparison expression from step 1 is compared to the current index key and the loop is exited if the comparison is true.
// The comparison operator used in the Idx__ instruction is the inverse of the WHERE clause comparison operator.
// For example, if we are scanning forwards and have a clause 'WHERE index_key < 10', we emit IdxGE(10) since >=10 is the first key where our condition is not satisfied anymore.
match (cmp_op, iter_dir) {
// Forwards, <= -> terminate if >
(
ast::Operator::Equals | ast::Operator::LessEquals,
IterationDirection::Forwards,
) => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxGT {
cursor_id: index_cursor_id,
start_reg: scan_terminating_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Gt {
lhs: rowid_reg,
rhs: scan_terminating_cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
// Forwards, < -> terminate if >=
(ast::Operator::Less, IterationDirection::Forwards) => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxGE {
cursor_id: index_cursor_id,
start_reg: scan_terminating_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Ge {
lhs: rowid_reg,
rhs: scan_terminating_cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
// Backwards, >= -> terminate if <
(
ast::Operator::Equals | ast::Operator::GreaterEquals,
IterationDirection::Backwards,
) => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxLT {
cursor_id: index_cursor_id,
start_reg: scan_terminating_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Lt {
lhs: rowid_reg,
rhs: scan_terminating_cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
// Backwards, > -> terminate if <=
(ast::Operator::Greater, IterationDirection::Backwards) => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxLE {
cursor_id: index_cursor_id,
start_reg: scan_terminating_cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Le {
lhs: rowid_reg,
rhs: scan_terminating_cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
// Forwards, > and >= -> we already did a seek to the first key where the cmp expr is satisfied, so we dont have a terminating condition
// Backwards, < and <= -> we already did a seek to the last key where the cmp expr is satisfied, so we dont have a terminating condition
_ => {}
}
if let Some(index_cursor_id) = index_cursor_id {
// Don't do a btree table seek until it's actually necessary to read from the table.
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id,
});
}
}
for cond in predicates
.iter()
.filter(|cond| cond.should_eval_at_loop(table_index))
@@ -813,30 +957,33 @@ pub fn close_loop(
target_pc: loop_labels.loop_start,
});
}
Operation::Scan { iter_dir, .. } => {
Operation::Scan {
index, iter_dir, ..
} => {
program.resolve_label(loop_labels.next, program.offset());
let cursor_id = program.resolve_cursor_id(&table.identifier);
let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name));
let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id);
match &table.table {
Table::BTree(_) => {
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
program.emit_insn(Insn::PrevAsync { cursor_id });
if *iter_dir == IterationDirection::Backwards {
program.emit_insn(Insn::PrevAsync {
cursor_id: iteration_cursor_id,
});
} else {
program.emit_insn(Insn::NextAsync { cursor_id });
program.emit_insn(Insn::NextAsync {
cursor_id: iteration_cursor_id,
});
}
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
if *iter_dir == IterationDirection::Backwards {
program.emit_insn(Insn::PrevAwait {
cursor_id,
cursor_id: iteration_cursor_id,
pc_if_next: loop_labels.loop_start,
});
} else {
program.emit_insn(Insn::NextAwait {
cursor_id,
cursor_id: iteration_cursor_id,
pc_if_next: loop_labels.loop_start,
});
}
@@ -854,17 +1001,29 @@ pub fn close_loop(
program.resolve_label(loop_labels.next, program.offset());
// Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction.
if !matches!(search, Search::RowidEq { .. }) {
let cursor_id = match search {
Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name),
Search::RowidSearch { .. } => program.resolve_cursor_id(&table.identifier),
let (cursor_id, iter_dir) = match search {
Search::IndexSearch {
index, iter_dir, ..
} => (program.resolve_cursor_id(&index.name), *iter_dir),
Search::RowidSearch { iter_dir, .. } => {
(program.resolve_cursor_id(&table.identifier), *iter_dir)
}
Search::RowidEq { .. } => unreachable!(),
};
program.emit_insn(Insn::NextAsync { cursor_id });
program.emit_insn(Insn::NextAwait {
cursor_id,
pc_if_next: loop_labels.loop_start,
});
if iter_dir == IterationDirection::Backwards {
program.emit_insn(Insn::PrevAsync { cursor_id });
program.emit_insn(Insn::PrevAwait {
cursor_id,
pc_if_next: loop_labels.loop_start,
});
} else {
program.emit_insn(Insn::NextAsync { cursor_id });
program.emit_insn(Insn::NextAwait {
cursor_id,
pc_if_next: loop_labels.loop_start,
});
}
}
}
}

View File

@@ -1,6 +1,6 @@
use std::{collections::HashMap, sync::Arc};
use limbo_sqlite3_parser::ast;
use limbo_sqlite3_parser::ast::{self, Expr, SortOrder};
use crate::{
schema::{Index, Schema},
@@ -9,8 +9,8 @@ use crate::{
};
use super::plan::{
DeletePlan, Direction, IterationDirection, Operation, Plan, Search, SelectPlan, TableReference,
UpdatePlan, WhereTerm,
DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SelectPlan,
TableReference, UpdatePlan, WhereTerm,
};
pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> {
@@ -40,10 +40,10 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
&mut plan.table_references,
&schema.indexes,
&mut plan.where_clause,
&mut plan.order_by,
&plan.group_by,
)?;
eliminate_unnecessary_orderby(plan, schema)?;
eliminate_orderby_like_groupby(plan)?;
Ok(())
@@ -62,6 +62,8 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> {
&mut plan.table_references,
&schema.indexes,
&mut plan.where_clause,
&mut plan.order_by,
&None,
)?;
Ok(())
@@ -79,6 +81,8 @@ fn optimize_update_plan(plan: &mut UpdatePlan, schema: &Schema) -> Result<()> {
&mut plan.table_references,
&schema.indexes,
&mut plan.where_clause,
&mut plan.order_by,
&None,
)?;
Ok(())
}
@@ -93,33 +97,6 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
Ok(())
}
fn query_is_already_ordered_by(
table_references: &[TableReference],
key: &mut ast::Expr,
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
) -> Result<bool> {
let first_table = table_references.first();
if first_table.is_none() {
return Ok(false);
}
let table_reference = first_table.unwrap();
match &table_reference.op {
Operation::Scan { .. } => Ok(key.is_rowid_alias_of(0)),
Operation::Search(search) => match search {
Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)),
Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)),
Search::IndexSearch { index, .. } => {
let index_rc = key.check_index_scan(0, table_reference, available_indexes)?;
let index_is_the_same = index_rc
.map(|irc| Arc::ptr_eq(index, &irc))
.unwrap_or(false);
Ok(index_is_the_same)
}
},
_ => Ok(false),
}
}
fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
if plan.order_by.is_none() | plan.group_by.is_none() {
return Ok(());
@@ -185,36 +162,117 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
Ok(())
}
fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
if plan.order_by.is_none() {
fn eliminate_unnecessary_orderby(
table_references: &mut [TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
order_by: &mut Option<Vec<(ast::Expr, Direction)>>,
group_by: &Option<GroupBy>,
) -> Result<()> {
let Some(order) = order_by else {
return Ok(());
}
if plan.table_references.is_empty() {
};
let Some(first_table_reference) = table_references.first_mut() else {
return Ok(());
}
};
let Some(btree_table) = first_table_reference.btree() else {
return Ok(());
};
// If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data
// This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping
// And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby
if plan.group_by.is_some() {
if group_by.is_some() {
return Ok(());
}
let Operation::Scan {
index, iter_dir, ..
} = &mut first_table_reference.op
else {
return Ok(());
};
assert!(
index.is_none(),
"Nothing shouldve transformed the scan to use an index yet"
);
// Special case: if ordering by just the rowid, we can remove the ORDER BY clause
if order.len() == 1 && order[0].0.is_rowid_alias_of(0) {
*iter_dir = match order[0].1 {
Direction::Ascending => IterationDirection::Forwards,
Direction::Descending => IterationDirection::Backwards,
};
*order_by = None;
return Ok(());
}
let o = plan.order_by.as_mut().unwrap();
// Find the best matching index for the ORDER BY columns
let table_name = &btree_table.name;
let mut best_index = (None, 0);
if o.len() != 1 {
// TODO: handle multiple order by keys
return Ok(());
for (_, indexes) in available_indexes.iter() {
for index_candidate in indexes.iter().filter(|i| &i.table_name == table_name) {
let matching_columns = index_candidate.columns.iter().enumerate().take_while(|(i, c)| {
if let Some((Expr::Column { table, column, .. }, _)) = order.get(*i) {
let col_idx_in_table = btree_table
.columns
.iter()
.position(|tc| tc.name.as_ref() == Some(&c.name));
matches!(col_idx_in_table, Some(col_idx) if *table == 0 && *column == col_idx)
} else {
false
}
}).count();
if matching_columns > best_index.1 {
best_index = (Some(index_candidate), matching_columns);
}
}
}
let (key, direction) = o.first_mut().unwrap();
let Some(matching_index) = best_index.0 else {
return Ok(());
};
let match_count = best_index.1;
let already_ordered =
query_is_already_ordered_by(&plan.table_references, key, &schema.indexes)?;
// If we found a matching index, use it for scanning
*index = Some(matching_index.clone());
// If the order by direction matches the index direction, we can iterate the index in forwards order.
// If they don't, we must iterate the index in backwards order.
let index_direction = &matching_index.columns.first().as_ref().unwrap().order;
*iter_dir = match (index_direction, order[0].1) {
(SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => {
IterationDirection::Forwards
}
(SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => {
IterationDirection::Backwards
}
};
if already_ordered {
push_scan_direction(&mut plan.table_references[0], direction);
plan.order_by = None;
// If the index covers all ORDER BY columns, and one of the following applies:
// - the ORDER BY directions exactly match the index orderings,
// - the ORDER by directions are the exact opposite of the index orderings,
// we can remove the ORDER BY clause.
if match_count == order.len() {
let full_match = {
let mut all_match_forward = true;
let mut all_match_reverse = true;
for (i, (_, direction)) in order.iter().enumerate() {
match (&matching_index.columns[i].order, direction) {
(SortOrder::Asc, Direction::Ascending)
| (SortOrder::Desc, Direction::Descending) => {
all_match_reverse = false;
}
(SortOrder::Asc, Direction::Descending)
| (SortOrder::Desc, Direction::Ascending) => {
all_match_forward = false;
}
}
}
all_match_forward || all_match_reverse
};
if full_match {
*order_by = None;
}
}
Ok(())
@@ -222,24 +280,25 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu
/**
* Use indexes where possible.
* Right now we make decisions about using indexes ONLY based on condition expressions, not e.g. ORDER BY or others.
* This is just because we are WIP.
*
* When this function is called, condition expressions from both the actual WHERE clause and the JOIN clauses are in the where_clause vector.
* If we find a condition that can be used to index scan, we pop it off from the where_clause vector and put it into a Search operation.
* We put it there simply because it makes it a bit easier to track during translation.
*
* In this function we also try to eliminate ORDER BY clauses if there is an index that satisfies the ORDER BY clause.
*/
fn use_indexes(
table_references: &mut [TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
where_clause: &mut Vec<WhereTerm>,
order_by: &mut Option<Vec<(ast::Expr, Direction)>>,
group_by: &Option<GroupBy>,
) -> Result<()> {
if where_clause.is_empty() {
return Ok(());
}
// Try to use indexes for eliminating ORDER BY clauses
eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?;
// Try to use indexes for WHERE conditions
'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() {
if let Operation::Scan { .. } = &mut table_reference.op {
if let Operation::Scan { iter_dir, .. } = &table_reference.op {
let mut i = 0;
while i < where_clause.len() {
let cond = where_clause.get_mut(i).unwrap();
@@ -248,6 +307,7 @@ fn use_indexes(
table_index,
table_reference,
available_indexes,
*iter_dir,
)? {
where_clause.remove(i);
table_reference.op = Operation::Search(index_search);
@@ -296,20 +356,6 @@ fn eliminate_constant_conditions(
Ok(ConstantConditionEliminationResult::Continue)
}
fn push_scan_direction(table: &mut TableReference, direction: &Direction) {
if let Operation::Scan {
ref mut iter_dir, ..
} = table.op
{
if iter_dir.is_none() {
match direction {
Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards),
Direction::Descending => *iter_dir = Some(IterationDirection::Backwards),
}
}
}
}
fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> {
for rc in plan.result_columns.iter_mut() {
rewrite_expr(&mut rc.expr)?;
@@ -611,6 +657,7 @@ pub fn try_extract_index_search_expression(
table_index: usize,
table_reference: &TableReference,
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
iter_dir: IterationDirection,
) -> Result<Option<Search>> {
if !cond.should_eval_at_loop(table_index) {
return Ok(None);
@@ -641,6 +688,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}
@@ -671,6 +719,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}
@@ -695,6 +744,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}
@@ -719,6 +769,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}

View File

@@ -259,12 +259,11 @@ pub struct TableReference {
pub enum Operation {
// Scan operation
// This operation is used to scan a table.
// The iter_dir are uset to indicate the direction of the iterator.
// The use of Option for iter_dir is aimed at implementing a conservative optimization strategy: it only pushes
// iter_dir down to Scan when iter_dir is None, to prevent potential result set errors caused by multiple
// assignments. for more detailed discussions, please refer to https://github.com/tursodatabase/limbo/pull/376
// The iter_dir is used to indicate the direction of the iterator.
Scan {
iter_dir: Option<IterationDirection>,
iter_dir: IterationDirection,
/// The index that we are using to scan the table, if any.
index: Option<Arc<Index>>,
},
// Search operation
// This operation is used to search for a row in a table using an index
@@ -337,12 +336,14 @@ pub enum Search {
RowidSearch {
cmp_op: ast::Operator,
cmp_expr: WhereTerm,
iter_dir: IterationDirection,
},
/// A secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc.
IndexSearch {
index: Arc<Index>,
cmp_op: ast::Operator,
cmp_expr: WhereTerm,
iter_dir: IterationDirection,
},
}

View File

@@ -1,7 +1,7 @@
use super::{
plan::{
Aggregate, EvalAt, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType,
TableReference, WhereTerm,
Aggregate, EvalAt, IterationDirection, JoinInfo, Operation, Plan, ResultSetColumn,
SelectPlan, SelectQueryType, TableReference, WhereTerm,
},
select::prepare_select_plan,
SymbolTable,
@@ -320,7 +320,10 @@ fn parse_from_clause_table<'a>(
));
};
scope.tables.push(TableReference {
op: Operation::Scan { iter_dir: None },
op: Operation::Scan {
iter_dir: IterationDirection::Forwards,
index: None,
},
table: tbl_ref,
identifier: alias.unwrap_or(normalized_qualified_name),
join_info: None,
@@ -399,7 +402,10 @@ fn parse_from_clause_table<'a>(
.unwrap_or(normalized_name.to_string());
scope.tables.push(TableReference {
op: Operation::Scan { iter_dir: None },
op: Operation::Scan {
iter_dir: IterationDirection::Forwards,
index: None,
},
join_info: None,
table: Table::Virtual(vtab),
identifier: alias,

View File

@@ -13,7 +13,8 @@ use super::optimizer::optimize_plan;
use super::plan::{
Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan,
};
use super::planner::{bind_column_references, parse_limit, parse_where};
use super::planner::bind_column_references;
use super::planner::{parse_limit, parse_where};
/*
* Update is simple. By default we scan the table, and for each row, we check the WHERE
@@ -64,26 +65,40 @@ pub fn translate_update(
}
pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result<Plan> {
if body.with.is_some() {
bail_parse_error!("WITH clause is not supported");
}
if body.or_conflict.is_some() {
bail_parse_error!("ON CONFLICT clause is not supported");
}
let table_name = &body.tbl_name.name;
let table = match schema.get_table(table_name.0.as_str()) {
Some(table) => table,
None => bail_parse_error!("Parse error: no such table: {}", table_name),
};
let Some(btree_table) = table.btree() else {
bail_parse_error!("Error: {} is not a btree table", table_name);
};
let iter_dir: Option<IterationDirection> = body.order_by.as_ref().and_then(|order_by| {
order_by.first().and_then(|ob| {
ob.order.map(|o| match o {
SortOrder::Asc => IterationDirection::Forwards,
SortOrder::Desc => IterationDirection::Backwards,
let iter_dir = body
.order_by
.as_ref()
.and_then(|order_by| {
order_by.first().and_then(|ob| {
ob.order.map(|o| match o {
SortOrder::Asc => IterationDirection::Forwards,
SortOrder::Desc => IterationDirection::Backwards,
})
})
})
});
.unwrap_or(IterationDirection::Forwards);
let table_references = vec![TableReference {
table: Table::BTree(btree_table.clone()),
table: match table.as_ref() {
Table::Virtual(vtab) => Table::Virtual(vtab.clone()),
Table::BTree(btree_table) => Table::BTree(btree_table.clone()),
_ => unreachable!(),
},
identifier: table_name.0.clone(),
op: Operation::Scan { iter_dir },
op: Operation::Scan {
iter_dir,
index: None,
},
join_info: None,
}];
let set_clauses = body
@@ -91,8 +106,8 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result<
.iter_mut()
.map(|set| {
let ident = normalize_ident(set.col_names[0].0.as_str());
let col_index = btree_table
.columns
let col_index = table
.columns()
.iter()
.enumerate()
.find_map(|(i, col)| {

View File

@@ -22,6 +22,20 @@ pub enum OwnedValueType {
Error,
}
impl Display for OwnedValueType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let value = match self {
Self::Null => "NULL",
Self::Integer => "INT",
Self::Float => "REAL",
Self::Blob => "BLOB",
Self::Text => "TEXT",
Self::Error => "ERROR",
};
write!(f, "{}", value)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum TextSubtype {
Text,
@@ -69,6 +83,15 @@ impl Text {
}
}
impl From<String> for Text {
fn from(value: String) -> Self {
Text {
value: value.into_bytes(),
subtype: TextSubtype::Text,
}
}
}
impl TextRef {
pub fn as_str(&self) -> &str {
unsafe { std::str::from_utf8_unchecked(self.value.to_slice()) }
@@ -1203,11 +1226,13 @@ pub enum CursorResult<T> {
IO,
}
#[derive(Clone, PartialEq, Debug)]
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum SeekOp {
EQ,
GE,
GT,
LE,
LT,
}
#[derive(Clone, PartialEq, Debug)]

View File

@@ -60,7 +60,35 @@ pub fn parse_schema_rows(
let sql: &str = row.get::<&str>(4)?;
if root_page == 0 && sql.to_lowercase().contains("create virtual") {
let name: &str = row.get::<&str>(1)?;
let vtab = syms.vtabs.get(name).unwrap().clone();
// a virtual table is found in the sqlite_schema, but it's no
// longer in the in-memory schema. We need to recreate it if
// the module is loaded in the symbol table.
let vtab = if let Some(vtab) = syms.vtabs.get(name) {
vtab.clone()
} else {
let mod_name = module_name_from_sql(sql)?;
if let Some(vmod) = syms.vtab_modules.get(mod_name) {
if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind
{
crate::VirtualTable::from_args(
Some(name),
mod_name,
module_args_from_sql(sql)?,
syms,
vmod.module_kind,
None,
)?
} else {
return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string()));
}
} else {
// the extension isn't loaded, so we emit a warning.
return Err(LimboError::ExtensionError(format!(
"Virtual table module '{}' not found\nPlease load extension",
&mod_name
)));
}
};
schema.add_virtual_table(vtab);
} else {
let table = schema::BTreeTable::from_sql(sql, root_page as usize)?;
@@ -132,6 +160,99 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool {
strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2))
}
fn module_name_from_sql(sql: &str) -> Result<&str> {
if let Some(start) = sql.find("USING") {
let start = start + 6;
// stop at the first space, semicolon, or parenthesis
let end = sql[start..]
.find(|c: char| c.is_whitespace() || c == ';' || c == '(')
.unwrap_or(sql.len() - start)
+ start;
Ok(sql[start..end].trim())
} else {
Err(LimboError::InvalidArgument(
"Expected 'USING' in module name".to_string(),
))
}
}
// CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...);
// CREATE VIRTUAL TABLE table_name USING module_name;
fn module_args_from_sql(sql: &str) -> Result<Vec<limbo_ext::Value>> {
if !sql.contains('(') {
return Ok(vec![]);
}
let start = sql.find('(').ok_or_else(|| {
LimboError::InvalidArgument("Expected '(' in module argument list".to_string())
})? + 1;
let end = sql.rfind(')').ok_or_else(|| {
LimboError::InvalidArgument("Expected ')' in module argument list".to_string())
})?;
let mut args = Vec::new();
let mut current_arg = String::new();
let mut chars = sql[start..end].chars().peekable();
let mut in_quotes = false;
while let Some(c) = chars.next() {
match c {
'\'' => {
if in_quotes {
if chars.peek() == Some(&'\'') {
// Escaped quote
current_arg.push('\'');
chars.next();
} else {
in_quotes = false;
args.push(limbo_ext::Value::from_text(current_arg.trim().to_string()));
current_arg.clear();
// Skip until comma or end
while let Some(&nc) = chars.peek() {
if nc == ',' {
chars.next(); // Consume comma
break;
} else if nc.is_whitespace() {
chars.next();
} else {
return Err(LimboError::InvalidArgument(
"Unexpected characters after quoted argument".to_string(),
));
}
}
}
} else {
in_quotes = true;
}
}
',' => {
if !in_quotes {
if !current_arg.trim().is_empty() {
args.push(limbo_ext::Value::from_text(current_arg.trim().to_string()));
current_arg.clear();
}
} else {
current_arg.push(c);
}
}
_ => {
current_arg.push(c);
}
}
}
if !current_arg.trim().is_empty() && !in_quotes {
args.push(limbo_ext::Value::from_text(current_arg.trim().to_string()));
}
if in_quotes {
return Err(LimboError::InvalidArgument(
"Unterminated string literal in module arguments".to_string(),
));
}
Ok(args)
}
pub fn check_literal_equivalency(lhs: &Literal, rhs: &Literal) -> bool {
match (lhs, rhs) {
(Literal::Numeric(n1), Literal::Numeric(n2)) => cmp_numeric_strings(n1, n2),
@@ -1632,4 +1753,88 @@ pub mod tests {
Ok((OwnedValueType::Float, "1.23e4"))
);
}
#[test]
fn test_module_name_basic() {
let sql = "CREATE VIRTUAL TABLE x USING y;";
assert_eq!(module_name_from_sql(sql).unwrap(), "y");
}
#[test]
fn test_module_name_with_args() {
let sql = "CREATE VIRTUAL TABLE x USING modname('a', 'b');";
assert_eq!(module_name_from_sql(sql).unwrap(), "modname");
}
#[test]
fn test_module_name_missing_using() {
let sql = "CREATE VIRTUAL TABLE x (a, b);";
assert!(module_name_from_sql(sql).is_err());
}
#[test]
fn test_module_name_no_semicolon() {
let sql = "CREATE VIRTUAL TABLE x USING limbo(a, b)";
assert_eq!(module_name_from_sql(sql).unwrap(), "limbo");
}
#[test]
fn test_module_name_no_semicolon_or_args() {
let sql = "CREATE VIRTUAL TABLE x USING limbo";
assert_eq!(module_name_from_sql(sql).unwrap(), "limbo");
}
#[test]
fn test_module_args_none() {
let sql = "CREATE VIRTUAL TABLE x USING modname;";
let args = module_args_from_sql(sql).unwrap();
assert_eq!(args.len(), 0);
}
#[test]
fn test_module_args_basic() {
let sql = "CREATE VIRTUAL TABLE x USING modname('arg1', 'arg2');";
let args = module_args_from_sql(sql).unwrap();
assert_eq!(args.len(), 2);
assert_eq!("arg1", args[0].to_text().unwrap());
assert_eq!("arg2", args[1].to_text().unwrap());
for arg in args {
unsafe { arg.__free_internal_type() }
}
}
#[test]
fn test_module_args_with_escaped_quote() {
let sql = "CREATE VIRTUAL TABLE x USING modname('a''b', 'c');";
let args = module_args_from_sql(sql).unwrap();
assert_eq!(args.len(), 2);
assert_eq!(args[0].to_text().unwrap(), "a'b");
assert_eq!(args[1].to_text().unwrap(), "c");
for arg in args {
unsafe { arg.__free_internal_type() }
}
}
#[test]
fn test_module_args_unterminated_string() {
let sql = "CREATE VIRTUAL TABLE x USING modname('arg1, 'arg2');";
assert!(module_args_from_sql(sql).is_err());
}
#[test]
fn test_module_args_extra_garbage_after_quote() {
let sql = "CREATE VIRTUAL TABLE x USING modname('arg1'x);";
assert!(module_args_from_sql(sql).is_err());
}
#[test]
fn test_module_args_trailing_comma() {
let sql = "CREATE VIRTUAL TABLE x USING modname('arg1',);";
let args = module_args_from_sql(sql).unwrap();
assert_eq!(args.len(), 1);
assert_eq!("arg1", args[0].to_text().unwrap());
for arg in args {
unsafe { arg.__free_internal_type() }
}
}
}

View File

@@ -413,6 +413,12 @@ impl ProgramBuilder {
Insn::SeekGT { target_pc, .. } => {
resolve(target_pc, "SeekGT");
}
Insn::SeekLE { target_pc, .. } => {
resolve(target_pc, "SeekLE");
}
Insn::SeekLT { target_pc, .. } => {
resolve(target_pc, "SeekLT");
}
Insn::IdxGE { target_pc, .. } => {
resolve(target_pc, "IdxGE");
}

View File

@@ -1,5 +1,5 @@
#![allow(unused_variables)]
use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY};
use crate::error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY};
use crate::ext::ExtValue;
use crate::function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc};
use crate::functions::datetime::{
@@ -10,11 +10,13 @@ use std::{borrow::BorrowMut, rc::Rc};
use crate::pseudo::PseudoCursor;
use crate::result::LimboResult;
use crate::schema::{affinity, Affinity};
use crate::storage::btree::{BTreeCursor, BTreeKey};
use crate::storage::wal::CheckpointResult;
use crate::types::{
AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, SeekKey, SeekOp,
AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, OwnedValueType, SeekKey, SeekOp,
};
use crate::util::{
cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real,
@@ -917,12 +919,21 @@ pub fn op_vcreate(
"Failed to upgrade Connection".to_string(),
));
};
let mod_type = conn
.syms
.borrow()
.vtab_modules
.get(&module_name)
.ok_or_else(|| {
crate::LimboError::ExtensionError(format!("Module {} not found", module_name))
})?
.module_kind;
let table = crate::VirtualTable::from_args(
Some(&table_name),
&module_name,
args,
&conn.syms.borrow(),
limbo_ext::VTabKind::VirtualTable,
mod_type,
None,
)?;
{
@@ -1341,6 +1352,68 @@ pub fn op_column(
Ok(InsnFunctionStepResult::Step)
}
pub fn op_type_check(
program: &Program,
state: &mut ProgramState,
insn: &Insn,
pager: &Rc<Pager>,
mv_store: Option<&Rc<MvStore>>,
) -> Result<InsnFunctionStepResult> {
let Insn::TypeCheck {
start_reg,
count,
check_generated,
table_reference,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
};
assert_eq!(table_reference.is_strict, true);
state.registers[*start_reg..*start_reg + *count]
.iter_mut()
.zip(table_reference.columns.iter())
.try_for_each(|(reg, col)| {
// INT PRIMARY KEY is not row_id_alias so we throw error if this col is NULL
if !col.is_rowid_alias
&& col.primary_key
&& matches!(reg.get_owned_value(), OwnedValue::Null)
{
bail_constraint_error!(
"NOT NULL constraint failed: {}.{} ({})",
&table_reference.name,
col.name.as_ref().map(|s| s.as_str()).unwrap_or(""),
SQLITE_CONSTRAINT
)
} else if col.is_rowid_alias && matches!(reg.get_owned_value(), OwnedValue::Null) {
// Handle INTEGER PRIMARY KEY for null as usual (Rowid will be auto-assigned)
return Ok(());
}
let col_affinity = col.affinity();
let ty_str = col.ty_str.as_str();
let applied = apply_affinity_char(reg, col_affinity);
let value_type = reg.get_owned_value().value_type();
match (ty_str, value_type) {
("INTEGER" | "INT", OwnedValueType::Integer) => {}
("REAL", OwnedValueType::Float) => {}
("BLOB", OwnedValueType::Blob) => {}
("TEXT", OwnedValueType::Text) => {}
("ANY", _) => {}
(t, v) => bail_constraint_error!(
"cannot store {} value in {} column {}.{} ({})",
v,
t,
&table_reference.name,
col.name.as_ref().map(|s| s.as_str()).unwrap_or(""),
SQLITE_CONSTRAINT
),
};
Ok(())
})?;
state.pc += 1;
Ok(InsnFunctionStepResult::Step)
}
pub fn op_make_record(
program: &Program,
state: &mut ProgramState,
@@ -1509,7 +1582,7 @@ pub fn op_halt(
)));
}
}
match program.halt(pager.clone(), state, mv_store.clone())? {
match program.halt(pager.clone(), state, mv_store)? {
StepResult::Done => Ok(InsnFunctionStepResult::Done),
StepResult::IO => Ok(InsnFunctionStepResult::IO),
StepResult::Row => Ok(InsnFunctionStepResult::Row),
@@ -1542,8 +1615,8 @@ pub fn op_transaction(
}
} else {
let connection = program.connection.upgrade().unwrap();
let current_state = connection.transaction_state.borrow().clone();
let (new_transaction_state, updated) = match (&current_state, write) {
let current_state = connection.transaction_state.get();
let (new_transaction_state, updated) = match (current_state, write) {
(TransactionState::Write, true) => (TransactionState::Write, false),
(TransactionState::Write, false) => (TransactionState::Write, false),
(TransactionState::Read, true) => (TransactionState::Write, true),
@@ -1588,7 +1661,7 @@ pub fn op_auto_commit(
};
let conn = program.connection.upgrade().unwrap();
if matches!(state.halt_state, Some(HaltState::Checkpointing)) {
return match program.halt(pager.clone(), state, mv_store.clone())? {
return match program.halt(pager.clone(), state, mv_store)? {
super::StepResult::Done => Ok(InsnFunctionStepResult::Done),
super::StepResult::IO => Ok(InsnFunctionStepResult::IO),
super::StepResult::Row => Ok(InsnFunctionStepResult::Row),
@@ -1597,7 +1670,7 @@ pub fn op_auto_commit(
};
}
if *auto_commit != *conn.auto_commit.borrow() {
if *auto_commit != conn.auto_commit.get() {
if *rollback {
todo!("Rollback is not implemented");
} else {
@@ -1616,7 +1689,7 @@ pub fn op_auto_commit(
"cannot commit - no transaction is active".to_string(),
));
}
return match program.halt(pager.clone(), state, mv_store.clone())? {
return match program.halt(pager.clone(), state, mv_store)? {
super::StepResult::Done => Ok(InsnFunctionStepResult::Done),
super::StepResult::IO => Ok(InsnFunctionStepResult::IO),
super::StepResult::Row => Ok(InsnFunctionStepResult::Row),
@@ -1775,17 +1848,14 @@ pub fn op_row_id(
let rowid = {
let mut index_cursor = state.get_cursor(index_cursor_id);
let index_cursor = index_cursor.as_btree_mut();
let rowid = index_cursor.rowid()?;
rowid
index_cursor.rowid()?
};
let mut table_cursor = state.get_cursor(table_cursor_id);
let table_cursor = table_cursor.as_btree_mut();
let deferred_seek =
match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? {
CursorResult::Ok(_) => None,
CursorResult::IO => Some((index_cursor_id, table_cursor_id)),
};
deferred_seek
match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? {
CursorResult::Ok(_) => None,
CursorResult::IO => Some((index_cursor_id, table_cursor_id)),
}
};
if let Some(deferred_seek) = deferred_seek {
state.deferred_seek = Some(deferred_seek);
@@ -1883,97 +1953,69 @@ pub fn op_deferred_seek(
Ok(InsnFunctionStepResult::Step)
}
pub fn op_seek_ge(
pub fn op_seek(
program: &Program,
state: &mut ProgramState,
insn: &Insn,
pager: &Rc<Pager>,
mv_store: Option<&Rc<MvStore>>,
) -> Result<InsnFunctionStepResult> {
let Insn::SeekGE {
let (Insn::SeekGE {
cursor_id,
start_reg,
num_regs,
target_pc,
is_index,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
};
assert!(target_pc.is_offset());
if *is_index {
let found = {
let mut cursor = state.get_cursor(*cursor_id);
let cursor = cursor.as_btree_mut();
let record_from_regs = make_record(&state.registers, start_reg, num_regs);
let found =
return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GE));
found
};
if !found {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
}
} else {
let pc = {
let mut cursor = state.get_cursor(*cursor_id);
let cursor = cursor.as_btree_mut();
let rowid = match state.registers[*start_reg].get_owned_value() {
OwnedValue::Null => {
// All integer values are greater than null so we just rewind the cursor
return_if_io!(cursor.rewind());
None
}
OwnedValue::Integer(rowid) => Some(*rowid as u64),
_ => {
return Err(LimboError::InternalError(
"SeekGE: the value in the register is not an integer".into(),
));
}
};
match rowid {
Some(rowid) => {
let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE));
if !found {
target_pc.to_offset_int()
} else {
state.pc + 1
}
}
None => state.pc + 1,
}
};
state.pc = pc;
}
Ok(InsnFunctionStepResult::Step)
}
pub fn op_seek_gt(
program: &Program,
state: &mut ProgramState,
insn: &Insn,
pager: &Rc<Pager>,
mv_store: Option<&Rc<MvStore>>,
) -> Result<InsnFunctionStepResult> {
let Insn::SeekGT {
| Insn::SeekGT {
cursor_id,
start_reg,
num_regs,
target_pc,
is_index,
} = insn
}
| Insn::SeekLE {
cursor_id,
start_reg,
num_regs,
target_pc,
is_index,
}
| Insn::SeekLT {
cursor_id,
start_reg,
num_regs,
target_pc,
is_index,
}) = insn
else {
unreachable!("unexpected Insn {:?}", insn)
};
assert!(target_pc.is_offset());
assert!(
target_pc.is_offset(),
"target_pc should be an offset, is: {:?}",
target_pc
);
let op = match insn {
Insn::SeekGE { .. } => SeekOp::GE,
Insn::SeekGT { .. } => SeekOp::GT,
Insn::SeekLE { .. } => SeekOp::LE,
Insn::SeekLT { .. } => SeekOp::LT,
_ => unreachable!("unexpected Insn {:?}", insn),
};
let op_name = match op {
SeekOp::GE => "SeekGE",
SeekOp::GT => "SeekGT",
SeekOp::LE => "SeekLE",
SeekOp::LT => "SeekLT",
_ => unreachable!("unexpected SeekOp {:?}", op),
};
if *is_index {
let found = {
let mut cursor = state.get_cursor(*cursor_id);
let cursor = cursor.as_btree_mut();
let record_from_regs = make_record(&state.registers, start_reg, num_regs);
let found =
return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GT));
let found = return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), op));
found
};
if !found {
@@ -1993,14 +2035,15 @@ pub fn op_seek_gt(
}
OwnedValue::Integer(rowid) => Some(*rowid as u64),
_ => {
return Err(LimboError::InternalError(
"SeekGT: the value in the register is not an integer".into(),
));
return Err(LimboError::InternalError(format!(
"{}: the value in the register is not an integer",
op_name
)));
}
};
let found = match rowid {
Some(rowid) => {
let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GT));
let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), op));
if !found {
target_pc.to_offset_int()
} else {
@@ -3779,7 +3822,17 @@ pub fn op_idx_insert_async(
} else {
flags.has(IdxInsertFlags::USE_SEEK)
};
// insert record as key
// To make this reentrant in case of `moved_before` = false, we need to check if the previous cursor.insert started
// a write/balancing operation. If it did, it means we already moved to the place we wanted.
let moved_before = if cursor.is_write_in_progress() {
true
} else {
moved_before
};
// Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages,
// therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to `Await` opcode
// because it could trigger a movement to child page after a balance root which will leave the current page as the root page.
return_if_io!(cursor.insert(&BTreeKey::new_index_key(record), moved_before));
}
state.pc += 1;
@@ -4227,13 +4280,15 @@ pub fn op_parse_schema(
))?;
let mut schema = conn.schema.write();
// TODO: This function below is synchronous, make it async
parse_schema_rows(
Some(stmt),
&mut schema,
conn.pager.io.clone(),
&conn.syms.borrow(),
state.mv_tx_id,
)?;
{
parse_schema_rows(
Some(stmt),
&mut schema,
conn.pager.io.clone(),
&conn.syms.borrow(),
state.mv_tx_id,
)?;
}
state.pc += 1;
Ok(InsnFunctionStepResult::Step)
}
@@ -5012,6 +5067,77 @@ fn exec_if(reg: &OwnedValue, jump_if_null: bool, not: bool) -> bool {
}
}
fn apply_affinity_char(target: &mut Register, affinity: Affinity) -> bool {
if let Register::OwnedValue(value) = target {
if matches!(value, OwnedValue::Blob(_)) {
return true;
}
match affinity {
Affinity::Blob => return true,
Affinity::Text => {
if matches!(value, OwnedValue::Text(_) | OwnedValue::Null) {
return true;
}
let text = value.to_string();
*value = OwnedValue::Text(text.into());
return true;
}
Affinity::Integer | Affinity::Numeric => {
if matches!(value, OwnedValue::Integer(_)) {
return true;
}
if !matches!(value, OwnedValue::Text(_) | OwnedValue::Float(_)) {
return true;
}
if let OwnedValue::Float(fl) = *value {
if let Ok(int) = cast_real_to_integer(fl).map(OwnedValue::Integer) {
*value = int;
return true;
}
return false;
}
let text = value.to_text().unwrap();
let Ok(num) = checked_cast_text_to_numeric(&text) else {
return false;
};
*value = match &num {
OwnedValue::Float(fl) => {
cast_real_to_integer(*fl)
.map(OwnedValue::Integer)
.unwrap_or(num);
return true;
}
OwnedValue::Integer(_) if text.starts_with("0x") => {
return false;
}
_ => num,
};
}
Affinity::Real => {
if let OwnedValue::Integer(i) = value {
*value = OwnedValue::Float(*i as f64);
return true;
} else if let OwnedValue::Text(t) = value {
if t.as_str().starts_with("0x") {
return false;
}
if let Ok(num) = checked_cast_text_to_numeric(t.as_str()) {
*value = num;
return true;
} else {
return false;
}
}
}
};
}
return true;
}
fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue {
if matches!(value, OwnedValue::Null) {
return OwnedValue::Null;

View File

@@ -528,6 +528,20 @@ pub fn insn_to_str(
),
)
}
Insn::TypeCheck {
start_reg,
count,
check_generated,
..
} => (
"TypeCheck",
*start_reg as i32,
*count as i32,
*check_generated as i32,
OwnedValue::build_text(""),
0,
String::from(""),
),
Insn::MakeRecord {
start_reg,
count,
@@ -736,23 +750,35 @@ pub fn insn_to_str(
start_reg,
num_regs: _,
target_pc,
} => (
"SeekGT",
*cursor_id as i32,
target_pc.to_debug_int(),
*start_reg as i32,
OwnedValue::build_text(""),
0,
"".to_string(),
),
Insn::SeekGE {
}
| Insn::SeekGE {
is_index: _,
cursor_id,
start_reg,
num_regs: _,
target_pc,
}
| Insn::SeekLE {
is_index: _,
cursor_id,
start_reg,
num_regs: _,
target_pc,
}
| Insn::SeekLT {
is_index: _,
cursor_id,
start_reg,
num_regs: _,
target_pc,
} => (
"SeekGE",
match insn {
Insn::SeekGT { .. } => "SeekGT",
Insn::SeekGE { .. } => "SeekGE",
Insn::SeekLE { .. } => "SeekLE",
Insn::SeekLT { .. } => "SeekLT",
_ => unreachable!(),
},
*cursor_id as i32,
target_pc.to_debug_int(),
*start_reg as i32,
@@ -1213,9 +1239,9 @@ pub fn insn_to_str(
0,
"".to_string(),
),
Insn::LastAsync { .. } => (
Insn::LastAsync { cursor_id } => (
"LastAsync",
0,
*cursor_id as i32,
0,
0,
OwnedValue::build_text(""),
@@ -1240,27 +1266,27 @@ pub fn insn_to_str(
0,
where_clause.clone(),
),
Insn::LastAwait { .. } => (
Insn::LastAwait { cursor_id, .. } => (
"LastAwait",
0,
*cursor_id as i32,
0,
0,
OwnedValue::build_text(""),
0,
"".to_string(),
),
Insn::PrevAsync { .. } => (
Insn::PrevAsync { cursor_id } => (
"PrevAsync",
0,
*cursor_id as i32,
0,
0,
OwnedValue::build_text(""),
0,
"".to_string(),
),
Insn::PrevAwait { .. } => (
Insn::PrevAwait { cursor_id, .. } => (
"PrevAwait",
0,
*cursor_id as i32,
0,
0,
OwnedValue::build_text(""),

View File

@@ -1,8 +1,10 @@
use std::num::NonZero;
use std::rc::Rc;
use super::{
cast_text_to_numeric, execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx,
};
use crate::schema::BTreeTable;
use crate::storage::wal::CheckpointMode;
use crate::types::{OwnedValue, Record};
use limbo_macros::Description;
@@ -344,7 +346,16 @@ pub enum Insn {
dest: usize,
},
/// Make a record and write it to destination register.
TypeCheck {
start_reg: usize, // P1
count: usize, // P2
/// GENERATED ALWAYS AS ... STATIC columns are only checked if P3 is zero.
/// When P3 is non-zero, no type checking occurs for static generated columns.
check_generated: bool, // P3
table_reference: Rc<BTreeTable>, // P4
},
// Make a record and write it to destination register.
MakeRecord {
start_reg: usize, // P1
count: usize, // P2
@@ -427,7 +438,7 @@ pub enum Insn {
register: usize,
},
/// Write a string value into a register.
// Write a string value into a register.
String8 {
value: String,
dest: usize,
@@ -501,6 +512,30 @@ pub enum Insn {
/// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
/// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction.
// If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key.
// If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key.
// Seek to the first index entry that is less than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction.
SeekLE {
is_index: bool,
cursor_id: CursorID,
start_reg: usize,
num_regs: usize,
target_pc: BranchOffset,
},
// If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key.
// If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key.
// Seek to the first index entry that is less than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction.
SeekLT {
is_index: bool,
cursor_id: CursorID,
start_reg: usize,
num_regs: usize,
target_pc: BranchOffset,
},
// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction.
IdxGE {
cursor_id: CursorID,
start_reg: usize,
@@ -1274,6 +1309,7 @@ impl Insn {
Insn::LastAwait { .. } => execute::op_last_await,
Insn::Column { .. } => execute::op_column,
Insn::TypeCheck { .. } => execute::op_type_check,
Insn::MakeRecord { .. } => execute::op_make_record,
Insn::ResultRow { .. } => execute::op_result_row,
@@ -1306,8 +1342,10 @@ impl Insn {
Insn::SeekRowid { .. } => execute::op_seek_rowid,
Insn::DeferredSeek { .. } => execute::op_deferred_seek,
Insn::SeekGE { .. } => execute::op_seek_ge,
Insn::SeekGT { .. } => execute::op_seek_gt,
Insn::SeekGE { .. } => execute::op_seek,
Insn::SeekGT { .. } => execute::op_seek,
Insn::SeekLE { .. } => execute::op_seek,
Insn::SeekLT { .. } => execute::op_seek,
Insn::SeekEnd { .. } => execute::op_seek_end,
Insn::IdxGE { .. } => execute::op_idx_ge,
Insn::IdxGT { .. } => execute::op_idx_gt,

View File

@@ -386,7 +386,7 @@ impl Program {
) -> Result<StepResult> {
if let Some(mv_store) = mv_store {
let conn = self.connection.upgrade().unwrap();
let auto_commit = *conn.auto_commit.borrow();
let auto_commit = conn.auto_commit.get();
if auto_commit {
let mut mv_transactions = conn.mv_transactions.borrow_mut();
for tx_id in mv_transactions.iter() {
@@ -400,7 +400,7 @@ impl Program {
.connection
.upgrade()
.expect("only weak ref to connection?");
let auto_commit = *connection.auto_commit.borrow();
let auto_commit = connection.auto_commit.get();
tracing::trace!("Halt auto_commit {}", auto_commit);
assert!(
program_state.halt_state.is_none()
@@ -409,7 +409,7 @@ impl Program {
if program_state.halt_state.is_some() {
self.step_end_write_txn(&pager, &mut program_state.halt_state, connection.deref())
} else if auto_commit {
let current_state = connection.transaction_state.borrow().clone();
let current_state = connection.transaction_state.get();
match current_state {
TransactionState::Write => self.step_end_write_txn(
&pager,
@@ -561,7 +561,10 @@ fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&In
| Insn::LastAwait { .. }
| Insn::SorterSort { .. }
| Insn::SeekGE { .. }
| Insn::SeekGT { .. } => indent_count + 1,
| Insn::SeekGT { .. }
| Insn::SeekLE { .. }
| Insn::SeekLT { .. } => indent_count + 1,
_ => indent_count,
}
} else {
@@ -627,11 +630,10 @@ impl Row {
pub fn get_value<'a>(&'a self, idx: usize) -> &'a OwnedValue {
let value = unsafe { self.values.add(idx).as_ref().unwrap() };
let value = match value {
match value {
Register::OwnedValue(owned_value) => owned_value,
_ => unreachable!("a row should be formed of values only"),
};
value
}
}
pub fn get_values(&self) -> impl Iterator<Item = &OwnedValue> {

85
docs/testing.md Normal file
View File

@@ -0,0 +1,85 @@
# Testing in Limbo
Limbo supports a comprehensive testing system to ensure correctness, performance, and compatibility with SQLite.
## 1. Compatibility Tests
The `make test` target is the main entry point.
Most compatibility tests live in the testing/ directory and are written in SQLites TCL test format. These tests ensure that Limbo matches SQLites behavior exactly. The database used during these tests is located at testing/testing.db, which includes the following schema:
```sql
CREATE TABLE users (
id INTEGER PRIMARY KEY,
first_name TEXT,
last_name TEXT,
email TEXT,
phone_number TEXT,
address TEXT,
city TEXT,
state TEXT,
zipcode TEXT,
age INTEGER
);
CREATE TABLE products (
id INTEGER PRIMARY KEY,
name TEXT,
price REAL
);
CREATE INDEX age_idx ON users (age);
```
You can freely write queries against these tables during compatibility testing.
### Shell and Python-based Tests
For cases where output or behavior differs intentionally from SQLite (e.g. due to new features or limitations), tests should be placed in the testing/cli_tests/ directory and written in Python.
These tests use the TestLimboShell class:
```python
from cli_tests.common import TestLimboShell
def test_uuid():
limbo = TestLimboShell()
limbo.run_test_fn("SELECT uuid4_str();", lambda res: len(res) == 36)
limbo.quit()
```
You can use run_test, run_test_fn, or debug_print to interact with the shell and validate results.
The constructor takes an optional argument with the `sql` you want to initiate the tests with. You can also enable blob testing or override the executable and flags.
Use these Python-based tests for validating:
- Output formatting
- Shell commands and .dot interactions
- Limbo-specific extensions in `testing/cli_tests/extensions.py`
- Any known divergence from SQLite behavior
> Logging and Tracing
If you wish to trace internal events during test execution, you can set the RUST_LOG environment variable before running the test. For example:
```bash
RUST_LOG=none,limbo_core=trace make test
```
This will enable trace-level logs for the limbo_core crate and disable logs elsewhere. Logging all internal traces to the `testing/test.log` file.
**Note:** trace logs can be very verbose—it's not uncommon for a single test run to generate megabytes of logs.
## Deterministic Simulation Testing (DST):
TODO!
## Fuzzing
TODO!

View File

@@ -112,11 +112,14 @@ impl VTabModule for KVStoreVTab {
if cursor.index.is_some_and(|c| c >= cursor.rows.len()) {
return Err("cursor out of range".into());
}
let (_, ref key, ref val) = cursor.rows[cursor.index.unwrap_or(0)];
match idx {
0 => Ok(Value::from_text(key.clone())), // key
1 => Ok(Value::from_text(val.clone())), // value
_ => Err("Invalid column".into()),
if let Some((_, ref key, ref val)) = cursor.rows.get(cursor.index.unwrap_or(0)) {
match idx {
0 => Ok(Value::from_text(key.clone())), // key
1 => Ok(Value::from_text(val.clone())), // value
_ => Err("Invalid column".into()),
}
} else {
Err("cursor out of range".into())
}
}
}

View File

@@ -1,3 +1,8 @@
#!/bin/bash
target/debug/limbo -m list "$@"
# if RUST_LOG is non-empty, enable tracing output
if [ -n "$RUST_LOG" ]; then
target/debug/limbo -m list -t testing/test.log "$@"
else
target/debug/limbo -m list "$@"
fi

View File

@@ -345,10 +345,10 @@ def test_kv():
limbo = TestLimboShell()
limbo.run_test_fn(
"create virtual table t using kv_store;",
lambda res: "Virtual table module not found: kv_store" in res,
lambda res: "Module kv_store not found" in res,
)
limbo.execute_dot(f".load {ext_path}")
limbo.debug_print(
limbo.execute_dot(
"create virtual table t using kv_store;",
)
limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res)
@@ -398,10 +398,35 @@ def test_kv():
limbo.run_test_fn(
"select count(*) from t;", lambda res: "100" == res, "can insert 100 rows"
)
limbo.run_test_fn("update t set value = 'updated' where key = 'key33';", null)
limbo.run_test_fn(
"select * from t where key = 'key33';",
lambda res: res == "key33|updated",
"can update single row",
)
limbo.run_test_fn(
"select COUNT(*) from t where value = 'updated';",
lambda res: res == "1",
"only updated a single row",
)
limbo.run_test_fn("update t set value = 'updated2';", null)
limbo.run_test_fn(
"select COUNT(*) from t where value = 'updated2';",
lambda res: res == "100",
"can update all rows",
)
limbo.run_test_fn("delete from t limit 96;", null, "can delete 96 rows")
limbo.run_test_fn(
"select count(*) from t;", lambda res: "4" == res, "four rows remain"
)
limbo.run_test_fn(
"update t set key = '100' where 1;", null, "where clause evaluates properly"
)
limbo.run_test_fn(
"select * from t where key = '100';",
lambda res: res == "100|updated2",
"there is only 1 key remaining after setting all keys to same value",
)
limbo.quit()

113
testing/cli_tests/memory.py Executable file
View File

@@ -0,0 +1,113 @@
#!/usr/bin/env python3
import os
from test_limbo_cli import TestLimboShell
sqlite_exec = "./target/debug/limbo"
sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ")
def validate_with_expected(result: str, expected: str):
return (expected in result, expected)
def stub_memory_test(
limbo: TestLimboShell,
name: str,
blob_size: int = 1024**2,
vals: int = 100,
blobs: bool = True,
):
# zero_blob_size = 1024 **2
zero_blob = "0" * blob_size * 2
# vals = 100
big_stmt = ["CREATE TABLE temp (t1 BLOB, t2 INTEGER);"]
big_stmt = big_stmt + [
f"INSERT INTO temp (t1) VALUES (zeroblob({blob_size}));"
if i % 2 == 0 and blobs
else f"INSERT INTO temp (t2) VALUES ({i});"
for i in range(vals * 2)
]
expected = []
for i in range(vals * 2):
if i % 2 == 0 and blobs:
big_stmt.append(f"SELECT hex(t1) FROM temp LIMIT 1 OFFSET {i};")
expected.append(zero_blob)
else:
big_stmt.append(f"SELECT t2 FROM temp LIMIT 1 OFFSET {i};")
expected.append(f"{i}")
big_stmt.append("SELECT count(*) FROM temp;")
expected.append(str(vals * 2))
big_stmt = "".join(big_stmt)
expected = "\n".join(expected)
limbo.run_test_fn(big_stmt, lambda res: validate_with_expected(res, expected), name)
# TODO no delete tests for now because of limbo outputs some debug information on delete
def memory_tests() -> list[dict]:
tests = []
for vals in range(0, 1000, 100):
tests.append(
{
"name": f"small-insert-integer-vals-{vals}",
"vals": vals,
"blobs": False,
}
)
tests.append(
{
"name": f"small-insert-blob-interleaved-blob-size-{1024}",
"vals": 10,
"blob_size": 1024,
}
)
tests.append(
{
"name": f"big-insert-blob-interleaved-blob-size-{1024}",
"vals": 100,
"blob_size": 1024,
}
)
for blob_size in range(0, (1024 * 1024) + 1, 1024 * 4**4):
if blob_size == 0:
continue
tests.append(
{
"name": f"small-insert-blob-interleaved-blob-size-{blob_size}",
"vals": 10,
"blob_size": blob_size,
}
)
tests.append(
{
"name": f"big-insert-blob-interleaved-blob-size-{blob_size}",
"vals": 100,
"blob_size": blob_size,
}
)
return tests
def main():
tests = memory_tests()
# TODO see how to parallelize this loop with different subprocesses
for test in tests:
limbo = TestLimboShell()
try:
stub_memory_test(limbo, **test)
except Exception as e:
print(f"Test FAILED: {e}")
limbo.quit()
exit(1)
limbo.quit() # remove this line when `with` statement is supported for TestLimboShell
print("All tests passed successfully.")
if __name__ == "__main__":
main()

View File

@@ -15,4 +15,149 @@ do_execsql_test_on_specific_db {:memory:} must-be-int-insert {
} {1
2
3
4}
4}
do_execsql_test_on_specific_db {:memory:} strict-basic-creation {
CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test1 VALUES(1, 'item1', 10.5);
SELECT * FROM test1;
} {1|item1|10.5}
do_execsql_test_in_memory_any_error strict-require-datatype {
CREATE TABLE test2(id INTEGER, name) STRICT;
}
do_execsql_test_in_memory_any_error strict-valid-datatypes {
CREATE TABLE test2(id INTEGER, value DATETIME) STRICT;
}
do_execsql_test_in_memory_any_error strict-type-enforcement {
CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test3 VALUES(1, 'item1', 'not-a-number');
}
do_execsql_test_on_specific_db {:memory:} strict-type-coercion {
CREATE TABLE test4(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test4 VALUES(1, 'item1', '10.5');
SELECT typeof(price), price FROM test4;
} {real|10.5}
do_execsql_test_on_specific_db {:memory:} strict-any-flexibility {
CREATE TABLE test5(id INTEGER, data ANY) STRICT;
INSERT INTO test5 VALUES(1, 100);
INSERT INTO test5 VALUES(2, 'text');
INSERT INTO test5 VALUES(3, 3.14);
SELECT id, typeof(data) FROM test5 ORDER BY id;
} {1|integer
2|text
3|real}
do_execsql_test_on_specific_db {:memory:} strict-any-preservation {
CREATE TABLE test6(id INTEGER, code ANY) STRICT;
INSERT INTO test6 VALUES(1, '000123');
SELECT typeof(code), code FROM test6;
} {text|000123}
do_execsql_test_in_memory_any_error strict-int-vs-integer-pk {
CREATE TABLE test8(id INT PRIMARY KEY, name TEXT) STRICT
INSERT INTO test8 VALUES(NULL, 'test');
}
do_execsql_test_on_specific_db {:memory:} strict-integer-pk-behavior {
CREATE TABLE test9(id INTEGER PRIMARY KEY, name TEXT) STRICT;
INSERT INTO test9 VALUES(NULL, 'test');
SELECT id, name FROM test9;
} {1|test}
do_execsql_test_on_specific_db {:memory:} strict-mixed-inserts {
CREATE TABLE test11(
id INTEGER PRIMARY KEY,
name TEXT,
price REAL,
quantity INT,
tags ANY
) STRICT;
INSERT INTO test11 VALUES(1, 'item1', 10.5, 5, 'tag1');
INSERT INTO test11 VALUES(2, 'item2', 20.75, 10, 42);
SELECT id, name, price, quantity, typeof(tags) FROM test11 ORDER BY id;
} {1|item1|10.5|5|text
2|item2|20.75|10|integer}
do_execsql_test_on_specific_db {:memory:} strict-update-basic {
CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test1 VALUES(1, 'item1', 10.5);
UPDATE test1 SET price = 15.75 WHERE id = 1;
SELECT * FROM test1;
} {1|item1|15.75}
do_execsql_test_in_memory_any_error strict-update-type-enforcement {
CREATE TABLE test2(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test2 VALUES(1, 'item1', 10.5);
UPDATE test2 SET price = 'not-a-number' WHERE id = 1;
}
do_execsql_test_on_specific_db {:memory:} strict-update-type-coercion {
CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test3 VALUES(1, 'item1', 10.5);
UPDATE test3 SET price = '15.75' WHERE id = 1;
SELECT id, typeof(price), price FROM test3;
} {1|real|15.75}
do_execsql_test_on_specific_db {:memory:} strict-update-any-flexibility {
CREATE TABLE test4(id INTEGER, data ANY) STRICT;
INSERT INTO test4 VALUES(1, 100);
UPDATE test4 SET data = 'text' WHERE id = 1;
INSERT INTO test4 VALUES(2, 'original');
UPDATE test4 SET data = 3.14 WHERE id = 2;
SELECT id, typeof(data), data FROM test4 ORDER BY id;
} {1|text|text
2|real|3.14}
do_execsql_test_on_specific_db {:memory:} strict-update-any-preservation {
CREATE TABLE test5(id INTEGER, code ANY) STRICT;
INSERT INTO test5 VALUES(1, 'text');
UPDATE test5 SET code = '000123' WHERE id = 1;
SELECT typeof(code), code FROM test5;
} {text|000123}
do_execsql_test_in_memory_any_error strict-update-not-null-constraint {
CREATE TABLE test7(id INTEGER, name TEXT NOT NULL) STRICT;
INSERT INTO test7 VALUES(1, 'name');
UPDATE test7 SET name = NULL WHERE id = 1;
}
# Uncomment following test case when unique constraint is added
#do_execsql_test_any_error strict-update-pk-constraint {
# CREATE TABLE test8(id INTEGER PRIMARY KEY, name TEXT) STRICT;
# INSERT INTO test8 VALUES(1, 'name1');
# INSERT INTO test8 VALUES(2, 'name2');
# UPDATE test8 SET id = 2 WHERE id = 1;
#}
do_execsql_test_on_specific_db {:memory:} strict-update-multiple-columns {
CREATE TABLE test9(id INTEGER, name TEXT, price REAL, quantity INT) STRICT;
INSERT INTO test9 VALUES(1, 'item1', 10.5, 5);
UPDATE test9 SET name = 'updated', price = 20.75, quantity = 10 WHERE id = 1;
SELECT * FROM test9;
} {1|updated|20.75|10}
do_execsql_test_on_specific_db {:memory:} strict-update-where-clause {
CREATE TABLE test10(id INTEGER, category TEXT, price REAL) STRICT;
INSERT INTO test10 VALUES(1, 'A', 10);
INSERT INTO test10 VALUES(2, 'A', 20);
INSERT INTO test10 VALUES(3, 'B', 30);
UPDATE test10 SET price = price * 2 WHERE category = 'A';
SELECT id, price FROM test10 ORDER BY id;
} {1|20.0
2|40.0
3|30.0}
do_execsql_test_on_specific_db {:memory:} strict-update-expression {
CREATE TABLE test11(id INTEGER, name TEXT, price REAL, discount REAL) STRICT;
INSERT INTO test11 VALUES(1, 'item1', 100, 0.1);
UPDATE test11 SET price = price - (price * discount);
SELECT id, price FROM test11;
} {1|90.0}

View File

@@ -141,3 +141,62 @@ Collin|15}
do_execsql_test case-insensitive-alias {
select u.first_name as fF, count(1) > 0 as cC from users u where fF = 'Jamie' group by fF order by cC;
} {Jamie|1}
do_execsql_test age_idx_order_desc {
select first_name from users order by age desc limit 3;
} {Robert
Sydney
Matthew}
do_execsql_test rowid_or_integer_pk_desc {
select first_name from users order by id desc limit 3;
} {Nicole
Gina
Dorothy}
# These two following tests may seem dumb but they verify that index scanning by age_idx doesn't drop any rows due to BTree bugs
do_execsql_test orderby_asc_verify_rows {
select count(1) from (select * from users order by age desc)
} {10000}
do_execsql_test orderby_desc_verify_rows {
select count(1) from (select * from users order by age desc)
} {10000}
do_execsql_test orderby_desc_with_offset {
select first_name, age from users order by age desc limit 3 offset 666;
} {Francis|94
Matthew|94
Theresa|94}
do_execsql_test orderby_desc_with_filter {
select first_name, age from users where age <= 50 order by age desc limit 5;
} {Gerald|50
Nicole|50
Tammy|50
Marissa|50
Daniel|50}
do_execsql_test orderby_asc_with_filter_range {
select first_name, age from users where age <= 50 and age >= 49 order by age asc limit 5;
} {William|49
Jennifer|49
Robert|49
David|49
Stephanie|49}
do_execsql_test orderby_desc_with_filter_id_lt {
select id from users where id < 6666 order by id desc limit 5;
} {6665
6664
6663
6662
6661}
do_execsql_test orderby_desc_with_filter_id_le {
select id from users where id <= 6666 order by id desc limit 5;
} {6666
6665
6664
6663
6662}

View File

@@ -97,3 +97,124 @@ proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolera
}
}
}
# This procedure passes the test if the output contains error messages
proc run_test_expecting_any_error {sqlite_exec db_name sql} {
# Execute the SQL command and capture output
set command [list $sqlite_exec $db_name $sql]
# Use catch to handle both successful and error cases
catch {exec {*}$command} result options
# Check if the output contains error indicators (×, error, syntax error, etc.)
if {[regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} {
# Error found in output - test passed
puts "Test PASSED: Got expected error"
return 1
}
# No error indicators in output
puts "Test FAILED: '$sql'"
puts "Expected an error but command output didn't indicate any error: '$result'"
exit 1
}
# This procedure passes if error matches a specific pattern
proc run_test_expecting_error {sqlite_exec db_name sql expected_error_pattern} {
# Execute the SQL command and capture output
set command [list $sqlite_exec $db_name $sql]
# Capture output whether command succeeds or fails
catch {exec {*}$command} result options
# Check if the output contains error indicators first
if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} {
puts "Test FAILED: '$sql'"
puts "Expected an error matching '$expected_error_pattern'"
puts "But command output didn't indicate any error: '$result'"
exit 1
}
# Now check if the error message matches the expected pattern
if {![regexp $expected_error_pattern $result]} {
puts "Test FAILED: '$sql'"
puts "Error occurred but didn't match expected pattern."
puts "Output was: '$result'"
puts "Expected pattern: '$expected_error_pattern'"
exit 1
}
# If we get here, the test passed - got expected error matching pattern
return 1
}
# This version accepts exact error text, ignoring formatting
proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_text} {
# Execute the SQL command and capture output
set command [list $sqlite_exec $db_name $sql]
# Capture output whether command succeeds or fails
catch {exec {*}$command} result options
# Check if the output contains error indicators first
if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} {
puts "Test FAILED: '$sql'"
puts "Expected an error with text: '$expected_error_text'"
puts "But command output didn't indicate any error: '$result'"
exit 1
}
# Normalize both the actual and expected error messages
# Remove all whitespace, newlines, and special characters for comparison
set normalized_actual [regsub -all {[[:space:]]|[[:punct:]]} $result ""]
set normalized_expected [regsub -all {[[:space:]]|[[:punct:]]} $expected_error_text ""]
# Convert to lowercase for case-insensitive comparison
set normalized_actual [string tolower $normalized_actual]
set normalized_expected [string tolower $normalized_expected]
# Check if the normalized strings contain the same text
if {[string first $normalized_expected $normalized_actual] == -1} {
puts "Test FAILED: '$sql'"
puts "Error occurred but content didn't match."
puts "Output was: '$result'"
puts "Expected text: '$expected_error_text'"
exit 1
}
# If we get here, the test passed - got error with expected content
return 1
}
proc do_execsql_test_error {test_name sql_statements expected_error_pattern} {
foreach db $::test_dbs {
puts [format "(%s) %s Running error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name]
set combined_sql [string trim $sql_statements]
run_test_expecting_error $::sqlite_exec $db $combined_sql $expected_error_pattern
}
}
proc do_execsql_test_error_content {test_name sql_statements expected_error_text} {
foreach db $::test_dbs {
puts [format "(%s) %s Running error content test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name]
set combined_sql [string trim $sql_statements]
run_test_expecting_error_content $::sqlite_exec $db $combined_sql $expected_error_text
}
}
proc do_execsql_test_any_error {test_name sql_statements} {
foreach db $::test_dbs {
puts [format "(%s) %s Running any-error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name]
set combined_sql [string trim $sql_statements]
run_test_expecting_any_error $::sqlite_exec $db $combined_sql
}
}
proc do_execsql_test_in_memory_any_error {test_name sql_statements} {
puts [format "(in-memory) %s Running any-error test: %s" [string repeat " " 31] $test_name]
# Use ":memory:" special filename for in-memory database
set db_name ":memory:"
set combined_sql [string trim $sql_statements]
run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql
}

View File

@@ -2,9 +2,9 @@ pub mod grammar_generator;
#[cfg(test)]
mod tests {
use std::rc::Rc;
use std::{collections::HashSet, rc::Rc};
use rand::SeedableRng;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
use rusqlite::params;
@@ -107,6 +107,207 @@ mod tests {
}
}
#[test]
pub fn rowid_seek_fuzz() {
let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x INTEGER PRIMARY KEY)"); // INTEGER PRIMARY KEY is a rowid alias, so an index is not created
let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap();
let insert = format!(
"INSERT INTO t VALUES {}",
(1..10000)
.map(|x| format!("({})", x))
.collect::<Vec<_>>()
.join(", ")
);
sqlite_conn.execute(&insert, params![]).unwrap();
sqlite_conn.close().unwrap();
let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap();
let limbo_conn = db.connect_limbo();
const COMPARISONS: [&str; 4] = ["<", "<=", ">", ">="];
const ORDER_BY: [Option<&str>; 4] = [
None,
Some("ORDER BY x"),
Some("ORDER BY x DESC"),
Some("ORDER BY x ASC"),
];
for comp in COMPARISONS.iter() {
for order_by in ORDER_BY.iter() {
for max in 0..=10000 {
let query = format!(
"SELECT * FROM t WHERE x {} {} {} LIMIT 3",
comp,
max,
order_by.unwrap_or("")
);
log::trace!("query: {}", query);
let limbo = limbo_exec_rows(&db, &limbo_conn, &query);
let sqlite = sqlite_exec_rows(&sqlite_conn, &query);
assert_eq!(
limbo, sqlite,
"query: {}, limbo: {:?}, sqlite: {:?}",
query, limbo, sqlite
);
}
}
}
}
#[test]
pub fn index_scan_fuzz() {
let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x PRIMARY KEY)");
let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap();
let insert = format!(
"INSERT INTO t VALUES {}",
(0..10000)
.map(|x| format!("({})", x))
.collect::<Vec<_>>()
.join(", ")
);
sqlite_conn.execute(&insert, params![]).unwrap();
sqlite_conn.close().unwrap();
let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap();
let limbo_conn = db.connect_limbo();
const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="];
const ORDER_BY: [Option<&str>; 4] = [
None,
Some("ORDER BY x"),
Some("ORDER BY x DESC"),
Some("ORDER BY x ASC"),
];
for comp in COMPARISONS.iter() {
for order_by in ORDER_BY.iter() {
for max in 0..=10000 {
let query = format!(
"SELECT * FROM t WHERE x {} {} {} LIMIT 3",
comp,
max,
order_by.unwrap_or(""),
);
let limbo = limbo_exec_rows(&db, &limbo_conn, &query);
let sqlite = sqlite_exec_rows(&sqlite_conn, &query);
assert_eq!(
limbo, sqlite,
"query: {}, limbo: {:?}, sqlite: {:?}",
query, limbo, sqlite
);
}
}
}
}
#[test]
pub fn index_scan_compound_key_fuzz() {
let (mut rng, seed) = if std::env::var("SEED").is_ok() {
let seed = std::env::var("SEED").unwrap().parse::<u64>().unwrap();
(ChaCha8Rng::seed_from_u64(seed), seed)
} else {
rng_from_time()
};
let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y))");
let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap();
let mut pk_tuples = HashSet::new();
while pk_tuples.len() < 100000 {
pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000)));
}
let mut tuples = Vec::new();
for pk_tuple in pk_tuples {
tuples.push(format!(
"({}, {}, {})",
pk_tuple.0,
pk_tuple.1,
rng.random_range(0..2000)
));
}
let insert = format!("INSERT INTO t VALUES {}", tuples.join(", "));
sqlite_conn.execute(&insert, params![]).unwrap();
sqlite_conn.close().unwrap();
let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap();
let limbo_conn = db.connect_limbo();
const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="];
const ORDER_BY: [Option<&str>; 4] = [
None,
Some("ORDER BY x"),
Some("ORDER BY x DESC"),
Some("ORDER BY x ASC"),
];
let print_dump_on_fail = |insert: &str, seed: u64| {
let comment = format!("-- seed: {}; dump for manual debugging:", seed);
let pragma_journal_mode = "PRAGMA journal_mode = wal;";
let create_table = "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y));";
let dump = format!(
"{}\n{}\n{}\n{}\n{}",
comment, pragma_journal_mode, create_table, comment, insert
);
println!("{}", dump);
};
for comp in COMPARISONS.iter() {
for order_by in ORDER_BY.iter() {
for max in 0..=3000 {
// see comment below about ordering and the '=' comparison operator; omitting LIMIT for that reason
// we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure
// correctness in the = case
let limit = if *comp == "=" { "" } else { "LIMIT 5" };
let query = format!(
"SELECT * FROM t WHERE x {} {} {} {}",
comp,
max,
order_by.unwrap_or(""),
limit
);
log::trace!("query: {}", query);
let limbo = limbo_exec_rows(&db, &limbo_conn, &query);
let sqlite = sqlite_exec_rows(&sqlite_conn, &query);
let is_equal = limbo == sqlite;
if !is_equal {
// if the condition is = and the same rows are present but in different order, then we accept that
// e.g. sqlite doesn't bother iterating in reverse order if "WHERE X = 3 ORDER BY X DESC", but we currently do.
if *comp == "=" {
let limbo_row_count = limbo.len();
let sqlite_row_count = sqlite.len();
if limbo_row_count == sqlite_row_count {
for limbo_row in limbo.iter() {
if !sqlite.contains(limbo_row) {
// save insert to file and print the filename for debugging
let error_msg = format!("row not found in sqlite: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed);
print_dump_on_fail(&insert, seed);
panic!("{}", error_msg);
}
}
for sqlite_row in sqlite.iter() {
if !limbo.contains(sqlite_row) {
let error_msg = format!("row not found in limbo: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed);
print_dump_on_fail(&insert, seed);
panic!("{}", error_msg);
}
}
continue;
} else {
print_dump_on_fail(&insert, seed);
let error_msg = format!("row count mismatch (limbo: {}, sqlite: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed);
panic!("{}", error_msg);
}
}
print_dump_on_fail(&insert, seed);
panic!(
"query: {}, limbo: {:?}, sqlite: {:?}, seed: {}",
query, limbo, sqlite, seed
);
}
}
}
}
}
#[test]
pub fn arithmetic_expression_fuzz() {
let _ = env_logger::try_init();