From 16b706a8d483bda34e998ec24d012ea3bb787490 Mon Sep 17 00:00:00 2001 From: danawan Date: Tue, 23 Sep 2025 11:49:41 +0700 Subject: [PATCH 01/65] add sqlean fuzzy string distances --- Cargo.lock | 8 + Cargo.toml | 2 + extensions/fuzzy/Cargo.toml | 20 ++ extensions/fuzzy/build.rs | 5 + extensions/fuzzy/src/common.rs | 32 ++ extensions/fuzzy/src/editdist.rs | 276 ++++++++++++++++ extensions/fuzzy/src/lib.rs | 522 +++++++++++++++++++++++++++++++ testing/cli_tests/extensions.py | 66 ++++ 8 files changed, 931 insertions(+) create mode 100644 extensions/fuzzy/Cargo.toml create mode 100644 extensions/fuzzy/build.rs create mode 100644 extensions/fuzzy/src/common.rs create mode 100644 extensions/fuzzy/src/editdist.rs create mode 100644 extensions/fuzzy/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 9e9739689..a331618e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2137,6 +2137,14 @@ dependencies = [ "turso_ext", ] +[[package]] +name = "limbo_fuzzy" +version = "0.2.0-pre.7" +dependencies = [ + "mimalloc", + "turso_ext", +] + [[package]] name = "limbo_ipaddr" version = "0.2.0-pre.7" diff --git a/Cargo.toml b/Cargo.toml index 6f3c64d9d..7ffeebf3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ members = [ "extensions/percentile", "extensions/regexp", "extensions/tests", + "extensions/fuzzy", "macros", "simulator", "sqlite3", @@ -61,6 +62,7 @@ limbo_percentile = { path = "extensions/percentile", version = "0.2.0-pre.7" } limbo_regexp = { path = "extensions/regexp", version = "0.2.0-pre.7" } turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.2.0-pre.7" } limbo_uuid = { path = "extensions/uuid", version = "0.2.0-pre.7" } +limbo_fuzzy = { path = "extensions/fuzzy", version = "0.2.0-pre.7" } turso_parser = { path = "parser", version = "0.2.0-pre.7" } sql_generation = { path = "sql_generation" } strum = { version = "0.26", features = ["derive"] } diff --git a/extensions/fuzzy/Cargo.toml b/extensions/fuzzy/Cargo.toml new file mode 100644 index 000000000..cf6036cfd --- /dev/null +++ b/extensions/fuzzy/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "limbo_fuzzy" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Limbo fuzzy string extension" + +[lib] +crate-type = ["cdylib", "lib"] + +[features] +static = ["turso_ext/static"] + +[dependencies] +turso_ext = { workspace = true, features = ["static"] } + +[target.'cfg(not(target_family = "wasm"))'.dependencies] +mimalloc = { version = "0.1", default-features = false } diff --git a/extensions/fuzzy/build.rs b/extensions/fuzzy/build.rs new file mode 100644 index 000000000..4a3d51d14 --- /dev/null +++ b/extensions/fuzzy/build.rs @@ -0,0 +1,5 @@ +fn main() { + if cfg!(target_os = "windows") { + println!("cargo:rustc-link-lib=advapi32"); + } +} diff --git a/extensions/fuzzy/src/common.rs b/extensions/fuzzy/src/common.rs new file mode 100644 index 000000000..4b0c12fd1 --- /dev/null +++ b/extensions/fuzzy/src/common.rs @@ -0,0 +1,32 @@ +pub const CCLASS_SILENT: u8 = 0; +pub const CCLASS_VOWEL: u8 = 1; +pub const CCLASS_B: u8 = 2; +pub const CCLASS_Y: u8 = 9; +//This will be useful in the phonetic +//pub const CCLASS_L: u8 = 6; +//pub const CCLASS_R: u8 = 7; +//pub const CCLASS_M: u8 = 8; +//pub const CCLASS_DIGIT: u8 = 10; +//pub const CCLASS_SPACE: u8 = 11; +//pub const CCLASS_OTHER: u8 = 12; +pub const MID_CLASS: [u8; 128] = [ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 11, 11, 12, 12, // + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, // + 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 12, 12, 12, 12, // + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 12, 12, 12, 12, 12, 12, // + 12, 1, 2, 3, 4, 1, 2, 3, 0, 1, 3, 3, 6, 8, 8, 1, // + 2, 3, 7, 3, 4, 1, 2, 2, 3, 1, 3, 12, 12, 12, 12, 12, // + 12, 1, 2, 3, 4, 1, 2, 3, 0, 1, 3, 3, 6, 8, 8, 1, // + 2, 3, 7, 3, 4, 1, 2, 2, 3, 1, 3, 12, 12, 12, 12, 12, // +]; + +pub const INIT_CLASS: [u8; 128] = [ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 11, 11, 12, 12, // + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, // + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, // + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 12, 12, 12, 12, 12, 12, // + 12, 1, 2, 3, 4, 1, 2, 3, 0, 1, 3, 3, 6, 8, 8, 1, // + 2, 3, 7, 3, 4, 1, 2, 2, 3, 9, 3, 12, 12, 12, 12, 12, // + 12, 1, 2, 3, 4, 1, 2, 3, 0, 1, 3, 3, 6, 8, 8, 1, // + 2, 3, 7, 3, 4, 1, 2, 2, 3, 9, 3, 12, 12, 12, 12, 12, // +]; diff --git a/extensions/fuzzy/src/editdist.rs b/extensions/fuzzy/src/editdist.rs new file mode 100644 index 000000000..8821cfbda --- /dev/null +++ b/extensions/fuzzy/src/editdist.rs @@ -0,0 +1,276 @@ +// Adapted from SQLite spellfix.c extension and sqlean fuzzy/editdist.c +use crate::common::*; + +#[derive(Debug, PartialEq)] +pub enum EditDistanceError { + NonAsciiInput, +} + +pub type EditDistanceResult = Result; + +fn character_class(c_prev: u8, c: u8) -> u8 { + if c_prev == 0 { + INIT_CLASS[(c & 0x7f) as usize] + } else { + MID_CLASS[(c & 0x7f) as usize] + } +} + +/// Return the cost of inserting or deleting character c immediately +/// following character c_prev. If c_prev == 0, that means c is the first +/// character of the word. +fn insert_or_delete_cost(c_prev: u8, c: u8, c_next: u8) -> i32 { + let class_c = character_class(c_prev, c); + + if class_c == CCLASS_SILENT { + return 1; + } + + if c_prev == c { + return 10; + } + + if class_c == CCLASS_VOWEL && (c_prev == b'r' || c_next == b'r') { + return 20; // Insert a vowel before or after 'r' + } + + let class_c_prev = character_class(c_prev, c_prev); + if class_c == class_c_prev { + if class_c == CCLASS_VOWEL { + 15 + } else { + 50 + } + } else { + // Any other character insertion or deletion + 100 + } +} + +const FINAL_INS_COST_DIV: i32 = 4; + +/// Return the cost of substituting c_to in place of c_from assuming +/// the previous character is c_prev. If c_prev == 0 then c_to is the first +/// character of the word. +fn substitute_cost(c_prev: u8, c_from: u8, c_to: u8) -> i32 { + if c_from == c_to { + return 0; + } + + if c_from == (c_to ^ 0x20) && c_to.is_ascii_alphabetic() { + return 0; + } + + let class_from = character_class(c_prev, c_from); + let class_to = character_class(c_prev, c_to); + + if class_from == class_to { + 40 + } else if (CCLASS_B..=CCLASS_Y).contains(&class_from) + && (CCLASS_B..=CCLASS_Y).contains(&class_to) + { + 75 + } else { + 100 + } +} + +/// Given two strings z_a and z_b which are pure ASCII, return the cost +/// of transforming z_a into z_b. If z_a ends with '*' assume that it is +/// a prefix of z_b and give only minimal penalty for extra characters +/// on the end of z_b. +/// +/// Returns cost where smaller numbers mean a closer match +/// +/// Returns Err for Non-ASCII characters on input +pub fn edit_distance(z_a: &str, z_b: &str) -> EditDistanceResult { + if z_a.is_empty() && z_b.is_empty() { + return Ok(0); + } + + let za_bytes = z_a.as_bytes(); + let zb_bytes = z_b.as_bytes(); + + if !z_a.is_ascii() || !z_b.is_ascii() { + return Err(EditDistanceError::NonAsciiInput); + } + + if z_a.is_empty() { + let mut res = 0; + let mut c_b_prev = 0u8; + let zb_bytes = z_b.as_bytes(); + + for (i, &c_b) in zb_bytes.iter().enumerate() { + let c_b_next = if i + 1 < zb_bytes.len() { + zb_bytes[i + 1] + } else { + 0 + }; + res += insert_or_delete_cost(c_b_prev, c_b, c_b_next) / FINAL_INS_COST_DIV; + c_b_prev = c_b; + } + return Ok(res); + } + + if z_b.is_empty() { + let mut res = 0; + let mut c_a_prev = 0u8; + let za_bytes = z_a.as_bytes(); + + for (i, &c_a) in za_bytes.iter().enumerate() { + let c_a_next = if i + 1 < za_bytes.len() { + za_bytes[i + 1] + } else { + 0 + }; + res += insert_or_delete_cost(c_a_prev, c_a, c_a_next); + c_a_prev = c_a; + } + return Ok(res); + } + + let mut za_start = 0; + let mut zb_start = 0; + + // Skip any common prefix + while za_start < za_bytes.len() + && zb_start < zb_bytes.len() + && za_bytes[za_start] == zb_bytes[zb_start] + { + za_start += 1; + zb_start += 1; + } + + // If both strings are exhausted after common prefix + if za_start >= za_bytes.len() && zb_start >= zb_bytes.len() { + return Ok(0); + } + + let za_remaining = &za_bytes[za_start..]; + let zb_remaining = &zb_bytes[zb_start..]; + let n_a = za_remaining.len(); + let n_b = zb_remaining.len(); + + // Special processing if either remaining string is empty after prefix matching + if n_a == 0 { + let mut res = 0; + let mut c_b_prev = if za_start > 0 { + za_bytes[za_start - 1] + } else { + 0 + }; + + for (i, &c_b) in zb_remaining.iter().enumerate() { + let c_b_next = if i + 1 < n_b { zb_remaining[i + 1] } else { 0 }; + res += insert_or_delete_cost(c_b_prev, c_b, c_b_next) / FINAL_INS_COST_DIV; + c_b_prev = c_b; + } + return Ok(res); + } + + if n_b == 0 { + let mut res = 0; + let mut c_a_prev = if za_start > 0 { + za_bytes[za_start - 1] + } else { + 0 + }; + + for (i, &c_a) in za_remaining.iter().enumerate() { + let c_a_next = if i + 1 < n_a { za_remaining[i + 1] } else { 0 }; + res += insert_or_delete_cost(c_a_prev, c_a, c_a_next); + c_a_prev = c_a; + } + return Ok(res); + } + + // Check if a is a prefix pattern + if za_remaining.len() == 1 && za_remaining[0] == b'*' { + return Ok(0); + } + + let mut m = vec![0i32; n_b + 1]; + let mut cx = vec![0u8; n_b + 1]; + + let dc = if za_start > 0 { + za_bytes[za_start - 1] + } else { + 0 + }; + m[0] = 0; + cx[0] = dc; + + let mut c_b_prev = dc; + for x_b in 1..=n_b { + let c_b = zb_remaining[x_b - 1]; + let c_b_next = if x_b < n_b { zb_remaining[x_b] } else { 0 }; + cx[x_b] = c_b; + m[x_b] = m[x_b - 1] + insert_or_delete_cost(c_b_prev, c_b, c_b_next); + c_b_prev = c_b; + } + + let mut c_a_prev = dc; + for x_a in 1..=n_a { + let last_a = x_a == n_a; + let c_a = za_remaining[x_a - 1]; + let c_a_next = if x_a < n_a { za_remaining[x_a] } else { 0 }; + + if c_a == b'*' && last_a { + break; + } + + let mut d = m[0]; + m[0] = d + insert_or_delete_cost(c_a_prev, c_a, c_a_next); + + for x_b in 1..=n_b { + let c_b = zb_remaining[x_b - 1]; + let c_b_next = if x_b < n_b { zb_remaining[x_b] } else { 0 }; + + // Cost to insert c_b + let mut ins_cost = insert_or_delete_cost(cx[x_b - 1], c_b, c_b_next); + if last_a { + ins_cost /= FINAL_INS_COST_DIV; + } + + // Cost to delete c_a + let del_cost = insert_or_delete_cost(cx[x_b], c_a, c_b_next); + + // Cost to substitute c_a -> c_b + let sub_cost = substitute_cost(cx[x_b - 1], c_a, c_b); + + // Find best cost + let mut total_cost = ins_cost + m[x_b - 1]; + let mut ncx = c_b; + + if del_cost + m[x_b] < total_cost { + total_cost = del_cost + m[x_b]; + ncx = c_a; + } + + if sub_cost + d < total_cost { + total_cost = sub_cost + d; + } + + d = m[x_b]; + m[x_b] = total_cost; + cx[x_b] = ncx; + } + c_a_prev = c_a; + } + + let res = if za_remaining.last() == Some(&b'*') { + let mut min_cost = m[1]; + + for &val in m.iter().skip(1).take(n_b) { + if val < min_cost { + min_cost = val; + } + } + + min_cost + } else { + m[n_b] + }; + + Ok(res) +} diff --git a/extensions/fuzzy/src/lib.rs b/extensions/fuzzy/src/lib.rs new file mode 100644 index 000000000..fb578e2d6 --- /dev/null +++ b/extensions/fuzzy/src/lib.rs @@ -0,0 +1,522 @@ +// Adapted from sqlean fuzzy +use std::cmp; +use turso_ext::{register_extension, scalar, ResultCode, Value}; +mod common; +mod editdist; + +register_extension! { + scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist}, +} + +/// Calculates and returns the Levenshtein distance of two non NULL strings. +#[scalar(name = "fuzzy_leven")] +fn levenshtein(args: &[Value]) -> Value { + let Some(arg1) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let Some(arg2) = args[1].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let dist = leven(arg1, arg2); + return Value::from_integer(dist); +} + +fn leven(s1: &str, s2: &str) -> i64 { + let mut str1: &[u8] = s1.as_bytes(); + let mut str2: &[u8] = s2.as_bytes(); + let mut str1_len = str1.len(); + let mut str2_len = str2.len(); + + if str1_len == 0 { + return str2_len as i64; + } + + if str2_len == 0 { + return str1_len as i64; + } + + while str1_len > 0 && str2_len > 0 && str1[0] == str2[0] { + str1 = &str1[1..]; + str2 = &str2[1..]; + str1_len -= 1; + str2_len -= 1; + } + + let mut vector: Vec = (0..=str1_len).collect(); + + let mut last_diag: usize; + let mut cur: usize; + + for row in 1..=str2_len { + last_diag = row - 1; + vector[0] = row; + + for col in 1..=str1_len { + cur = vector[col]; + + let cost = if str1[col - 1] == str2[row - 1] { 0 } else { 1 }; + + vector[col] = std::cmp::min( + std::cmp::min(vector[col] + 1, vector[col - 1] + 1), + last_diag + cost, + ); + + last_diag = cur; + } + } + vector[str1_len] as i64 +} + +/// Calculates and returns the Damerau-Levenshtein distance of two non NULL +#[scalar(name = "fuzzy_damlev")] +fn damerau_levenshtein(args: &[Value]) -> Value { + let Some(arg1) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let Some(arg2) = args[1].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let dist = damlev(arg1, arg2); + return Value::from_integer(dist); +} + +#[allow(clippy::needless_range_loop)] +fn damlev(s1: &str, s2: &str) -> i64 { + let str1: &[u8] = s1.as_bytes(); + let str2: &[u8] = s2.as_bytes(); + let str1_len = str1.len(); + let str2_len = str2.len(); + + if str1_len == 0 { + return str2_len as i64; + } + + if str2_len == 0 { + return str1_len as i64; + } + + let mut start = 0; + while start < str1_len && start < str2_len && str1[start] == str2[start] { + start += 1; + } + let str1 = &str1[start..]; + let str2 = &str2[start..]; + let len1 = str1.len(); + let len2 = str2.len(); + + const ALPHA_SIZE: usize = 255; + let infi = len1 + len2; + + let mut dict = vec![0usize; ALPHA_SIZE]; + + let rows = len1 + 2; + let cols = len2 + 2; + let mut matrix = vec![vec![0usize; cols]; rows]; + + matrix[0][0] = infi; + + for i in 1..rows { + matrix[i][0] = infi; + matrix[i][1] = i - 1; + } + for j in 1..cols { + matrix[0][j] = infi; + matrix[1][j] = j - 1; + } + + for (row, &c1) in str1.iter().enumerate() { + let mut db = 0; + for (col, &c2) in str2.iter().enumerate() { + let i = dict[c2 as usize]; + let k = db; + let cost = if c1 == c2 { 0 } else { 1 }; + if cost == 0 { + db = col + 1; + } + + matrix[row + 2][col + 2] = std::cmp::min( + std::cmp::min( + matrix[row + 1][col + 1] + cost, + matrix[row + 2][col + 1] + 1, + ), + std::cmp::min( + matrix[row + 1][col + 2] + 1, + matrix[i][k] + (row + 1 - i - 1) + (col + 1 - k - 1) + 1, + ), + ); + } + dict[c1 as usize] = row + 1; + } + + matrix[rows - 1][cols - 1] as i64 +} +// +// fuzzy_editdist(A,B) +// +// Return the cost of transforming string A into string B. Both strings +// must be pure ASCII text. If A ends with '*' then it is assumed to be +// a prefix of B and extra characters on the end of B have minimal additional +// cost. +// +#[scalar(name = "fuzzy_editdist")] +pub fn edit_distance(args: &[Value]) { + let Some(arg1) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let Some(arg2) = args[1].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + if let Ok(res) = editdist::edit_distance(arg1, arg2) { + return Value::from_integer(res as i64); + } else { + return Value::error(ResultCode::InvalidArgs); + } +} + +// returns the hamming distance between two strings +#[scalar(name = "fuzzy_hamming")] +fn hamming(args: &[Value]) { + let Some(arg1) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let Some(arg2) = args[1].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let dist = hamming_dist(arg1, arg2); + return Value::from_integer(dist); +} + +fn hamming_dist(s1: &str, s2: &str) -> i64 { + let str1_b = s1.as_bytes(); + let str2_b = s2.as_bytes(); + + if str1_b.len() != str2_b.len() { + return -1_i64; + } + + let res = str1_b + .iter() + .zip(str2_b.iter()) + .filter(|(a, b)| a != b) + .count(); + + res as i64 +} +#[scalar(name = "fuzzy_jarowin")] +fn jaronwin(args: &[Value]) { + let Some(arg1) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let Some(arg2) = args[1].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let res = jaro_winkler(arg1, arg2); + return Value::from_float(res); +} + +/// Calculates and returns the Jaro-Winkler distance of two non NULL strings. +fn jaro_winkler(s1: &str, s2: &str) -> f64 { + let dist = jaro(s1, s2); + + let mut prefix_len = 0; + for (c1, c2) in s1.chars().zip(s2.chars()) { + if c1 == c2 { + prefix_len += 1; + } else { + break; + } + + if prefix_len == 3 { + break; + } + } + + dist + (prefix_len as f64) * 0.1 * (1.0 - dist) +} + +/// Calculates and returns the Jaro distance of two non NULL strings. +fn jaro(s1: &str, s2: &str) -> f64 { + if s1 == s2 { + return 1.0; + } + + let s1: Vec = s1.chars().collect(); + let s2: Vec = s2.chars().collect(); + + let len1 = s1.len(); + let len2 = s2.len(); + + if len1 == 0 || len2 == 0 { + return 0.0; + } + + let max_dist = (cmp::max(len1, len2) / 2).saturating_sub(1); + let mut match_count = 0; + + let mut hash_s1 = vec![false; len1]; + let mut hash_s2 = vec![false; len2]; + + for i in 0..len1 { + let start = i.saturating_sub(max_dist); + let end = cmp::min(i + max_dist + 1, len2); + + for j in start..end { + if s1[i] == s2[j] && !hash_s2[j] { + hash_s1[i] = true; + hash_s2[j] = true; + match_count += 1; + break; + } + } + } + + if match_count == 0 { + return 0.0; + } + + let mut t = 0; + let mut point = 0; + + for i in 0..len1 { + if hash_s1[i] { + while point < len2 && !hash_s2[point] { + point += 1; + } + if point < len2 && s1[i] != s2[point] { + t += 1; + } + point += 1; + } + } + + let t = t as f64 / 2.0; + let match_count = match_count as f64; + + (match_count / len1 as f64 + match_count / len2 as f64 + (match_count - t) / match_count) / 3.0 +} + +/// Computes and returns the Optimal String Alignment distance for two non NULL +#[scalar(name = "fuzzy_osadist")] +pub fn osadist(args: &[Value]) { + let Some(arg1) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let Some(arg2) = args[1].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + + let dist = optimal_string_alignment(arg1, arg2); + return Value::from_integer(dist as i64); +} + +fn optimal_string_alignment(s1: &str, s2: &str) -> usize { + let mut s1_chars: Vec = s1.chars().collect(); + let mut s2_chars: Vec = s2.chars().collect(); + + let mut len1 = s1_chars.len(); + let mut len2 = s2_chars.len(); + + while len1 > 0 && len2 > 0 && s1_chars[0] == s2_chars[0] { + s1_chars.remove(0); + s2_chars.remove(0); + len1 -= 1; + len2 -= 1; + } + + if len1 == 0 { + return len2; + } + if len2 == 0 { + return len1; + } + + let mut matrix = vec![vec![0usize; len2 + 1]; len1 + 1]; + + // clippy from this + //for i in 0..=len1 { + // matrix[i][0] = i; + //} + //for j in 0..=len2 { + // matrix[0][j] = j; + //} + // to + for (i, row) in matrix.iter_mut().enumerate().take(len1 + 1) { + row[0] = i; + } + + for (j, item) in matrix[0].iter_mut().enumerate().take(len2 + 1) { + *item = j; + } + + for i in 1..=len1 { + for j in 1..=len2 { + let cost = if s1_chars[i - 1] == s2_chars[j - 1] { + 0 + } else { + 1 + }; + + let deletion = matrix[i - 1][j] + 1; + let insertion = matrix[i][j - 1] + 1; + let substitution = matrix[i - 1][j - 1] + cost; + + matrix[i][j] = deletion.min(insertion).min(substitution); + + if i > 1 + && j > 1 + && s1_chars[i % len1] == s2_chars[j - 2] + && s1_chars[i - 2] == s2_chars[j % len2] + { + matrix[i][j] = matrix[i][j].min(matrix[i - 2][j - 2] + cost); + } + } + } + + matrix[len1][len2] +} + +//tests adapted from sqlean fuzzy +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_damlev() { + let cases = vec![ + ("abc", "abc", 0), + ("abc", "", 3), + ("", "abc", 3), + ("abc", "ab", 1), + ("abc", "abcd", 1), + ("abc", "acb", 1), + ("abc", "ca", 2), + ]; + + for (s1, s2, expected) in cases { + let got = damlev(s1, s2); + assert_eq!(got, expected, "damlev({}, {}) failed", s1, s2); + } + } + + #[test] + fn test_hamming() { + let cases = vec![ + ("abc", "abc", 0), + ("abc", "", -1), + ("", "abc", -1), + ("hello", "hellp", 1), + ("hello", "heloh", 2), + ]; + + for (s1, s2, expected) in cases { + let got = hamming_dist(s1, s2); + assert_eq!(got, expected, "hamming({}, {}) failed", s1, s2); + } + } + + #[test] + fn test_jaro_win() { + let cases: Vec<(&str, &str, f64)> = vec![ + ("abc", "abc", 1.0), + ("abc", "", 0.0), + ("", "abc", 0.0), + ("my string", "my tsring", 0.974), + ("my string", "my ntrisg", 0.896), + ]; + + for (s1, s2, expected) in cases { + let got = jaro_winkler(s1, s2); + + if (expected - 0.974).abs() < 1e-6 || (expected - 0.896).abs() < 1e-6 { + let got_rounded = (got * 1000.0).round() / 1000.0; + assert!( + (got_rounded - expected).abs() < 1e-6, + "jaro_winkler({}, {}) failed: got {}, expected {}", + s1, + s2, + got_rounded, + expected + ); + } else { + assert!( + (got - expected).abs() < 1e-6, + "jaro_winkler({}, {}) failed: got {}, expected {}", + s1, + s2, + got, + expected + ); + } + } + } + + #[test] + fn test_leven() { + let cases = vec![ + ("abc", "abc", 0), + ("abc", "", 3), + ("", "abc", 3), + ("abc", "ab", 1), + ("abc", "abcd", 1), + ("abc", "acb", 2), + ("abc", "ca", 3), + ]; + + for (s1, s2, expected) in cases { + let got = leven(s1, s2); + assert_eq!(got, expected, "leven({}, {}) failed", s1, s2); + } + } + + #[test] + fn test_edit_distance() { + let test_cases = vec![ + ("abc", "abc", 0), + ("abc", "", 300), + ("", "abc", 75), + ("abc", "ab", 100), + ("abc", "abcd", 25), + ("abc", "acb", 110), + ("abc", "ca", 225), + //more cases + ("awesome", "aewsme", 215), + ("kitten", "sitting", 105), + ("flaw", "lawn", 110), + ("rust", "trust", 100), + ("gumbo", "gambol", 65), + ]; + for (s1, s2, expected) in test_cases { + let res = editdist::edit_distance(s1, s2).unwrap(); + assert_eq!(res, expected, "edit_distance({}, {}) failed", s1, s2); + } + } + + #[test] + fn test_osadist() { + let cases = vec![ + ("abc", "abc", 0), + ("abc", "", 3), + ("", "abc", 3), + ("abc", "ab", 1), + ("abc", "abcd", 1), + ("abc", "acb", 2), + ("abc", "ca", 3), + ]; + + for (s1, s2, expected) in cases { + let got = optimal_string_alignment(s1, s2); + assert_eq!(got, expected, "osadist({}, {}) failed", s1, s2); + } + } +} diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index f53621fb9..332bd666c 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -560,6 +560,71 @@ def test_ipaddr(): ) limbo.quit() +def validate_fuzzy_leven(a): + return a == "3" + +def validate_fuzzy_damlev1(a): + return a == "2" + +def validate_fuzzy_damlev2(a): + return a == "1" + +def validate_fuzzy_editdist1(a): + return a == "225" + +def validate_fuzzy_editdist2(a): + return a == "110" + +def validate_fuzzy_jarowin(a): + return a == "0.907142857142857" + +def validate_fuzzy_osadist(a): + return a == "3" + +def test_fuzzy(): + limbo = TestTursoShell() + ext_path = "./target/debug/liblimbo_fuzzy" + limbo.run_test_fn( + "SELECT fuzzy_leven('awesome', 'aewsme');", + lambda res: "error: no such function: " in res, + "fuzzy levenshtein function returns null when ext not loaded", + ) + limbo.execute_dot(f".load {ext_path}") + limbo.run_test_fn( + "SELECT fuzzy_leven('awesome', 'aewsme');", + validate_fuzzy_leven, + "fuzzy levenshtein function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_damlev('awesome', 'aewsme');", + validate_fuzzy_damlev1, + "fuzzy damerau levenshtein1 function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_damlev('Something', 'Smoething');", + validate_fuzzy_damlev2, + "fuzzy damerau levenshtein2 function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_editdist('abc', 'ca');", + validate_fuzzy_editdist1, + "fuzzy editdist1 function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_editdist('abc', 'acb');", + validate_fuzzy_editdist2, + "fuzzy editdist2 function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_jarowin('awesome', 'aewsme');", + validate_fuzzy_jarowin, + "fuzzy jarowin function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_osadist('awesome', 'aewsme');", + validate_fuzzy_osadist, + "fuzzy osadist function works", + ) def test_vfs(): limbo = TestTursoShell() @@ -822,6 +887,7 @@ def main(): test_kv() test_csv() test_tablestats() + test_fuzzy() except Exception as e: console.error(f"Test FAILED: {e}") cleanup() From 48ec200c174afc454f58811522dbf413a1526009 Mon Sep 17 00:00:00 2001 From: danawan Date: Tue, 23 Sep 2025 13:11:42 +0700 Subject: [PATCH 02/65] clippy --- extensions/fuzzy/src/lib.rs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/extensions/fuzzy/src/lib.rs b/extensions/fuzzy/src/lib.rs index fb578e2d6..d34a42a06 100644 --- a/extensions/fuzzy/src/lib.rs +++ b/extensions/fuzzy/src/lib.rs @@ -405,7 +405,7 @@ mod tests { for (s1, s2, expected) in cases { let got = damlev(s1, s2); - assert_eq!(got, expected, "damlev({}, {}) failed", s1, s2); + assert_eq!(got, expected, "damlev({s1}, {s2}) failed"); } } @@ -421,7 +421,7 @@ mod tests { for (s1, s2, expected) in cases { let got = hamming_dist(s1, s2); - assert_eq!(got, expected, "hamming({}, {}) failed", s1, s2); + assert_eq!(got, expected, "hamming({s1}, {s2}) failed"); } } @@ -442,20 +442,12 @@ mod tests { let got_rounded = (got * 1000.0).round() / 1000.0; assert!( (got_rounded - expected).abs() < 1e-6, - "jaro_winkler({}, {}) failed: got {}, expected {}", - s1, - s2, - got_rounded, - expected + "jaro_winkler({s1}, {s2}) failed: got {got_rounded}, expected {expected}" ); } else { assert!( (got - expected).abs() < 1e-6, - "jaro_winkler({}, {}) failed: got {}, expected {}", - s1, - s2, - got, - expected + "jaro_winkler({s1}, {s2}) failed: got {got}, expected {expected}" ); } } @@ -475,7 +467,7 @@ mod tests { for (s1, s2, expected) in cases { let got = leven(s1, s2); - assert_eq!(got, expected, "leven({}, {}) failed", s1, s2); + assert_eq!(got, expected, "leven({s1}, {s2}) failed"); } } @@ -498,7 +490,7 @@ mod tests { ]; for (s1, s2, expected) in test_cases { let res = editdist::edit_distance(s1, s2).unwrap(); - assert_eq!(res, expected, "edit_distance({}, {}) failed", s1, s2); + assert_eq!(res, expected, "edit_distance({s1}, {s2}) failed"); } } @@ -516,7 +508,7 @@ mod tests { for (s1, s2, expected) in cases { let got = optimal_string_alignment(s1, s2); - assert_eq!(got, expected, "osadist({}, {}) failed", s1, s2); + assert_eq!(got, expected, "osadist({s1}, {s2}) failed"); } } } From 80508910ea843389ee60b15594fe472c3ac93846 Mon Sep 17 00:00:00 2001 From: danawan Date: Tue, 23 Sep 2025 13:28:13 +0700 Subject: [PATCH 03/65] ruff check --fix --- testing/cli_tests/extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 332bd666c..d88a771f5 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -614,7 +614,7 @@ def test_fuzzy(): "SELECT fuzzy_editdist('abc', 'acb');", validate_fuzzy_editdist2, "fuzzy editdist2 function works", - ) + ) limbo.run_test_fn( "SELECT fuzzy_jarowin('awesome', 'aewsme');", validate_fuzzy_jarowin, From 815f0ffe259a418ea05111b38abbe8cd062b6549 Mon Sep 17 00:00:00 2001 From: danawan Date: Wed, 24 Sep 2025 13:58:19 +0700 Subject: [PATCH 04/65] add phonetic and soundex --- extensions/fuzzy/src/common.rs | 28 ++++++-- extensions/fuzzy/src/lib.rs | 60 ++++++++++++++++- extensions/fuzzy/src/phonetic.rs | 110 +++++++++++++++++++++++++++++++ extensions/fuzzy/src/soundex.rs | 65 ++++++++++++++++++ testing/cli_tests/extensions.py | 11 ++++ 5 files changed, 267 insertions(+), 7 deletions(-) create mode 100644 extensions/fuzzy/src/phonetic.rs create mode 100644 extensions/fuzzy/src/soundex.rs diff --git a/extensions/fuzzy/src/common.rs b/extensions/fuzzy/src/common.rs index 4b0c12fd1..9fc5a3a4d 100644 --- a/extensions/fuzzy/src/common.rs +++ b/extensions/fuzzy/src/common.rs @@ -2,13 +2,12 @@ pub const CCLASS_SILENT: u8 = 0; pub const CCLASS_VOWEL: u8 = 1; pub const CCLASS_B: u8 = 2; pub const CCLASS_Y: u8 = 9; -//This will be useful in the phonetic -//pub const CCLASS_L: u8 = 6; -//pub const CCLASS_R: u8 = 7; +pub const CCLASS_L: u8 = 6; +pub const CCLASS_R: u8 = 7; //pub const CCLASS_M: u8 = 8; -//pub const CCLASS_DIGIT: u8 = 10; -//pub const CCLASS_SPACE: u8 = 11; -//pub const CCLASS_OTHER: u8 = 12; +pub const CCLASS_DIGIT: u8 = 10; +pub const CCLASS_SPACE: u8 = 11; +pub const CCLASS_OTHER: u8 = 12; pub const MID_CLASS: [u8; 128] = [ 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 11, 11, 12, 12, // 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, // @@ -30,3 +29,20 @@ pub const INIT_CLASS: [u8; 128] = [ 12, 1, 2, 3, 4, 1, 2, 3, 0, 1, 3, 3, 6, 8, 8, 1, // 2, 3, 7, 3, 4, 1, 2, 2, 3, 9, 3, 12, 12, 12, 12, 12, // ]; + +// Based on: const unsigned char className[] = ".ABCDHLRMY9 ?"; +pub const CLASS_NAME: [u8; 13] = [ + b'.', // CCLASS_SILENT (0) -> . + b'A', // CCLASS_VOWEL (1) -> A + b'B', // CCLASS_B (2) -> B + b'C', // CCLASS_C (3) -> C + b'D', // CCLASS_D (4) -> D + b'H', // CCLASS_H (5) -> H + b'L', // CCLASS_L (6) -> L + b'R', // CCLASS_R (7) -> R + b'M', // CCLASS_M (8) -> M + b'Y', // CCLASS_Y (9) -> Y + b'9', // CCLASS_DIGIT (10) -> 9 + b' ', // CCLASS_SPACE (11) -> space + b'?', // CCLASS_OTHER (12) -> ? +]; diff --git a/extensions/fuzzy/src/lib.rs b/extensions/fuzzy/src/lib.rs index d34a42a06..3ede9faa5 100644 --- a/extensions/fuzzy/src/lib.rs +++ b/extensions/fuzzy/src/lib.rs @@ -3,9 +3,11 @@ use std::cmp; use turso_ext::{register_extension, scalar, ResultCode, Value}; mod common; mod editdist; +mod phonetic; +mod soundex; register_extension! { - scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist}, + scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic}, } /// Calculates and returns the Levenshtein distance of two non NULL strings. @@ -386,6 +388,26 @@ fn optimal_string_alignment(s1: &str, s2: &str) -> usize { matrix[len1][len2] } +#[scalar(name = "fuzzy_soundex")] +pub fn fuzzy_soundex(args: &[Value]) { + let arg1 = args[0].to_text(); + if let Some(txt) = soundex::soundex(arg1) { + Value::from_text(txt) + } else { + Value::null() + } +} + +#[scalar(name = "fuzzy_phonetic")] +pub fn fuzzy_phonetic(args: &[Value]) { + let arg1 = args[0].to_text(); + if let Some(txt) = phonetic::phonetic_hash_str(arg1) { + Value::from_text(txt) + } else { + Value::null() + } +} + //tests adapted from sqlean fuzzy #[cfg(test)] mod tests { @@ -511,4 +533,40 @@ mod tests { assert_eq!(got, expected, "osadist({s1}, {s2}) failed"); } } + #[test] + fn test_soundex() { + let cases = vec![ + (None, None), + (Some(""), Some("".to_string())), + (Some("phonetics"), Some("P532".to_string())), + (Some("is"), Some("I200".to_string())), + (Some("awesome"), Some("A250".to_string())), + ]; + + for (input, expected) in cases { + let result = soundex::soundex(input); + assert_eq!( + result, expected, + "fuzzy_soundex({input:?}) failed: expected {expected:?}, got {result:?}" + ); + } + } + #[test] + fn test_phonetic() { + let cases = vec![ + (None, None), + (Some(""), Some("".to_string())), + (Some("phonetics"), Some("BAMADAC".to_string())), + (Some("is"), Some("AC".to_string())), + (Some("awesome"), Some("ABACAMA".to_string())), + ]; + + for (input, expected) in cases { + let result = phonetic::phonetic_hash_str(input); + assert_eq!( + result, expected, + "fuzzy_phonetic({input:?}) failed: expected {expected:?}, got {result:?}" + ); + } + } } diff --git a/extensions/fuzzy/src/phonetic.rs b/extensions/fuzzy/src/phonetic.rs new file mode 100644 index 000000000..624e7ed27 --- /dev/null +++ b/extensions/fuzzy/src/phonetic.rs @@ -0,0 +1,110 @@ +use crate::common::*; + +/// Generate a "phonetic hash" from a string of ASCII characters. +/// +/// The algorithm: +/// Maps characters by character class as defined above +/// Omits double-letters +/// Omits vowels beside R and L +/// Omits T when followed by CH +/// Omits W when followed by R +/// Omits D when followed by J or G +/// Omits K in KN or G in GN at the beginning of a word +/// +/// Returns a Vec containing the phonetic hash, or None if input is invalid. +pub fn phonetic_hash(z_in: &[u8]) -> Option> { + if z_in.is_empty() { + return Some(Vec::new()); + } + + let mut z_out = Vec::with_capacity(z_in.len() + 1); + let mut c_prev = 0x77u8; + let mut c_prev_x = 0x77u8; + let mut a_class = &INIT_CLASS; + + let mut input = z_in; + if z_in.len() > 2 { + match z_in[0] { + b'g' | b'k' => { + if z_in[1] == b'n' { + input = &z_in[1..]; + } + } + _ => {} + } + } + + let mut i = 0; + while i < input.len() { + let mut c = input[i]; + + if i + 1 < input.len() { + if c == b'w' && input[i + 1] == b'r' { + i += 1; + continue; + } + if c == b'd' && (input[i + 1] == b'j' || input[i + 1] == b'g') { + i += 1; + continue; + } + if i + 2 < input.len() && c == b't' && input[i + 1] == b'c' && input[i + 2] == b'h' { + i += 1; + continue; + } + } + + c = a_class[(c & 0x7f) as usize]; + + if c == CCLASS_SPACE { + i += 1; + continue; + } + + if c == CCLASS_OTHER && c_prev != CCLASS_DIGIT { + i += 1; + continue; + } + + a_class = &MID_CLASS; + + if c == CCLASS_VOWEL && (c_prev_x == CCLASS_R || c_prev_x == CCLASS_L) { + i += 1; + continue; + } + + if (c == CCLASS_R || c == CCLASS_L) && c_prev_x == CCLASS_VOWEL && !z_out.is_empty() { + z_out.pop(); + } + + c_prev = c; + + if c == CCLASS_SILENT { + i += 1; + continue; + } + + c_prev_x = c; + if (c as usize) < CLASS_NAME.len() { + c = CLASS_NAME[c as usize]; + } else { + c = b'?'; + } + + if z_out.is_empty() || c != *z_out.last().unwrap() { + z_out.push(c); + } + + i += 1; + } + + Some(z_out) +} + +pub fn phonetic_hash_str(input: Option<&str>) -> Option { + match input { + None => None, + Some(s) => { + phonetic_hash(s.as_bytes()).map(|bytes| String::from_utf8_lossy(&bytes).into_owned()) + } + } +} diff --git a/extensions/fuzzy/src/soundex.rs b/extensions/fuzzy/src/soundex.rs new file mode 100644 index 000000000..809a76df3 --- /dev/null +++ b/extensions/fuzzy/src/soundex.rs @@ -0,0 +1,65 @@ +/// Computes and returns the soundex representation of a given string. +/// https://en.wikipedia.org/wiki/Soundex +pub fn soundex(input: Option<&str>) -> Option { + if let Some(input_str) = input { + if input_str.is_empty() { + return Some("".to_string()); + } + + let str_bytes = input_str.as_bytes(); + let str_len = str_bytes.len(); + + let mut code = String::with_capacity(4); + code.push(str_bytes[0].to_ascii_uppercase() as char); + + let mut buf: Vec = Vec::with_capacity(str_len); + for &byte in str_bytes { + buf.push(soundex_encode(byte as char)); + } + + let mut d = 1; // digit counter + let mut i = 1; // index counter + + while i < str_len && d < 4 { + let current = buf[i]; + let previous = buf[i - 1]; + + if current != previous && current != '0' { + if i > 1 { + let two_back = buf[i - 2]; + let separator = str_bytes[i - 1].to_ascii_lowercase() as char; + if current == two_back && (separator == 'h' || separator == 'w') { + i += 1; + continue; + } + } + + code.push(current); + d += 1; + } + i += 1; + } + + while d < 4 { + code.push('0'); + d += 1; + } + + Some(code) + } else { + None + } +} + +/// Helper function +fn soundex_encode(c: char) -> char { + match c.to_ascii_lowercase() { + 'b' | 'f' | 'p' | 'v' => '1', + 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2', + 'd' | 't' => '3', + 'l' => '4', + 'm' | 'n' => '5', + 'r' => '6', + _ => '0', + } +} diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index d88a771f5..dbe142a60 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -581,6 +581,12 @@ def validate_fuzzy_jarowin(a): def validate_fuzzy_osadist(a): return a == "3" +def validate_fuzzy_soundex(a): + return a == "A250" + +def validate_fuzzy_phonetic(a): + return a == "ABACAMA" + def test_fuzzy(): limbo = TestTursoShell() ext_path = "./target/debug/liblimbo_fuzzy" @@ -625,6 +631,11 @@ def test_fuzzy(): validate_fuzzy_osadist, "fuzzy osadist function works", ) + limbo.run_test_fn( + "SELECT fuzzy_phonetic('awesome');", + validate_fuzzy_phonetic, + "fuzzy phonetic function works", + ) def test_vfs(): limbo = TestTursoShell() From edb6da2d5751d9be60f762bbf26539dc74e84e0b Mon Sep 17 00:00:00 2001 From: danawan Date: Wed, 24 Sep 2025 15:01:41 +0700 Subject: [PATCH 05/65] pre.8 --- Cargo.lock | 56 +++++++++++++++++++++++++++--------------------------- Cargo.toml | 36 +++++++++++++++++------------------ 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a331618e6..50fd696ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -666,7 +666,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_tester" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "anyhow", "assert_cmd", @@ -2108,7 +2108,7 @@ dependencies = [ [[package]] name = "limbo_completion" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "mimalloc", "turso_ext", @@ -2116,7 +2116,7 @@ dependencies = [ [[package]] name = "limbo_crypto" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "blake3", "data-encoding", @@ -2129,7 +2129,7 @@ dependencies = [ [[package]] name = "limbo_csv" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "csv", "mimalloc", @@ -2139,7 +2139,7 @@ dependencies = [ [[package]] name = "limbo_fuzzy" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "mimalloc", "turso_ext", @@ -2147,7 +2147,7 @@ dependencies = [ [[package]] name = "limbo_ipaddr" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "ipnetwork", "mimalloc", @@ -2156,7 +2156,7 @@ dependencies = [ [[package]] name = "limbo_percentile" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "mimalloc", "turso_ext", @@ -2164,7 +2164,7 @@ dependencies = [ [[package]] name = "limbo_regexp" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "mimalloc", "regex", @@ -2173,7 +2173,7 @@ dependencies = [ [[package]] name = "limbo_sim" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "anyhow", "chrono", @@ -2209,7 +2209,7 @@ dependencies = [ [[package]] name = "limbo_sqlite_test_ext" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "cc", ] @@ -2930,7 +2930,7 @@ dependencies = [ [[package]] name = "py-turso" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "anyhow", "pyo3", @@ -3630,7 +3630,7 @@ checksum = "d372029cb5195f9ab4e4b9aef550787dce78b124fcaee8d82519925defcd6f0d" [[package]] name = "sql_generation" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "anarchist-readable-name-generator-lib 0.2.0", "anyhow", @@ -4131,7 +4131,7 @@ dependencies = [ [[package]] name = "turso" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "rand 0.9.2", "rand_chacha 0.9.0", @@ -4143,7 +4143,7 @@ dependencies = [ [[package]] name = "turso-java" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "jni", "thiserror 2.0.16", @@ -4152,7 +4152,7 @@ dependencies = [ [[package]] name = "turso_cli" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "anyhow", "cfg-if", @@ -4185,7 +4185,7 @@ dependencies = [ [[package]] name = "turso_core" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "aegis", "aes", @@ -4243,7 +4243,7 @@ dependencies = [ [[package]] name = "turso_dart" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "flutter_rust_bridge", "turso_core", @@ -4251,7 +4251,7 @@ dependencies = [ [[package]] name = "turso_ext" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "chrono", "getrandom 0.3.2", @@ -4260,7 +4260,7 @@ dependencies = [ [[package]] name = "turso_ext_tests" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "env_logger 0.11.7", "lazy_static", @@ -4271,7 +4271,7 @@ dependencies = [ [[package]] name = "turso_macros" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "proc-macro2", "quote", @@ -4280,7 +4280,7 @@ dependencies = [ [[package]] name = "turso_node" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "chrono", "napi", @@ -4293,7 +4293,7 @@ dependencies = [ [[package]] name = "turso_parser" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "bitflags 2.9.4", "criterion", @@ -4309,7 +4309,7 @@ dependencies = [ [[package]] name = "turso_sqlite3" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "env_logger 0.11.7", "libc", @@ -4322,7 +4322,7 @@ dependencies = [ [[package]] name = "turso_sqlite3_parser" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "bitflags 2.9.4", "cc", @@ -4340,7 +4340,7 @@ dependencies = [ [[package]] name = "turso_stress" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "anarchist-readable-name-generator-lib 0.1.2", "antithesis_sdk", @@ -4356,7 +4356,7 @@ dependencies = [ [[package]] name = "turso_sync_engine" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "base64", "bytes", @@ -4382,7 +4382,7 @@ dependencies = [ [[package]] name = "turso_sync_js" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "genawaiter", "napi", @@ -4397,7 +4397,7 @@ dependencies = [ [[package]] name = "turso_whopper" -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 7ffeebf3e..9f03ea957 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,30 +40,30 @@ exclude = [ ] [workspace.package] -version = "0.2.0-pre.7" +version = "0.2.0-pre.8" authors = ["the Limbo authors"] edition = "2021" license = "MIT" repository = "https://github.com/tursodatabase/turso" [workspace.dependencies] -turso = { path = "bindings/rust", version = "0.2.0-pre.7" } -turso_node = { path = "bindings/javascript", version = "0.2.0-pre.7" } -limbo_completion = { path = "extensions/completion", version = "0.2.0-pre.7" } -turso_core = { path = "core", version = "0.2.0-pre.7" } -turso_sync_engine = { path = "sync/engine", version = "0.2.0-pre.7" } -limbo_crypto = { path = "extensions/crypto", version = "0.2.0-pre.7" } -limbo_csv = { path = "extensions/csv", version = "0.2.0-pre.7" } -turso_ext = { path = "extensions/core", version = "0.2.0-pre.7" } -turso_ext_tests = { path = "extensions/tests", version = "0.2.0-pre.7" } -limbo_ipaddr = { path = "extensions/ipaddr", version = "0.2.0-pre.7" } -turso_macros = { path = "macros", version = "0.2.0-pre.7" } -limbo_percentile = { path = "extensions/percentile", version = "0.2.0-pre.7" } -limbo_regexp = { path = "extensions/regexp", version = "0.2.0-pre.7" } -turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.2.0-pre.7" } -limbo_uuid = { path = "extensions/uuid", version = "0.2.0-pre.7" } -limbo_fuzzy = { path = "extensions/fuzzy", version = "0.2.0-pre.7" } -turso_parser = { path = "parser", version = "0.2.0-pre.7" } +turso = { path = "bindings/rust", version = "0.2.0-pre.8" } +turso_node = { path = "bindings/javascript", version = "0.2.0-pre.8" } +limbo_completion = { path = "extensions/completion", version = "0.2.0-pre.8" } +turso_core = { path = "core", version = "0.2.0-pre.8" } +turso_sync_engine = { path = "sync/engine", version = "0.2.0-pre.8" } +limbo_crypto = { path = "extensions/crypto", version = "0.2.0-pre.8" } +limbo_csv = { path = "extensions/csv", version = "0.2.0-pre.8" } +turso_ext = { path = "extensions/core", version = "0.2.0-pre.8" } +turso_ext_tests = { path = "extensions/tests", version = "0.2.0-pre.8" } +limbo_ipaddr = { path = "extensions/ipaddr", version = "0.2.0-pre.8" } +turso_macros = { path = "macros", version = "0.2.0-pre.8" } +limbo_percentile = { path = "extensions/percentile", version = "0.2.0-pre.8" } +limbo_regexp = { path = "extensions/regexp", version = "0.2.0-pre.8" } +turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.2.0-pre.8" } +limbo_uuid = { path = "extensions/uuid", version = "0.2.0-pre.8" } +turso_parser = { path = "parser", version = "0.2.0-pre.8" } +limbo_fuzzy = { path = "extensions/fuzzy", version = "0.2.0-pre.8" } sql_generation = { path = "sql_generation" } strum = { version = "0.26", features = ["derive"] } strum_macros = "0.26" From 54a95a0b5567788fdc46c03e548303b06b24f252 Mon Sep 17 00:00:00 2001 From: danawan Date: Thu, 25 Sep 2025 10:23:24 +0700 Subject: [PATCH 06/65] add caverphone --- extensions/fuzzy/src/caver.rs | 191 ++++++++++++++++++++++++++++++++ extensions/fuzzy/src/lib.rs | 32 +++++- testing/cli_tests/extensions.py | 8 ++ 3 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 extensions/fuzzy/src/caver.rs diff --git a/extensions/fuzzy/src/caver.rs b/extensions/fuzzy/src/caver.rs new file mode 100644 index 000000000..7a89f8b5e --- /dev/null +++ b/extensions/fuzzy/src/caver.rs @@ -0,0 +1,191 @@ +// remove_non_letters deletes everything from the source string, +// except lowercased letters a-z +fn remove_non_letters(src: &str) -> String { + src.chars() + .filter(|x: &char| x.is_ascii_lowercase()) + .collect() +} + +// replace_start replaces the `old` substring with the `new` one +// if it matches at the beginning of the `src` string +fn replace_start(src: &str, old: &str, new: &str) -> String { + if let Some(suffix) = src.strip_prefix(old) { + let mut result = String::with_capacity(src.len() - old.len() + new.len()); + result.push_str(new); + result.push_str(suffix); + result + } else { + src.to_string() + } +} + +// replace_end replaces the `old` substring with the `new` one +// if it matches at the end of the `src` string +fn replace_end(src: &str, old: &str, new: &str) -> String { + if let Some(prefix) = src.strip_suffix(old) { + let mut result = String::with_capacity(src.len() - old.len() + new.len()); + result.push_str(prefix); + result.push_str(new); + result + } else { + src.to_string() + } +} + +// replace replaces all `old` substrings with `new` ones +// in the the `src` string +fn replace(src: &str, old: &str, new: &str) -> String { + if old.is_empty() || src.is_empty() { + return src.to_string(); + } + + let mut result = String::with_capacity(src.len()); + let mut idx = 0; + + while idx < src.len() { + if idx + old.len() <= src.len() && &src[idx..idx + old.len()] == old { + result.push_str(new); + idx += old.len(); + } else { + let ch = src[idx..].chars().next().unwrap(); + result.push(ch); + idx += ch.len_utf8(); + } + } + + result +} + +// replace_seq replaces all sequences of the `old` character +// with the `new` substring in the the `src` string +fn replace_seq(src: &str, old: char, new: &str) -> String { + let mut result = String::with_capacity(src.len()); + let mut match_len = 0; + + for ch in src.chars() { + if ch == old { + match_len += 1; + } else { + if match_len > 0 { + result.push_str(new); + match_len = 0; + } + result.push(ch); + } + } + + if match_len > 0 { + result.push_str(new); + } + + result +} + +// pad pads `src` string with trailing 1s +// up to the length of 10 characters +fn pad(src: &str) -> String { + let max_len = 10; + let mut result = String::with_capacity(max_len); + for ch in src.chars().take(max_len) { + result.push(ch); + } + while result.chars().count() < max_len { + result.push('1'); + } + + result +} + +// caverphone implements the Caverphone phonetic hashing algorithm +// https://en.wikipedia.org/wiki/Caverphone +fn caverphone(src: &str) -> String { + if src.is_empty() { + return String::new(); + } + + let mut res = remove_non_letters(src); + res = replace_end(&res, "e", ""); + res = replace_start(&res, "cough", "cou2f"); + res = replace_start(&res, "rough", "rou2f"); + res = replace_start(&res, "tough", "tou2f"); + res = replace_start(&res, "enough", "enou2f"); + res = replace_start(&res, "trough", "trou2f"); + + res = replace_start(&res, "gn", "2n"); + res = replace_end(&res, "mb", "m2"); + + res = replace(&res, "cq", "2q"); + res = replace(&res, "ci", "si"); + res = replace(&res, "ce", "se"); + res = replace(&res, "cy", "sy"); + res = replace(&res, "tch", "2ch"); + res = replace(&res, "c", "k"); + res = replace(&res, "q", "k"); + res = replace(&res, "x", "k"); + res = replace(&res, "v", "f"); + res = replace(&res, "dg", "2g"); + res = replace(&res, "tio", "sio"); + res = replace(&res, "tia", "sia"); + res = replace(&res, "d", "t"); + res = replace(&res, "ph", "fh"); + res = replace(&res, "b", "p"); + res = replace(&res, "sh", "s2"); + res = replace(&res, "z", "s"); + + res = replace_start(&res, "a", "A"); + res = replace_start(&res, "e", "A"); + res = replace_start(&res, "i", "A"); + res = replace_start(&res, "o", "A"); + res = replace_start(&res, "u", "A"); + + res = replace(&res, "a", "3"); + res = replace(&res, "e", "3"); + res = replace(&res, "i", "3"); + res = replace(&res, "o", "3"); + res = replace(&res, "u", "3"); + + res = replace(&res, "j", "y"); + res = replace_start(&res, "y3", "Y3"); + res = replace_start(&res, "y", "A"); + res = replace(&res, "y", "3"); + + res = replace(&res, "3gh3", "3kh3"); + res = replace(&res, "gh", "22"); + res = replace(&res, "g", "k"); + + res = replace_seq(&res, 's', "S"); + res = replace_seq(&res, 't', "T"); + res = replace_seq(&res, 'p', "P"); + res = replace_seq(&res, 'k', "K"); + res = replace_seq(&res, 'f', "F"); + res = replace_seq(&res, 'm', "M"); + res = replace_seq(&res, 'n', "N"); + + res = replace(&res, "w3", "W3"); + res = replace(&res, "wh3", "Wh3"); + res = replace_end(&res, "w", "3"); + res = replace(&res, "w", "2"); + + res = replace_start(&res, "h", "A"); + res = replace(&res, "h", "2"); + + res = replace(&res, "r3", "R3"); + res = replace_end(&res, "r", "3"); + res = replace(&res, "r", "2"); + + res = replace(&res, "l3", "L3"); + res = replace_end(&res, "l", "3"); + res = replace(&res, "l", "2"); + + res = replace(&res, "2", ""); + res = replace_end(&res, "3", "A"); + res = replace(&res, "3", ""); + + res = pad(&res); + + res +} + +pub fn caver_str(input: Option<&str>) -> Option { + input.map(caverphone) +} diff --git a/extensions/fuzzy/src/lib.rs b/extensions/fuzzy/src/lib.rs index 3ede9faa5..64f65804d 100644 --- a/extensions/fuzzy/src/lib.rs +++ b/extensions/fuzzy/src/lib.rs @@ -1,13 +1,14 @@ // Adapted from sqlean fuzzy use std::cmp; use turso_ext::{register_extension, scalar, ResultCode, Value}; +mod caver; mod common; mod editdist; mod phonetic; mod soundex; register_extension! { - scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic}, + scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver}, } /// Calculates and returns the Levenshtein distance of two non NULL strings. @@ -408,6 +409,16 @@ pub fn fuzzy_phonetic(args: &[Value]) { } } +#[scalar(name = "fuzzy_caver")] +pub fn fuzzy_caver(args: &[Value]) { + let arg1 = args[0].to_text(); + if let Some(txt) = caver::caver_str(arg1) { + Value::from_text(txt) + } else { + Value::null() + } +} + //tests adapted from sqlean fuzzy #[cfg(test)] mod tests { @@ -569,4 +580,23 @@ mod tests { ); } } + + #[test] + fn test_caver() { + let cases = vec![ + (None, None), + (Some(""), Some("".to_string())), + (Some("phonetics"), Some("FNTKS11111".to_string())), + (Some("is"), Some("AS11111111".to_string())), + (Some("awesome"), Some("AWSM111111".to_string())), + ]; + + for (input, expected) in cases { + let result = caver::caver_str(input); + assert_eq!( + result, expected, + "fuzzy_caver({input:?}) failed: expected {expected:?}, got {result:?}" + ); + } + } } diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index dbe142a60..6d8527a91 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -587,6 +587,9 @@ def validate_fuzzy_soundex(a): def validate_fuzzy_phonetic(a): return a == "ABACAMA" +def validate_fuzzy_caver(a): + return a == "AWSM111111" + def test_fuzzy(): limbo = TestTursoShell() ext_path = "./target/debug/liblimbo_fuzzy" @@ -636,6 +639,11 @@ def test_fuzzy(): validate_fuzzy_phonetic, "fuzzy phonetic function works", ) + limbo.run_test_fn( + "SELECT fuzzy_caver('awesome');", + validate_fuzzy_caver, + "fuzzy caver function works", + ) def test_vfs(): limbo = TestTursoShell() From 189caa5d5df4b652cc1b5239182a1c1c50a0ca5b Mon Sep 17 00:00:00 2001 From: danawan Date: Thu, 25 Sep 2025 16:10:34 +0700 Subject: [PATCH 07/65] rsoundex --- extensions/fuzzy/src/lib.rs | 31 +++++++++++++++++++- extensions/fuzzy/src/rsoundex.rs | 49 ++++++++++++++++++++++++++++++++ testing/cli_tests/extensions.py | 8 ++++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 extensions/fuzzy/src/rsoundex.rs diff --git a/extensions/fuzzy/src/lib.rs b/extensions/fuzzy/src/lib.rs index 64f65804d..aeb5dad9f 100644 --- a/extensions/fuzzy/src/lib.rs +++ b/extensions/fuzzy/src/lib.rs @@ -5,10 +5,11 @@ mod caver; mod common; mod editdist; mod phonetic; +mod rsoundex; mod soundex; register_extension! { - scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver}, + scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver, fuzzy_rsoundex}, } /// Calculates and returns the Levenshtein distance of two non NULL strings. @@ -419,6 +420,16 @@ pub fn fuzzy_caver(args: &[Value]) { } } +#[scalar(name = "fuzzy_rsoundex")] +pub fn fuzzy_rsoundex(args: &[Value]) { + let arg1 = args[0].to_text(); + if let Some(txt) = rsoundex::rsoundex(arg1) { + Value::from_text(txt) + } else { + Value::null() + } +} + //tests adapted from sqlean fuzzy #[cfg(test)] mod tests { @@ -599,4 +610,22 @@ mod tests { ); } } + #[test] + fn test_rsoundex() { + let cases = vec![ + (None, None), + (Some(""), Some("".to_string())), + (Some("phonetics"), Some("P1080603".to_string())), + (Some("is"), Some("I03".to_string())), + (Some("awesome"), Some("A03080".to_string())), + ]; + + for (input, expected) in cases { + let result = rsoundex::rsoundex(input); + assert_eq!( + result, expected, + "fuzzy_rsoundex({input:?}) failed: expected {expected:?}, got {result:?}" + ); + } + } } diff --git a/extensions/fuzzy/src/rsoundex.rs b/extensions/fuzzy/src/rsoundex.rs new file mode 100644 index 000000000..2163d0cce --- /dev/null +++ b/extensions/fuzzy/src/rsoundex.rs @@ -0,0 +1,49 @@ +/// Computes and returns the soundex representation of a given non NULL string. +/// More information about the algorithm can be found here: +/// http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html +pub fn rsoundex(input: Option<&str>) -> Option { + if let Some(s) = input { + if s.is_empty() { + return Some("".to_string()); + } + + let str_bytes = s.as_bytes(); + let str_len = str_bytes.len(); + + let mut code = String::with_capacity(str_len + 1); + code.push(str_bytes[0].to_ascii_uppercase() as char); + + let mut buf: Vec = Vec::with_capacity(str_len); + for &b in str_bytes { + buf.push(refined_soundex_encode(b as char)); + } + + let mut prev: Option = None; + for c in buf { + if Some(c) != prev { + code.push(c); + prev = Some(c); + } + } + + Some(code) + } else { + None + } +} + +//helper +fn refined_soundex_encode(c: char) -> char { + match c.to_ascii_lowercase() { + 'b' | 'p' => '1', + 'f' | 'v' => '2', + 'c' | 'k' | 's' => '3', + 'g' | 'j' => '4', + 'q' | 'x' | 'z' => '5', + 'd' | 't' => '6', + 'l' => '7', + 'm' | 'n' => '8', + 'r' => '9', + _ => '0', + } +} diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 6d8527a91..a6c6cb89c 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -590,6 +590,9 @@ def validate_fuzzy_phonetic(a): def validate_fuzzy_caver(a): return a == "AWSM111111" +def validate_fuzzy_rsoundex(a): + return a == "A03080" + def test_fuzzy(): limbo = TestTursoShell() ext_path = "./target/debug/liblimbo_fuzzy" @@ -644,6 +647,11 @@ def test_fuzzy(): validate_fuzzy_caver, "fuzzy caver function works", ) + limbo.run_test_fn( + "SELECT fuzzy_rsoundex('awesome');", + validate_fuzzy_rsoundex, + "fuzzy rsoundex function works", + ) def test_vfs(): limbo = TestTursoShell() From 468046c654569f020205b32392d67093e874901f Mon Sep 17 00:00:00 2001 From: danawan Date: Fri, 26 Sep 2025 15:20:27 +0700 Subject: [PATCH 08/65] translit and script --- extensions/fuzzy/src/common.rs | 6 + extensions/fuzzy/src/lib.rs | 47 ++- extensions/fuzzy/src/translit.rs | 577 +++++++++++++++++++++++++++++++ testing/cli_tests/extensions.py | 24 ++ 4 files changed, 648 insertions(+), 6 deletions(-) create mode 100644 extensions/fuzzy/src/translit.rs diff --git a/extensions/fuzzy/src/common.rs b/extensions/fuzzy/src/common.rs index 9fc5a3a4d..dee8e7d60 100644 --- a/extensions/fuzzy/src/common.rs +++ b/extensions/fuzzy/src/common.rs @@ -46,3 +46,9 @@ pub const CLASS_NAME: [u8; 13] = [ b' ', // CCLASS_SPACE (11) -> space b'?', // CCLASS_OTHER (12) -> ? ]; + +pub const SCRIPT_LATIN: u32 = 0x0001; +pub const SCRIPT_CYRILLIC: u32 = 0x0002; +pub const SCRIPT_GREEK: u32 = 0x0004; +pub const SCRIPT_HEBREW: u32 = 0x0008; +pub const SCRIPT_ARABIC: u32 = 0x0010; diff --git a/extensions/fuzzy/src/lib.rs b/extensions/fuzzy/src/lib.rs index aeb5dad9f..d43985ad5 100644 --- a/extensions/fuzzy/src/lib.rs +++ b/extensions/fuzzy/src/lib.rs @@ -7,9 +7,10 @@ mod editdist; mod phonetic; mod rsoundex; mod soundex; +mod translit; register_extension! { - scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver, fuzzy_rsoundex}, + scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver, fuzzy_rsoundex, fuzzy_translit, fuzzy_script} } /// Calculates and returns the Levenshtein distance of two non NULL strings. @@ -167,7 +168,7 @@ fn damlev(s1: &str, s2: &str) -> i64 { // cost. // #[scalar(name = "fuzzy_editdist")] -pub fn edit_distance(args: &[Value]) { +fn edit_distance(args: &[Value]) { let Some(arg1) = args[0].to_text() else { return Value::error(ResultCode::InvalidArgs); }; @@ -311,7 +312,7 @@ fn jaro(s1: &str, s2: &str) -> f64 { /// Computes and returns the Optimal String Alignment distance for two non NULL #[scalar(name = "fuzzy_osadist")] -pub fn osadist(args: &[Value]) { +fn osadist(args: &[Value]) { let Some(arg1) = args[0].to_text() else { return Value::error(ResultCode::InvalidArgs); }; @@ -391,7 +392,7 @@ fn optimal_string_alignment(s1: &str, s2: &str) -> usize { } #[scalar(name = "fuzzy_soundex")] -pub fn fuzzy_soundex(args: &[Value]) { +fn fuzzy_soundex(args: &[Value]) { let arg1 = args[0].to_text(); if let Some(txt) = soundex::soundex(arg1) { Value::from_text(txt) @@ -401,7 +402,7 @@ pub fn fuzzy_soundex(args: &[Value]) { } #[scalar(name = "fuzzy_phonetic")] -pub fn fuzzy_phonetic(args: &[Value]) { +fn fuzzy_phonetic(args: &[Value]) { let arg1 = args[0].to_text(); if let Some(txt) = phonetic::phonetic_hash_str(arg1) { Value::from_text(txt) @@ -411,7 +412,7 @@ pub fn fuzzy_phonetic(args: &[Value]) { } #[scalar(name = "fuzzy_caver")] -pub fn fuzzy_caver(args: &[Value]) { +fn fuzzy_caver(args: &[Value]) { let arg1 = args[0].to_text(); if let Some(txt) = caver::caver_str(arg1) { Value::from_text(txt) @@ -430,6 +431,40 @@ pub fn fuzzy_rsoundex(args: &[Value]) { } } +//Convert a string that contains non-ASCII Roman characters into +//pure ASCII. +#[scalar(name = "fuzzy_translit")] +fn fuzzy_translit(args: &[Value]) { + let Some(arg) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + let dist = translit::transliterate_str(arg); + return Value::from_text(dist); +} + +// Try to determine the dominant script used by the word X and return +// its ISO 15924 numeric code. +// +// The current implementation only understands the following scripts: +// +// 125 (Hebrew) +// 160 (Arabic) +// 200 (Greek) +// 215 (Latin) +// 220 (Cyrillic) +// +// This routine will return 998 if the input X contains characters from +// two or more of the above scripts or 999 if X contains no characters +// from any of the above scripts. +#[scalar(name = "fuzzy_script")] +pub fn fuzzy_script(args: &[Value]) { + let Some(arg) = args[0].to_text() else { + return Value::error(ResultCode::InvalidArgs); + }; + let dist = translit::script_code(arg.as_bytes()); + return Value::from_integer(dist as i64); +} + //tests adapted from sqlean fuzzy #[cfg(test)] mod tests { diff --git a/extensions/fuzzy/src/translit.rs b/extensions/fuzzy/src/translit.rs new file mode 100644 index 000000000..fe1132f51 --- /dev/null +++ b/extensions/fuzzy/src/translit.rs @@ -0,0 +1,577 @@ +use crate::common::*; + +static TRANSLIT_UTF8_LOOKUP: [u8; 64] = [ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +]; + +#[derive(Copy, Clone, Debug)] +struct Transliteration { + c_from: u16, + c_to0: u8, + c_to1: u8, + c_to2: u8, + c_to3: u8, +} + +impl Transliteration { + const fn new(c_from: u16, c_to0: u8, c_to1: u8, c_to2: u8, c_to3: u8) -> Self { + Self { + c_from, + c_to0, + c_to1, + c_to2, + c_to3, + } + } +} + +static TRANSLIT: [Transliteration; 389] = [ + Transliteration::new(0x00A0, b' ', 0x00, 0x00, 0x00), /* to */ + Transliteration::new(0x00B5, b'u', 0x00, 0x00, 0x00), /* µ to u */ + Transliteration::new(0x00C0, b'A', 0x00, 0x00, 0x00), /* À to A */ + Transliteration::new(0x00C1, b'A', 0x00, 0x00, 0x00), /* Á to A */ + Transliteration::new(0x00C2, b'A', 0x00, 0x00, 0x00), /*  to A */ + Transliteration::new(0x00C3, b'A', 0x00, 0x00, 0x00), /* à to A */ + Transliteration::new(0x00C4, b'A', b'e', 0x00, 0x00), /* Ä to Ae */ + Transliteration::new(0x00C5, b'A', b'a', 0x00, 0x00), /* Å to Aa */ + Transliteration::new(0x00C6, b'A', b'E', 0x00, 0x00), /* Æ to AE */ + Transliteration::new(0x00C7, b'C', 0x00, 0x00, 0x00), /* Ç to C */ + Transliteration::new(0x00C8, b'E', 0x00, 0x00, 0x00), /* È to E */ + Transliteration::new(0x00C9, b'E', 0x00, 0x00, 0x00), /* É to E */ + Transliteration::new(0x00CA, b'E', 0x00, 0x00, 0x00), /* Ê to E */ + Transliteration::new(0x00CB, b'E', 0x00, 0x00, 0x00), /* Ë to E */ + Transliteration::new(0x00CC, b'I', 0x00, 0x00, 0x00), /* Ì to I */ + Transliteration::new(0x00CD, b'I', 0x00, 0x00, 0x00), /* Í to I */ + Transliteration::new(0x00CE, b'I', 0x00, 0x00, 0x00), /* Î to I */ + Transliteration::new(0x00CF, b'I', 0x00, 0x00, 0x00), /* Ï to I */ + Transliteration::new(0x00D0, b'D', 0x00, 0x00, 0x00), /* Ð to D */ + Transliteration::new(0x00D1, b'N', 0x00, 0x00, 0x00), /* Ñ to N */ + Transliteration::new(0x00D2, b'O', 0x00, 0x00, 0x00), /* Ò to O */ + Transliteration::new(0x00D3, b'O', 0x00, 0x00, 0x00), /* Ó to O */ + Transliteration::new(0x00D4, b'O', 0x00, 0x00, 0x00), /* Ô to O */ + Transliteration::new(0x00D5, b'O', 0x00, 0x00, 0x00), /* Õ to O */ + Transliteration::new(0x00D6, b'O', b'e', 0x00, 0x00), /* Ö to Oe */ + Transliteration::new(0x00D7, b'x', 0x00, 0x00, 0x00), /* × to x */ + Transliteration::new(0x00D8, b'O', 0x00, 0x00, 0x00), /* Ø to O */ + Transliteration::new(0x00D9, b'U', 0x00, 0x00, 0x00), /* Ù to U */ + Transliteration::new(0x00DA, b'U', 0x00, 0x00, 0x00), /* Ú to U */ + Transliteration::new(0x00DB, b'U', 0x00, 0x00, 0x00), /* Û to U */ + Transliteration::new(0x00DC, b'U', b'e', 0x00, 0x00), /* Ü to Ue */ + Transliteration::new(0x00DD, b'Y', 0x00, 0x00, 0x00), /* Ý to Y */ + Transliteration::new(0x00DE, b'T', b'h', 0x00, 0x00), /* Þ to Th */ + Transliteration::new(0x00DF, b's', b's', 0x00, 0x00), /* ß to ss */ + Transliteration::new(0x00E0, b'a', 0x00, 0x00, 0x00), /* à to a */ + Transliteration::new(0x00E1, b'a', 0x00, 0x00, 0x00), /* á to a */ + Transliteration::new(0x00E2, b'a', 0x00, 0x00, 0x00), /* â to a */ + Transliteration::new(0x00E3, b'a', 0x00, 0x00, 0x00), /* ã to a */ + Transliteration::new(0x00E4, b'a', b'e', 0x00, 0x00), /* ä to ae */ + Transliteration::new(0x00E5, b'a', b'a', 0x00, 0x00), /* å to aa */ + Transliteration::new(0x00E6, b'a', b'e', 0x00, 0x00), /* æ to ae */ + Transliteration::new(0x00E7, b'c', 0x00, 0x00, 0x00), /* ç to c */ + Transliteration::new(0x00E8, b'e', 0x00, 0x00, 0x00), /* è to e */ + Transliteration::new(0x00E9, b'e', 0x00, 0x00, 0x00), /* é to e */ + Transliteration::new(0x00EA, b'e', 0x00, 0x00, 0x00), /* ê to e */ + Transliteration::new(0x00EB, b'e', 0x00, 0x00, 0x00), /* ë to e */ + Transliteration::new(0x00EC, b'i', 0x00, 0x00, 0x00), /* ì to i */ + Transliteration::new(0x00ED, b'i', 0x00, 0x00, 0x00), /* í to i */ + Transliteration::new(0x00EE, b'i', 0x00, 0x00, 0x00), /* î to i */ + Transliteration::new(0x00EF, b'i', 0x00, 0x00, 0x00), /* ï to i */ + Transliteration::new(0x00F0, b'd', 0x00, 0x00, 0x00), /* ð to d */ + Transliteration::new(0x00F1, b'n', 0x00, 0x00, 0x00), /* ñ to n */ + Transliteration::new(0x00F2, b'o', 0x00, 0x00, 0x00), /* ò to o */ + Transliteration::new(0x00F3, b'o', 0x00, 0x00, 0x00), /* ó to o */ + Transliteration::new(0x00F4, b'o', 0x00, 0x00, 0x00), /* ô to o */ + Transliteration::new(0x00F5, b'o', 0x00, 0x00, 0x00), /* õ to o */ + Transliteration::new(0x00F6, b'o', b'e', 0x00, 0x00), /* ö to oe */ + Transliteration::new(0x00F7, b':', 0x00, 0x00, 0x00), /* ÷ to : */ + Transliteration::new(0x00F8, b'o', 0x00, 0x00, 0x00), /* ø to o */ + Transliteration::new(0x00F9, b'u', 0x00, 0x00, 0x00), /* ù to u */ + Transliteration::new(0x00FA, b'u', 0x00, 0x00, 0x00), /* ú to u */ + Transliteration::new(0x00FB, b'u', 0x00, 0x00, 0x00), /* û to u */ + Transliteration::new(0x00FC, b'u', b'e', 0x00, 0x00), /* ü to ue */ + Transliteration::new(0x00FD, b'y', 0x00, 0x00, 0x00), /* ý to y */ + Transliteration::new(0x00FE, b't', b'h', 0x00, 0x00), /* þ to th */ + Transliteration::new(0x00FF, b'y', 0x00, 0x00, 0x00), /* ÿ to y */ + Transliteration::new(0x0100, b'A', 0x00, 0x00, 0x00), /* Ā to A */ + Transliteration::new(0x0101, b'a', 0x00, 0x00, 0x00), /* ā to a */ + Transliteration::new(0x0102, b'A', 0x00, 0x00, 0x00), /* Ă to A */ + Transliteration::new(0x0103, b'a', 0x00, 0x00, 0x00), /* ă to a */ + Transliteration::new(0x0104, b'A', 0x00, 0x00, 0x00), /* Ą to A */ + Transliteration::new(0x0105, b'a', 0x00, 0x00, 0x00), /* ą to a */ + Transliteration::new(0x0106, b'C', 0x00, 0x00, 0x00), /* Ć to C */ + Transliteration::new(0x0107, b'c', 0x00, 0x00, 0x00), /* ć to c */ + Transliteration::new(0x0108, b'C', b'h', 0x00, 0x00), /* Ĉ to Ch */ + Transliteration::new(0x0109, b'c', b'h', 0x00, 0x00), /* ĉ to ch */ + Transliteration::new(0x010A, b'C', 0x00, 0x00, 0x00), /* Ċ to C */ + Transliteration::new(0x010B, b'c', 0x00, 0x00, 0x00), /* ċ to c */ + Transliteration::new(0x010C, b'C', 0x00, 0x00, 0x00), /* Č to C */ + Transliteration::new(0x010D, b'c', 0x00, 0x00, 0x00), /* č to c */ + Transliteration::new(0x010E, b'D', 0x00, 0x00, 0x00), /* Ď to D */ + Transliteration::new(0x010F, b'd', 0x00, 0x00, 0x00), /* ď to d */ + Transliteration::new(0x0110, b'D', 0x00, 0x00, 0x00), /* Đ to D */ + Transliteration::new(0x0111, b'd', 0x00, 0x00, 0x00), /* đ to d */ + Transliteration::new(0x0112, b'E', 0x00, 0x00, 0x00), /* Ē to E */ + Transliteration::new(0x0113, b'e', 0x00, 0x00, 0x00), /* ē to e */ + Transliteration::new(0x0114, b'E', 0x00, 0x00, 0x00), /* Ĕ to E */ + Transliteration::new(0x0115, b'e', 0x00, 0x00, 0x00), /* ĕ to e */ + Transliteration::new(0x0116, b'E', 0x00, 0x00, 0x00), /* Ė to E */ + Transliteration::new(0x0117, b'e', 0x00, 0x00, 0x00), /* ė to e */ + Transliteration::new(0x0118, b'E', 0x00, 0x00, 0x00), /* Ę to E */ + Transliteration::new(0x0119, b'e', 0x00, 0x00, 0x00), /* ę to e */ + Transliteration::new(0x011A, b'E', 0x00, 0x00, 0x00), /* Ě to E */ + Transliteration::new(0x011B, b'e', 0x00, 0x00, 0x00), /* ě to e */ + Transliteration::new(0x011C, b'G', b'h', 0x00, 0x00), /* Ĝ to Gh */ + Transliteration::new(0x011D, b'g', b'h', 0x00, 0x00), /* ĝ to gh */ + Transliteration::new(0x011E, b'G', 0x00, 0x00, 0x00), /* Ğ to G */ + Transliteration::new(0x011F, b'g', 0x00, 0x00, 0x00), /* ğ to g */ + Transliteration::new(0x0120, b'G', 0x00, 0x00, 0x00), /* Ġ to G */ + Transliteration::new(0x0121, b'g', 0x00, 0x00, 0x00), /* ġ to g */ + Transliteration::new(0x0122, b'G', 0x00, 0x00, 0x00), /* Ģ to G */ + Transliteration::new(0x0123, b'g', 0x00, 0x00, 0x00), /* ģ to g */ + Transliteration::new(0x0124, b'H', b'h', 0x00, 0x00), /* Ĥ to Hh */ + Transliteration::new(0x0125, b'h', b'h', 0x00, 0x00), /* ĥ to hh */ + Transliteration::new(0x0126, b'H', 0x00, 0x00, 0x00), /* Ħ to H */ + Transliteration::new(0x0127, b'h', 0x00, 0x00, 0x00), /* ħ to h */ + Transliteration::new(0x0128, b'I', 0x00, 0x00, 0x00), /* Ĩ to I */ + Transliteration::new(0x0129, b'i', 0x00, 0x00, 0x00), /* ĩ to i */ + Transliteration::new(0x012A, b'I', 0x00, 0x00, 0x00), /* Ī to I */ + Transliteration::new(0x012B, b'i', 0x00, 0x00, 0x00), /* ī to i */ + Transliteration::new(0x012C, b'I', 0x00, 0x00, 0x00), /* Ĭ to I */ + Transliteration::new(0x012D, b'i', 0x00, 0x00, 0x00), /* ĭ to i */ + Transliteration::new(0x012E, b'I', 0x00, 0x00, 0x00), /* Į to I */ + Transliteration::new(0x012F, b'i', 0x00, 0x00, 0x00), /* į to i */ + Transliteration::new(0x0130, b'I', 0x00, 0x00, 0x00), /* İ to I */ + Transliteration::new(0x0131, b'i', 0x00, 0x00, 0x00), /* ı to i */ + Transliteration::new(0x0132, b'I', b'J', 0x00, 0x00), /* IJ to IJ */ + Transliteration::new(0x0133, b'i', b'j', 0x00, 0x00), /* ij to ij */ + Transliteration::new(0x0134, b'J', b'h', 0x00, 0x00), /* Ĵ to Jh */ + Transliteration::new(0x0135, b'j', b'h', 0x00, 0x00), /* ĵ to jh */ + Transliteration::new(0x0136, b'K', 0x00, 0x00, 0x00), /* Ķ to K */ + Transliteration::new(0x0137, b'k', 0x00, 0x00, 0x00), /* ķ to k */ + Transliteration::new(0x0138, b'k', 0x00, 0x00, 0x00), /* ĸ to k */ + Transliteration::new(0x0139, b'L', 0x00, 0x00, 0x00), /* Ĺ to L */ + Transliteration::new(0x013A, b'l', 0x00, 0x00, 0x00), /* ĺ to l */ + Transliteration::new(0x013B, b'L', 0x00, 0x00, 0x00), /* Ļ to L */ + Transliteration::new(0x013C, b'l', 0x00, 0x00, 0x00), /* ļ to l */ + Transliteration::new(0x013D, b'L', 0x00, 0x00, 0x00), /* Ľ to L */ + Transliteration::new(0x013E, b'l', 0x00, 0x00, 0x00), /* ľ to l */ + Transliteration::new(0x013F, b'L', b'.', 0x00, 0x00), /* Ŀ to L. */ + Transliteration::new(0x0140, b'l', b'.', 0x00, 0x00), /* ŀ to l. */ + Transliteration::new(0x0141, b'L', 0x00, 0x00, 0x00), /* Ł to L */ + Transliteration::new(0x0142, b'l', 0x00, 0x00, 0x00), /* ł to l */ + Transliteration::new(0x0143, b'N', 0x00, 0x00, 0x00), /* Ń to N */ + Transliteration::new(0x0144, b'n', 0x00, 0x00, 0x00), /* ń to n */ + Transliteration::new(0x0145, b'N', 0x00, 0x00, 0x00), /* Ņ to N */ + Transliteration::new(0x0146, b'n', 0x00, 0x00, 0x00), /* ņ to n */ + Transliteration::new(0x0147, b'N', 0x00, 0x00, 0x00), /* Ň to N */ + Transliteration::new(0x0148, b'n', 0x00, 0x00, 0x00), /* ň to n */ + Transliteration::new(0x0149, b'\'', b'n', 0x00, 0x00), /* ʼn to 'n */ + Transliteration::new(0x014A, b'N', b'G', 0x00, 0x00), /* Ŋ to NG */ + Transliteration::new(0x014B, b'n', b'g', 0x00, 0x00), /* ŋ to ng */ + Transliteration::new(0x014C, b'O', 0x00, 0x00, 0x00), /* Ō to O */ + Transliteration::new(0x014D, b'o', 0x00, 0x00, 0x00), /* ō to o */ + Transliteration::new(0x014E, b'O', 0x00, 0x00, 0x00), /* Ŏ to O */ + Transliteration::new(0x014F, b'o', 0x00, 0x00, 0x00), /* ŏ to o */ + Transliteration::new(0x0150, b'O', 0x00, 0x00, 0x00), /* Ő to O */ + Transliteration::new(0x0151, b'o', 0x00, 0x00, 0x00), /* ő to o */ + Transliteration::new(0x0152, b'O', b'E', 0x00, 0x00), /* Œ to OE */ + Transliteration::new(0x0153, b'o', b'e', 0x00, 0x00), /* œ to oe */ + Transliteration::new(0x0154, b'R', 0x00, 0x00, 0x00), /* Ŕ to R */ + Transliteration::new(0x0155, b'r', 0x00, 0x00, 0x00), /* ŕ to r */ + Transliteration::new(0x0156, b'R', 0x00, 0x00, 0x00), /* Ŗ to R */ + Transliteration::new(0x0157, b'r', 0x00, 0x00, 0x00), /* ŗ to r */ + Transliteration::new(0x0158, b'R', 0x00, 0x00, 0x00), /* Ř to R */ + Transliteration::new(0x0159, b'r', 0x00, 0x00, 0x00), /* ř to r */ + Transliteration::new(0x015A, b'S', 0x00, 0x00, 0x00), /* Ś to S */ + Transliteration::new(0x015B, b's', 0x00, 0x00, 0x00), /* ś to s */ + Transliteration::new(0x015C, b'S', b'h', 0x00, 0x00), /* Ŝ to Sh */ + Transliteration::new(0x015D, b's', b'h', 0x00, 0x00), /* ŝ to sh */ + Transliteration::new(0x015E, b'S', 0x00, 0x00, 0x00), /* Ş to S */ + Transliteration::new(0x015F, b's', 0x00, 0x00, 0x00), /* ş to s */ + Transliteration::new(0x0160, b'S', 0x00, 0x00, 0x00), /* Š to S */ + Transliteration::new(0x0161, b's', 0x00, 0x00, 0x00), /* š to s */ + Transliteration::new(0x0162, b'T', 0x00, 0x00, 0x00), /* Ţ to T */ + Transliteration::new(0x0163, b't', 0x00, 0x00, 0x00), /* ţ to t */ + Transliteration::new(0x0164, b'T', 0x00, 0x00, 0x00), /* Ť to T */ + Transliteration::new(0x0165, b't', 0x00, 0x00, 0x00), /* ť to t */ + Transliteration::new(0x0166, b'T', 0x00, 0x00, 0x00), /* Ŧ to T */ + Transliteration::new(0x0167, b't', 0x00, 0x00, 0x00), /* ŧ to t */ + Transliteration::new(0x0168, b'U', 0x00, 0x00, 0x00), /* Ũ to U */ + Transliteration::new(0x0169, b'u', 0x00, 0x00, 0x00), /* ũ to u */ + Transliteration::new(0x016A, b'U', 0x00, 0x00, 0x00), /* Ū to U */ + Transliteration::new(0x016B, b'u', 0x00, 0x00, 0x00), /* ū to u */ + Transliteration::new(0x016C, b'U', 0x00, 0x00, 0x00), /* Ŭ to U */ + Transliteration::new(0x016D, b'u', 0x00, 0x00, 0x00), /* ŭ to u */ + Transliteration::new(0x016E, b'U', 0x00, 0x00, 0x00), /* Ů to U */ + Transliteration::new(0x016F, b'u', 0x00, 0x00, 0x00), /* ů to u */ + Transliteration::new(0x0170, b'U', 0x00, 0x00, 0x00), /* Ű to U */ + Transliteration::new(0x0171, b'u', 0x00, 0x00, 0x00), /* ű to u */ + Transliteration::new(0x0172, b'U', 0x00, 0x00, 0x00), /* Ų to U */ + Transliteration::new(0x0173, b'u', 0x00, 0x00, 0x00), /* ų to u */ + Transliteration::new(0x0174, b'W', 0x00, 0x00, 0x00), /* Ŵ to W */ + Transliteration::new(0x0175, b'w', 0x00, 0x00, 0x00), /* ŵ to w */ + Transliteration::new(0x0176, b'Y', 0x00, 0x00, 0x00), /* Ŷ to Y */ + Transliteration::new(0x0177, b'y', 0x00, 0x00, 0x00), /* ŷ to y */ + Transliteration::new(0x0178, b'Y', 0x00, 0x00, 0x00), /* Ÿ to Y */ + Transliteration::new(0x0179, b'Z', 0x00, 0x00, 0x00), /* Ź to Z */ + Transliteration::new(0x017A, b'z', 0x00, 0x00, 0x00), /* ź to z */ + Transliteration::new(0x017B, b'Z', 0x00, 0x00, 0x00), /* Ż to Z */ + Transliteration::new(0x017C, b'z', 0x00, 0x00, 0x00), /* ż to z */ + Transliteration::new(0x017D, b'Z', 0x00, 0x00, 0x00), /* Ž to Z */ + Transliteration::new(0x017E, b'z', 0x00, 0x00, 0x00), /* ž to z */ + Transliteration::new(0x017F, b's', 0x00, 0x00, 0x00), /* ſ to s */ + Transliteration::new(0x0192, b'f', 0x00, 0x00, 0x00), /* ƒ to f */ + Transliteration::new(0x0218, b'S', 0x00, 0x00, 0x00), /* Ș to S */ + Transliteration::new(0x0219, b's', 0x00, 0x00, 0x00), /* ș to s */ + Transliteration::new(0x021A, b'T', 0x00, 0x00, 0x00), /* Ț to T */ + Transliteration::new(0x021B, b't', 0x00, 0x00, 0x00), /* ț to t */ + Transliteration::new(0x0386, b'A', 0x00, 0x00, 0x00), /* Ά to A */ + Transliteration::new(0x0388, b'E', 0x00, 0x00, 0x00), /* Έ to E */ + Transliteration::new(0x0389, b'I', 0x00, 0x00, 0x00), /* Ή to I */ + Transliteration::new(0x038A, b'I', 0x00, 0x00, 0x00), /* Ί to I */ + Transliteration::new(0x038C, b'O', 0x00, 0x00, 0x00), /* Ό to O */ + Transliteration::new(0x038E, b'Y', 0x00, 0x00, 0x00), /* Ύ to Y */ + Transliteration::new(0x038F, b'O', 0x00, 0x00, 0x00), /* Ώ to O */ + Transliteration::new(0x0390, b'i', 0x00, 0x00, 0x00), /* ΐ to i */ + Transliteration::new(0x0391, b'A', 0x00, 0x00, 0x00), /* Α to A */ + Transliteration::new(0x0392, b'B', 0x00, 0x00, 0x00), /* Β to B */ + Transliteration::new(0x0393, b'G', 0x00, 0x00, 0x00), /* Γ to G */ + Transliteration::new(0x0394, b'D', 0x00, 0x00, 0x00), /* Δ to D */ + Transliteration::new(0x0395, b'E', 0x00, 0x00, 0x00), /* Ε to E */ + Transliteration::new(0x0396, b'Z', 0x00, 0x00, 0x00), /* Ζ to Z */ + Transliteration::new(0x0397, b'I', 0x00, 0x00, 0x00), /* Η to I */ + Transliteration::new(0x0398, b'T', b'h', 0x00, 0x00), /* Θ to Th */ + Transliteration::new(0x0399, b'I', 0x00, 0x00, 0x00), /* Ι to I */ + Transliteration::new(0x039A, b'K', 0x00, 0x00, 0x00), /* Κ to K */ + Transliteration::new(0x039B, b'L', 0x00, 0x00, 0x00), /* Λ to L */ + Transliteration::new(0x039C, b'M', 0x00, 0x00, 0x00), /* Μ to M */ + Transliteration::new(0x039D, b'N', 0x00, 0x00, 0x00), /* Ν to N */ + Transliteration::new(0x039E, b'X', 0x00, 0x00, 0x00), /* Ξ to X */ + Transliteration::new(0x039F, b'O', 0x00, 0x00, 0x00), /* Ο to O */ + Transliteration::new(0x03A0, b'P', 0x00, 0x00, 0x00), /* Π to P */ + Transliteration::new(0x03A1, b'R', 0x00, 0x00, 0x00), /* Ρ to R */ + Transliteration::new(0x03A3, b'S', 0x00, 0x00, 0x00), /* Σ to S */ + Transliteration::new(0x03A4, b'T', 0x00, 0x00, 0x00), /* Τ to T */ + Transliteration::new(0x03A5, b'Y', 0x00, 0x00, 0x00), /* Υ to Y */ + Transliteration::new(0x03A6, b'F', 0x00, 0x00, 0x00), /* Φ to F */ + Transliteration::new(0x03A7, b'C', b'h', 0x00, 0x00), /* Χ to Ch */ + Transliteration::new(0x03A8, b'P', b's', 0x00, 0x00), /* Ψ to Ps */ + Transliteration::new(0x03A9, b'O', 0x00, 0x00, 0x00), /* Ω to O */ + Transliteration::new(0x03AA, b'I', 0x00, 0x00, 0x00), /* Ϊ to I */ + Transliteration::new(0x03AB, b'Y', 0x00, 0x00, 0x00), /* Ϋ to Y */ + Transliteration::new(0x03AC, b'a', 0x00, 0x00, 0x00), /* ά to a */ + Transliteration::new(0x03AD, b'e', 0x00, 0x00, 0x00), /* έ to e */ + Transliteration::new(0x03AE, b'i', 0x00, 0x00, 0x00), /* ή to i */ + Transliteration::new(0x03AF, b'i', 0x00, 0x00, 0x00), /* ί to i */ + Transliteration::new(0x03B1, b'a', 0x00, 0x00, 0x00), /* α to a */ + Transliteration::new(0x03B2, b'b', 0x00, 0x00, 0x00), /* β to b */ + Transliteration::new(0x03B3, b'g', 0x00, 0x00, 0x00), /* γ to g */ + Transliteration::new(0x03B4, b'd', 0x00, 0x00, 0x00), /* δ to d */ + Transliteration::new(0x03B5, b'e', 0x00, 0x00, 0x00), /* ε to e */ + Transliteration::new(0x03B6, b'z', 0x00, 0x00, 0x00), /* ζ to z */ + Transliteration::new(0x03B7, b'i', 0x00, 0x00, 0x00), /* η to i */ + Transliteration::new(0x03B8, b't', b'h', 0x00, 0x00), /* θ to th */ + Transliteration::new(0x03B9, b'i', 0x00, 0x00, 0x00), /* ι to i */ + Transliteration::new(0x03BA, b'k', 0x00, 0x00, 0x00), /* κ to k */ + Transliteration::new(0x03BB, b'l', 0x00, 0x00, 0x00), /* λ to l */ + Transliteration::new(0x03BC, b'm', 0x00, 0x00, 0x00), /* μ to m */ + Transliteration::new(0x03BD, b'n', 0x00, 0x00, 0x00), /* ν to n */ + Transliteration::new(0x03BE, b'x', 0x00, 0x00, 0x00), /* ξ to x */ + Transliteration::new(0x03BF, b'o', 0x00, 0x00, 0x00), /* ο to o */ + Transliteration::new(0x03C0, b'p', 0x00, 0x00, 0x00), /* π to p */ + Transliteration::new(0x03C1, b'r', 0x00, 0x00, 0x00), /* ρ to r */ + Transliteration::new(0x03C3, b's', 0x00, 0x00, 0x00), /* σ to s */ + Transliteration::new(0x03C4, b't', 0x00, 0x00, 0x00), /* τ to t */ + Transliteration::new(0x03C5, b'y', 0x00, 0x00, 0x00), /* υ to y */ + Transliteration::new(0x03C6, b'f', 0x00, 0x00, 0x00), /* φ to f */ + Transliteration::new(0x03C7, b'c', b'h', 0x00, 0x00), /* χ to ch */ + Transliteration::new(0x03C8, b'p', b's', 0x00, 0x00), /* ψ to ps */ + Transliteration::new(0x03C9, b'o', 0x00, 0x00, 0x00), /* ω to o */ + Transliteration::new(0x03CA, b'i', 0x00, 0x00, 0x00), /* ϊ to i */ + Transliteration::new(0x03CB, b'y', 0x00, 0x00, 0x00), /* ϋ to y */ + Transliteration::new(0x03CC, b'o', 0x00, 0x00, 0x00), /* ό to o */ + Transliteration::new(0x03CD, b'y', 0x00, 0x00, 0x00), /* ύ to y */ + Transliteration::new(0x03CE, b'i', 0x00, 0x00, 0x00), /* ώ to i */ + Transliteration::new(0x0400, b'E', 0x00, 0x00, 0x00), /* Ѐ to E */ + Transliteration::new(0x0401, b'E', 0x00, 0x00, 0x00), /* Ё to E */ + Transliteration::new(0x0402, b'D', 0x00, 0x00, 0x00), /* Ђ to D */ + Transliteration::new(0x0403, b'G', 0x00, 0x00, 0x00), /* Ѓ to G */ + Transliteration::new(0x0404, b'E', 0x00, 0x00, 0x00), /* Є to E */ + Transliteration::new(0x0405, b'Z', 0x00, 0x00, 0x00), /* Ѕ to Z */ + Transliteration::new(0x0406, b'I', 0x00, 0x00, 0x00), /* І to I */ + Transliteration::new(0x0407, b'I', 0x00, 0x00, 0x00), /* Ї to I */ + Transliteration::new(0x0408, b'J', 0x00, 0x00, 0x00), /* Ј to J */ + Transliteration::new(0x0409, b'I', 0x00, 0x00, 0x00), /* Љ to I */ + Transliteration::new(0x040A, b'N', 0x00, 0x00, 0x00), /* Њ to N */ + Transliteration::new(0x040B, b'D', 0x00, 0x00, 0x00), /* Ћ to D */ + Transliteration::new(0x040C, b'K', 0x00, 0x00, 0x00), /* Ќ to K */ + Transliteration::new(0x040D, b'I', 0x00, 0x00, 0x00), /* Ѝ to I */ + Transliteration::new(0x040E, b'U', 0x00, 0x00, 0x00), /* Ў to U */ + Transliteration::new(0x040F, b'D', 0x00, 0x00, 0x00), /* Џ to D */ + Transliteration::new(0x0410, b'A', 0x00, 0x00, 0x00), /* А to A */ + Transliteration::new(0x0411, b'B', 0x00, 0x00, 0x00), /* Б to B */ + Transliteration::new(0x0412, b'V', 0x00, 0x00, 0x00), /* В to V */ + Transliteration::new(0x0413, b'G', 0x00, 0x00, 0x00), /* Г to G */ + Transliteration::new(0x0414, b'D', 0x00, 0x00, 0x00), /* Д to D */ + Transliteration::new(0x0415, b'E', 0x00, 0x00, 0x00), /* Е to E */ + Transliteration::new(0x0416, b'Z', b'h', 0x00, 0x00), /* Ж to Zh */ + Transliteration::new(0x0417, b'Z', 0x00, 0x00, 0x00), /* З to Z */ + Transliteration::new(0x0418, b'I', 0x00, 0x00, 0x00), /* И to I */ + Transliteration::new(0x0419, b'I', 0x00, 0x00, 0x00), /* Й to I */ + Transliteration::new(0x041A, b'K', 0x00, 0x00, 0x00), /* К to K */ + Transliteration::new(0x041B, b'L', 0x00, 0x00, 0x00), /* Л to L */ + Transliteration::new(0x041C, b'M', 0x00, 0x00, 0x00), /* М to M */ + Transliteration::new(0x041D, b'N', 0x00, 0x00, 0x00), /* Н to N */ + Transliteration::new(0x041E, b'O', 0x00, 0x00, 0x00), /* О to O */ + Transliteration::new(0x041F, b'P', 0x00, 0x00, 0x00), /* П to P */ + Transliteration::new(0x0420, b'R', 0x00, 0x00, 0x00), /* Р to R */ + Transliteration::new(0x0421, b'S', 0x00, 0x00, 0x00), /* С to S */ + Transliteration::new(0x0422, b'T', 0x00, 0x00, 0x00), /* Т to T */ + Transliteration::new(0x0423, b'U', 0x00, 0x00, 0x00), /* У to U */ + Transliteration::new(0x0424, b'F', 0x00, 0x00, 0x00), /* Ф to F */ + Transliteration::new(0x0425, b'K', b'h', 0x00, 0x00), /* Х to Kh */ + Transliteration::new(0x0426, b'T', b'c', 0x00, 0x00), /* Ц to Tc */ + Transliteration::new(0x0427, b'C', b'h', 0x00, 0x00), /* Ч to Ch */ + Transliteration::new(0x0428, b'S', b'h', 0x00, 0x00), /* Ш to Sh */ + Transliteration::new(0x0429, b'S', b'h', b'c', b'h'), /* Щ to Shch */ + Transliteration::new(0x042A, b'a', 0x00, 0x00, 0x00), /* to A */ + Transliteration::new(0x042B, b'Y', 0x00, 0x00, 0x00), /* Ы to Y */ + Transliteration::new(0x042C, b'Y', 0x00, 0x00, 0x00), /* to Y */ + Transliteration::new(0x042D, b'E', 0x00, 0x00, 0x00), /* Э to E */ + Transliteration::new(0x042E, b'I', b'u', 0x00, 0x00), /* Ю to Iu */ + Transliteration::new(0x042F, b'I', b'a', 0x00, 0x00), /* Я to Ia */ + Transliteration::new(0x0430, b'a', 0x00, 0x00, 0x00), /* а to a */ + Transliteration::new(0x0431, b'b', 0x00, 0x00, 0x00), /* б to b */ + Transliteration::new(0x0432, b'v', 0x00, 0x00, 0x00), /* в to v */ + Transliteration::new(0x0433, b'g', 0x00, 0x00, 0x00), /* г to g */ + Transliteration::new(0x0434, b'd', 0x00, 0x00, 0x00), /* д to d */ + Transliteration::new(0x0435, b'e', 0x00, 0x00, 0x00), /* е to e */ + Transliteration::new(0x0436, b'z', b'h', 0x00, 0x00), /* ж to zh */ + Transliteration::new(0x0437, b'z', 0x00, 0x00, 0x00), /* з to z */ + Transliteration::new(0x0438, b'i', 0x00, 0x00, 0x00), /* и to i */ + Transliteration::new(0x0439, b'i', 0x00, 0x00, 0x00), /* й to i */ + Transliteration::new(0x043A, b'k', 0x00, 0x00, 0x00), /* к to k */ + Transliteration::new(0x043B, b'l', 0x00, 0x00, 0x00), /* л to l */ + Transliteration::new(0x043C, b'm', 0x00, 0x00, 0x00), /* м to m */ + Transliteration::new(0x043D, b'n', 0x00, 0x00, 0x00), /* н to n */ + Transliteration::new(0x043E, b'o', 0x00, 0x00, 0x00), /* о to o */ + Transliteration::new(0x043F, b'p', 0x00, 0x00, 0x00), /* п to p */ + Transliteration::new(0x0440, b'r', 0x00, 0x00, 0x00), /* р to r */ + Transliteration::new(0x0441, b's', 0x00, 0x00, 0x00), /* с to s */ + Transliteration::new(0x0442, b't', 0x00, 0x00, 0x00), /* т to t */ + Transliteration::new(0x0443, b'u', 0x00, 0x00, 0x00), /* у to u */ + Transliteration::new(0x0444, b'f', 0x00, 0x00, 0x00), /* ф to f */ + Transliteration::new(0x0445, b'k', b'h', 0x00, 0x00), /* х to kh */ + Transliteration::new(0x0446, b't', b'c', 0x00, 0x00), /* ц to tc */ + Transliteration::new(0x0447, b'c', b'h', 0x00, 0x00), /* ч to ch */ + Transliteration::new(0x0448, b's', b'h', 0x00, 0x00), /* ш to sh */ + Transliteration::new(0x0449, b's', b'h', b'c', b'h'), /* щ to shch */ + Transliteration::new(0x044A, b'a', 0x00, 0x00, 0x00), /* to a */ + Transliteration::new(0x044B, b'y', 0x00, 0x00, 0x00), /* ы to y */ + Transliteration::new(0x044C, b'y', 0x00, 0x00, 0x00), /* to y */ + Transliteration::new(0x044D, b'e', 0x00, 0x00, 0x00), /* э to e */ + Transliteration::new(0x044E, b'i', b'u', 0x00, 0x00), /* ю to iu */ + Transliteration::new(0x044F, b'i', b'a', 0x00, 0x00), /* я to ia */ + Transliteration::new(0x0450, b'e', 0x00, 0x00, 0x00), /* ѐ to e */ + Transliteration::new(0x0451, b'e', 0x00, 0x00, 0x00), /* ё to e */ + Transliteration::new(0x0452, b'd', 0x00, 0x00, 0x00), /* ђ to d */ + Transliteration::new(0x0453, b'g', 0x00, 0x00, 0x00), /* ѓ to g */ + Transliteration::new(0x0454, b'e', 0x00, 0x00, 0x00), /* є to e */ + Transliteration::new(0x0455, b'z', 0x00, 0x00, 0x00), /* ѕ to z */ + Transliteration::new(0x0456, b'i', 0x00, 0x00, 0x00), /* і to i */ + Transliteration::new(0x0457, b'i', 0x00, 0x00, 0x00), /* ї to i */ + Transliteration::new(0x0458, b'j', 0x00, 0x00, 0x00), /* ј to j */ + Transliteration::new(0x0459, b'i', 0x00, 0x00, 0x00), /* љ to i */ + Transliteration::new(0x045A, b'n', 0x00, 0x00, 0x00), /* њ to n */ + Transliteration::new(0x045B, b'd', 0x00, 0x00, 0x00), /* ћ to d */ + Transliteration::new(0x045C, b'k', 0x00, 0x00, 0x00), /* ќ to k */ + Transliteration::new(0x045D, b'i', 0x00, 0x00, 0x00), /* ѝ to i */ + Transliteration::new(0x045E, b'u', 0x00, 0x00, 0x00), /* ў to u */ + Transliteration::new(0x045F, b'd', 0x00, 0x00, 0x00), /* џ to d */ + Transliteration::new(0x1E02, b'B', 0x00, 0x00, 0x00), /* Ḃ to B */ + Transliteration::new(0x1E03, b'b', 0x00, 0x00, 0x00), /* ḃ to b */ + Transliteration::new(0x1E0A, b'D', 0x00, 0x00, 0x00), /* Ḋ to D */ + Transliteration::new(0x1E0B, b'd', 0x00, 0x00, 0x00), /* ḋ to d */ + Transliteration::new(0x1E1E, b'F', 0x00, 0x00, 0x00), /* Ḟ to F */ + Transliteration::new(0x1E1F, b'f', 0x00, 0x00, 0x00), /* ḟ to f */ + Transliteration::new(0x1E40, b'M', 0x00, 0x00, 0x00), /* Ṁ to M */ + Transliteration::new(0x1E41, b'm', 0x00, 0x00, 0x00), /* ṁ to m */ + Transliteration::new(0x1E56, b'P', 0x00, 0x00, 0x00), /* Ṗ to P */ + Transliteration::new(0x1E57, b'p', 0x00, 0x00, 0x00), /* ṗ to p */ + Transliteration::new(0x1E60, b'S', 0x00, 0x00, 0x00), /* Ṡ to S */ + Transliteration::new(0x1E61, b's', 0x00, 0x00, 0x00), /* ṡ to s */ + Transliteration::new(0x1E6A, b'T', 0x00, 0x00, 0x00), /* Ṫ to T */ + Transliteration::new(0x1E6B, b't', 0x00, 0x00, 0x00), /* ṫ to t */ + Transliteration::new(0x1E80, b'W', 0x00, 0x00, 0x00), /* Ẁ to W */ + Transliteration::new(0x1E81, b'w', 0x00, 0x00, 0x00), /* ẁ to w */ + Transliteration::new(0x1E82, b'W', 0x00, 0x00, 0x00), /* Ẃ to W */ + Transliteration::new(0x1E83, b'w', 0x00, 0x00, 0x00), /* ẃ to w */ + Transliteration::new(0x1E84, b'W', 0x00, 0x00, 0x00), /* Ẅ to W */ + Transliteration::new(0x1E85, b'w', 0x00, 0x00, 0x00), /* ẅ to w */ + Transliteration::new(0x1EF2, b'Y', 0x00, 0x00, 0x00), /* Ỳ to Y */ + Transliteration::new(0x1EF3, b'y', 0x00, 0x00, 0x00), /* ỳ to y */ + Transliteration::new(0xFB00, b'f', b'f', 0x00, 0x00), /* ff to ff */ + Transliteration::new(0xFB01, b'f', b'i', 0x00, 0x00), /* fi to fi */ + Transliteration::new(0xFB02, b'f', b'l', 0x00, 0x00), /* fl to fl */ + Transliteration::new(0xFB05, b's', b't', 0x00, 0x00), /* ſt to st */ + Transliteration::new(0xFB06, b's', b't', 0x00, 0x00), /* st to st */ +]; + +/// Return the value of the first UTF-8 character in the string +fn utf8_read(z: &[u8]) -> (u32, usize) { + if z.is_empty() { + return (0, 0); + } + + let first_byte = z[0]; + if first_byte < 0x80 { + (first_byte as u32, 1) + } else { + let lookup_index = (first_byte - 0xc0) as usize; + if lookup_index >= TRANSLIT_UTF8_LOOKUP.len() { + return (first_byte as u32, 1); + } + + let mut c = TRANSLIT_UTF8_LOOKUP[lookup_index] as u32; + let mut i = 1; + + while i < z.len() && (z[i] & 0xc0) == 0x80 { + c = (c << 6) + ((z[i] & 0x3f) as u32); + i += 1; + } + + (c, i) + } +} + +/// Find transliteration entry for a given Unicode character using binary search +fn find_translit(c: u32) -> Option<&'static Transliteration> { + let c = c as u16; // Cast to u16 since our table uses u16 + TRANSLIT + .binary_search_by_key(&c, |t| t.c_from) + .ok() + .map(|idx| &TRANSLIT[idx]) +} + +/// Convert the input string from UTF-8 into pure ASCII by converting +/// all non-ASCII characters to some combination of characters in the ASCII subset. +pub fn transliterate(input: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len() * 4); + let mut pos = 0; + + while pos < input.len() { + let (c, size) = utf8_read(&input[pos..]); + pos += size; + + if c <= 127 { + output.push(c as u8); + } else if let Some(translit) = find_translit(c) { + output.push(translit.c_to0); + if translit.c_to1 != 0 { + output.push(translit.c_to1); + if translit.c_to2 != 0 { + output.push(translit.c_to2); + if translit.c_to3 != 0 { + output.push(translit.c_to3); + } + } + } + } else { + output.push(b'?'); + } + } + + output +} + +pub fn transliterate_str(input: &str) -> String { + let result = transliterate(input.as_bytes()); + String::from_utf8(result).unwrap_or_else(|_| "?".to_string()) +} + +pub fn script_code(input: &[u8]) -> i32 { + let mut pos = 0; + let mut script_mask = 0; + let mut seen_digit = false; + + while pos < input.len() { + let (c, size) = utf8_read(&input[pos..]); + pos += size; + + if c < 0x02af { + if c >= 0x80 { + script_mask |= SCRIPT_LATIN; + } else if (c as u8).is_ascii_digit() { + seen_digit = true; + } else { + script_mask |= SCRIPT_LATIN; + } + } else if (0x0400..=0x04ff).contains(&c) { + script_mask |= SCRIPT_CYRILLIC; + } else if (0x0386..=0x03ce).contains(&c) { + script_mask |= SCRIPT_GREEK; + } else if (0x0590..=0x05ff).contains(&c) { + script_mask |= SCRIPT_HEBREW; + } else if (0x0600..=0x06ff).contains(&c) { + script_mask |= SCRIPT_ARABIC; + } + } + + if script_mask == 0 && seen_digit { + script_mask = SCRIPT_LATIN; + } + + match script_mask { + 0 => 999, + SCRIPT_LATIN => 215, + SCRIPT_CYRILLIC => 220, + SCRIPT_GREEK => 200, + SCRIPT_HEBREW => 125, + SCRIPT_ARABIC => 160, + _ => 998, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_utf8_read() { + let input = "Café".as_bytes(); + let (c, size) = utf8_read(&input[0..]); + assert_eq!(c, b'C' as u32); + assert_eq!(size, 1); + let (c, size) = utf8_read(&input[3..]); + assert_eq!(c, 0x00E9); // é + assert_eq!(size, 2); + } + + #[test] + fn test_transliterate_basic() { + let result = transliterate_str("Café"); + assert_eq!(result, "Cafe"); + let result = transliterate_str("Naïve"); + assert_eq!(result, "Naive"); + } + + #[test] + fn test_transliterate_german() { + let result = transliterate_str("Müller"); + assert_eq!(result, "Mueller"); + let result = transliterate_str("Größe"); + assert_eq!(result, "Groesse"); + } + + #[test] + fn test_script_code() { + assert_eq!(script_code("Hello".as_bytes()), 215); + assert_eq!(script_code("123".as_bytes()), 215); + assert_eq!(script_code("привет".as_bytes()), 220); + assert_eq!(script_code("γειά".as_bytes()), 200); + assert_eq!(script_code("helloпривет".as_bytes()), 998); + } +} diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index a6c6cb89c..fb1279d45 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -593,6 +593,15 @@ def validate_fuzzy_caver(a): def validate_fuzzy_rsoundex(a): return a == "A03080" +def validate_fuzzy_translit1(a): + return a == "oh my ?" + +def validate_fuzzy_translit2(a): + return a == "privet" + +def validate_fuzzy_script(a): + return a == "160" + def test_fuzzy(): limbo = TestTursoShell() ext_path = "./target/debug/liblimbo_fuzzy" @@ -652,6 +661,21 @@ def test_fuzzy(): validate_fuzzy_rsoundex, "fuzzy rsoundex function works", ) + limbo.run_test_fn( + "SELECT fuzzy_translit('oh my 😅');", + validate_fuzzy_translit1, + "fuzzy translit1 function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_translit('привет');", + validate_fuzzy_translit2, + "fuzzy translit2 function works", + ) + limbo.run_test_fn( + "SELECT fuzzy_script('داناوانب');", + validate_fuzzy_script, + "fuzzy script function works", + ) def test_vfs(): limbo = TestTursoShell() From 255e357547bb8e15b709334e24a42d2280f165e8 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 13:12:46 +0400 Subject: [PATCH 09/65] resolve column alias after rewritting column access in the expression in returning insert clause --- core/translate/expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index dddfec85e..dd8f8d117 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -4093,8 +4093,6 @@ pub fn process_returning_clause( for rc in returning.iter_mut() { match rc { ast::ResultColumn::Expr(expr, alias) => { - let column_alias = determine_column_alias(expr, alias, table); - bind_and_rewrite_expr( expr, Some(&mut table_references), @@ -4104,6 +4102,8 @@ pub fn process_returning_clause( BindingBehavior::TryResultColumnsFirst, )?; + let column_alias = determine_column_alias(expr, alias, table); + result_columns.push(ResultSetColumn { expr: *expr.clone(), alias: column_alias, From fdabbed539d5dfaefb2f93b750ccc9f5b240cc4c Mon Sep 17 00:00:00 2001 From: Pavan-Nambi Date: Fri, 26 Sep 2025 15:07:33 +0530 Subject: [PATCH 10/65] length shall not count when it sees nullc --- core/vdbe/execute.rs | 8 ++++++-- testing/scalar-functions.test | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 3f3fb3c15..b4ce9e425 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -8012,8 +8012,12 @@ impl Value { pub fn exec_length(&self) -> Self { match self { Value::Text(t) => { - // Count Unicode scalar values (characters) - Value::Integer(t.as_str().chars().count() as i64) + let s = t.as_str(); + let len_before_null = s.find('\0').map_or_else( + || s.chars().count(), + |null_pos| s[..null_pos].chars().count(), + ); + Value::Integer(len_before_null as i64) } Value::Integer(_) | Value::Float(_) => { // For numbers, SQLite returns the length of the string representation diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index 9ef2f74b3..46133cd14 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -939,6 +939,11 @@ do_execsql_test parse-large-integral-numeric-string-as-number { SELECT (-104614899632619 || 45597) > CAST(0 AS NUMERIC); } {0} +# https://github.com/tursodatabase/turso/issues/3317 +do_execsql_test length-999123 { + select length('a' || char(0) || 'b'); +} {1} + # TODO: sqlite seems not enable soundex() by default unless build it with SQLITE_SOUNDEX enabled. # do_execsql_test soundex-text { # select soundex('Pfister'), soundex('husobee'), soundex('Tymczak'), soundex('Ashcraft'), soundex('Robert'), soundex('Rupert'), soundex('Rubin'), soundex('Kant'), soundex('Knuth'), soundex('x'), soundex(''); From 83d8a7c7754be9960632b7c7d0cece8683156ddd Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 12:47:31 +0200 Subject: [PATCH 11/65] core/mvcc/logical-log: lock recover logical log process --- core/mvcc/database/mod.rs | 55 ++++++++++++++++ core/mvcc/persistent_storage/logical_log.rs | 69 +++++++-------------- core/mvcc/persistent_storage/mod.rs | 16 ++++- 3 files changed, 93 insertions(+), 47 deletions(-) diff --git a/core/mvcc/database/mod.rs b/core/mvcc/database/mod.rs index 06c935bc4..0383182de 100644 --- a/core/mvcc/database/mod.rs +++ b/core/mvcc/database/mod.rs @@ -14,6 +14,7 @@ use crate::types::IOResult; use crate::types::ImmutableRecord; use crate::types::SeekResult; use crate::Completion; +use crate::File; use crate::IOExt; use crate::LimboError; use crate::Result; @@ -32,6 +33,9 @@ use tracing::Level; pub mod checkpoint_state_machine; pub use checkpoint_state_machine::{CheckpointState, CheckpointStateMachine}; +use super::persistent_storage::logical_log::StreamingLogicalLogReader; +use super::persistent_storage::logical_log::StreamingResult; + #[cfg(test)] pub mod tests; @@ -855,6 +859,10 @@ pub struct MvStore { /// If there are two concurrent BEGIN (non-CONCURRENT) transactions, and one tries to promote /// to exclusive, it will abort if another transaction committed after its begin timestamp. last_committed_tx_ts: AtomicU64, + + /// Lock used while recovering a logical log file. We don't want multiple connections trying to + /// load the file. + recover_lock: RwLock<()>, } impl MvStore { @@ -878,6 +886,7 @@ impl MvStore { checkpointed_txid_max: AtomicU64::new(0), last_committed_schema_change_ts: AtomicU64::new(0), last_committed_tx_ts: AtomicU64::new(0), + recover_lock: RwLock::new(()), } } @@ -1739,6 +1748,52 @@ impl MvStore { .unwrap_or(None); last_rowid } + + pub fn needs_recover(&self) -> bool { + self.storage.needs_recover() + } + + pub fn mark_recovered(&self) { + self.storage.mark_recovered(); + } + + pub fn get_logical_log_file(&self) -> Arc { + self.storage.get_logical_log_file() + } + + pub fn recover_logical_log(&self, io: &Arc, pager: &Arc) -> Result<()> { + // Get lock, if we don't get it we will wait until recover finishes in another connection + // and then return. + let _recover_guard = self.recover_lock.write(); + if !self.storage.needs_recover() { + // another connection completed recover + return Ok(()); + } + let file = self.get_logical_log_file(); + let mut reader = StreamingLogicalLogReader::new(file.clone()); + + let c = reader.read_header()?; + io.wait_for_completion(c)?; + let tx_id = 0; + self.begin_load_tx(pager.clone())?; + loop { + match reader.next_record(io).unwrap() { + StreamingResult::InsertRow { row, rowid } => { + tracing::trace!("read {rowid:?}"); + self.insert(tx_id, row)?; + } + StreamingResult::DeleteRow { rowid } => { + self.delete(tx_id, rowid)?; + } + StreamingResult::Eof => { + break; + } + } + } + self.commit_load_tx(tx_id); + self.mark_recovered(); + Ok(()) + } } /// A write-write conflict happens when transaction T_current attempts to update a diff --git a/core/mvcc/persistent_storage/logical_log.rs b/core/mvcc/persistent_storage/logical_log.rs index b64cb5acc..987477aa5 100644 --- a/core/mvcc/persistent_storage/logical_log.rs +++ b/core/mvcc/persistent_storage/logical_log.rs @@ -2,22 +2,20 @@ #![allow(dead_code)] use crate::{ io::ReadComplete, - mvcc::{ - database::{LogRecord, Row, RowID, RowVersion}, - LocalClock, MvStore, - }, + mvcc::database::{LogRecord, Row, RowID, RowVersion}, storage::sqlite3_ondisk::{read_varint, write_varint_to_vec}, turso_assert, types::{IOCompletions, ImmutableRecord}, - Buffer, Completion, CompletionError, LimboError, Pager, Result, + Buffer, Completion, CompletionError, LimboError, Result, }; use std::{cell::RefCell, sync::Arc}; use crate::{types::IOResult, File}; pub struct LogicalLog { - file: Arc, + pub file: Arc, offset: u64, + recover: bool, } /// Log's Header, this will be the 64 bytes in any logical log file. @@ -138,7 +136,12 @@ impl LogRecordType { impl LogicalLog { pub fn new(file: Arc) -> Self { - Self { file, offset: 0 } + let recover = file.size().unwrap() > 0; + Self { + file, + offset: 0, + recover, + } } pub fn log_tx(&mut self, tx: &LogRecord) -> Result> { @@ -213,9 +216,17 @@ impl LogicalLog { self.offset = 0; Ok(IOResult::IO(IOCompletions::Single(c))) } + + pub fn needs_recover(&self) -> bool { + self.recover + } + + pub fn mark_recovered(&mut self) { + self.recover = false; + } } -enum StreamingResult { +pub enum StreamingResult { InsertRow { row: Row, rowid: RowID }, DeleteRow { rowid: RowID }, Eof, @@ -230,7 +241,7 @@ enum StreamingState { }, } -struct StreamingLogicalLogReader { +pub struct StreamingLogicalLogReader { file: Arc, /// Offset to read from file offset: usize, @@ -436,38 +447,6 @@ impl StreamingLogicalLogReader { } } -pub fn load_logical_log( - mv_store: &Arc>, - file: Arc, - io: &Arc, - pager: &Arc, -) -> Result { - let mut reader = StreamingLogicalLogReader::new(file.clone()); - - let c = reader.read_header()?; - io.wait_for_completion(c)?; - let tx_id = 0; - mv_store.begin_load_tx(pager.clone())?; - loop { - match reader.next_record(io).unwrap() { - StreamingResult::InsertRow { row, rowid } => { - tracing::trace!("read {rowid:?}"); - mv_store.insert(tx_id, row)?; - } - StreamingResult::DeleteRow { rowid } => { - mv_store.delete(tx_id, rowid)?; - } - StreamingResult::Eof => { - break; - } - } - } - mv_store.commit_load_tx(tx_id); - - let logical_log = LogicalLog::new(file); - Ok(logical_log) -} - #[cfg(test)] mod tests { use std::{collections::HashSet, sync::Arc}; @@ -491,7 +470,7 @@ mod tests { OpenFlags, RefValue, }; - use super::{load_logical_log, LogRecordType}; + use super::LogRecordType; #[test] fn test_logical_log_read() { @@ -517,7 +496,7 @@ mod tests { let file = io.open_file(log_file, OpenFlags::ReadOnly, false).unwrap(); let mvcc_store = Arc::new(MvStore::new(LocalClock::new(), Storage::new(file.clone()))); - load_logical_log(&mvcc_store, file, &io, &pager).unwrap(); + mvcc_store.recover_logical_log(&io, &pager).unwrap(); let tx = mvcc_store.begin_tx(pager.clone()).unwrap(); let row = mvcc_store.read(tx, RowID::new(1, 1)).unwrap().unwrap(); let record = ImmutableRecord::from_bin_record(row.data.clone()); @@ -559,7 +538,7 @@ mod tests { let file = io.open_file(log_file, OpenFlags::ReadOnly, false).unwrap(); let mvcc_store = Arc::new(MvStore::new(LocalClock::new(), Storage::new(file.clone()))); - load_logical_log(&mvcc_store, file, &io, &pager).unwrap(); + mvcc_store.recover_logical_log(&io, &pager).unwrap(); for (rowid, value) in &values { let tx = mvcc_store.begin_tx(pager.clone()).unwrap(); let row = mvcc_store.read(tx, *rowid).unwrap().unwrap(); @@ -650,7 +629,7 @@ mod tests { let file = io.open_file(log_file, OpenFlags::ReadOnly, false).unwrap(); let mvcc_store = Arc::new(MvStore::new(LocalClock::new(), Storage::new(file.clone()))); - load_logical_log(&mvcc_store, file, &io, &pager).unwrap(); + mvcc_store.recover_logical_log(&io, &pager).unwrap(); // Check rowids that weren't deleted let tx = mvcc_store.begin_tx(pager.clone()).unwrap(); diff --git a/core/mvcc/persistent_storage/mod.rs b/core/mvcc/persistent_storage/mod.rs index 58af1b849..0d5d2967f 100644 --- a/core/mvcc/persistent_storage/mod.rs +++ b/core/mvcc/persistent_storage/mod.rs @@ -1,14 +1,14 @@ use std::fmt::Debug; use std::sync::{Arc, RwLock}; -mod logical_log; +pub mod logical_log; use crate::mvcc::database::LogRecord; use crate::mvcc::persistent_storage::logical_log::LogicalLog; use crate::types::IOResult; use crate::{File, Result}; pub struct Storage { - logical_log: RwLock, + pub logical_log: RwLock, } impl Storage { @@ -35,6 +35,18 @@ impl Storage { pub fn truncate(&self) -> Result> { self.logical_log.write().unwrap().truncate() } + + pub fn needs_recover(&self) -> bool { + self.logical_log.read().unwrap().needs_recover() + } + + pub fn mark_recovered(&self) { + self.logical_log.write().unwrap().mark_recovered(); + } + + pub fn get_logical_log_file(&self) -> Arc { + self.logical_log.write().unwrap().file.clone() + } } impl Debug for Storage { From 2a7abd82f77e49639ea90be5061a78a2b2fe52b8 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 12:47:52 +0200 Subject: [PATCH 12/65] core/lib: recover mvcc logical log if needed on connect --- core/lib.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 55c9310d2..790a3940b 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -403,7 +403,8 @@ impl Database { let mv_store = if opts.enable_mvcc { let file = io.open_file(&format!("{path}-lg"), OpenFlags::default(), false)?; let storage = mvcc::persistent_storage::Storage::new(file); - Some(Arc::new(MvStore::new(mvcc::LocalClock::new(), storage))) + let mv_store = MvStore::new(mvcc::LocalClock::new(), storage); + Some(Arc::new(mv_store)) } else { None }; @@ -482,6 +483,11 @@ impl Database { #[instrument(skip_all, level = Level::INFO)] pub fn connect(self: &Arc) -> Result> { let pager = self.init_pager(None)?; + let pager = Arc::new(pager); + + if self.mv_store.is_some() { + self.maybe_recover_logical_log(pager.clone())?; + } let page_size = pager.get_page_size_unchecked(); @@ -492,7 +498,7 @@ impl Database { .get(); let conn = Arc::new(Connection { db: self.clone(), - pager: RwLock::new(Arc::new(pager)), + pager: RwLock::new(pager), schema: RwLock::new( self.schema .lock() @@ -532,6 +538,18 @@ impl Database { Ok(conn) } + pub fn maybe_recover_logical_log(self: &Arc, pager: Arc) -> Result<()> { + let Some(mv_store) = self.mv_store.clone() else { + panic!("tryign to recover logical log without mvcc"); + + }; + if !mv_store.needs_recover() { + return Ok(()); + } + + mv_store.recover_logical_log(&self.io, &pager) + } + pub fn is_readonly(&self) -> bool { self.open_flags.contains(OpenFlags::ReadOnly) } From ae994146af639e7874526d514652108b0d0b92a0 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 12:56:43 +0200 Subject: [PATCH 13/65] core/mvcc/logical-log: on mvcc restart clear DATABASE_MANAGER --- core/mvcc/database/tests.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/core/mvcc/database/tests.rs b/core/mvcc/database/tests.rs index 54476eb04..ea2761240 100644 --- a/core/mvcc/database/tests.rs +++ b/core/mvcc/database/tests.rs @@ -71,6 +71,14 @@ impl MvccTestDbNoConn { /// Restarts the database, make sure there is no connection to the database open before calling this! pub fn restart(&mut self) { + // First let's clear any entries in database manager in order to force restart. + // If not, we will load the same database instance again. + { + let mut manager = DATABASE_MANAGER.lock().unwrap(); + manager.clear(); + } + + // Now open again. let io = Arc::new(PlatformIO::new().unwrap()); let path = self.path.as_ref().unwrap(); let db = Database::open_file(io.clone(), path, true, true).unwrap(); @@ -705,10 +713,10 @@ fn test_future_row() { use crate::mvcc::cursor::MvccLazyCursor; use crate::mvcc::database::{MvStore, Row, RowID}; use crate::types::Text; -use crate::RefValue; use crate::Value; use crate::{Database, StepResult}; use crate::{MemoryIO, Statement}; +use crate::{RefValue, DATABASE_MANAGER}; // Simple atomic clock implementation for testing From 4cdf293a2b655e7042c5120c5917b523fd38cf38 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 12:56:58 +0200 Subject: [PATCH 14/65] core/mvcc/logical-log: fuzz test recover use db.restart --- core/mvcc/persistent_storage/logical_log.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/core/mvcc/persistent_storage/logical_log.rs b/core/mvcc/persistent_storage/logical_log.rs index 987477aa5..41e29ab21 100644 --- a/core/mvcc/persistent_storage/logical_log.rs +++ b/core/mvcc/persistent_storage/logical_log.rs @@ -596,7 +596,7 @@ mod tests { txns.push(ops); } // let's not drop db as we don't want files to be removed - let db = MvccTestDbNoConn::new_with_random_db(); + let mut db = MvccTestDbNoConn::new_with_random_db(); let (io, pager) = { let conn = db.connect(); let pager = conn.pager.read().clone(); @@ -624,12 +624,10 @@ mod tests { (db.io.clone(), pager) }; - // Now try to read it back - let log_file = db.get_log_path(); - - let file = io.open_file(log_file, OpenFlags::ReadOnly, false).unwrap(); - let mvcc_store = Arc::new(MvStore::new(LocalClock::new(), Storage::new(file.clone()))); - mvcc_store.recover_logical_log(&io, &pager).unwrap(); + db.restart(); + // connect after restart should recover log. + let conn = db.connect(); + let mvcc_store = db.get_mvcc_store(); // Check rowids that weren't deleted let tx = mvcc_store.begin_tx(pager.clone()).unwrap(); From 9c1d94a3559710e7d7fa7ecd66642051daa2a007 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 12:59:13 +0200 Subject: [PATCH 15/65] core/mvcc/logical-log: assert we don't call begin_load_tx twice --- core/mvcc/database/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/mvcc/database/mod.rs b/core/mvcc/database/mod.rs index 0383182de..447956358 100644 --- a/core/mvcc/database/mod.rs +++ b/core/mvcc/database/mod.rs @@ -1250,6 +1250,10 @@ impl MvStore { let header = self.get_new_transaction_database_header(&pager); let tx = Transaction::new(tx_id, begin_ts, header); tracing::trace!("begin_load_tx(tx_id={tx_id})"); + assert!( + !self.txs.contains_key(&tx_id), + "somehow we tried to call begin_load_tx twice" + ); self.txs.insert(tx_id, tx); Ok(()) From 59d3e37b9fdb2b50c4a80822ce8ffa2c924a366e Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 13:01:13 +0200 Subject: [PATCH 16/65] fmt --- core/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/lib.rs b/core/lib.rs index 790a3940b..ffa8c967c 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -541,7 +541,6 @@ impl Database { pub fn maybe_recover_logical_log(self: &Arc, pager: Arc) -> Result<()> { let Some(mv_store) = self.mv_store.clone() else { panic!("tryign to recover logical log without mvcc"); - }; if !mv_store.needs_recover() { return Ok(()); From 96accef06ce59e58efe2130ed2f98b0ed7b563a4 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 26 Sep 2025 14:02:12 +0300 Subject: [PATCH 17/65] core/mvcc: Wrap header with RwLock --- core/mvcc/persistent_storage/logical_log.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/mvcc/persistent_storage/logical_log.rs b/core/mvcc/persistent_storage/logical_log.rs index b64cb5acc..9c436ecb0 100644 --- a/core/mvcc/persistent_storage/logical_log.rs +++ b/core/mvcc/persistent_storage/logical_log.rs @@ -260,10 +260,10 @@ impl StreamingLogicalLogReader { pub fn read_header(&mut self) -> Result { let header_buf = Arc::new(Buffer::new_temporary(LOG_HEADER_MAX_SIZE)); - let header = Arc::new(RefCell::new(LogHeader::default())); + let header = Arc::new(RwLock::new(LogHeader::default())); let completion: Box = Box::new(move |res| { let header = header.clone(); - let mut header = header.borrow_mut(); + let mut header = header.write().unwrap(); let Ok((buf, bytes_read)) = res else { tracing::error!("couldn't ready log err={:?}", res,); return; From 1402e9841e9844db07f129a8dbc0fc373d44d1d1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 26 Sep 2025 14:17:51 +0300 Subject: [PATCH 18/65] core/mvcc: Wrap StreamingLogicalLogReader::buffer with RwLock --- core/mvcc/persistent_storage/logical_log.rs | 26 +++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/core/mvcc/persistent_storage/logical_log.rs b/core/mvcc/persistent_storage/logical_log.rs index 9c436ecb0..89ac6d053 100644 --- a/core/mvcc/persistent_storage/logical_log.rs +++ b/core/mvcc/persistent_storage/logical_log.rs @@ -11,7 +11,7 @@ use crate::{ types::{IOCompletions, ImmutableRecord}, Buffer, Completion, CompletionError, LimboError, Pager, Result, }; -use std::{cell::RefCell, sync::Arc}; +use std::sync::{Arc, RwLock}; use crate::{types::IOResult, File}; @@ -237,7 +237,7 @@ struct StreamingLogicalLogReader { /// Log Header header: Option>, /// Cached buffer after io read - buffer: Arc>>, + buffer: Arc>>, /// Position to read from loaded buffer buffer_offset: usize, file_size: usize, @@ -251,7 +251,7 @@ impl StreamingLogicalLogReader { file, offset: 0, header: None, - buffer: Arc::new(RefCell::new(Vec::with_capacity(4096))), + buffer: Arc::new(RwLock::new(Vec::with_capacity(4096))), buffer_offset: 0, file_size, state: StreamingState::NeedTransactionStart, @@ -366,7 +366,7 @@ impl StreamingLogicalLogReader { fn consume_u8(&mut self, io: &Arc) -> Result { self.read_more_data(io, 1)?; - let r = self.buffer.borrow()[self.buffer_offset]; + let r = self.buffer.read().unwrap()[self.buffer_offset]; self.buffer_offset += 1; Ok(r) } @@ -374,7 +374,7 @@ impl StreamingLogicalLogReader { fn consume_u64(&mut self, io: &Arc) -> Result { self.read_more_data(io, 8)?; let r = u64::from_be_bytes( - self.buffer.borrow()[self.buffer_offset..self.buffer_offset + 8] + self.buffer.read().unwrap()[self.buffer_offset..self.buffer_offset + 8] .try_into() .unwrap(), ); @@ -384,7 +384,8 @@ impl StreamingLogicalLogReader { fn consume_varint(&mut self, io: &Arc) -> Result<(u64, usize)> { self.read_more_data(io, 9)?; - let buffer = &self.buffer.borrow()[self.buffer_offset..]; + let buffer_guard = self.buffer.read().unwrap(); + let buffer = &buffer_guard[self.buffer_offset..]; let (v, n) = read_varint(buffer)?; self.buffer_offset += n; Ok((v, n)) @@ -392,13 +393,14 @@ impl StreamingLogicalLogReader { fn consume_buffer(&mut self, io: &Arc, amount: usize) -> Result> { self.read_more_data(io, amount)?; - let buffer = self.buffer.borrow()[self.buffer_offset..self.buffer_offset + amount].to_vec(); + let buffer = + self.buffer.read().unwrap()[self.buffer_offset..self.buffer_offset + amount].to_vec(); self.buffer_offset += amount; Ok(buffer) } - fn get_buffer(&self) -> std::cell::Ref<'_, Vec> { - self.buffer.borrow() + fn get_buffer(&self) -> std::sync::RwLockReadGuard<'_, Vec> { + self.buffer.read().unwrap() } pub fn read_more_data(&mut self, io: &Arc, need: usize) -> Result<()> { @@ -412,7 +414,7 @@ impl StreamingLogicalLogReader { let buffer = self.buffer.clone(); let completion: Box = Box::new(move |res| { let buffer = buffer.clone(); - let mut buffer = buffer.borrow_mut(); + let mut buffer = buffer.write().unwrap(); let Ok((buf, _bytes_read)) = res else { tracing::trace!("couldn't ready log err={:?}", res,); return; @@ -426,13 +428,13 @@ impl StreamingLogicalLogReader { self.offset += to_read; // cleanup consumed bytes // this could be better for sure - let _ = self.buffer.borrow_mut().drain(0..self.buffer_offset); + let _ = self.buffer.write().unwrap().drain(0..self.buffer_offset); self.buffer_offset = 0; Ok(()) } fn bytes_can_read(&self) -> usize { - self.buffer.borrow().len() - self.buffer_offset + self.buffer.read().unwrap().len() - self.buffer_offset } } From 9e47cc3700fa2098b5492eeb8c6e236dc1be37e8 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 14:16:11 +0200 Subject: [PATCH 19/65] clippy --- core/mvcc/persistent_storage/logical_log.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/mvcc/persistent_storage/logical_log.rs b/core/mvcc/persistent_storage/logical_log.rs index 41e29ab21..2789acff4 100644 --- a/core/mvcc/persistent_storage/logical_log.rs +++ b/core/mvcc/persistent_storage/logical_log.rs @@ -597,7 +597,7 @@ mod tests { } // let's not drop db as we don't want files to be removed let mut db = MvccTestDbNoConn::new_with_random_db(); - let (io, pager) = { + let pager = { let conn = db.connect(); let pager = conn.pager.read().clone(); let mvcc_store = db.get_mvcc_store(); @@ -620,13 +620,13 @@ mod tests { } conn.close().unwrap(); - let db = db.get_db(); - (db.io.clone(), pager) + pager }; db.restart(); + // connect after restart should recover log. - let conn = db.connect(); + let _conn = db.connect(); let mvcc_store = db.get_mvcc_store(); // Check rowids that weren't deleted From a783f8247009572d6430422cfba7954c4f5c7f00 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 26 Sep 2025 15:46:16 +0300 Subject: [PATCH 20/65] bench/tpch: remove "cast('yyyy-mm-dd' as datetime)" this causes sqlite and tursodb to interpret the value as just 'yyyy', e.g. '1995-01-01' becomes '1995', causing a lot of the queries not to return any results, which is not what we want. --- perf/tpc-h/queries/1.sql | 2 +- perf/tpc-h/queries/10.sql | 4 ++-- perf/tpc-h/queries/12.sql | 4 ++-- perf/tpc-h/queries/14.sql | 4 ++-- perf/tpc-h/queries/15.sql | 4 ++-- perf/tpc-h/queries/20.sql | 4 ++-- perf/tpc-h/queries/3.sql | 4 ++-- perf/tpc-h/queries/4.sql | 4 ++-- perf/tpc-h/queries/5.sql | 4 ++-- perf/tpc-h/queries/6.sql | 4 ++-- perf/tpc-h/queries/7.sql | 2 +- perf/tpc-h/queries/8.sql | 2 +- 12 files changed, 21 insertions(+), 21 deletions(-) diff --git a/perf/tpc-h/queries/1.sql b/perf/tpc-h/queries/1.sql index 9a7fbc6c4..7708b593e 100644 --- a/perf/tpc-h/queries/1.sql +++ b/perf/tpc-h/queries/1.sql @@ -12,7 +12,7 @@ select from lineitem where - l_shipdate <= cast('1998-12-01' as datetime) -- modified not to include cast({'day': 71} as interval) + l_shipdate <= '1998-12-01' -- modified not to include cast({'day': 71} as interval) group by l_returnflag, l_linestatus diff --git a/perf/tpc-h/queries/10.sql b/perf/tpc-h/queries/10.sql index a9e16acee..adc6eac61 100644 --- a/perf/tpc-h/queries/10.sql +++ b/perf/tpc-h/queries/10.sql @@ -15,8 +15,8 @@ from where c_custkey = o_custkey and l_orderkey = o_orderkey - and o_orderdate >= cast('1994-01-01' as datetime) - and o_orderdate < cast('1994-04-01' as datetime) -- modified not to include cast({'month': 3} as interval) + and o_orderdate >= '1994-01-01' + and o_orderdate < '1994-04-01' -- modified not to include cast({'month': 3} as interval) and l_returnflag = 'R' and c_nationkey = n_nationkey group by diff --git a/perf/tpc-h/queries/12.sql b/perf/tpc-h/queries/12.sql index 038af1d66..8324c0583 100644 --- a/perf/tpc-h/queries/12.sql +++ b/perf/tpc-h/queries/12.sql @@ -20,8 +20,8 @@ where and l_shipmode in ('FOB', 'SHIP') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate - and l_receiptdate >= cast('1994-01-01' as datetime) - and l_receiptdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) + and l_receiptdate >= '1994-01-01' + and l_receiptdate < '1995-01-01' -- modified not to include cast({'year': 1} as interval) group by l_shipmode order by diff --git a/perf/tpc-h/queries/14.sql b/perf/tpc-h/queries/14.sql index 439042916..b875be0eb 100644 --- a/perf/tpc-h/queries/14.sql +++ b/perf/tpc-h/queries/14.sql @@ -9,5 +9,5 @@ from part where l_partkey = p_partkey - and l_shipdate >= cast('1994-03-01' as datetime) - and l_shipdate < cast('1994-04-01' as datetime); -- modified not to include cast({'month': 1} as interval) + and l_shipdate >= '1994-03-01' + and l_shipdate < '1994-04-01'; -- modified not to include cast({'month': 1} as interval) diff --git a/perf/tpc-h/queries/15.sql b/perf/tpc-h/queries/15.sql index c4d750c97..5fb6bf750 100644 --- a/perf/tpc-h/queries/15.sql +++ b/perf/tpc-h/queries/15.sql @@ -7,8 +7,8 @@ create view revenue0 (supplier_no, total_revenue) as from lineitem where - l_shipdate >= cast('1993-01-01' as datetime) - and l_shipdate < cast('1993-04-01' as datetime) -- modified not to include cast({'month': 3} as interval) + l_shipdate >= '1993-01-01' + and l_shipdate < '1993-04-01' -- modified not to include cast({'month': 3} as interval) group by l_suppkey; diff --git a/perf/tpc-h/queries/20.sql b/perf/tpc-h/queries/20.sql index cfcf520a6..c7fe1545d 100644 --- a/perf/tpc-h/queries/20.sql +++ b/perf/tpc-h/queries/20.sql @@ -30,8 +30,8 @@ where where l_partkey = ps_partkey and l_suppkey = ps_suppkey - and l_shipdate >= cast('1994-01-01' as datetime) - and l_shipdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) + and l_shipdate >= '1994-01-01' + and l_shipdate < '1995-01-01' -- modified not to include cast({'year': 1} as interval) ) ) and s_nationkey = n_nationkey diff --git a/perf/tpc-h/queries/3.sql b/perf/tpc-h/queries/3.sql index cbea08906..5364073b4 100644 --- a/perf/tpc-h/queries/3.sql +++ b/perf/tpc-h/queries/3.sql @@ -11,8 +11,8 @@ where c_mktsegment = 'FURNITURE' and c_custkey = o_custkey and l_orderkey = o_orderkey - and o_orderdate < cast('1995-03-29' as datetime) - and l_shipdate > cast('1995-03-29' as datetime) + and o_orderdate < '1995-03-29' + and l_shipdate > '1995-03-29' group by l_orderkey, o_orderdate, diff --git a/perf/tpc-h/queries/4.sql b/perf/tpc-h/queries/4.sql index 649d55eef..b461356fe 100644 --- a/perf/tpc-h/queries/4.sql +++ b/perf/tpc-h/queries/4.sql @@ -7,8 +7,8 @@ select from orders where - o_orderdate >= cast('1997-06-01' as datetime) - and o_orderdate < cast('1997-09-01' as datetime) -- modified not to include cast({'month': 3} as interval) + o_orderdate >= '1997-06-01' + and o_orderdate < '1997-09-01' -- modified not to include cast({'month': 3} as interval) and exists ( select * diff --git a/perf/tpc-h/queries/5.sql b/perf/tpc-h/queries/5.sql index 01bcc19a5..3c6922643 100644 --- a/perf/tpc-h/queries/5.sql +++ b/perf/tpc-h/queries/5.sql @@ -16,8 +16,8 @@ where and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'MIDDLE EAST' - and o_orderdate >= cast('1994-01-01' as datetime) - and o_orderdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) + and o_orderdate >= '1994-01-01' + and o_orderdate < '1995-01-01' -- modified not to include cast({'year': 1} as interval) group by n_name order by diff --git a/perf/tpc-h/queries/6.sql b/perf/tpc-h/queries/6.sql index 9114d1f40..a6368732c 100644 --- a/perf/tpc-h/queries/6.sql +++ b/perf/tpc-h/queries/6.sql @@ -3,7 +3,7 @@ select from lineitem where - l_shipdate >= cast('1994-01-01' as datetime) - and l_shipdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) + l_shipdate >= '1994-01-01' + and l_shipdate < '1995-01-01' -- modified not to include cast({'year': 1} as interval) and l_discount between 0.08 - 0.01 and 0.08 + 0.01 and l_quantity < 24; diff --git a/perf/tpc-h/queries/7.sql b/perf/tpc-h/queries/7.sql index 509548d03..bb202026d 100644 --- a/perf/tpc-h/queries/7.sql +++ b/perf/tpc-h/queries/7.sql @@ -28,7 +28,7 @@ from or (n1.n_name = 'INDIA' and n2.n_name = 'ROMANIA') ) and l_shipdate between - cast('1995-01-01' as datetime) and cast('1996-12-31' as datetime) + '1995-01-01' and '1996-12-31' ) as shipping group by supp_nation, diff --git a/perf/tpc-h/queries/8.sql b/perf/tpc-h/queries/8.sql index ba7fa2f73..ebec3e611 100644 --- a/perf/tpc-h/queries/8.sql +++ b/perf/tpc-h/queries/8.sql @@ -29,7 +29,7 @@ from and r_name = 'ASIA' and s_nationkey = n2.n_nationkey and o_orderdate between - cast('1995-01-01' as datetime) and cast('1996-12-31' as datetime) + '1995-01-01' and '1996-12-31' and p_type = 'PROMO BRUSHED COPPER' ) as all_nations group by From 045b11b2552e01c1ef0a0b03ff0c243452cea20a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 26 Sep 2025 16:02:37 +0300 Subject: [PATCH 21/65] bench/tpc-h: don't fail build if query 1 has output difference (known floating point precision issue) --- perf/tpc-h/run.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/perf/tpc-h/run.sh b/perf/tpc-h/run.sh index 1d572f388..df4cb020a 100755 --- a/perf/tpc-h/run.sh +++ b/perf/tpc-h/run.sh @@ -87,7 +87,12 @@ for query_file in $(ls "$QUERIES_DIR"/*.sql | sort -V); do if [ -n "$output_diff" ]; then echo "Output difference:" echo "$output_diff" - exit_code=1 + # Ignore differences for query 1 due to floating point precision incompatibility + if [ "$query_file" = "$QUERIES_DIR/1.sql" ]; then + echo "Ignoring output difference for query 1 (known floating point precision incompatibility)" + else + exit_code=1 + fi else echo "No output difference" fi From 52f3216211c6c8bc24e3fa26ac52258da624ada3 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 17:11:06 +0400 Subject: [PATCH 22/65] fix avg aggregation - ignore NULL rows as SQLite do - emit NULL instead of NaN when no rows were aggregated - adjust agg column alias name --- .../packages/native/promise.test.ts | 31 +++++++++++++++++ core/translate/select.rs | 14 +++++--- core/vdbe/execute.rs | 32 ++++++++++------- .../query_processing/test_read_path.rs | 34 +++++++++++++++++++ 4 files changed, 95 insertions(+), 16 deletions(-) diff --git a/bindings/javascript/packages/native/promise.test.ts b/bindings/javascript/packages/native/promise.test.ts index 82d9e1064..422a6f56a 100644 --- a/bindings/javascript/packages/native/promise.test.ts +++ b/bindings/javascript/packages/native/promise.test.ts @@ -72,6 +72,37 @@ test('explicit connect', async () => { expect(await db.prepare("SELECT 1 as x").all()).toEqual([{ x: 1 }]); }) +test('avg-bug', async () => { + const db = await connect(':memory:'); + const create = db.prepare(`create table "aggregate_table" ( + "id" integer primary key autoincrement not null, + "name" text not null, + "a" integer, + "b" integer, + "c" integer, + "null_only" integer + );`); + + await create.run(); + const insert = db.prepare( + `insert into "aggregate_table" ("id", "name", "a", "b", "c", "null_only") values (null, ?, ?, ?, ?, null), (null, ?, ?, ?, ?, null), (null, ?, ?, ?, ?, null), (null, ?, ?, ?, ?, null), (null, ?, ?, ?, ?, null), (null, ?, ?, ?, ?, null), (null, ?, ?, ?, ?, null);`, + ); + + await insert.run( + 'value 1', 5, 10, 20, + 'value 1', 5, 20, 30, + 'value 2', 10, 50, 60, + 'value 3', 20, 20, null, + 'value 4', null, 90, 120, + 'value 5', 80, 10, null, + 'value 6', null, null, 150, + ); + + expect(await db.prepare(`select avg("a") from "aggregate_table";`).get()).toEqual({ 'avg ("a")': 24 }); + expect(await db.prepare(`select avg("null_only") from "aggregate_table";`).get()).toEqual({ 'avg ("null_only")': null }); + expect(await db.prepare(`select avg(distinct "b") from "aggregate_table";`).get()).toEqual({ 'avg (DISTINCT "b")': 42.5 }); +}) + test('on-disk db', async () => { const path = `test-${(Math.random() * 10000) | 0}.db`; try { diff --git a/core/translate/select.rs b/core/translate/select.rs index 3b305ba12..bd6d1a2b8 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -369,6 +369,15 @@ fn prepare_one_select_plan( } } ResultColumn::Expr(ref mut expr, maybe_alias) => { + let alias = if let Some(alias) = maybe_alias { + match alias { + ast::As::Elided(alias) => alias.as_str().to_string(), + ast::As::As(alias) => alias.as_str().to_string(), + } + } else { + // we always emit alias - otherwise user will see very confusing column name (e.g. avg(t0.c1)) + expr.as_ref().to_string() + }; bind_and_rewrite_expr( expr, Some(&mut plan.table_references), @@ -385,10 +394,7 @@ fn prepare_one_select_plan( Some(&mut windows), )?; plan.result_columns.push(ResultSetColumn { - alias: maybe_alias.as_ref().map(|alias| match alias { - ast::As::Elided(alias) => alias.as_str().to_string(), - ast::As::As(alias) => alias.as_str().to_string(), - }), + alias: Some(alias), expr: expr.as_ref().clone(), contains_aggregates, }); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4b1bdd4d1..fc55eee0e 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3637,17 +3637,21 @@ pub fn op_agg_step( match func { AggFunc::Avg => { let col = state.registers[*col].clone(); - let Register::Aggregate(agg) = state.registers[*acc_reg].borrow_mut() else { - panic!( - "Unexpected value {:?} in AggStep at register {}", - state.registers[*acc_reg], *acc_reg - ); - }; - let AggContext::Avg(acc, count) = agg.borrow_mut() else { - unreachable!(); - }; - *acc = acc.exec_add(col.get_value()); - *count += 1; + // > The avg() function returns the average value of all non-NULL X within a group + // https://sqlite.org/lang_aggfunc.html#avg + if !col.is_null() { + let Register::Aggregate(agg) = state.registers[*acc_reg].borrow_mut() else { + panic!( + "Unexpected value {:?} in AggStep at register {}", + state.registers[*acc_reg], *acc_reg + ); + }; + let AggContext::Avg(acc, count) = agg.borrow_mut() else { + unreachable!(); + }; + *acc = acc.exec_add(col.get_value()); + *count += 1; + } } AggFunc::Sum | AggFunc::Total => { let col = state.registers[*col].clone(); @@ -3915,7 +3919,11 @@ pub fn op_agg_final( let AggContext::Avg(acc, count) = agg else { unreachable!(); }; - let acc = acc.clone() / count.clone(); + let acc = if count.as_int() == Some(0) { + Value::Null + } else { + acc.clone() / count.clone() + }; state.registers[dest_reg] = Register::Value(acc); } AggFunc::Sum => { diff --git a/tests/integration/query_processing/test_read_path.rs b/tests/integration/query_processing/test_read_path.rs index 2285b33f8..3a1b3e52a 100644 --- a/tests/integration/query_processing/test_read_path.rs +++ b/tests/integration/query_processing/test_read_path.rs @@ -750,3 +750,37 @@ fn test_cte_alias() -> anyhow::Result<()> { } Ok(()) } + +#[test] +fn test_avg_agg() -> anyhow::Result<()> { + let tmp_db = TempDatabase::new_with_rusqlite("create table t (x, y);", false); + let conn = tmp_db.connect_limbo(); + conn.execute("insert into t values (1, null), (2, null), (3, null), (null, null), (4, null)")?; + let mut rows = Vec::new(); + let mut stmt = conn.prepare("select avg(x), avg(y) from t")?; + loop { + match stmt.step()? { + StepResult::Row => { + let row = stmt.row().unwrap(); + rows.push(row.get_values().cloned().collect::>()); + } + StepResult::Done => break, + StepResult::IO => stmt.run_once()?, + _ => panic!("Unexpected step result"), + } + } + + assert_eq!(stmt.num_columns(), 2); + assert_eq!(stmt.get_column_name(0), "avg (x)"); + assert_eq!(stmt.get_column_name(1), "avg (y)"); + + assert_eq!( + rows, + vec![vec![ + turso_core::Value::Float((1.0 + 2.0 + 3.0 + 4.0) / (4.0)), + turso_core::Value::Null + ]] + ); + + Ok(()) +} From 5b5379d0788e93e5564fe968b4a5d9cc18233a3a Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 17:40:41 +0400 Subject: [PATCH 23/65] propagate context to stringifier to properly derive column names --- core/lib.rs | 8 +++++++- core/translate/select.rs | 14 ++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index b3e1c6a0d..8784f8aee 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -40,6 +40,7 @@ pub mod numeric; mod numeric; use crate::storage::checksum::CHECKSUM_REQUIRED_RESERVED_BYTES; +use crate::translate::display::PlanContext; use crate::translate::pragma::TURSO_CDC_DEFAULT_TABLE_NAME; #[cfg(all(feature = "fs", feature = "conn_raw_api"))] use crate::types::{WalFrameInfo, WalState}; @@ -91,6 +92,7 @@ pub use storage::{ }; use tracing::{instrument, Level}; use turso_macros::match_ignore_ascii_case; +use turso_parser::ast::fmt::ToTokens; use turso_parser::{ast, ast::Cmd, parser::Parser}; use types::IOResult; pub use types::RefValue; @@ -2562,7 +2564,11 @@ impl Statement { let column = &self.program.result_columns.get(idx).expect("No column"); match column.name(&self.program.table_references) { Some(name) => Cow::Borrowed(name), - None => Cow::Owned(column.expr.to_string()), + None => { + let tables = [&self.program.table_references]; + let ctx = PlanContext(&tables); + Cow::Owned(column.expr.displayer(&ctx).to_string()) + } } } QueryMode::Explain => Cow::Borrowed(EXPLAIN_COLUMNS[idx]), diff --git a/core/translate/select.rs b/core/translate/select.rs index bd6d1a2b8..3b305ba12 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -369,15 +369,6 @@ fn prepare_one_select_plan( } } ResultColumn::Expr(ref mut expr, maybe_alias) => { - let alias = if let Some(alias) = maybe_alias { - match alias { - ast::As::Elided(alias) => alias.as_str().to_string(), - ast::As::As(alias) => alias.as_str().to_string(), - } - } else { - // we always emit alias - otherwise user will see very confusing column name (e.g. avg(t0.c1)) - expr.as_ref().to_string() - }; bind_and_rewrite_expr( expr, Some(&mut plan.table_references), @@ -394,7 +385,10 @@ fn prepare_one_select_plan( Some(&mut windows), )?; plan.result_columns.push(ResultSetColumn { - alias: Some(alias), + alias: maybe_alias.as_ref().map(|alias| match alias { + ast::As::Elided(alias) => alias.as_str().to_string(), + ast::As::As(alias) => alias.as_str().to_string(), + }), expr: expr.as_ref().clone(), contains_aggregates, }); From a0c47b98b880409a77daf6d428cdd45da7f816bb Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 17:41:13 +0400 Subject: [PATCH 24/65] fix test --- bindings/javascript/packages/native/promise.test.ts | 6 +++--- tests/integration/query_processing/test_read_path.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bindings/javascript/packages/native/promise.test.ts b/bindings/javascript/packages/native/promise.test.ts index 422a6f56a..190fa23c5 100644 --- a/bindings/javascript/packages/native/promise.test.ts +++ b/bindings/javascript/packages/native/promise.test.ts @@ -98,9 +98,9 @@ test('avg-bug', async () => { 'value 6', null, null, 150, ); - expect(await db.prepare(`select avg("a") from "aggregate_table";`).get()).toEqual({ 'avg ("a")': 24 }); - expect(await db.prepare(`select avg("null_only") from "aggregate_table";`).get()).toEqual({ 'avg ("null_only")': null }); - expect(await db.prepare(`select avg(distinct "b") from "aggregate_table";`).get()).toEqual({ 'avg (DISTINCT "b")': 42.5 }); + expect(await db.prepare(`select avg("a") from "aggregate_table";`).get()).toEqual({ 'avg (aggregate_table.a)': 24 }); + expect(await db.prepare(`select avg("null_only") from "aggregate_table";`).get()).toEqual({ 'avg (aggregate_table.null_only)': null }); + expect(await db.prepare(`select avg(distinct "b") from "aggregate_table";`).get()).toEqual({ 'avg (DISTINCT aggregate_table.b)': 42.5 }); }) test('on-disk db', async () => { diff --git a/tests/integration/query_processing/test_read_path.rs b/tests/integration/query_processing/test_read_path.rs index 3a1b3e52a..452ca1c85 100644 --- a/tests/integration/query_processing/test_read_path.rs +++ b/tests/integration/query_processing/test_read_path.rs @@ -771,8 +771,8 @@ fn test_avg_agg() -> anyhow::Result<()> { } assert_eq!(stmt.num_columns(), 2); - assert_eq!(stmt.get_column_name(0), "avg (x)"); - assert_eq!(stmt.get_column_name(1), "avg (y)"); + assert_eq!(stmt.get_column_name(0), "avg (t.x)"); + assert_eq!(stmt.get_column_name(1), "avg (t.y)"); assert_eq!( rows, From 63a9fa8c281db84bb39b59aa55c693d035e86c8e Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 15:02:55 +0400 Subject: [PATCH 25/65] fix handling of offset parameter set through variable - before the fix db generated following plan: turso> EXPLAIN SELECT * FROM users LIMIT ? OFFSET ?; addr opcode p1 p2 p3 p4 p5 comment ---- ----------------- ---- ---- ---- ------------- -- ------- 0 Init 0 16 0 0 Start at 16 1 Variable 1 1 0 0 r[1]=parameter(1); OFFSET expr 2 MustBeInt 1 0 0 0 3 Variable 2 2 0 0 r[2]=parameter(2); OFFSET expr 4 MustBeInt 2 0 0 0 5 OffsetLimit 1 3 2 0 if r[1]>0 then r[3]=r[1]+max(0,r[2]) else r[3]=(-1) 6 OpenRead 0 2 0 0 table=users, root=2, iDb=0 7 Rewind 0 15 0 0 Rewind table users 8 Variable 2 2 0 0 r[2]=parameter(2); OFFSET expr 9 MustBeInt 2 0 0 0 10 IfPos 2 14 1 0 r[2]>0 -> r[2]-=1, goto 14 11 Column 0 0 4 0 r[4]=users.x 12 ResultRow 4 1 0 0 output=r[4] 13 DecrJumpZero 1 15 0 0 if (--r[1]==0) goto 15 14 Next 0 8 0 0 15 Halt 0 0 0 0 16 Transaction 0 1 1 0 iDb=0 tx_mode=Read 17 Goto 0 1 0 0 - the problem here is that Variable value is re-read at step 8 - which is wrong --- core/translate/order_by.rs | 8 +------- core/translate/result_row.rs | 35 +++++------------------------------ core/translate/values.rs | 4 ++-- 3 files changed, 8 insertions(+), 39 deletions(-) diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 868a26117..35f90cb9a 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -154,13 +154,7 @@ pub fn emit_order_by( }); program.preassign_label_to_next_insn(sort_loop_start_label); - emit_offset( - program, - plan, - sort_loop_next_label, - t_ctx.reg_offset, - &t_ctx.resolver, - ); + emit_offset(program, sort_loop_next_label, t_ctx.reg_offset); program.emit_insn(Insn::SorterData { cursor_id: sort_cursor, diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index 04b7454f5..4c17d4946 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -32,7 +32,7 @@ pub fn emit_select_result( limit_ctx: Option, ) -> Result<()> { if let (Some(jump_to), Some(_)) = (offset_jump_to, label_on_limit_reached) { - emit_offset(program, plan, jump_to, reg_offset, resolver); + emit_offset(program, jump_to, reg_offset); } let start_reg = reg_result_cols_start; @@ -162,39 +162,14 @@ pub fn emit_result_row_and_limit( Ok(()) } -pub fn emit_offset( - program: &mut ProgramBuilder, - plan: &SelectPlan, - jump_to: BranchOffset, - reg_offset: Option, - resolver: &Resolver, -) { - let Some(offset_expr) = &plan.offset else { +pub fn emit_offset(program: &mut ProgramBuilder, jump_to: BranchOffset, reg_offset: Option) { + let Some(reg_offset) = ®_offset else { return; }; - if let Some(val) = try_fold_expr_to_i64(offset_expr) { - if val > 0 { - program.add_comment(program.offset(), "OFFSET const"); - program.emit_insn(Insn::IfPos { - reg: reg_offset.expect("reg_offset must be Some"), - target_pc: jump_to, - decrement_by: 1, - }); - } - return; - } - - let r = reg_offset.expect("reg_offset must be Some"); - - program.add_comment(program.offset(), "OFFSET expr"); - - _ = translate_expr(program, None, offset_expr, r, resolver); - - program.emit_insn(Insn::MustBeInt { reg: r }); - + program.add_comment(program.offset(), "OFFSET const"); program.emit_insn(Insn::IfPos { - reg: r, + reg: *reg_offset, target_pc: jump_to, decrement_by: 1, }); diff --git a/core/translate/values.rs b/core/translate/values.rs index 869a63f31..b74483f7f 100644 --- a/core/translate/values.rs +++ b/core/translate/values.rs @@ -34,7 +34,7 @@ fn emit_values_when_single_row( t_ctx: &TranslateCtx, ) -> Result { let end_label = program.allocate_label(); - emit_offset(program, plan, end_label, t_ctx.reg_offset, &t_ctx.resolver); + emit_offset(program, end_label, t_ctx.reg_offset); let first_row = &plan.values[0]; let row_len = first_row.len(); let start_reg = program.alloc_registers(row_len); @@ -87,7 +87,7 @@ fn emit_toplevel_values( }); let goto_label = program.allocate_label(); - emit_offset(program, plan, goto_label, t_ctx.reg_offset, &t_ctx.resolver); + emit_offset(program, goto_label, t_ctx.reg_offset); let row_len = plan.values[0].len(); let copy_start_reg = program.alloc_registers(row_len); for i in 0..row_len { From 5bf69350b38f6db761206ccb686cd85c208a19a3 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 15:08:40 +0400 Subject: [PATCH 26/65] add simple tests for offset/limit binding --- .../packages/native/promise.test.ts | 14 ++++++++ .../query_processing/test_read_path.rs | 34 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/bindings/javascript/packages/native/promise.test.ts b/bindings/javascript/packages/native/promise.test.ts index 190fa23c5..0d05a4b11 100644 --- a/bindings/javascript/packages/native/promise.test.ts +++ b/bindings/javascript/packages/native/promise.test.ts @@ -103,6 +103,20 @@ test('avg-bug', async () => { expect(await db.prepare(`select avg(distinct "b") from "aggregate_table";`).get()).toEqual({ 'avg (DISTINCT aggregate_table.b)': 42.5 }); }) +test('offset-bug', async () => { + const db = await connect(":memory:"); + await db.exec(`CREATE TABLE users ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + verified integer not null default 0 + );`); + const insert = db.prepare(`INSERT INTO users (name) VALUES (?),(?);`); + await insert.run('John', 'John1'); + + const stmt = db.prepare(`SELECT * FROM users LIMIT ? OFFSET ?;`); + expect(await stmt.all(1, 1)).toEqual([{ id: 2, name: 'John1', verified: 0 }]) +}) + test('on-disk db', async () => { const path = `test-${(Math.random() * 10000) | 0}.db`; try { diff --git a/tests/integration/query_processing/test_read_path.rs b/tests/integration/query_processing/test_read_path.rs index 452ca1c85..2319aada6 100644 --- a/tests/integration/query_processing/test_read_path.rs +++ b/tests/integration/query_processing/test_read_path.rs @@ -784,3 +784,37 @@ fn test_avg_agg() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn test_offset_limit_bind() -> anyhow::Result<()> { + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE test (i INTEGER);", false); + let conn = tmp_db.connect_limbo(); + + conn.execute("INSERT INTO test VALUES (5), (4), (3), (2), (1)")?; + + let mut stmt = conn.prepare("SELECT * FROM test LIMIT ? OFFSET ?")?; + stmt.bind_at(1.try_into()?, Value::Integer(2)); + stmt.bind_at(2.try_into()?, Value::Integer(1)); + + let mut rows = Vec::new(); + loop { + match stmt.step()? { + StepResult::Row => { + let row = stmt.row().unwrap(); + rows.push(row.get_values().cloned().collect::>()); + } + StepResult::IO => stmt.run_once()?, + _ => break, + } + } + + assert_eq!( + rows, + vec![ + vec![turso_core::Value::Integer(4)], + vec![turso_core::Value::Integer(3)] + ] + ); + + Ok(()) +} From f80650586a79e5ea934716fc02856b79fa5cd9b1 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 16:03:29 +0400 Subject: [PATCH 27/65] remove misleading comment --- core/translate/result_row.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index 4c17d4946..ffcef6f99 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -166,8 +166,6 @@ pub fn emit_offset(program: &mut ProgramBuilder, jump_to: BranchOffset, reg_offs let Some(reg_offset) = ®_offset else { return; }; - - program.add_comment(program.offset(), "OFFSET const"); program.emit_insn(Insn::IfPos { reg: *reg_offset, target_pc: jump_to, From f7bf60e85633ec2d966a2fe1e98370b21b9d80b6 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 26 Sep 2025 17:20:28 +0300 Subject: [PATCH 28/65] github: Add 30 minute timeout to all jobs We're getting hit by macOS runner concurrency limits whenever some jobs get stuck (for example, because of a deadlock). --- .github/workflows/build-sim.yml | 1 + .github/workflows/c-compat.yml | 1 + .github/workflows/dart.yml | 3 +++ .github/workflows/java.yml | 1 + .github/workflows/labeler.yml | 1 + .github/workflows/long_fuzz_tests_btree.yml | 4 ++-- .github/workflows/napi.yml | 3 +++ .github/workflows/python.yml | 7 +++++++ .github/workflows/rust.yml | 4 ++++ .github/workflows/rust_perf.yml | 5 +++++ .github/workflows/stale.yml | 1 + 11 files changed, 29 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-sim.yml b/.github/workflows/build-sim.yml index 5871edf8c..27af28684 100644 --- a/.github/workflows/build-sim.yml +++ b/.github/workflows/build-sim.yml @@ -25,6 +25,7 @@ env: jobs: deploy: runs-on: blacksmith + timeout-minutes: 30 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/c-compat.yml b/.github/workflows/c-compat.yml index 7eb2fd874..01066f83e 100644 --- a/.github/workflows/c-compat.yml +++ b/.github/workflows/c-compat.yml @@ -13,6 +13,7 @@ on: jobs: test: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - name: Checkout code diff --git a/.github/workflows/dart.yml b/.github/workflows/dart.yml index 407c0d869..81945920b 100644 --- a/.github/workflows/dart.yml +++ b/.github/workflows/dart.yml @@ -12,6 +12,7 @@ env: working-directory: bindings/dart jobs: test: + timeout-minutes: 30 defaults: run: working-directory: ${{ env.working-directory }} @@ -39,6 +40,7 @@ jobs: run: flutter test precompile: if: ${{ false && startsWith(github.ref, 'refs/tags/') }} + timeout-minutes: 30 defaults: run: working-directory: ${{ env.working-directory }} @@ -84,6 +86,7 @@ jobs: PRIVATE_KEY: ${{ secrets.CARGOKIT_PRIVATE_KEY }} publish: if: ${{ false && startsWith(github.ref, 'refs/tags/') }} + timeout-minutes: 30 defaults: run: working-directory: ${{ env.working-directory }} diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index e1451d121..089998711 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -16,6 +16,7 @@ env: jobs: test: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 defaults: run: diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 71e0f9b5d..8fcc0d8f2 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -4,6 +4,7 @@ on: jobs: labeler: + timeout-minutes: 30 permissions: contents: read issues: write diff --git a/.github/workflows/long_fuzz_tests_btree.yml b/.github/workflows/long_fuzz_tests_btree.yml index 0f38f67bf..a84755749 100644 --- a/.github/workflows/long_fuzz_tests_btree.yml +++ b/.github/workflows/long_fuzz_tests_btree.yml @@ -11,7 +11,7 @@ on: jobs: run-long-tests: runs-on: blacksmith-4vcpu-ubuntu-2404 - timeout-minutes: 0 + timeout-minutes: 30 steps: - uses: actions/checkout@v3 @@ -31,7 +31,7 @@ jobs: simple-stress-test: runs-on: blacksmith-4vcpu-ubuntu-2404 - timeout-minutes: 0 + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - uses: useblacksmith/rust-cache@v3 diff --git a/.github/workflows/napi.yml b/.github/workflows/napi.yml index 30b87ae7c..d46bfed0e 100644 --- a/.github/workflows/napi.yml +++ b/.github/workflows/napi.yml @@ -161,6 +161,7 @@ jobs: if-no-files-found: error test-db-linux-x64-gnu-binding: name: Test DB bindings on Linux-x64-gnu - node@${{ matrix.node }} + timeout-minutes: 30 needs: - build strategy: @@ -192,6 +193,7 @@ jobs: run: docker run --rm -v $(pwd):/build -w /build node:${{ matrix.node }}-slim yarn workspace @tursodatabase/database test test-db-browser-binding: name: Test DB bindings on browser@${{ matrix.node }} + timeout-minutes: 30 needs: - build strategy: @@ -228,6 +230,7 @@ jobs: publish: name: Publish runs-on: ubuntu-latest + timeout-minutes: 30 permissions: contents: read id-token: write diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index cc83a2ae0..c100dd519 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -17,6 +17,7 @@ env: jobs: configure-strategy: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 outputs: python-versions: ${{ steps.gen-matrix.outputs.python-versions }} steps: @@ -26,6 +27,7 @@ jobs: test: needs: configure-strategy + timeout-minutes: 30 strategy: matrix: os: @@ -64,6 +66,7 @@ jobs: lint: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 steps: - name: Checkout code @@ -85,6 +88,7 @@ jobs: linux: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 defaults: run: working-directory: ${{ env.working-directory }} @@ -112,6 +116,7 @@ jobs: macos-arm64: runs-on: macos-14 + timeout-minutes: 30 defaults: run: working-directory: ${{ env.working-directory }} @@ -141,6 +146,7 @@ jobs: sdist: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 defaults: run: working-directory: ${{ env.working-directory }} @@ -161,6 +167,7 @@ jobs: release: name: Release runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 if: "startsWith(github.ref, 'refs/tags/')" needs: [linux, macos-arm64, sdist] steps: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index d2a201aba..32ec162c8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -12,6 +12,7 @@ env: jobs: cargo-fmt-check: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - uses: dtolnay/rust-toolchain@stable @@ -21,6 +22,7 @@ jobs: run: cd fuzz && cargo fmt --check build-native: + timeout-minutes: 30 strategy: matrix: os: [blacksmith-4vcpu-ubuntu-2404, macos-latest, windows-latest] @@ -54,6 +56,7 @@ jobs: clippy: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - name: Clippy @@ -97,6 +100,7 @@ jobs: timeout-minutes: 20 test-sqlite: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - uses: "./.github/shared/install_sqlite" diff --git a/.github/workflows/rust_perf.yml b/.github/workflows/rust_perf.yml index eecd59c20..f93051b08 100644 --- a/.github/workflows/rust_perf.yml +++ b/.github/workflows/rust_perf.yml @@ -12,6 +12,7 @@ env: jobs: bench: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - uses: useblacksmith/setup-node@v5 @@ -55,6 +56,7 @@ jobs: clickbench: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - uses: useblacksmith/setup-node@v5 @@ -102,6 +104,7 @@ jobs: tpc-h-criterion: runs-on: ubuntu-latest + timeout-minutes: 30 env: DB_FILE: "perf/tpc-h/TPC-H.db" steps: @@ -156,6 +159,7 @@ jobs: tpc-h: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - name: TPC-H @@ -163,6 +167,7 @@ jobs: vfs-bench-compile: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - uses: useblacksmith/rust-cache@v3 diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 426cf58a0..9826e9777 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -11,6 +11,7 @@ permissions: jobs: stale: runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 30 steps: - name: Close stale pull requests uses: actions/stale@v6 From 99adf731680d7103e398d7a379e176c6ae0c0df0 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 26 Sep 2025 16:59:57 +0200 Subject: [PATCH 29/65] core/mvcc/logical-log: rename to needs_recovery --- core/mvcc/persistent_storage/logical_log.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/mvcc/persistent_storage/logical_log.rs b/core/mvcc/persistent_storage/logical_log.rs index 2789acff4..c5946d536 100644 --- a/core/mvcc/persistent_storage/logical_log.rs +++ b/core/mvcc/persistent_storage/logical_log.rs @@ -15,7 +15,7 @@ use crate::{types::IOResult, File}; pub struct LogicalLog { pub file: Arc, offset: u64, - recover: bool, + needs_recovery: bool, } /// Log's Header, this will be the 64 bytes in any logical log file. @@ -140,7 +140,7 @@ impl LogicalLog { Self { file, offset: 0, - recover, + needs_recovery: recover, } } @@ -218,11 +218,11 @@ impl LogicalLog { } pub fn needs_recover(&self) -> bool { - self.recover + self.needs_recovery } pub fn mark_recovered(&mut self) { - self.recover = false; + self.needs_recovery = false; } } From 931cf2658e9ff36e015330e8027463349279d5ae Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 26 Sep 2025 18:23:14 +0300 Subject: [PATCH 30/65] core/storage: Display page category for rowid integrity check failure Let's add more hints to hunt down the reason for #2896. --- core/storage/btree.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 5b27381c9..0115e3ce9 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -5766,9 +5766,10 @@ pub enum IntegrityCheckError { content_area: usize, usable_space: usize, }, - #[error("Page {page_id} cell {cell_idx} has rowid={rowid} in wrong order. Parent cell has parent_rowid={max_intkey} and next_rowid={next_rowid}")] + #[error("Page {page_id} ({page_category:?}) cell {cell_idx} has rowid={rowid} in wrong order. Parent cell has parent_rowid={max_intkey} and next_rowid={next_rowid}")] CellRowidOutOfRange { page_id: usize, + page_category: PageCategory, cell_idx: usize, rowid: i64, max_intkey: i64, @@ -6059,6 +6060,7 @@ pub fn integrity_check( if rowid > max_intkey || rowid > next_rowid { errors.push(IntegrityCheckError::CellRowidOutOfRange { page_id: page.get().id, + page_category, cell_idx, rowid, max_intkey, @@ -6084,6 +6086,7 @@ pub fn integrity_check( if rowid > max_intkey || rowid > next_rowid { errors.push(IntegrityCheckError::CellRowidOutOfRange { page_id: page.get().id, + page_category, cell_idx, rowid, max_intkey, From fe4bfb7c88516a1eb5aae2b56de708b1c73c1e33 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Fri, 26 Sep 2025 19:29:37 +0400 Subject: [PATCH 31/65] fix encryption config in the sync-client --- .../sync/packages/browser/promise-bundle.ts | 22 +++-- .../sync/packages/browser/promise-default.ts | 22 +++-- .../browser/promise-turbopack-hack.ts | 22 +++-- .../packages/browser/promise-vite-dev-hack.ts | 22 +++-- .../javascript/sync/packages/common/index.ts | 3 +- .../javascript/sync/packages/common/types.ts | 11 ++- .../sync/packages/native/index.d.ts | 6 ++ .../javascript/sync/packages/native/index.js | 95 ++++++++++--------- .../sync/packages/native/promise.ts | 23 +++-- 9 files changed, 136 insertions(+), 90 deletions(-) diff --git a/bindings/javascript/sync/packages/browser/promise-bundle.ts b/bindings/javascript/sync/packages/browser/promise-bundle.ts index 1a1051fb6..e53cd9c72 100644 --- a/bindings/javascript/sync/packages/browser/promise-bundle.ts +++ b/bindings/javascript/sync/packages/browser/promise-bundle.ts @@ -1,6 +1,6 @@ import { registerFileAtWorker, unregisterFileAtWorker } from "@tursodatabase/database-browser-common" import { DatabasePromise } from "@tursodatabase/database-common" -import { ProtocolIo, run, DatabaseOpts as SyncDatabaseOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; +import { ProtocolIo, run, DatabaseOpts, EncryptionOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; import { SyncEngine, SyncEngineProtocolVersion, initThreadPool, MainWorker } from "./index-bundle.js"; let BrowserIO: ProtocolIo = { @@ -44,7 +44,7 @@ class Database extends DatabasePromise { #io: ProtocolIo; #guards: SyncEngineGuards; #worker: Worker | null; - constructor(opts: SyncDatabaseOpts) { + constructor(opts: DatabaseOpts) { const engine = new SyncEngine({ path: opts.path, clientName: opts.clientName, @@ -58,10 +58,16 @@ class Database extends DatabasePromise { let headers = typeof opts.authToken === "function" ? () => ({ ...(opts.authToken != null && { "Authorization": `Bearer ${(opts.authToken as any)()}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }) : { ...(opts.authToken != null && { "Authorization": `Bearer ${opts.authToken}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }; this.#runOpts = { url: opts.url, @@ -91,7 +97,7 @@ class Database extends DatabasePromise { } /** * pull new changes from the remote database - * if {@link SyncDatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. + * if {@link DatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. * @returns true if new changes were pulled from the remote */ async pull() { @@ -104,7 +110,7 @@ class Database extends DatabasePromise { } /** * push new local changes to the remote database - * if {@link SyncDatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote + * if {@link DatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote */ async push() { await this.#guards.push(async () => await run(this.#runOpts, this.#io, this.#engine, this.#engine.push())); @@ -145,11 +151,11 @@ class Database extends DatabasePromise { * @param {Object} opts - Options for database behavior. * @returns {Promise} - A promise that resolves to a Database instance. */ -async function connect(opts: SyncDatabaseOpts): Promise { +async function connect(opts: DatabaseOpts): Promise { const db = new Database(opts); await db.connect(); return db; } export { connect, Database } -export type { DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } +export type { DatabaseOpts, EncryptionOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } diff --git a/bindings/javascript/sync/packages/browser/promise-default.ts b/bindings/javascript/sync/packages/browser/promise-default.ts index 6f2cb4d60..8d55219ac 100644 --- a/bindings/javascript/sync/packages/browser/promise-default.ts +++ b/bindings/javascript/sync/packages/browser/promise-default.ts @@ -1,6 +1,6 @@ import { registerFileAtWorker, unregisterFileAtWorker } from "@tursodatabase/database-browser-common" import { DatabasePromise } from "@tursodatabase/database-common" -import { ProtocolIo, run, DatabaseOpts as SyncDatabaseOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; +import { ProtocolIo, run, DatabaseOpts, EncryptionOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; import { SyncEngine, SyncEngineProtocolVersion, initThreadPool, MainWorker } from "./index-default.js"; let BrowserIO: ProtocolIo = { @@ -44,7 +44,7 @@ class Database extends DatabasePromise { #io: ProtocolIo; #guards: SyncEngineGuards; #worker: Worker | null; - constructor(opts: SyncDatabaseOpts) { + constructor(opts: DatabaseOpts) { const engine = new SyncEngine({ path: opts.path, clientName: opts.clientName, @@ -58,10 +58,16 @@ class Database extends DatabasePromise { let headers = typeof opts.authToken === "function" ? () => ({ ...(opts.authToken != null && { "Authorization": `Bearer ${(opts.authToken as any)()}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }) : { ...(opts.authToken != null && { "Authorization": `Bearer ${opts.authToken}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }; this.#runOpts = { url: opts.url, @@ -91,7 +97,7 @@ class Database extends DatabasePromise { } /** * pull new changes from the remote database - * if {@link SyncDatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. + * if {@link DatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. * @returns true if new changes were pulled from the remote */ async pull() { @@ -104,7 +110,7 @@ class Database extends DatabasePromise { } /** * push new local changes to the remote database - * if {@link SyncDatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote + * if {@link DatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote */ async push() { await this.#guards.push(async () => await run(this.#runOpts, this.#io, this.#engine, this.#engine.push())); @@ -145,11 +151,11 @@ class Database extends DatabasePromise { * @param {Object} opts - Options for database behavior. * @returns {Promise} - A promise that resolves to a Database instance. */ -async function connect(opts: SyncDatabaseOpts): Promise { +async function connect(opts: DatabaseOpts): Promise { const db = new Database(opts); await db.connect(); return db; } export { connect, Database } -export type { DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } +export type { DatabaseOpts, EncryptionOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } diff --git a/bindings/javascript/sync/packages/browser/promise-turbopack-hack.ts b/bindings/javascript/sync/packages/browser/promise-turbopack-hack.ts index bc886b755..5507f81fa 100644 --- a/bindings/javascript/sync/packages/browser/promise-turbopack-hack.ts +++ b/bindings/javascript/sync/packages/browser/promise-turbopack-hack.ts @@ -1,6 +1,6 @@ import { registerFileAtWorker, unregisterFileAtWorker } from "@tursodatabase/database-browser-common" import { DatabasePromise } from "@tursodatabase/database-common" -import { ProtocolIo, run, DatabaseOpts as SyncDatabaseOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; +import { ProtocolIo, run, DatabaseOpts, EncryptionOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; import { SyncEngine, SyncEngineProtocolVersion, initThreadPool, MainWorker } from "./index-turbopack-hack.js"; let BrowserIO: ProtocolIo = { @@ -44,7 +44,7 @@ class Database extends DatabasePromise { #io: ProtocolIo; #guards: SyncEngineGuards; #worker: Worker | null; - constructor(opts: SyncDatabaseOpts) { + constructor(opts: DatabaseOpts) { const engine = new SyncEngine({ path: opts.path, clientName: opts.clientName, @@ -58,10 +58,16 @@ class Database extends DatabasePromise { let headers = typeof opts.authToken === "function" ? () => ({ ...(opts.authToken != null && { "Authorization": `Bearer ${(opts.authToken as any)()}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }) : { ...(opts.authToken != null && { "Authorization": `Bearer ${opts.authToken}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }; this.#runOpts = { url: opts.url, @@ -91,7 +97,7 @@ class Database extends DatabasePromise { } /** * pull new changes from the remote database - * if {@link SyncDatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. + * if {@link DatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. * @returns true if new changes were pulled from the remote */ async pull() { @@ -104,7 +110,7 @@ class Database extends DatabasePromise { } /** * push new local changes to the remote database - * if {@link SyncDatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote + * if {@link DatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote */ async push() { await this.#guards.push(async () => await run(this.#runOpts, this.#io, this.#engine, this.#engine.push())); @@ -145,11 +151,11 @@ class Database extends DatabasePromise { * @param {Object} opts - Options for database behavior. * @returns {Promise} - A promise that resolves to a Database instance. */ -async function connect(opts: SyncDatabaseOpts): Promise { +async function connect(opts: DatabaseOpts): Promise { const db = new Database(opts); await db.connect(); return db; } export { connect, Database } -export type { DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } +export type { DatabaseOpts, EncryptionOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } \ No newline at end of file diff --git a/bindings/javascript/sync/packages/browser/promise-vite-dev-hack.ts b/bindings/javascript/sync/packages/browser/promise-vite-dev-hack.ts index c084f4410..ca755e805 100644 --- a/bindings/javascript/sync/packages/browser/promise-vite-dev-hack.ts +++ b/bindings/javascript/sync/packages/browser/promise-vite-dev-hack.ts @@ -1,6 +1,6 @@ import { registerFileAtWorker, unregisterFileAtWorker } from "@tursodatabase/database-browser-common" import { DatabasePromise } from "@tursodatabase/database-common" -import { ProtocolIo, run, DatabaseOpts as SyncDatabaseOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; +import { ProtocolIo, run, DatabaseOpts, EncryptionOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; import { SyncEngine, SyncEngineProtocolVersion, initThreadPool, MainWorker } from "./index-vite-dev-hack.js"; let BrowserIO: ProtocolIo = { @@ -44,7 +44,7 @@ class Database extends DatabasePromise { #io: ProtocolIo; #guards: SyncEngineGuards; #worker: Worker | null; - constructor(opts: SyncDatabaseOpts) { + constructor(opts: DatabaseOpts) { const engine = new SyncEngine({ path: opts.path, clientName: opts.clientName, @@ -58,10 +58,16 @@ class Database extends DatabasePromise { let headers = typeof opts.authToken === "function" ? () => ({ ...(opts.authToken != null && { "Authorization": `Bearer ${(opts.authToken as any)()}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }) : { ...(opts.authToken != null && { "Authorization": `Bearer ${opts.authToken}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }; this.#runOpts = { url: opts.url, @@ -91,7 +97,7 @@ class Database extends DatabasePromise { } /** * pull new changes from the remote database - * if {@link SyncDatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. + * if {@link DatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. * @returns true if new changes were pulled from the remote */ async pull() { @@ -104,7 +110,7 @@ class Database extends DatabasePromise { } /** * push new local changes to the remote database - * if {@link SyncDatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote + * if {@link DatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote */ async push() { await this.#guards.push(async () => await run(this.#runOpts, this.#io, this.#engine, this.#engine.push())); @@ -145,11 +151,11 @@ class Database extends DatabasePromise { * @param {Object} opts - Options for database behavior. * @returns {Promise} - A promise that resolves to a Database instance. */ -async function connect(opts: SyncDatabaseOpts): Promise { +async function connect(opts: DatabaseOpts): Promise { const db = new Database(opts); await db.connect(); return db; } export { connect, Database } -export type { DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } +export type { DatabaseOpts, EncryptionOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } diff --git a/bindings/javascript/sync/packages/common/index.ts b/bindings/javascript/sync/packages/common/index.ts index 03db7e853..5facb2dc9 100644 --- a/bindings/javascript/sync/packages/common/index.ts +++ b/bindings/javascript/sync/packages/common/index.ts @@ -1,5 +1,5 @@ import { run, memoryIO, SyncEngineGuards } from "./run.js" -import { DatabaseOpts, ProtocolIo, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, DatabaseChangeType } from "./types.js" +import { DatabaseOpts, ProtocolIo, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, DatabaseChangeType, EncryptionOpts } from "./types.js" export { run, memoryIO, SyncEngineGuards } export type { @@ -9,6 +9,7 @@ export type { DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, + EncryptionOpts, ProtocolIo, RunOpts, diff --git a/bindings/javascript/sync/packages/common/types.ts b/bindings/javascript/sync/packages/common/types.ts index 3a8ddb02a..ee3e77a87 100644 --- a/bindings/javascript/sync/packages/common/types.ts +++ b/bindings/javascript/sync/packages/common/types.ts @@ -56,6 +56,13 @@ export interface DatabaseRowMutation { } export type Transform = (arg: DatabaseRowMutation) => DatabaseRowTransformResult; + +export interface EncryptionOpts { + // base64 encoded encryption key (must be either 16 or 32 bytes depending on the cipher) + key: string, + // encryption cipher algorithm + cipher: 'aes256gcm' | 'aes128gcm' | 'chacha20poly1305' +} export interface DatabaseOpts { /** * local path where to store all synced database files (e.g. local.db) @@ -79,9 +86,9 @@ export interface DatabaseOpts { */ clientName?: string; /** - * optional key if cloud database were encrypted by default + * optional encryption parameters if cloud database were encrypted by default */ - encryptionKey?: string; + encryption?: EncryptionOpts; /** * optional callback which will be called for every mutation before sending it to the remote * this callback can transform the update in order to support complex conflict resolution strategy diff --git a/bindings/javascript/sync/packages/native/index.d.ts b/bindings/javascript/sync/packages/native/index.d.ts index bc3560c19..aeb10e8ee 100644 --- a/bindings/javascript/sync/packages/native/index.d.ts +++ b/bindings/javascript/sync/packages/native/index.d.ts @@ -1,5 +1,10 @@ /* auto-generated by NAPI-RS */ /* eslint-disable */ +export declare class BatchExecutor { + stepSync(): number + reset(): void +} + /** A database connection. */ export declare class Database { /** @@ -39,6 +44,7 @@ export declare class Database { * A `Statement` instance. */ prepare(sql: string): Statement + executor(sql: string): BatchExecutor /** * Returns the rowid of the last row inserted. * diff --git a/bindings/javascript/sync/packages/native/index.js b/bindings/javascript/sync/packages/native/index.js index 9887adb36..75efff739 100644 --- a/bindings/javascript/sync/packages/native/index.js +++ b/bindings/javascript/sync/packages/native/index.js @@ -81,8 +81,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-android-arm64') const bindingPackageVersion = require('@tursodatabase/sync-android-arm64/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -97,8 +97,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-android-arm-eabi') const bindingPackageVersion = require('@tursodatabase/sync-android-arm-eabi/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -117,8 +117,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-win32-x64-msvc') const bindingPackageVersion = require('@tursodatabase/sync-win32-x64-msvc/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -133,8 +133,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-win32-ia32-msvc') const bindingPackageVersion = require('@tursodatabase/sync-win32-ia32-msvc/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -149,8 +149,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-win32-arm64-msvc') const bindingPackageVersion = require('@tursodatabase/sync-win32-arm64-msvc/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -168,8 +168,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-darwin-universal') const bindingPackageVersion = require('@tursodatabase/sync-darwin-universal/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -184,8 +184,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-darwin-x64') const bindingPackageVersion = require('@tursodatabase/sync-darwin-x64/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -200,8 +200,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-darwin-arm64') const bindingPackageVersion = require('@tursodatabase/sync-darwin-arm64/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -220,8 +220,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-freebsd-x64') const bindingPackageVersion = require('@tursodatabase/sync-freebsd-x64/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -236,8 +236,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-freebsd-arm64') const bindingPackageVersion = require('@tursodatabase/sync-freebsd-arm64/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -257,8 +257,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-x64-musl') const bindingPackageVersion = require('@tursodatabase/sync-linux-x64-musl/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -273,8 +273,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-x64-gnu') const bindingPackageVersion = require('@tursodatabase/sync-linux-x64-gnu/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -291,8 +291,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-arm64-musl') const bindingPackageVersion = require('@tursodatabase/sync-linux-arm64-musl/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -307,8 +307,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-arm64-gnu') const bindingPackageVersion = require('@tursodatabase/sync-linux-arm64-gnu/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -325,8 +325,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-arm-musleabihf') const bindingPackageVersion = require('@tursodatabase/sync-linux-arm-musleabihf/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -341,8 +341,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-arm-gnueabihf') const bindingPackageVersion = require('@tursodatabase/sync-linux-arm-gnueabihf/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -359,8 +359,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-riscv64-musl') const bindingPackageVersion = require('@tursodatabase/sync-linux-riscv64-musl/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -375,8 +375,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-riscv64-gnu') const bindingPackageVersion = require('@tursodatabase/sync-linux-riscv64-gnu/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -392,8 +392,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-ppc64-gnu') const bindingPackageVersion = require('@tursodatabase/sync-linux-ppc64-gnu/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -408,8 +408,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-linux-s390x-gnu') const bindingPackageVersion = require('@tursodatabase/sync-linux-s390x-gnu/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -428,8 +428,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-openharmony-arm64') const bindingPackageVersion = require('@tursodatabase/sync-openharmony-arm64/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -444,8 +444,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-openharmony-x64') const bindingPackageVersion = require('@tursodatabase/sync-openharmony-x64/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -460,8 +460,8 @@ function requireNative() { try { const binding = require('@tursodatabase/sync-openharmony-arm') const bindingPackageVersion = require('@tursodatabase/sync-openharmony-arm/package.json').version - if (bindingPackageVersion !== '0.2.0-pre.7' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.7 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.0-pre.8' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.0-pre.8 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -508,7 +508,8 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { Database, Statement, GeneratorHolder, JsDataCompletion, JsProtocolIo, JsProtocolRequestBytes, SyncEngine, SyncEngineChanges, DatabaseChangeTypeJs, SyncEngineProtocolVersion } = nativeBinding +const { BatchExecutor, Database, Statement, GeneratorHolder, JsDataCompletion, JsProtocolIo, JsProtocolRequestBytes, SyncEngine, SyncEngineChanges, DatabaseChangeTypeJs, SyncEngineProtocolVersion } = nativeBinding +export { BatchExecutor } export { Database } export { Statement } export { GeneratorHolder } diff --git a/bindings/javascript/sync/packages/native/promise.ts b/bindings/javascript/sync/packages/native/promise.ts index f6d0af579..85906b3c6 100644 --- a/bindings/javascript/sync/packages/native/promise.ts +++ b/bindings/javascript/sync/packages/native/promise.ts @@ -1,5 +1,5 @@ import { DatabasePromise } from "@tursodatabase/database-common" -import { ProtocolIo, run, DatabaseOpts as SyncDatabaseOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; +import { ProtocolIo, run, DatabaseOpts, EncryptionOpts, RunOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult, DatabaseStats, SyncEngineGuards } from "@tursodatabase/sync-common"; import { SyncEngine, SyncEngineProtocolVersion } from "#index"; import { promises } from "node:fs"; @@ -44,7 +44,7 @@ class Database extends DatabasePromise { #engine: any; #io: ProtocolIo; #guards: SyncEngineGuards - constructor(opts: SyncDatabaseOpts) { + constructor(opts: DatabaseOpts) { const engine = new SyncEngine({ path: opts.path, clientName: opts.clientName, @@ -58,10 +58,16 @@ class Database extends DatabasePromise { let headers = typeof opts.authToken === "function" ? () => ({ ...(opts.authToken != null && { "Authorization": `Bearer ${(opts.authToken as any)()}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }) : { ...(opts.authToken != null && { "Authorization": `Bearer ${opts.authToken}` }), - ...(opts.encryptionKey != null && { "x-turso-encryption-key": opts.encryptionKey }) + ...(opts.encryption != null && { + "x-turso-encryption-key": opts.encryption.key, + "x-turso-encryption-cipher": opts.encryption.cipher, + }) }; this.#runOpts = { url: opts.url, @@ -81,7 +87,7 @@ class Database extends DatabasePromise { } /** * pull new changes from the remote database - * if {@link SyncDatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. + * if {@link DatabaseOpts.longPollTimeoutMs} is set - then server will hold the connection open until either new changes will appear in the database or timeout occurs. * @returns true if new changes were pulled from the remote */ async pull() { @@ -94,7 +100,7 @@ class Database extends DatabasePromise { } /** * push new local changes to the remote database - * if {@link SyncDatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote + * if {@link DatabaseOpts.transform} is set - then provided callback will be called for every mutation before sending it to the remote */ async push() { await this.#guards.push(async () => await run(this.#runOpts, this.#io, this.#engine, this.#engine.push())); @@ -125,10 +131,11 @@ class Database extends DatabasePromise { * @param {Object} opts - Options for database behavior. * @returns {Promise} - A promise that resolves to a Database instance. */ -async function connect(opts: SyncDatabaseOpts): Promise { +async function connect(opts: DatabaseOpts): Promise { const db = new Database(opts); await db.connect(); return db; } -export { connect, Database, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } +export { connect, Database } +export type { DatabaseOpts, EncryptionOpts, DatabaseRowMutation, DatabaseRowStatement, DatabaseRowTransformResult } \ No newline at end of file From 222ab125c18f9b7c2dcdad407d288b90c277d4be Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 26 Sep 2025 19:00:14 +0300 Subject: [PATCH 32/65] Turso 0.2.0-pre.9 --- Cargo.lock | 54 +++++++++---------- Cargo.toml | 34 ++++++------ bindings/javascript/package-lock.json | 36 ++++++------- bindings/javascript/package.json | 2 +- .../packages/browser-common/package.json | 2 +- .../javascript/packages/browser/package.json | 6 +-- .../javascript/packages/common/package.json | 2 +- .../javascript/packages/native/package.json | 4 +- .../sync/packages/browser/package.json | 8 +-- .../sync/packages/common/package.json | 4 +- .../sync/packages/native/package.json | 6 +-- bindings/javascript/yarn.lock | 24 ++++----- 12 files changed, 91 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29d269c46..91f456c74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -684,7 +684,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_tester" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "anyhow", "assert_cmd", @@ -2264,7 +2264,7 @@ dependencies = [ [[package]] name = "limbo_completion" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "mimalloc", "turso_ext", @@ -2272,7 +2272,7 @@ dependencies = [ [[package]] name = "limbo_crypto" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "blake3", "data-encoding", @@ -2285,7 +2285,7 @@ dependencies = [ [[package]] name = "limbo_csv" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "csv", "mimalloc", @@ -2295,7 +2295,7 @@ dependencies = [ [[package]] name = "limbo_ipaddr" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "ipnetwork", "mimalloc", @@ -2304,7 +2304,7 @@ dependencies = [ [[package]] name = "limbo_percentile" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "mimalloc", "turso_ext", @@ -2312,7 +2312,7 @@ dependencies = [ [[package]] name = "limbo_regexp" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "mimalloc", "regex", @@ -2321,7 +2321,7 @@ dependencies = [ [[package]] name = "limbo_sim" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "anyhow", "chrono", @@ -2357,7 +2357,7 @@ dependencies = [ [[package]] name = "limbo_sqlite_test_ext" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "cc", ] @@ -3093,7 +3093,7 @@ dependencies = [ [[package]] name = "py-turso" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "anyhow", "pyo3", @@ -3820,7 +3820,7 @@ checksum = "d372029cb5195f9ab4e4b9aef550787dce78b124fcaee8d82519925defcd6f0d" [[package]] name = "sql_generation" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "anarchist-readable-name-generator-lib 0.2.0", "anyhow", @@ -4343,7 +4343,7 @@ dependencies = [ [[package]] name = "turso" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "rand 0.9.2", "rand_chacha 0.9.0", @@ -4355,7 +4355,7 @@ dependencies = [ [[package]] name = "turso-java" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "jni", "thiserror 2.0.16", @@ -4364,7 +4364,7 @@ dependencies = [ [[package]] name = "turso_cli" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "anyhow", "cfg-if", @@ -4400,7 +4400,7 @@ dependencies = [ [[package]] name = "turso_core" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "aegis", "aes", @@ -4458,7 +4458,7 @@ dependencies = [ [[package]] name = "turso_dart" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "flutter_rust_bridge", "turso_core", @@ -4466,7 +4466,7 @@ dependencies = [ [[package]] name = "turso_ext" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "chrono", "getrandom 0.3.2", @@ -4475,7 +4475,7 @@ dependencies = [ [[package]] name = "turso_ext_tests" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "env_logger 0.11.7", "lazy_static", @@ -4486,7 +4486,7 @@ dependencies = [ [[package]] name = "turso_macros" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "proc-macro2", "quote", @@ -4495,7 +4495,7 @@ dependencies = [ [[package]] name = "turso_node" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "chrono", "napi", @@ -4508,7 +4508,7 @@ dependencies = [ [[package]] name = "turso_parser" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "bitflags 2.9.4", "criterion", @@ -4524,7 +4524,7 @@ dependencies = [ [[package]] name = "turso_sqlite3" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "env_logger 0.11.7", "libc", @@ -4537,7 +4537,7 @@ dependencies = [ [[package]] name = "turso_sqlite3_parser" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "bitflags 2.9.4", "cc", @@ -4555,7 +4555,7 @@ dependencies = [ [[package]] name = "turso_stress" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "anarchist-readable-name-generator-lib 0.1.2", "antithesis_sdk", @@ -4571,7 +4571,7 @@ dependencies = [ [[package]] name = "turso_sync_engine" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "base64", "bytes", @@ -4597,7 +4597,7 @@ dependencies = [ [[package]] name = "turso_sync_js" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "genawaiter", "napi", @@ -4612,7 +4612,7 @@ dependencies = [ [[package]] name = "turso_whopper" -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 53cf2a5eb..e94180bd7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,29 +39,29 @@ exclude = [ ] [workspace.package] -version = "0.2.0-pre.8" +version = "0.2.0-pre.9" authors = ["the Limbo authors"] edition = "2021" license = "MIT" repository = "https://github.com/tursodatabase/turso" [workspace.dependencies] -turso = { path = "bindings/rust", version = "0.2.0-pre.8" } -turso_node = { path = "bindings/javascript", version = "0.2.0-pre.8" } -limbo_completion = { path = "extensions/completion", version = "0.2.0-pre.8" } -turso_core = { path = "core", version = "0.2.0-pre.8" } -turso_sync_engine = { path = "sync/engine", version = "0.2.0-pre.8" } -limbo_crypto = { path = "extensions/crypto", version = "0.2.0-pre.8" } -limbo_csv = { path = "extensions/csv", version = "0.2.0-pre.8" } -turso_ext = { path = "extensions/core", version = "0.2.0-pre.8" } -turso_ext_tests = { path = "extensions/tests", version = "0.2.0-pre.8" } -limbo_ipaddr = { path = "extensions/ipaddr", version = "0.2.0-pre.8" } -turso_macros = { path = "macros", version = "0.2.0-pre.8" } -limbo_percentile = { path = "extensions/percentile", version = "0.2.0-pre.8" } -limbo_regexp = { path = "extensions/regexp", version = "0.2.0-pre.8" } -turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.2.0-pre.8" } -limbo_uuid = { path = "extensions/uuid", version = "0.2.0-pre.8" } -turso_parser = { path = "parser", version = "0.2.0-pre.8" } +turso = { path = "bindings/rust", version = "0.2.0-pre.9" } +turso_node = { path = "bindings/javascript", version = "0.2.0-pre.9" } +limbo_completion = { path = "extensions/completion", version = "0.2.0-pre.9" } +turso_core = { path = "core", version = "0.2.0-pre.9" } +turso_sync_engine = { path = "sync/engine", version = "0.2.0-pre.9" } +limbo_crypto = { path = "extensions/crypto", version = "0.2.0-pre.9" } +limbo_csv = { path = "extensions/csv", version = "0.2.0-pre.9" } +turso_ext = { path = "extensions/core", version = "0.2.0-pre.9" } +turso_ext_tests = { path = "extensions/tests", version = "0.2.0-pre.9" } +limbo_ipaddr = { path = "extensions/ipaddr", version = "0.2.0-pre.9" } +turso_macros = { path = "macros", version = "0.2.0-pre.9" } +limbo_percentile = { path = "extensions/percentile", version = "0.2.0-pre.9" } +limbo_regexp = { path = "extensions/regexp", version = "0.2.0-pre.9" } +turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.2.0-pre.9" } +limbo_uuid = { path = "extensions/uuid", version = "0.2.0-pre.9" } +turso_parser = { path = "parser", version = "0.2.0-pre.9" } sql_generation = { path = "sql_generation" } strum = { version = "0.26", features = ["derive"] } strum_macros = "0.26" diff --git a/bindings/javascript/package-lock.json b/bindings/javascript/package-lock.json index 100703c16..59393adbf 100644 --- a/bindings/javascript/package-lock.json +++ b/bindings/javascript/package-lock.json @@ -1,11 +1,11 @@ { "name": "javascript", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "lockfileVersion": 3, "requires": true, "packages": { "": { - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "workspaces": [ "packages/common", "packages/native", @@ -4592,11 +4592,11 @@ }, "packages/browser": { "name": "@tursodatabase/database-browser", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "license": "MIT", "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.8", - "@tursodatabase/database-common": "^0.2.0-pre.8" + "@tursodatabase/database-browser-common": "^0.2.0-pre.9", + "@tursodatabase/database-common": "^0.2.0-pre.9" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", @@ -4609,7 +4609,7 @@ }, "packages/browser-common": { "name": "@tursodatabase/database-browser-common", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "license": "MIT", "dependencies": { "@napi-rs/wasm-runtime": "^1.0.5" @@ -4620,7 +4620,7 @@ }, "packages/common": { "name": "@tursodatabase/database-common", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "license": "MIT", "devDependencies": { "typescript": "^5.9.2" @@ -4628,10 +4628,10 @@ }, "packages/native": { "name": "@tursodatabase/database", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "license": "MIT", "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.8" + "@tursodatabase/database-common": "^0.2.0-pre.9" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", @@ -4645,12 +4645,12 @@ }, "sync/packages/browser": { "name": "@tursodatabase/sync-browser", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "license": "MIT", "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.8", - "@tursodatabase/database-common": "^0.2.0-pre.8", - "@tursodatabase/sync-common": "^0.2.0-pre.8" + "@tursodatabase/database-browser-common": "^0.2.0-pre.9", + "@tursodatabase/database-common": "^0.2.0-pre.9", + "@tursodatabase/sync-common": "^0.2.0-pre.9" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", @@ -4663,10 +4663,10 @@ }, "sync/packages/common": { "name": "@tursodatabase/sync-common", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "license": "MIT", "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.8" + "@tursodatabase/database-common": "^0.2.0-pre.9" }, "devDependencies": { "typescript": "^5.9.2" @@ -4674,11 +4674,11 @@ }, "sync/packages/native": { "name": "@tursodatabase/sync", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "license": "MIT", "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.8", - "@tursodatabase/sync-common": "^0.2.0-pre.8" + "@tursodatabase/database-common": "^0.2.0-pre.9", + "@tursodatabase/sync-common": "^0.2.0-pre.9" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json index 4427e2b0a..849f25785 100644 --- a/bindings/javascript/package.json +++ b/bindings/javascript/package.json @@ -14,5 +14,5 @@ "sync/packages/native", "sync/packages/browser" ], - "version": "0.2.0-pre.8" + "version": "0.2.0-pre.9" } diff --git a/bindings/javascript/packages/browser-common/package.json b/bindings/javascript/packages/browser-common/package.json index d179e537c..e2a68d6fe 100644 --- a/bindings/javascript/packages/browser-common/package.json +++ b/bindings/javascript/packages/browser-common/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database-browser-common", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" diff --git a/bindings/javascript/packages/browser/package.json b/bindings/javascript/packages/browser/package.json index 6731852db..19f592ff9 100644 --- a/bindings/javascript/packages/browser/package.json +++ b/bindings/javascript/packages/browser/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database-browser", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -51,7 +51,7 @@ ] }, "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.8", - "@tursodatabase/database-common": "^0.2.0-pre.8" + "@tursodatabase/database-browser-common": "^0.2.0-pre.9", + "@tursodatabase/database-common": "^0.2.0-pre.9" } } diff --git a/bindings/javascript/packages/common/package.json b/bindings/javascript/packages/common/package.json index f7cd53b92..4916ece55 100644 --- a/bindings/javascript/packages/common/package.json +++ b/bindings/javascript/packages/common/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database-common", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" diff --git a/bindings/javascript/packages/native/package.json b/bindings/javascript/packages/native/package.json index b20d01856..eed6f8886 100644 --- a/bindings/javascript/packages/native/package.json +++ b/bindings/javascript/packages/native/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -47,7 +47,7 @@ ] }, "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.8" + "@tursodatabase/database-common": "^0.2.0-pre.9" }, "imports": { "#index": "./index.js" diff --git a/bindings/javascript/sync/packages/browser/package.json b/bindings/javascript/sync/packages/browser/package.json index a0d17d33c..8ef179b53 100644 --- a/bindings/javascript/sync/packages/browser/package.json +++ b/bindings/javascript/sync/packages/browser/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/sync-browser", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -54,8 +54,8 @@ "#index": "./index.js" }, "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.8", - "@tursodatabase/database-common": "^0.2.0-pre.8", - "@tursodatabase/sync-common": "^0.2.0-pre.8" + "@tursodatabase/database-browser-common": "^0.2.0-pre.9", + "@tursodatabase/database-common": "^0.2.0-pre.9", + "@tursodatabase/sync-common": "^0.2.0-pre.9" } } diff --git a/bindings/javascript/sync/packages/common/package.json b/bindings/javascript/sync/packages/common/package.json index 4dbffb94c..bdae31766 100644 --- a/bindings/javascript/sync/packages/common/package.json +++ b/bindings/javascript/sync/packages/common/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/sync-common", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -23,6 +23,6 @@ "test": "echo 'no tests'" }, "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.8" + "@tursodatabase/database-common": "^0.2.0-pre.9" } } diff --git a/bindings/javascript/sync/packages/native/package.json b/bindings/javascript/sync/packages/native/package.json index c7e6952c9..a649f068c 100644 --- a/bindings/javascript/sync/packages/native/package.json +++ b/bindings/javascript/sync/packages/native/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/sync", - "version": "0.2.0-pre.8", + "version": "0.2.0-pre.9", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -44,8 +44,8 @@ ] }, "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.8", - "@tursodatabase/sync-common": "^0.2.0-pre.8" + "@tursodatabase/database-common": "^0.2.0-pre.9", + "@tursodatabase/sync-common": "^0.2.0-pre.9" }, "imports": { "#index": "./index.js" diff --git a/bindings/javascript/yarn.lock b/bindings/javascript/yarn.lock index cd915df35..1ea9a8437 100644 --- a/bindings/javascript/yarn.lock +++ b/bindings/javascript/yarn.lock @@ -1575,7 +1575,7 @@ __metadata: languageName: node linkType: hard -"@tursodatabase/database-browser-common@npm:^0.2.0-pre.8, @tursodatabase/database-browser-common@workspace:packages/browser-common": +"@tursodatabase/database-browser-common@npm:^0.2.0-pre.9, @tursodatabase/database-browser-common@workspace:packages/browser-common": version: 0.0.0-use.local resolution: "@tursodatabase/database-browser-common@workspace:packages/browser-common" dependencies: @@ -1589,8 +1589,8 @@ __metadata: resolution: "@tursodatabase/database-browser@workspace:packages/browser" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.8" - "@tursodatabase/database-common": "npm:^0.2.0-pre.8" + "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.9" + "@tursodatabase/database-common": "npm:^0.2.0-pre.9" "@vitest/browser": "npm:^3.2.4" playwright: "npm:^1.55.0" typescript: "npm:^5.9.2" @@ -1599,7 +1599,7 @@ __metadata: languageName: unknown linkType: soft -"@tursodatabase/database-common@npm:^0.2.0-pre.8, @tursodatabase/database-common@workspace:packages/common": +"@tursodatabase/database-common@npm:^0.2.0-pre.9, @tursodatabase/database-common@workspace:packages/common": version: 0.0.0-use.local resolution: "@tursodatabase/database-common@workspace:packages/common" dependencies: @@ -1612,7 +1612,7 @@ __metadata: resolution: "@tursodatabase/database@workspace:packages/native" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-common": "npm:^0.2.0-pre.8" + "@tursodatabase/database-common": "npm:^0.2.0-pre.9" "@types/node": "npm:^24.3.1" better-sqlite3: "npm:^12.2.0" drizzle-kit: "npm:^0.31.4" @@ -1627,9 +1627,9 @@ __metadata: resolution: "@tursodatabase/sync-browser@workspace:sync/packages/browser" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.8" - "@tursodatabase/database-common": "npm:^0.2.0-pre.8" - "@tursodatabase/sync-common": "npm:^0.2.0-pre.8" + "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.9" + "@tursodatabase/database-common": "npm:^0.2.0-pre.9" + "@tursodatabase/sync-common": "npm:^0.2.0-pre.9" "@vitest/browser": "npm:^3.2.4" playwright: "npm:^1.55.0" typescript: "npm:^5.9.2" @@ -1638,11 +1638,11 @@ __metadata: languageName: unknown linkType: soft -"@tursodatabase/sync-common@npm:^0.2.0-pre.8, @tursodatabase/sync-common@workspace:sync/packages/common": +"@tursodatabase/sync-common@npm:^0.2.0-pre.9, @tursodatabase/sync-common@workspace:sync/packages/common": version: 0.0.0-use.local resolution: "@tursodatabase/sync-common@workspace:sync/packages/common" dependencies: - "@tursodatabase/database-common": "npm:^0.2.0-pre.8" + "@tursodatabase/database-common": "npm:^0.2.0-pre.9" typescript: "npm:^5.9.2" languageName: unknown linkType: soft @@ -1652,8 +1652,8 @@ __metadata: resolution: "@tursodatabase/sync@workspace:sync/packages/native" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-common": "npm:^0.2.0-pre.8" - "@tursodatabase/sync-common": "npm:^0.2.0-pre.8" + "@tursodatabase/database-common": "npm:^0.2.0-pre.9" + "@tursodatabase/sync-common": "npm:^0.2.0-pre.9" "@types/node": "npm:^24.3.1" typescript: "npm:^5.9.2" vitest: "npm:^3.2.4" From 5fcc187434d6fe23f34555dde08527eb0cbb4ba9 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 12:06:44 -0400 Subject: [PATCH 33/65] translate: refactor arguments and centralize parameter context --- core/translate/alter.rs | 43 +++++++------ core/translate/analyze.rs | 14 ++-- core/translate/attach.rs | 11 +--- core/translate/compound_select.rs | 13 ++-- core/translate/delete.rs | 31 ++++----- core/translate/emitter.rs | 38 +++++------ core/translate/expr.rs | 3 +- core/translate/index.rs | 34 +++++----- core/translate/insert.rs | 65 ++++++++++--------- core/translate/mod.rs | 64 ++++++++----------- core/translate/planner.rs | 102 ++++++++++++++---------------- core/translate/pragma.rs | 39 ++++++------ core/translate/rollback.rs | 5 +- core/translate/schema.rs | 76 +++++++++++----------- core/translate/select.rs | 94 ++++++++++++--------------- core/translate/update.rs | 32 ++++------ core/translate/view.rs | 37 ++++++----- core/translate/window.rs | 34 ++++------ core/vdbe/builder.rs | 3 + 19 files changed, 338 insertions(+), 400 deletions(-) diff --git a/core/translate/alter.rs b/core/translate/alter.rs index c07781f35..0e2820ce9 100644 --- a/core/translate/alter.rs +++ b/core/translate/alter.rs @@ -3,21 +3,21 @@ use turso_parser::{ast, parser::Parser}; use crate::{ function::{AlterTableFunc, Func}, - schema::{Column, Schema}, + schema::Column, + translate::emitter::Resolver, util::normalize_ident, vdbe::{ builder::{CursorType, ProgramBuilder}, insn::{Cookie, Insn, RegisterOrLiteral}, }, - LimboError, Result, SymbolTable, + LimboError, Result, }; use super::{schema::SQLITE_TABLEID, update::translate_update_for_schema_change}; pub fn translate_alter_table( alter: ast::AlterTable, - syms: &SymbolTable, - schema: &Schema, + resolver: &Resolver, mut program: ProgramBuilder, connection: &Arc, input: &str, @@ -34,7 +34,7 @@ pub fn translate_alter_table( crate::bail_parse_error!("table {} may not be modified", table_name); } - if schema.table_has_indexes(table_name) && !schema.indexes_enabled() { + if resolver.schema.table_has_indexes(table_name) && !resolver.schema.indexes_enabled() { // Let's disable altering a table with indices altogether instead of checking column by // column to be extra safe. crate::bail_parse_error!( @@ -42,14 +42,18 @@ pub fn translate_alter_table( ); } - let Some(original_btree) = schema.get_table(table_name).and_then(|table| table.btree()) else { + let Some(original_btree) = resolver + .schema + .get_table(table_name) + .and_then(|table| table.btree()) + else { return Err(LimboError::ParseError(format!( "no such table: {table_name}" ))); }; // Check if this table has dependent materialized views - let dependent_views = schema.get_dependent_materialized_views(table_name); + let dependent_views = resolver.schema.get_dependent_materialized_views(table_name); if !dependent_views.is_empty() { return Err(LimboError::ParseError(format!( "cannot alter table \"{table_name}\": it has dependent materialized view(s): {}", @@ -113,9 +117,8 @@ pub fn translate_alter_table( }; translate_update_for_schema_change( - schema, &mut update, - syms, + resolver, program, connection, input, @@ -175,7 +178,7 @@ pub fn translate_alter_table( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); @@ -242,9 +245,8 @@ pub fn translate_alter_table( }; translate_update_for_schema_change( - schema, &mut update, - syms, + resolver, program, connection, input, @@ -252,7 +254,7 @@ pub fn translate_alter_table( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); program.emit_insn(Insn::AddColumn { @@ -265,8 +267,9 @@ pub fn translate_alter_table( ast::AlterTableBody::RenameTo(new_name) => { let new_name = new_name.as_str(); - if schema.get_table(new_name).is_some() - || schema + if resolver.schema.get_table(new_name).is_some() + || resolver + .schema .indexes .values() .flatten() @@ -277,7 +280,8 @@ pub fn translate_alter_table( ))); }; - let sqlite_schema = schema + let sqlite_schema = resolver + .schema .get_btree_table(SQLITE_TABLEID) .expect("sqlite_schema should be on schema"); @@ -339,7 +343,7 @@ pub fn translate_alter_table( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); @@ -412,7 +416,8 @@ pub fn translate_alter_table( )); } - let sqlite_schema = schema + let sqlite_schema = resolver + .schema .get_btree_table(SQLITE_TABLEID) .expect("sqlite_schema should be on schema"); @@ -481,7 +486,7 @@ pub fn translate_alter_table( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); program.emit_insn(Insn::AlterColumn { diff --git a/core/translate/analyze.rs b/core/translate/analyze.rs index d7e68fc04..8848f6c5f 100644 --- a/core/translate/analyze.rs +++ b/core/translate/analyze.rs @@ -4,7 +4,7 @@ use turso_parser::ast; use crate::{ bail_parse_error, - schema::{BTreeTable, Schema}, + schema::BTreeTable, storage::pager::CreateBTreeFlags, translate::{ emitter::Resolver, @@ -15,20 +15,19 @@ use crate::{ builder::{CursorType, ProgramBuilder}, insn::{Insn, RegisterOrLiteral}, }, - Result, SymbolTable, + Result, }; pub fn translate_analyze( target_opt: Option, - schema: &Schema, - syms: &SymbolTable, + resolver: &Resolver, mut program: ProgramBuilder, ) -> Result { let Some(target) = target_opt else { bail_parse_error!("ANALYZE with no target is not supported"); }; let normalized = normalize_ident(target.name.as_str()); - let Some(target_schema) = schema.get_table(&normalized) else { + let Some(target_schema) = resolver.schema.get_table(&normalized) else { bail_parse_error!("ANALYZE is not supported"); }; let Some(target_btree) = target_schema.btree() else { @@ -48,7 +47,7 @@ pub fn translate_analyze( let sqlite_stat1_btreetable: Arc; let sqlite_stat1_source: RegisterOrLiteral<_>; - if let Some(sqlite_stat1) = schema.get_btree_table("sqlite_stat1") { + if let Some(sqlite_stat1) = resolver.schema.get_btree_table("sqlite_stat1") { sqlite_stat1_btreetable = sqlite_stat1.clone(); sqlite_stat1_source = RegisterOrLiteral::Literal(sqlite_stat1.root_page); // sqlite_stat1 already exists, so we need to remove the row @@ -131,7 +130,7 @@ pub fn translate_analyze( sqlite_stat1_btreetable = Arc::new(BTreeTable::from_sql(sql, 0)?); sqlite_stat1_source = RegisterOrLiteral::Register(table_root_reg); - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); program.emit_insn(Insn::OpenWrite { @@ -140,7 +139,6 @@ pub fn translate_analyze( db: 0, }); - let resolver = Resolver::new(schema, syms); // Add the table entry to sqlite_schema emit_schema_entry( &mut program, diff --git a/core/translate/attach.rs b/core/translate/attach.rs index a42688e62..0ec29e101 100644 --- a/core/translate/attach.rs +++ b/core/translate/attach.rs @@ -1,21 +1,19 @@ use crate::function::{Func, ScalarFunc}; -use crate::schema::Schema; use crate::translate::emitter::Resolver; use crate::translate::expr::{sanitize_string, translate_expr}; use crate::translate::{ProgramBuilder, ProgramBuilderOpts}; use crate::util::normalize_ident; use crate::vdbe::insn::Insn; -use crate::{Result, SymbolTable}; +use crate::Result; use turso_parser::ast::{Expr, Literal}; /// Translate ATTACH statement /// SQLite implements ATTACH as a function call to sqlite_attach() pub fn translate_attach( expr: &Expr, + resolver: &Resolver, db_name: &Expr, key: &Option>, - schema: &Schema, - syms: &SymbolTable, mut program: ProgramBuilder, ) -> Result { // SQLite treats ATTACH as a function call to sqlite_attach(filename, dbname, key) @@ -28,7 +26,6 @@ pub fn translate_attach( }); let arg_reg = program.alloc_registers(4); // 3 for args + 1 for result - let resolver = Resolver::new(schema, syms); // Load filename argument // Handle different expression types as string literals for filenames @@ -120,8 +117,7 @@ pub fn translate_attach( /// SQLite implements DETACH as a function call to sqlite_detach() pub fn translate_detach( expr: &Expr, - schema: &Schema, - syms: &SymbolTable, + resolver: &Resolver, mut program: ProgramBuilder, ) -> Result { // SQLite treats DETACH as a function call to sqlite_detach(dbname) @@ -133,7 +129,6 @@ pub fn translate_detach( }); let arg_reg = program.alloc_registers(2); // 1 for arg + 1 for result - let resolver = Resolver::new(schema, syms); // Load database name argument // Handle different expression types as string literals for database names diff --git a/core/translate/compound_select.rs b/core/translate/compound_select.rs index 40eb2ce59..0cac57af9 100644 --- a/core/translate/compound_select.rs +++ b/core/translate/compound_select.rs @@ -1,5 +1,5 @@ use crate::schema::{Index, IndexColumn, Schema}; -use crate::translate::emitter::{emit_query, LimitCtx, TranslateCtx}; +use crate::translate::emitter::{emit_query, LimitCtx, Resolver, TranslateCtx}; use crate::translate::expr::translate_expr; use crate::translate::plan::{Plan, QueryDestination, SelectPlan}; use crate::translate::result_row::try_fold_expr_to_i64; @@ -16,9 +16,8 @@ use tracing::Level; #[instrument(skip_all, level = Level::DEBUG)] pub fn emit_program_for_compound_select( program: &mut ProgramBuilder, + resolver: &Resolver, plan: Plan, - schema: &Schema, - syms: &SymbolTable, ) -> crate::Result<()> { let Plan::CompoundSelect { left: _left, @@ -41,8 +40,8 @@ pub fn emit_program_for_compound_select( let right_most_ctx = TranslateCtx::new( program, - schema, - syms, + resolver.schema, + resolver.symbol_table, right_most.table_references.joined_tables().len(), ); @@ -102,8 +101,8 @@ pub fn emit_program_for_compound_select( emit_compound_select( program, plan, - schema, - syms, + right_most_ctx.resolver.schema, + right_most_ctx.resolver.symbol_table, limit_ctx, offset_reg, yield_reg, diff --git a/core/translate/delete.rs b/core/translate/delete.rs index d343d0ea2..4d2dbdbff 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -1,12 +1,11 @@ -use crate::schema::Table; -use crate::translate::emitter::emit_program; -use crate::translate::expr::ParamState; +use crate::schema::{Schema, Table}; +use crate::translate::emitter::{emit_program, Resolver}; use crate::translate::optimizer::optimize_plan; use crate::translate::plan::{DeletePlan, Operation, Plan}; use crate::translate::planner::{parse_limit, parse_where}; use crate::util::normalize_ident; -use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, TableRefIdCounter}; -use crate::{schema::Schema, Result, SymbolTable}; +use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts}; +use crate::Result; use std::sync::Arc; use turso_parser::ast::{Expr, Limit, QualifiedName, ResultColumn}; @@ -14,12 +13,11 @@ use super::plan::{ColumnUsedMask, JoinedTable, TableReferences}; #[allow(clippy::too_many_arguments)] pub fn translate_delete( - schema: &Schema, tbl_name: &QualifiedName, + resolver: &Resolver, where_clause: Option>, limit: Option, returning: Vec, - syms: &SymbolTable, mut program: ProgramBuilder, connection: &Arc, ) -> Result { @@ -30,7 +28,7 @@ pub fn translate_delete( crate::bail_parse_error!("table {} may not be modified", tbl_name); } - if schema.table_has_indexes(&tbl_name) && !schema.indexes_enabled() { + if resolver.schema.table_has_indexes(&tbl_name) && !resolver.schema.indexes_enabled() { // Let's disable altering a table with indices altogether instead of checking column by // column to be extra safe. crate::bail_parse_error!( @@ -48,15 +46,15 @@ pub fn translate_delete( let result_columns = vec![]; let mut delete_plan = prepare_delete_plan( - schema, + &mut program, + resolver.schema, tbl_name, where_clause, limit, result_columns, - &mut program.table_reference_counter, connection, )?; - optimize_plan(&mut delete_plan, schema)?; + optimize_plan(&mut delete_plan, resolver.schema)?; let Plan::Delete(ref delete) = delete_plan else { panic!("delete_plan is not a DeletePlan"); }; @@ -66,17 +64,17 @@ pub fn translate_delete( approx_num_labels: 0, }; program.extend(&opts); - emit_program(connection, &mut program, delete_plan, schema, syms, |_| {})?; + emit_program(connection, resolver, &mut program, delete_plan, |_| {})?; Ok(program) } pub fn prepare_delete_plan( + program: &mut ProgramBuilder, schema: &Schema, tbl_name: String, where_clause: Option>, limit: Option, result_columns: Vec, - table_ref_counter: &mut TableRefIdCounter, connection: &Arc, ) -> Result { let table = match schema.get_table(&tbl_name) { @@ -115,7 +113,7 @@ pub fn prepare_delete_plan( op: Operation::default_scan_for(&table), table, identifier: tbl_name, - internal_id: table_ref_counter.next(), + internal_id: program.table_reference_counter.next(), join_info: None, col_used_mask: ColumnUsedMask::default(), database_id: 0, @@ -123,7 +121,6 @@ pub fn prepare_delete_plan( let mut table_references = TableReferences::new(joined_tables, vec![]); let mut where_predicates = vec![]; - let mut param_ctx = ParamState::default(); // Parse the WHERE clause parse_where( @@ -132,12 +129,12 @@ pub fn prepare_delete_plan( None, &mut where_predicates, connection, - &mut param_ctx, + &mut program.param_ctx, )?; // Parse the LIMIT/OFFSET clause let (resolved_limit, resolved_offset) = limit.map_or(Ok((None, None)), |mut l| { - parse_limit(&mut l, connection, &mut param_ctx) + parse_limit(&mut l, connection, &mut program.param_ctx) })?; let plan = DeletePlan { diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 48cb4a45e..1d986d572 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -208,35 +208,29 @@ pub enum TransactionMode { #[instrument(skip_all, level = Level::DEBUG)] pub fn emit_program( connection: &Arc, + resolver: &Resolver, program: &mut ProgramBuilder, plan: Plan, - schema: &Schema, - syms: &SymbolTable, after: impl FnOnce(&mut ProgramBuilder), ) -> Result<()> { match plan { - Plan::Select(plan) => emit_program_for_select(program, plan, schema, syms), - Plan::Delete(plan) => emit_program_for_delete(connection, program, plan, schema, syms), - Plan::Update(plan) => { - emit_program_for_update(connection, program, plan, schema, syms, after) - } - Plan::CompoundSelect { .. } => { - emit_program_for_compound_select(program, plan, schema, syms) - } + Plan::Select(plan) => emit_program_for_select(program, resolver, plan), + Plan::Delete(plan) => emit_program_for_delete(connection, resolver, program, plan), + Plan::Update(plan) => emit_program_for_update(connection, resolver, program, plan, after), + Plan::CompoundSelect { .. } => emit_program_for_compound_select(program, resolver, plan), } } #[instrument(skip_all, level = Level::DEBUG)] fn emit_program_for_select( program: &mut ProgramBuilder, + resolver: &Resolver, mut plan: SelectPlan, - schema: &Schema, - syms: &SymbolTable, ) -> Result<()> { let mut t_ctx = TranslateCtx::new( program, - schema, - syms, + resolver.schema, + resolver.symbol_table, plan.table_references.joined_tables().len(), ); @@ -419,15 +413,14 @@ pub fn emit_query<'a>( #[instrument(skip_all, level = Level::DEBUG)] fn emit_program_for_delete( connection: &Arc, + resolver: &Resolver, program: &mut ProgramBuilder, mut plan: DeletePlan, - schema: &Schema, - syms: &SymbolTable, ) -> Result<()> { let mut t_ctx = TranslateCtx::new( program, - schema, - syms, + resolver.schema, + resolver.symbol_table, plan.table_references.joined_tables().len(), ); @@ -715,16 +708,15 @@ fn emit_delete_insns( #[instrument(skip_all, level = Level::DEBUG)] fn emit_program_for_update( connection: &Arc, + resolver: &Resolver, program: &mut ProgramBuilder, mut plan: UpdatePlan, - schema: &Schema, - syms: &SymbolTable, after: impl FnOnce(&mut ProgramBuilder), ) -> Result<()> { let mut t_ctx = TranslateCtx::new( program, - schema, - syms, + resolver.schema, + resolver.symbol_table, plan.table_references.joined_tables().len(), ); @@ -759,7 +751,7 @@ fn emit_program_for_update( is_table: true, }); program.incr_nesting(); - emit_program_for_select(program, ephemeral_plan, schema, syms)?; + emit_program_for_select(program, resolver, ephemeral_plan)?; program.decr_nesting(); } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index dddfec85e..fd6fbebce 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -4063,7 +4063,6 @@ pub fn process_returning_clause( table_name: &str, program: &mut ProgramBuilder, connection: &std::sync::Arc, - param_ctx: &mut ParamState, ) -> Result<( Vec, super::plan::TableReferences, @@ -4100,7 +4099,7 @@ pub fn process_returning_clause( Some(&mut table_references), None, connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::TryResultColumnsFirst, )?; diff --git a/core/translate/index.rs b/core/translate/index.rs index e78b9dc02..75d657446 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use crate::bail_parse_error; use crate::schema::{Table, RESERVED_TABLE_PREFIXES}; use crate::translate::emitter::{ emit_cdc_full_record, emit_cdc_insns, prepare_cdc_if_necessary, OperationMode, Resolver, @@ -11,9 +12,8 @@ use crate::translate::plan::{ use crate::vdbe::builder::CursorKey; use crate::vdbe::insn::{CmpInsFlags, Cookie}; use crate::vdbe::BranchOffset; -use crate::{bail_parse_error, SymbolTable}; use crate::{ - schema::{BTreeTable, Column, Index, IndexColumn, PseudoCursorType, Schema}, + schema::{BTreeTable, Column, Index, IndexColumn, PseudoCursorType}, storage::pager::CreateBTreeFlags, util::normalize_ident, vdbe::{ @@ -28,11 +28,10 @@ use super::schema::{emit_schema_entry, SchemaEntryType, SQLITE_TABLEID}; #[allow(clippy::too_many_arguments)] pub fn translate_create_index( unique_if_not_exists: (bool, bool), + resolver: &Resolver, idx_name: &str, tbl_name: &str, columns: &[SortedColumn], - schema: &Schema, - syms: &SymbolTable, mut program: ProgramBuilder, connection: &Arc, where_clause: Option>, @@ -40,7 +39,7 @@ pub fn translate_create_index( if tbl_name.eq_ignore_ascii_case("sqlite_sequence") { crate::bail_parse_error!("table sqlite_sequence may not be indexed"); } - if !schema.indexes_enabled() { + if !resolver.schema.indexes_enabled() { crate::bail_parse_error!( "CREATE INDEX is disabled by default. Run with `--experimental-indexes` to enable this feature." ); @@ -73,14 +72,14 @@ pub fn translate_create_index( // Check if the index is being created on a valid btree table and // the name is globally unique in the schema. - if !schema.is_unique_idx_name(&idx_name) { + if !resolver.schema.is_unique_idx_name(&idx_name) { // If IF NOT EXISTS is specified, silently return without error if unique_if_not_exists.1 { return Ok(program); } crate::bail_parse_error!("Error: index with name '{idx_name}' already exists."); } - let Some(table) = schema.tables.get(&tbl_name) else { + let Some(table) = resolver.schema.tables.get(&tbl_name) else { crate::bail_parse_error!("Error: table '{tbl_name}' does not exist."); }; let Some(tbl) = table.btree() else { @@ -127,7 +126,7 @@ pub fn translate_create_index( // 3. table_cursor_id - table we are creating the index on // 4. sorter_cursor_id - sorter // 5. pseudo_cursor_id - pseudo table to store the sorted index values - let sqlite_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(sqlite_table.clone())); let table_ref = program.table_reference_counter.next(); @@ -179,8 +178,7 @@ pub fn translate_create_index( original_columns, &idx.where_clause.clone(), ); - let resolver = Resolver::new(schema, syms); - let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; + let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, SQLITE_TABLEID)?; emit_schema_entry( &mut program, &resolver, @@ -327,7 +325,7 @@ pub fn translate_create_index( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); // Parse the schema table to get the index root page and add new index to Schema @@ -416,12 +414,11 @@ fn create_idx_stmt_to_sql( pub fn translate_drop_index( idx_name: &str, + resolver: &Resolver, if_exists: bool, - schema: &Schema, - syms: &SymbolTable, mut program: ProgramBuilder, ) -> crate::Result { - if !schema.indexes_enabled() { + if !resolver.schema.indexes_enabled() { crate::bail_parse_error!( "DROP INDEX is disabled by default. Run with `--experimental-indexes` to enable this feature." ); @@ -436,7 +433,7 @@ pub fn translate_drop_index( // Find the index in Schema let mut maybe_index = None; - for val in schema.indexes.values() { + for val in resolver.schema.indexes.values() { if maybe_index.is_some() { break; } @@ -470,7 +467,7 @@ pub fn translate_drop_index( } } - let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; + let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, SQLITE_TABLEID)?; // According to sqlite should emit Null instruction // but why? @@ -486,7 +483,7 @@ pub fn translate_drop_index( let row_id_reg = program.alloc_register(); // We're going to use this cursor to search through sqlite_schema - let sqlite_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(sqlite_table.clone())); @@ -554,7 +551,6 @@ pub fn translate_drop_index( } else { None }; - let resolver = Resolver::new(schema, syms); emit_cdc_insns( &mut program, &resolver, @@ -584,7 +580,7 @@ pub fn translate_drop_index( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); diff --git a/core/translate/insert.rs b/core/translate/insert.rs index dae59ac3f..374b8cd87 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -14,7 +14,7 @@ use crate::translate::emitter::{ }; use crate::translate::expr::{ bind_and_rewrite_expr, emit_returning_results, process_returning_clause, walk_expr_mut, - BindingBehavior, ParamState, ReturningValueRegisters, WalkControl, + BindingBehavior, ReturningValueRegisters, WalkControl, }; use crate::translate::plan::TableReferences; use crate::translate::planner::ROWID_STRS; @@ -32,7 +32,7 @@ use crate::{ insn::Insn, }, }; -use crate::{Result, SymbolTable, VirtualTable}; +use crate::{Result, VirtualTable}; use super::emitter::Resolver; use super::expr::{translate_expr, translate_expr_no_constant_opt, NoConstantOptReason}; @@ -47,14 +47,13 @@ struct TempTableCtx { #[allow(clippy::too_many_arguments)] pub fn translate_insert( - schema: &Schema, with: Option, + resolver: &Resolver, on_conflict: Option, tbl_name: QualifiedName, columns: Vec, mut body: InsertBody, mut returning: Vec, - syms: &SymbolTable, mut program: ProgramBuilder, connection: &Arc, ) -> Result { @@ -72,7 +71,11 @@ pub fn translate_insert( crate::bail_parse_error!("ON CONFLICT clause is not supported"); } - if schema.table_has_indexes(&tbl_name.name.to_string()) && !schema.indexes_enabled() { + if resolver + .schema + .table_has_indexes(&tbl_name.name.to_string()) + && !resolver.schema.indexes_enabled() + { // Let's disable altering a table with indices altogether instead of checking column by // column to be extra safe. crate::bail_parse_error!( @@ -86,18 +89,20 @@ pub fn translate_insert( crate::bail_parse_error!("table {} may not be modified", table_name); } - let table = match schema.get_table(table_name.as_str()) { + let table = match resolver.schema.get_table(table_name.as_str()) { Some(table) => table, None => crate::bail_parse_error!("no such table: {}", table_name), }; // Check if this is a materialized view - if schema.is_materialized_view(table_name.as_str()) { + if resolver.schema.is_materialized_view(table_name.as_str()) { crate::bail_parse_error!("cannot modify materialized view {}", table_name); } // Check if this table has any incompatible dependent views - let incompatible_views = schema.has_incompatible_dependent_views(table_name.as_str()); + let incompatible_views = resolver + .schema + .has_incompatible_dependent_views(table_name.as_str()); if !incompatible_views.is_empty() { use crate::incremental::compiler::DBSP_CIRCUIT_VERSION; crate::bail_parse_error!( @@ -110,8 +115,6 @@ pub fn translate_insert( ); } - let resolver = Resolver::new(schema, syms); - if let Some(virtual_table) = &table.virtual_table() { program = translate_virtual_table_insert( program, @@ -119,7 +122,7 @@ pub fn translate_insert( columns, body, on_conflict, - &resolver, + resolver, )?; return Ok(program); } @@ -136,7 +139,6 @@ pub fn translate_insert( let mut values: Option>> = None; let mut upsert_opt: Option = None; - let mut param_ctx = ParamState::default(); let mut inserting_multiple_rows = false; if let InsertBody::Select(select, upsert) = &mut body { match &mut select.body.select { @@ -167,7 +169,7 @@ pub fn translate_insert( None, None, connection, - &mut param_ctx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, )?; } @@ -187,7 +189,7 @@ pub fn translate_insert( None, None, connection, - &mut param_ctx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, )?; } @@ -197,7 +199,7 @@ pub fn translate_insert( None, None, connection, - &mut param_ctx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, )?; } @@ -207,20 +209,20 @@ pub fn translate_insert( } // resolve the constrained target for UPSERT if specified let resolved_upsert = if let Some(upsert) = &upsert_opt { - Some(resolve_upsert_target(schema, &table, upsert)?) + Some(resolve_upsert_target(resolver.schema, &table, upsert)?) } else { None }; if inserting_multiple_rows && btree_table.has_autoincrement { - ensure_sequence_initialized(&mut program, schema, &btree_table)?; + ensure_sequence_initialized(&mut program, resolver.schema, &btree_table)?; } let halt_label = program.allocate_label(); let loop_start_label = program.allocate_label(); let row_done_label = program.allocate_label(); - let cdc_table = prepare_cdc_if_necessary(&mut program, schema, table.get_name())?; + let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, table.get_name())?; // Process RETURNING clause using shared module let (mut result_columns, _) = process_returning_clause( @@ -229,7 +231,6 @@ pub fn translate_insert( table_name.as_str(), &mut program, connection, - &mut param_ctx, )?; let mut yield_reg_opt = None; @@ -261,7 +262,7 @@ pub fn translate_insert( }; program.incr_nesting(); let result = - translate_select(schema, select, syms, program, query_destination, connection)?; + translate_select(select, resolver, program, query_destination, connection)?; program = result.program; program.decr_nesting(); @@ -415,7 +416,8 @@ pub fn translate_insert( // allocate cursor id's for each btree index cursor we'll need to populate the indexes // (idx name, root_page, idx cursor id) - let idx_cursors = schema + let idx_cursors = resolver + .schema .get_indices(table_name.as_str()) .map(|idx| { ( @@ -466,9 +468,14 @@ pub fn translate_insert( let mut autoincrement_meta = None; if btree_table.has_autoincrement { - let seq_table = schema.get_btree_table("sqlite_sequence").ok_or_else(|| { - crate::error::LimboError::InternalError("sqlite_sequence table not found".to_string()) - })?; + let seq_table = resolver + .schema + .get_btree_table("sqlite_sequence") + .ok_or_else(|| { + crate::error::LimboError::InternalError( + "sqlite_sequence table not found".to_string(), + ) + })?; let seq_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(seq_table.clone())); program.emit_insn(Insn::OpenWrite { cursor_id: seq_cursor_id, @@ -600,7 +607,7 @@ pub fn translate_insert( if let Some((seq_cursor_id, _, r_seq_rowid, table_name_reg)) = autoincrement_meta { emit_update_sqlite_sequence( &mut program, - schema, + resolver.schema, seq_cursor_id, r_seq_rowid, table_name_reg, @@ -686,7 +693,7 @@ pub fn translate_insert( // DO UPDATE (matching target) -> fetch conflicting rowid and jump to `upsert_entry`. // // otherwise, raise SQLITE_CONSTRAINT_UNIQUE - for index in schema.get_indices(table_name.as_str()) { + for index in resolver.schema.get_indices(table_name.as_str()) { let column_mappings = index .columns .iter() @@ -929,7 +936,7 @@ pub fn translate_insert( // We re-check partial-index predicates against the NEW image, produce packed records, // and insert into all applicable indexes, we do not re-probe uniqueness here, as preflight // already guaranteed non-conflict. - for index in schema.get_indices(table_name.as_str()) { + for index in resolver.schema.get_indices(table_name.as_str()) { let idx_cursor_id = idx_cursors .iter() .find(|(name, _, _)| *name == &index.name) @@ -1024,7 +1031,7 @@ pub fn translate_insert( emit_update_sqlite_sequence( &mut program, - schema, + resolver.schema, seq_cursor_id, r_seq_rowid, table_name_reg, @@ -1093,7 +1100,7 @@ pub fn translate_insert( emit_upsert( &mut program, - schema, + resolver.schema, &table, &insertion, cursor_id, diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 2263e3a97..fd4df8405 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -43,6 +43,7 @@ mod window; use crate::schema::Schema; use crate::storage::pager::Pager; use crate::translate::delete::translate_delete; +use crate::translate::emitter::Resolver; use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::vdbe::Program; use crate::{bail_parse_error, Connection, Result, SymbolTable}; @@ -91,19 +92,14 @@ pub fn translate( ); program.prologue(); + let resolver = Resolver::new(schema, syms); program = match stmt { // There can be no nesting with pragma, so lift it up here - ast::Stmt::Pragma { name, body } => pragma::translate_pragma( - schema, - syms, - &name, - body, - pager, - connection.clone(), - program, - )?, - stmt => translate_inner(schema, stmt, syms, program, &connection, input)?, + ast::Stmt::Pragma { name, body } => { + pragma::translate_pragma(&resolver, &name, body, pager, connection.clone(), program)? + } + stmt => translate_inner(stmt, &resolver, program, &connection, input)?, }; program.epilogue(schema); @@ -115,9 +111,8 @@ pub fn translate( // statements, we would have to return a program builder instead /// Translate SQL statement into bytecode program. pub fn translate_inner( - schema: &Schema, stmt: ast::Stmt, - syms: &SymbolTable, + resolver: &Resolver, program: ProgramBuilder, connection: &Arc, input: &str, @@ -148,13 +143,13 @@ pub fn translate_inner( let mut program = match stmt { ast::Stmt::AlterTable(alter) => { - translate_alter_table(alter, syms, schema, program, connection, input)? + translate_alter_table(alter, resolver, program, connection, input)? } - ast::Stmt::Analyze { name } => translate_analyze(name, schema, syms, program)?, + ast::Stmt::Analyze { name } => translate_analyze(name, resolver, program)?, ast::Stmt::Attach { expr, db_name, key } => { - attach::translate_attach(&expr, &db_name, &key, schema, syms, program)? + attach::translate_attach(&expr, resolver, &db_name, &key, program)? } - ast::Stmt::Begin { typ, name } => translate_tx_begin(typ, name, schema, program)?, + ast::Stmt::Begin { typ, name } => translate_tx_begin(typ, name, resolver.schema, program)?, ast::Stmt::Commit { name } => translate_tx_commit(name, program)?, ast::Stmt::CreateIndex { unique, @@ -165,11 +160,10 @@ pub fn translate_inner( where_clause, } => translate_create_index( (unique, if_not_exists), + resolver, idx_name.name.as_str(), tbl_name.as_str(), &columns, - schema, - syms, program, connection, where_clause, @@ -181,11 +175,10 @@ pub fn translate_inner( body, } => translate_create_table( tbl_name, + resolver, temporary, if_not_exists, body, - schema, - syms, program, connection, )?, @@ -196,26 +189,24 @@ pub fn translate_inner( columns, .. } => view::translate_create_view( - schema, view_name.name.as_str(), + resolver, &select, &columns, connection.clone(), - syms, program, )?, ast::Stmt::CreateMaterializedView { view_name, select, .. } => view::translate_create_materialized_view( - schema, view_name.name.as_str(), + resolver, &select, connection.clone(), - syms, program, )?, ast::Stmt::CreateVirtualTable(vtab) => { - translate_create_virtual_table(vtab, schema, syms, program)? + translate_create_virtual_table(vtab, resolver, program)? } ast::Stmt::Delete { tbl_name, @@ -236,30 +227,31 @@ pub fn translate_inner( bail_parse_error!("ORDER BY clause is not supported in DELETE"); } translate_delete( - schema, &tbl_name, + resolver, where_clause, limit, returning, - syms, program, connection, )? } - ast::Stmt::Detach { name } => attach::translate_detach(&name, schema, syms, program)?, + ast::Stmt::Detach { name } => attach::translate_detach(&name, resolver, program)?, ast::Stmt::DropIndex { if_exists, idx_name, - } => translate_drop_index(idx_name.name.as_str(), if_exists, schema, syms, program)?, + } => translate_drop_index(idx_name.name.as_str(), resolver, if_exists, program)?, ast::Stmt::DropTable { if_exists, tbl_name, - } => translate_drop_table(tbl_name, if_exists, schema, syms, program)?, + } => translate_drop_table(tbl_name, resolver, if_exists, program)?, ast::Stmt::DropTrigger { .. } => bail_parse_error!("DROP TRIGGER not supported yet"), ast::Stmt::DropView { if_exists, view_name, - } => view::translate_drop_view(schema, view_name.name.as_str(), if_exists, program)?, + } => { + view::translate_drop_view(resolver.schema, view_name.name.as_str(), if_exists, program)? + } ast::Stmt::Pragma { .. } => { bail_parse_error!("PRAGMA statement cannot be evaluated in a nested context") } @@ -268,13 +260,12 @@ pub fn translate_inner( ast::Stmt::Rollback { tx_name, savepoint_name, - } => translate_rollback(schema, syms, program, tx_name, savepoint_name)?, + } => translate_rollback(program, tx_name, savepoint_name)?, ast::Stmt::Savepoint { .. } => bail_parse_error!("SAVEPOINT not supported yet"), ast::Stmt::Select(select) => { translate_select( - schema, select, - syms, + resolver, program, plan::QueryDestination::ResultRows, connection, @@ -282,7 +273,7 @@ pub fn translate_inner( .program } ast::Stmt::Update(mut update) => { - translate_update(schema, &mut update, syms, program, connection)? + translate_update(&mut update, resolver, program, connection)? } ast::Stmt::Vacuum { .. } => bail_parse_error!("VACUUM not supported yet"), ast::Stmt::Insert { @@ -293,14 +284,13 @@ pub fn translate_inner( body, returning, } => translate_insert( - schema, with, + resolver, or_conflict, tbl_name, columns, body, returning, - syms, program, connection, )?, diff --git a/core/translate/planner.rs b/core/translate/planner.rs index e1919afbc..58669ab9d 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -9,22 +9,26 @@ use super::{ ResultSetColumn, Scan, TableReferences, WhereTerm, }, select::prepare_select_plan, - SymbolTable, }; -use crate::translate::expr::{BindingBehavior, WalkControl}; -use crate::translate::plan::{Window, WindowFunction}; +use crate::translate::{ + emitter::Resolver, + expr::{BindingBehavior, WalkControl}, +}; use crate::{ ast::Limit, function::Func, - schema::{Schema, Table}, + schema::Table, util::{exprs_are_equivalent, normalize_ident}, - vdbe::builder::TableRefIdCounter, Result, }; use crate::{ function::{AggFunc, ExtFunc}, translate::expr::{bind_and_rewrite_expr, ParamState}, }; +use crate::{ + translate::plan::{Window, WindowFunction}, + vdbe::builder::ProgramBuilder, +}; use turso_parser::ast::Literal::Null; use turso_parser::ast::{ self, As, Expr, FromClause, JoinType, Materialized, Over, QualifiedName, TableInternalId, With, @@ -51,9 +55,8 @@ pub const ROWID_STRS: [&str; 3] = ["rowid", "_rowid_", "oid"]; /// - `Err(..)` if an invalid function usage is detected (e.g., window /// function encountered while `windows` is `None`). pub fn resolve_window_and_aggregate_functions( - schema: &Schema, - syms: &SymbolTable, top_level_expr: &Expr, + resolver: &Resolver, aggs: &mut Vec, mut windows: Option<&mut Vec>, ) -> Result { @@ -80,7 +83,7 @@ pub fn resolve_window_and_aggregate_functions( let args_count = args.len(); let distinctness = Distinctness::from_ast(distinctness.as_ref()); - if !schema.indexes_enabled() && distinctness.is_distinct() { + if !resolver.schema.indexes_enabled() && distinctness.is_distinct() { crate::bail_parse_error!( "SELECT with DISTINCT is not allowed without indexes enabled" ); @@ -102,7 +105,10 @@ pub fn resolve_window_and_aggregate_functions( return Ok(WalkControl::SkipChildren); } Err(e) => { - if let Some(f) = syms.resolve_function(name.as_str(), args_count) { + if let Some(f) = resolver + .symbol_table + .resolve_function(name.as_str(), args_count) + { let func = AggFunc::External(f.func.clone().into()); if let ExtFunc::Aggregate { .. } = f.as_ref().func { if let Some(over_clause) = filter_over.over_clause.as_ref() { @@ -263,24 +269,21 @@ fn add_aggregate_if_not_exists( Ok(()) } -#[allow(clippy::too_many_arguments)] fn parse_from_clause_table( - schema: &Schema, table: ast::SelectTable, + resolver: &Resolver, + program: &mut ProgramBuilder, table_references: &mut TableReferences, vtab_predicates: &mut Vec, ctes: &mut Vec, - syms: &SymbolTable, - table_ref_counter: &mut TableRefIdCounter, connection: &Arc, ) -> Result<()> { match table { ast::SelectTable::Table(qualified_name, maybe_alias, _) => parse_table( - schema, - syms, table_references, + resolver, + program, ctes, - table_ref_counter, vtab_predicates, &qualified_name, maybe_alias.as_ref(), @@ -289,11 +292,10 @@ fn parse_from_clause_table( ), ast::SelectTable::Select(subselect, maybe_alias) => { let Plan::Select(subplan) = prepare_select_plan( - schema, subselect, - syms, + resolver, + program, table_references.outer_query_refs(), - table_ref_counter, QueryDestination::placeholder_for_subquery(), connection, )? @@ -312,16 +314,15 @@ fn parse_from_clause_table( identifier, subplan, None, - table_ref_counter.next(), + program.table_reference_counter.next(), )); Ok(()) } ast::SelectTable::TableCall(qualified_name, args, maybe_alias) => parse_table( - schema, - syms, table_references, + resolver, + program, ctes, - table_ref_counter, vtab_predicates, &qualified_name, maybe_alias.as_ref(), @@ -334,11 +335,10 @@ fn parse_from_clause_table( #[allow(clippy::too_many_arguments)] fn parse_table( - schema: &Schema, - syms: &SymbolTable, table_references: &mut TableReferences, + resolver: &Resolver, + program: &mut ProgramBuilder, ctes: &mut Vec, - table_ref_counter: &mut TableRefIdCounter, vtab_predicates: &mut Vec, qualified_name: &QualifiedName, maybe_alias: Option<&As>, @@ -380,7 +380,7 @@ fn parse_table( ast::As::Elided(id) => id, }) .map(|a| normalize_ident(a.as_str())); - let internal_id = table_ref_counter.next(); + let internal_id = program.table_reference_counter.next(); let tbl_ref = if let Table::Virtual(tbl) = table.as_ref() { transform_args_into_where_terms(args, internal_id, vtab_predicates, table.as_ref())?; Table::Virtual(tbl.clone()) @@ -418,13 +418,12 @@ fn parse_table( // Recursively call parse_from_clause_table with the view as a SELECT return parse_from_clause_table( - schema, ast::SelectTable::Select(*subselect.clone(), view_alias), + resolver, + program, table_references, vtab_predicates, ctes, - syms, - table_ref_counter, connection, ); } @@ -487,7 +486,7 @@ fn parse_table( }), table: Table::BTree(btree_table), identifier: alias.unwrap_or(normalized_qualified_name), - internal_id: table_ref_counter.next(), + internal_id: program.table_reference_counter.next(), join_info: None, col_used_mask: ColumnUsedMask::default(), database_id, @@ -510,7 +509,7 @@ fn parse_table( op: Operation::default_scan_for(&outer_ref.table), table: outer_ref.table.clone(), identifier: outer_ref.identifier.clone(), - internal_id: table_ref_counter.next(), + internal_id: program.table_reference_counter.next(), join_info: None, col_used_mask: ColumnUsedMask::default(), database_id, @@ -587,16 +586,14 @@ fn transform_args_into_where_terms( #[allow(clippy::too_many_arguments)] pub fn parse_from( - schema: &Schema, mut from: Option, - syms: &SymbolTable, + resolver: &Resolver, + program: &mut ProgramBuilder, with: Option, out_where_clause: &mut Vec, vtab_predicates: &mut Vec, table_references: &mut TableReferences, - table_ref_counter: &mut TableRefIdCounter, connection: &Arc, - param_ctx: &mut ParamState, ) -> Result<()> { if from.is_none() { return Ok(()); @@ -620,7 +617,7 @@ pub fn parse_from( // TODO: sqlite actually allows overriding a catalog table with a CTE. // We should carry over the 'Scope' struct to all of our identifier resolution. let cte_name_normalized = normalize_ident(cte.tbl_name.as_str()); - if schema.get_table(&cte_name_normalized).is_some() { + if resolver.schema.get_table(&cte_name_normalized).is_some() { crate::bail_parse_error!( "CTE name {} conflicts with catalog table name", cte.tbl_name.as_str() @@ -650,11 +647,10 @@ pub fn parse_from( // CTE can refer to other CTEs that came before it, plus any schema tables or tables in the outer scope. let cte_plan = prepare_select_plan( - schema, cte.select, - syms, + resolver, + program, &outer_query_refs_for_cte, - table_ref_counter, QueryDestination::placeholder_for_subquery(), connection, )?; @@ -665,7 +661,7 @@ pub fn parse_from( cte_name_normalized, cte_plan, None, - table_ref_counter.next(), + program.table_reference_counter.next(), )); } } @@ -674,28 +670,25 @@ pub fn parse_from( let select_owned = from_owned.select; let joins_owned = from_owned.joins; parse_from_clause_table( - schema, *select_owned, + resolver, + program, table_references, vtab_predicates, &mut ctes_as_subqueries, - syms, - table_ref_counter, connection, )?; for join in joins_owned.into_iter() { parse_join( - schema, join, - syms, + resolver, + program, &mut ctes_as_subqueries, out_where_clause, vtab_predicates, table_references, - table_ref_counter, connection, - param_ctx, )?; } @@ -905,16 +898,14 @@ pub fn determine_where_to_eval_expr( #[allow(clippy::too_many_arguments)] fn parse_join( - schema: &Schema, join: ast::JoinedSelectTable, - syms: &SymbolTable, + resolver: &Resolver, + program: &mut ProgramBuilder, ctes: &mut Vec, out_where_clause: &mut Vec, vtab_predicates: &mut Vec, table_references: &mut TableReferences, - table_ref_counter: &mut TableRefIdCounter, connection: &Arc, - param_ctx: &mut ParamState, ) -> Result<()> { let ast::JoinedSelectTable { operator: join_operator, @@ -923,13 +914,12 @@ fn parse_join( } = join; parse_from_clause_table( - schema, table.as_ref().clone(), + resolver, + program, table_references, vtab_predicates, ctes, - syms, - table_ref_counter, connection, )?; @@ -1007,7 +997,7 @@ fn parse_join( Some(table_references), None, connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::TryResultColumnsFirst, )?; } diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index 3bc8668e8..0a3b90c96 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -15,12 +15,12 @@ use crate::storage::pager::AutoVacuumMode; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::CacheSize; use crate::storage::wal::CheckpointMode; -use crate::translate::emitter::TransactionMode; +use crate::translate::emitter::{Resolver, TransactionMode}; use crate::translate::schema::translate_create_table; use crate::util::{normalize_ident, parse_signed_number, parse_string, IOExt as _}; use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts}; use crate::vdbe::insn::{Cookie, Insn}; -use crate::{bail_parse_error, CaptureDataChangesMode, LimboError, SymbolTable, Value}; +use crate::{bail_parse_error, CaptureDataChangesMode, LimboError, Value}; use std::str::FromStr; use strum::IntoEnumIterator; @@ -32,10 +32,8 @@ fn list_pragmas(program: &mut ProgramBuilder) { program.add_pragma_result_column("pragma_list".into()); } -#[allow(clippy::too_many_arguments)] pub fn translate_pragma( - schema: &Schema, - syms: &SymbolTable, + resolver: &Resolver, name: &ast::QualifiedName, body: Option, pager: Arc, @@ -60,12 +58,17 @@ pub fn translate_pragma( }; let (mut program, mode) = match body { - None => query_pragma(pragma, schema, None, pager, connection, program)?, + None => query_pragma(pragma, resolver.schema, None, pager, connection, program)?, Some(ast::PragmaBody::Equals(value) | ast::PragmaBody::Call(value)) => match pragma { - PragmaName::TableInfo => { - query_pragma(pragma, schema, Some(*value), pager, connection, program)? - } - _ => update_pragma(pragma, schema, syms, *value, pager, connection, program)?, + PragmaName::TableInfo => query_pragma( + pragma, + resolver.schema, + Some(*value), + pager, + connection, + program, + )?, + _ => update_pragma(pragma, resolver, *value, pager, connection, program)?, }, }; match mode { @@ -86,8 +89,7 @@ pub fn translate_pragma( fn update_pragma( pragma: PragmaName, - schema: &Schema, - syms: &SymbolTable, + resolver: &Resolver, value: ast::Expr, pager: Arc, connection: Arc, @@ -154,7 +156,7 @@ fn update_pragma( PragmaName::LegacyFileFormat => Ok((program, TransactionMode::None)), PragmaName::WalCheckpoint => query_pragma( PragmaName::WalCheckpoint, - schema, + resolver.schema, Some(value), pager, connection, @@ -163,7 +165,7 @@ fn update_pragma( PragmaName::ModuleList => Ok((program, TransactionMode::None)), PragmaName::PageCount => query_pragma( PragmaName::PageCount, - schema, + resolver.schema, None, pager, connection, @@ -287,13 +289,14 @@ fn update_pragma( // but for now, let's keep it as is... let opts = CaptureDataChangesMode::parse(&value)?; if let Some(table) = &opts.table() { - if schema.get_table(table).is_none() { + if resolver.schema.get_table(table).is_none() { program = translate_create_table( QualifiedName { db_name: None, name: ast::Name::new(table), alias: None, }, + resolver, false, true, // if_not_exists ast::CreateTableBody::ColumnsAndConstraints { @@ -301,8 +304,6 @@ fn update_pragma( constraints: vec![], options: ast::TableOptions::NONE, }, - schema, - syms, program, &connection, )?; @@ -314,7 +315,7 @@ fn update_pragma( PragmaName::DatabaseList => unreachable!("database_list cannot be set"), PragmaName::QueryOnly => query_pragma( PragmaName::QueryOnly, - schema, + resolver.schema, Some(value), pager, connection, @@ -322,7 +323,7 @@ fn update_pragma( ), PragmaName::FreelistCount => query_pragma( PragmaName::FreelistCount, - schema, + resolver.schema, Some(value), pager, connection, diff --git a/core/translate/rollback.rs b/core/translate/rollback.rs index 67feab969..d2fa22d8b 100644 --- a/core/translate/rollback.rs +++ b/core/translate/rollback.rs @@ -1,14 +1,11 @@ use turso_parser::ast::Name; use crate::{ - schema::Schema, vdbe::{builder::ProgramBuilder, insn::Insn}, - Result, SymbolTable, + Result, }; pub fn translate_rollback( - _schema: &Schema, - _syms: &SymbolTable, mut program: ProgramBuilder, txn_name: Option, savepoint_name: Option, diff --git a/core/translate/schema.rs b/core/translate/schema.rs index c4f705113..ddc7fa73a 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -5,7 +5,6 @@ use crate::ext::VTabImpl; use crate::schema::create_table; use crate::schema::BTreeTable; use crate::schema::Column; -use crate::schema::Schema; use crate::schema::Table; use crate::schema::Type; use crate::schema::RESERVED_TABLE_PREFIXES; @@ -23,19 +22,16 @@ use crate::vdbe::builder::CursorType; use crate::vdbe::insn::Cookie; use crate::vdbe::insn::{CmpInsFlags, InsertFlags, Insn}; use crate::Connection; -use crate::SymbolTable; use crate::{bail_parse_error, Result}; use turso_ext::VTabKind; -#[allow(clippy::too_many_arguments)] pub fn translate_create_table( tbl_name: ast::QualifiedName, + resolver: &Resolver, temporary: bool, if_not_exists: bool, body: ast::CreateTableBody, - schema: &Schema, - syms: &SymbolTable, mut program: ProgramBuilder, connection: &Connection, ) -> Result { @@ -70,7 +66,7 @@ pub fn translate_create_table( ); } - if schema.get_table(&normalized_tbl_name).is_some() { + if resolver.schema.get_table(&normalized_tbl_name).is_some() { if if_not_exists { return Ok(program); } @@ -113,7 +109,7 @@ pub fn translate_create_table( } } - let schema_master_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let schema_master_table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(schema_master_table.clone())); program.emit_insn(Insn::OpenWrite { @@ -121,11 +117,10 @@ pub fn translate_create_table( root_page: 1usize.into(), db: 0, }); - let resolver = Resolver::new(schema, syms); - let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; + let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, SQLITE_TABLEID)?; let created_sequence_table = - if has_autoincrement && schema.get_table("sqlite_sequence").is_none() { + if has_autoincrement && resolver.schema.get_table("sqlite_sequence").is_none() { let seq_table_root_reg = program.alloc_register(); program.emit_insn(Insn::CreateBtree { db: 0, @@ -136,7 +131,7 @@ pub fn translate_create_table( let seq_sql = "CREATE TABLE sqlite_sequence(name,seq)"; emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, cdc_table.as_ref().map(|x| x.0), SchemaEntryType::Table, @@ -191,7 +186,7 @@ pub fn translate_create_table( let index_regs = collect_autoindexes(&body, &mut program, &normalized_tbl_name)?; if let Some(index_regs) = index_regs.as_ref() { - if !schema.indexes_enabled() { + if !resolver.schema.indexes_enabled() { bail_parse_error!("Constraints UNIQUE and PRIMARY KEY (unless INTEGER PRIMARY KEY) on table are not supported without indexes"); } for index_reg in index_regs.iter() { @@ -203,7 +198,7 @@ pub fn translate_create_table( } } - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, @@ -211,12 +206,11 @@ pub fn translate_create_table( db: 0, }); - let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; - let resolver = Resolver::new(schema, syms); + let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, SQLITE_TABLEID)?; emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, cdc_table.as_ref().map(|x| x.0), SchemaEntryType::Table, @@ -235,7 +229,7 @@ pub fn translate_create_table( ); emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, None, SchemaEntryType::Index, @@ -252,7 +246,7 @@ pub fn translate_create_table( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); @@ -478,8 +472,7 @@ fn create_vtable_body_to_str(vtab: &ast::CreateVirtualTable, module: Arc Result { let ast::CreateVirtualTable { @@ -492,13 +485,13 @@ pub fn translate_create_virtual_table( let table_name = tbl_name.name.as_str().to_string(); let module_name_str = module_name.as_str().to_string(); let args_vec = args.clone(); - let Some(vtab_module) = syms.vtab_modules.get(&module_name_str) else { + let Some(vtab_module) = resolver.symbol_table.vtab_modules.get(&module_name_str) else { bail_parse_error!("no such module: {}", module_name_str); }; if !vtab_module.module_kind.eq(&VTabKind::VirtualTable) { bail_parse_error!("module {} is not a virtual table", module_name_str); }; - if schema.get_table(&table_name).is_some() { + if resolver.schema.get_table(&table_name).is_some() { if *if_not_exists { return Ok(program); } @@ -540,7 +533,7 @@ pub fn translate_create_virtual_table( table_name: table_name_reg, args_reg, }); - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, @@ -548,12 +541,11 @@ pub fn translate_create_virtual_table( db: 0, }); - let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; + let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, SQLITE_TABLEID)?; let sql = create_vtable_body_to_str(&vtab, vtab_module.clone()); - let resolver = Resolver::new(schema, syms); emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, cdc_table.map(|x| x.0), SchemaEntryType::Table, @@ -566,7 +558,7 @@ pub fn translate_create_virtual_table( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); let parse_schema_where_clause = format!("tbl_name = '{table_name}' AND type != 'trigger'"); @@ -580,9 +572,8 @@ pub fn translate_create_virtual_table( pub fn translate_drop_table( tbl_name: ast::QualifiedName, + resolver: &Resolver, if_exists: bool, - schema: &Schema, - syms: &SymbolTable, mut program: ProgramBuilder, ) -> Result { if tbl_name @@ -593,7 +584,11 @@ pub fn translate_drop_table( bail_parse_error!("table sqlite_sequence may not be dropped"); } - if !schema.indexes_enabled() && schema.table_has_indexes(&tbl_name.name.to_string()) { + if !resolver.schema.indexes_enabled() + && resolver + .schema + .table_has_indexes(&tbl_name.name.to_string()) + { bail_parse_error!( "DROP TABLE with indexes on the table is disabled by default. Omit the `--experimental-indexes=false` flag to enable this feature." ); @@ -605,7 +600,7 @@ pub fn translate_drop_table( approx_num_labels: 4, }; program.extend(&opts); - let table = schema.get_table(tbl_name.name.as_str()); + let table = resolver.schema.get_table(tbl_name.name.as_str()); if table.is_none() { if if_exists { return Ok(program); @@ -623,13 +618,13 @@ pub fn translate_drop_table( let table = table.unwrap(); // safe since we just checked for None // Check if this is a materialized view - if so, refuse to drop it with DROP TABLE - if schema.is_materialized_view(tbl_name.name.as_str()) { + if resolver.schema.is_materialized_view(tbl_name.name.as_str()) { bail_parse_error!( "Cannot DROP TABLE on materialized view {}. Use DROP VIEW instead.", tbl_name.name.as_str() ); } - let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; + let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, SQLITE_TABLEID)?; let null_reg = program.alloc_register(); // r1 program.emit_null(null_reg, None); @@ -640,7 +635,7 @@ pub fn translate_drop_table( program.mark_last_insn_constant(); let row_id_reg = program.alloc_register(); // r5 - let schema_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let schema_table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id_0 = program.alloc_cursor_id( // cursor 0 CursorType::BTreeTable(schema_table.clone()), @@ -716,10 +711,9 @@ pub fn translate_drop_table( } else { None }; - let resolver = Resolver::new(schema, syms); emit_cdc_insns( &mut program, - &resolver, + resolver, OperationMode::DELETE, cdc_cursor_id, row_id_reg, @@ -744,7 +738,7 @@ pub fn translate_drop_table( // end of loop on schema table // 2. Destroy the indices within a loop - let indices = schema.get_indices(tbl_name.name.as_str()); + let indices = resolver.schema.get_indices(tbl_name.name.as_str()); for index in indices { program.emit_insn(Insn::Destroy { root: index.root_page, @@ -954,7 +948,11 @@ pub fn translate_drop_table( } // if drops table, sequence table should reset. - if let Some(seq_table) = schema.get_table("sqlite_sequence").and_then(|t| t.btree()) { + if let Some(seq_table) = resolver + .schema + .get_table("sqlite_sequence") + .and_then(|t| t.btree()) + { let seq_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(seq_table.clone())); let seq_table_name_reg = program.alloc_register(); let dropped_table_name_reg = @@ -1013,7 +1011,7 @@ pub fn translate_drop_table( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: schema.schema_version as i32 + 1, + value: resolver.schema.schema_version as i32 + 1, p5: 0, }); diff --git a/core/translate/select.rs b/core/translate/select.rs index 3b305ba12..fedfeb005 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -15,10 +15,10 @@ use crate::translate::planner::{ }; use crate::translate::window::plan_windows; use crate::util::normalize_ident; -use crate::vdbe::builder::{ProgramBuilderOpts, TableRefIdCounter}; +use crate::vdbe::builder::ProgramBuilderOpts; use crate::vdbe::insn::Insn; -use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result}; -use crate::{Connection, SymbolTable}; +use crate::Connection; +use crate::{vdbe::builder::ProgramBuilder, Result}; use std::sync::Arc; use turso_parser::ast::ResultColumn; use turso_parser::ast::{self, CompoundSelect, Expr}; @@ -29,23 +29,21 @@ pub struct TranslateSelectResult { } pub fn translate_select( - schema: &Schema, select: ast::Select, - syms: &SymbolTable, + resolver: &Resolver, mut program: ProgramBuilder, query_destination: QueryDestination, connection: &Arc, ) -> Result { let mut select_plan = prepare_select_plan( - schema, select, - syms, + resolver, + &mut program, &[], - &mut program.table_reference_counter, query_destination, connection, )?; - optimize_plan(&mut select_plan, schema)?; + optimize_plan(&mut select_plan, resolver.schema)?; let num_result_cols; let opts = match &select_plan { Plan::Select(select) => { @@ -84,7 +82,7 @@ pub fn translate_select( }; program.extend(&opts); - emit_program(connection, &mut program, select_plan, schema, syms, |_| {})?; + emit_program(connection, resolver, &mut program, select_plan, |_| {})?; Ok(TranslateSelectResult { program, num_result_cols, @@ -92,60 +90,52 @@ pub fn translate_select( } pub fn prepare_select_plan( - schema: &Schema, select: ast::Select, - syms: &SymbolTable, + resolver: &Resolver, + program: &mut ProgramBuilder, outer_query_refs: &[OuterQueryReference], - table_ref_counter: &mut TableRefIdCounter, query_destination: QueryDestination, connection: &Arc, ) -> Result { let compounds = select.body.compounds; - let mut param_ctx = ParamState::default(); match compounds.is_empty() { true => Ok(Plan::Select(prepare_one_select_plan( - schema, select.body.select, + resolver, + program, select.limit, select.order_by, select.with, - syms, outer_query_refs, - table_ref_counter, query_destination, connection, - &mut param_ctx, )?)), false => { let mut last = prepare_one_select_plan( - schema, select.body.select, + resolver, + program, None, vec![], None, - syms, outer_query_refs, - table_ref_counter, query_destination.clone(), connection, - &mut param_ctx, )?; let mut left = Vec::with_capacity(compounds.len()); for CompoundSelect { select, operator } in compounds { left.push((last, operator)); last = prepare_one_select_plan( - schema, select, + resolver, + program, None, vec![], None, - syms, outer_query_refs, - table_ref_counter, query_destination.clone(), connection, - &mut param_ctx, )?; } @@ -157,7 +147,7 @@ pub fn prepare_select_plan( } } let (limit, offset) = select.limit.map_or(Ok((None, None)), |mut l| { - parse_limit(&mut l, connection, &mut param_ctx) + parse_limit(&mut l, connection, &mut program.param_ctx) })?; // FIXME: handle ORDER BY for compound selects @@ -181,17 +171,15 @@ pub fn prepare_select_plan( #[allow(clippy::too_many_arguments)] fn prepare_one_select_plan( - schema: &Schema, select: ast::OneSelect, + resolver: &Resolver, + program: &mut ProgramBuilder, limit: Option, order_by: Vec, with: Option, - syms: &SymbolTable, outer_query_refs: &[OuterQueryReference], - table_ref_counter: &mut TableRefIdCounter, query_destination: QueryDestination, connection: &Arc, - param_ctx: &mut ParamState, ) -> Result { match select { ast::OneSelect::Select { @@ -203,7 +191,7 @@ fn prepare_one_select_plan( window_clause, .. } => { - if !schema.indexes_enabled() && distinctness.is_some() { + if !resolver.schema.indexes_enabled() && distinctness.is_some() { crate::bail_parse_error!( "SELECT with DISTINCT is not allowed without indexes enabled" ); @@ -229,16 +217,14 @@ fn prepare_one_select_plan( // Parse the FROM clause into a vec of TableReferences. Fold all the join conditions expressions into the WHERE clause. parse_from( - schema, from, - syms, + resolver, + program, with, &mut where_predicates, &mut vtab_predicates, &mut table_references, - table_ref_counter, connection, - param_ctx, )?; // Preallocate space for the result columns @@ -301,7 +287,7 @@ fn prepare_one_select_plan( Some(&mut plan.table_references), None, connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, )?; } @@ -311,7 +297,7 @@ fn prepare_one_select_plan( Some(&mut plan.table_references), None, connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, )?; } @@ -374,13 +360,12 @@ fn prepare_one_select_plan( Some(&mut plan.table_references), None, connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, )?; let contains_aggregates = resolve_window_and_aggregate_functions( - schema, - syms, expr, + resolver, &mut aggregate_expressions, Some(&mut windows), )?; @@ -403,7 +388,7 @@ fn prepare_one_select_plan( &mut vtab_predicates, &mut plan, connection, - param_ctx, + &mut program.param_ctx, )?; // Parse the actual WHERE clause and add its conditions to the plan WHERE clause that already contains the join conditions. @@ -413,7 +398,7 @@ fn prepare_one_select_plan( Some(&plan.result_columns), &mut plan.where_clause, connection, - param_ctx, + &mut program.param_ctx, )?; if let Some(mut group_by) = group_by { @@ -424,7 +409,7 @@ fn prepare_one_select_plan( Some(&mut plan.table_references), Some(&plan.result_columns), connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::TryResultColumnsFirst, )?; } @@ -441,13 +426,12 @@ fn prepare_one_select_plan( Some(&mut plan.table_references), Some(&plan.result_columns), connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::TryResultColumnsFirst, )?; let contains_aggregates = resolve_window_and_aggregate_functions( - schema, - syms, expr, + resolver, &mut aggregate_expressions, None, )?; @@ -481,13 +465,12 @@ fn prepare_one_select_plan( Some(&mut plan.table_references), Some(&plan.result_columns), connection, - param_ctx, + &mut program.param_ctx, BindingBehavior::TryResultColumnsFirst, )?; resolve_window_and_aggregate_functions( - schema, - syms, &o.expr, + resolver, &mut plan.aggregates, Some(&mut windows), )?; @@ -502,17 +485,22 @@ fn prepare_one_select_plan( &group_by.exprs, &plan.order_by, &plan.aggregates, - &Resolver::new(schema, syms), + resolver, )); } // Parse the LIMIT/OFFSET clause (plan.limit, plan.offset) = limit.map_or(Ok((None, None)), |mut l| { - parse_limit(&mut l, connection, param_ctx) + parse_limit(&mut l, connection, &mut program.param_ctx) })?; if !windows.is_empty() { - plan_windows(schema, syms, &mut plan, table_ref_counter, &mut windows)?; + plan_windows( + &mut plan, + resolver, + &mut program.table_reference_counter, + &mut windows, + )?; } // Return the unoptimized query plan diff --git a/core/translate/update.rs b/core/translate/update.rs index d9c435fd4..ab035f377 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use crate::schema::{BTreeTable, Column, Type, ROWID_SENTINEL}; +use crate::translate::emitter::Resolver; use crate::translate::expr::{ bind_and_rewrite_expr, walk_expr, BindingBehavior, ParamState, WalkControl, }; @@ -14,7 +15,6 @@ use crate::{ schema::{Schema, Table}, util::normalize_ident, vdbe::builder::{ProgramBuilder, ProgramBuilderOpts}, - SymbolTable, }; use turso_parser::ast::{self, Expr, Indexed, SortOrder}; @@ -56,34 +56,32 @@ addr opcode p1 p2 p3 p4 p5 comment 18 Goto 0 1 0 0 */ pub fn translate_update( - schema: &Schema, body: &mut ast::Update, - syms: &SymbolTable, + resolver: &Resolver, mut program: ProgramBuilder, connection: &Arc, ) -> crate::Result { - let mut plan = prepare_update_plan(&mut program, schema, body, connection, false)?; - optimize_plan(&mut plan, schema)?; + let mut plan = prepare_update_plan(&mut program, resolver.schema, body, connection, false)?; + optimize_plan(&mut plan, resolver.schema)?; let opts = ProgramBuilderOpts { num_cursors: 1, approx_num_insns: 20, approx_num_labels: 4, }; program.extend(&opts); - emit_program(connection, &mut program, plan, schema, syms, |_| {})?; + emit_program(connection, resolver, &mut program, plan, |_| {})?; Ok(program) } pub fn translate_update_for_schema_change( - schema: &Schema, body: &mut ast::Update, - syms: &SymbolTable, + resolver: &Resolver, mut program: ProgramBuilder, connection: &Arc, ddl_query: &str, after: impl FnOnce(&mut ProgramBuilder), ) -> crate::Result { - let mut plan = prepare_update_plan(&mut program, schema, body, connection, true)?; + let mut plan = prepare_update_plan(&mut program, resolver.schema, body, connection, true)?; if let Plan::Update(plan) = &mut plan { if program.capture_data_changes_mode().has_updates() { @@ -91,14 +89,14 @@ pub fn translate_update_for_schema_change( } } - optimize_plan(&mut plan, schema)?; + optimize_plan(&mut plan, resolver.schema)?; let opts = ProgramBuilderOpts { num_cursors: 1, approx_num_insns: 20, approx_num_labels: 4, }; program.extend(&opts); - emit_program(connection, &mut program, plan, schema, syms, after)?; + emit_program(connection, resolver, &mut program, plan, after)?; Ok(program) } @@ -201,7 +199,6 @@ pub fn prepare_update_plan( .collect(); let mut set_clauses = Vec::with_capacity(body.sets.len()); - let mut param_idx = ParamState::default(); // Process each SET assignment and map column names to expressions // e.g the statement `SET x = 1, y = 2, z = 3` has 3 set assigments @@ -211,7 +208,7 @@ pub fn prepare_update_plan( Some(&mut table_references), None, connection, - &mut param_idx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, )?; @@ -271,7 +268,6 @@ pub fn prepare_update_plan( body.tbl_name.name.as_str(), program, connection, - &mut param_idx, )?; let order_by = body @@ -283,7 +279,7 @@ pub fn prepare_update_plan( Some(&mut table_references), Some(&result_columns), connection, - &mut param_idx, + &mut program.param_ctx, BindingBehavior::ResultColumnsNotAllowed, ); (o.expr.clone(), o.order.unwrap_or(SortOrder::Asc)) @@ -327,7 +323,7 @@ pub fn prepare_update_plan( Some(&result_columns), &mut where_clause, connection, - &mut param_idx, + &mut program.param_ctx, )?; let table = Arc::new(BTreeTable { @@ -405,13 +401,13 @@ pub fn prepare_update_plan( Some(&result_columns), &mut where_clause, connection, - &mut param_idx, + &mut program.param_ctx, )?; }; // Parse the LIMIT/OFFSET clause let (limit, offset) = body.limit.as_mut().map_or(Ok((None, None)), |l| { - parse_limit(l, connection, &mut param_idx) + parse_limit(l, connection, &mut program.param_ctx) })?; // Check what indexes will need to be updated by checking set_clauses and see diff --git a/core/translate/view.rs b/core/translate/view.rs index 96cce0d22..548ca5bc2 100644 --- a/core/translate/view.rs +++ b/core/translate/view.rs @@ -5,16 +5,15 @@ use crate::translate::schema::{emit_schema_entry, SchemaEntryType, SQLITE_TABLEI use crate::util::{normalize_ident, PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX}; use crate::vdbe::builder::{CursorType, ProgramBuilder}; use crate::vdbe::insn::{CmpInsFlags, Cookie, Insn, RegisterOrLiteral}; -use crate::{Connection, Result, SymbolTable}; +use crate::{Connection, Result}; use std::sync::Arc; use turso_parser::ast; pub fn translate_create_materialized_view( - schema: &Schema, view_name: &str, + resolver: &Resolver, select_stmt: &ast::Select, connection: Arc, - syms: &SymbolTable, mut program: ProgramBuilder, ) -> Result { // Check if experimental views are enabled @@ -28,7 +27,8 @@ pub fn translate_create_materialized_view( let normalized_view_name = normalize_ident(view_name); // Check if view already exists - if schema + if resolver + .schema .get_materialized_view(&normalized_view_name) .is_some() { @@ -42,7 +42,8 @@ pub fn translate_create_materialized_view( // storing invalid view definitions use crate::incremental::view::IncrementalView; use crate::schema::BTreeTable; - let view_column_schema = IncrementalView::validate_and_extract_columns(select_stmt, schema)?; + let view_column_schema = + IncrementalView::validate_and_extract_columns(select_stmt, resolver.schema)?; let view_columns = view_column_schema.flat_columns(); // Reconstruct the SQL string for storage @@ -120,7 +121,7 @@ pub fn translate_create_materialized_view( program.preassign_label_to_next_insn(clear_done_label); // Open cursor to sqlite_schema table - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, @@ -129,10 +130,9 @@ pub fn translate_create_materialized_view( }); // Add the materialized view entry to sqlite_schema - let resolver = Resolver::new(schema, syms); emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, None, // cdc_table_cursor_id, no cdc for views SchemaEntryType::View, @@ -164,7 +164,7 @@ pub fn translate_create_materialized_view( emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, None, // cdc_table_cursor_id SchemaEntryType::Table, @@ -211,7 +211,7 @@ pub fn translate_create_materialized_view( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: (schema.schema_version + 1) as i32, + value: (resolver.schema.schema_version + 1) as i32, p5: 0, }); @@ -221,7 +221,7 @@ pub fn translate_create_materialized_view( cursors: cursor_info, }); - program.epilogue(schema); + program.epilogue(resolver.schema); Ok(program) } @@ -230,19 +230,19 @@ fn create_materialized_view_to_str(view_name: &str, select_stmt: &ast::Select) - } pub fn translate_create_view( - schema: &Schema, view_name: &str, + resolver: &Resolver, select_stmt: &ast::Select, _columns: &[ast::IndexedColumn], _connection: Arc, - syms: &SymbolTable, mut program: ProgramBuilder, ) -> Result { let normalized_view_name = normalize_ident(view_name); // Check if view already exists - if schema.get_view(&normalized_view_name).is_some() - || schema + if resolver.schema.get_view(&normalized_view_name).is_some() + || resolver + .schema .get_materialized_view(&normalized_view_name) .is_some() { @@ -255,7 +255,7 @@ pub fn translate_create_view( let sql = create_view_to_str(view_name, select_stmt); // Open cursor to sqlite_schema table - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let table = resolver.schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, @@ -264,10 +264,9 @@ pub fn translate_create_view( }); // Add the view entry to sqlite_schema - let resolver = Resolver::new(schema, syms); emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, None, // cdc_table_cursor_id, no cdc for views SchemaEntryType::View, @@ -286,7 +285,7 @@ pub fn translate_create_view( program.emit_insn(Insn::SetCookie { db: 0, cookie: Cookie::SchemaVersion, - value: (schema.schema_version + 1) as i32, + value: (resolver.schema.schema_version + 1) as i32, p5: 0, }); diff --git a/core/translate/window.rs b/core/translate/window.rs index 3fb512303..784e64dc9 100644 --- a/core/translate/window.rs +++ b/core/translate/window.rs @@ -1,4 +1,4 @@ -use crate::schema::{BTreeTable, Schema, Table}; +use crate::schema::{BTreeTable, Table}; use crate::translate::aggregation::{translate_aggregation_step, AggArgumentSource}; use crate::translate::emitter::{Resolver, TranslateCtx}; use crate::translate::expr::{walk_expr, walk_expr_mut, WalkControl}; @@ -13,7 +13,6 @@ use crate::util::exprs_are_equivalent; use crate::vdbe::builder::{CursorType, ProgramBuilder, TableRefIdCounter}; use crate::vdbe::insn::{InsertFlags, Insn}; use crate::vdbe::{BranchOffset, CursorID}; -use crate::SymbolTable; use std::mem; use std::sync::Arc; use turso_parser::ast::Name::Ident; @@ -22,8 +21,7 @@ use turso_parser::ast::{Expr, FunctionTail, Literal, Over, SortOrder, TableInter const SUBQUERY_DATABASE_ID: usize = 0; struct WindowSubqueryContext<'a> { - schema: &'a Schema, - syms: &'a SymbolTable, + resolver: &'a Resolver<'a>, subquery_order_by: &'a mut Vec<(Box, SortOrder)>, subquery_result_columns: &'a mut Vec, subquery_id: &'a TableInternalId, @@ -82,9 +80,8 @@ struct WindowSubqueryContext<'a> { /// ); /// ``` pub fn plan_windows( - schema: &Schema, - syms: &SymbolTable, plan: &mut SelectPlan, + resolver: &Resolver, table_ref_counter: &mut TableRefIdCounter, windows: &mut Vec, ) -> crate::Result<()> { @@ -99,13 +96,12 @@ pub fn plan_windows( ); } - prepare_window_subquery(schema, syms, plan, table_ref_counter, windows, 0) + prepare_window_subquery(plan, resolver, table_ref_counter, windows, 0) } fn prepare_window_subquery( - schema: &Schema, - syms: &SymbolTable, outer_plan: &mut SelectPlan, + resolver: &Resolver, table_ref_counter: &mut TableRefIdCounter, windows: &mut Vec, processed_window_count: usize, @@ -139,8 +135,7 @@ fn prepare_window_subquery( } let mut ctx = WindowSubqueryContext { - schema, - syms, + resolver, subquery_order_by: &mut subquery_order_by, subquery_result_columns: &mut subquery_result_columns, subquery_id: &subquery_id, @@ -213,9 +208,8 @@ fn prepare_window_subquery( }; prepare_window_subquery( - schema, - syms, &mut inner_plan, + resolver, table_ref_counter, windows, processed_window_count + 1, @@ -250,13 +244,8 @@ fn append_order_by( ctx.subquery_order_by .push((Box::new(expr.clone()), *sort_order)); - let contains_aggregates = resolve_window_and_aggregate_functions( - ctx.schema, - ctx.syms, - expr, - &mut plan.aggregates, - None, - )?; + let contains_aggregates = + resolve_window_and_aggregate_functions(expr, ctx.resolver, &mut plan.aggregates, None)?; rewrite_expr_as_subquery_column(expr, ctx, contains_aggregates); Ok(()) } @@ -352,9 +341,8 @@ fn rewrite_expr_referencing_current_window( filter_over, } => { for arg in args.iter_mut() { - let contains_aggregates = resolve_window_and_aggregate_functions( - ctx.schema, ctx.syms, arg, aggregates, None, - )?; + let contains_aggregates = + resolve_window_and_aggregate_functions(arg, ctx.resolver, aggregates, None)?; rewrite_expr_as_subquery_column(arg, ctx, contains_aggregates); } assert!( diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 4ea0b283e..e7768db06 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -10,6 +10,7 @@ use crate::{ translate::{ collate::CollationSeq, emitter::TransactionMode, + expr::ParamState, plan::{ResultSetColumn, TableReferences}, }, CaptureDataChangesMode, Connection, Value, VirtualTable, @@ -118,6 +119,7 @@ pub struct ProgramBuilder { query_mode: QueryMode, /// Current parent explain address, if any. current_parent_explain_idx: Option, + pub param_ctx: ParamState, } #[derive(Debug, Clone)] @@ -203,6 +205,7 @@ impl ProgramBuilder { rollback: false, query_mode, current_parent_explain_idx: None, + param_ctx: ParamState::default(), } } From d9658070a935788df21f8bd4eb99e8d207abc6e4 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 12:17:34 -0400 Subject: [PATCH 34/65] Fix clippy warnings --- core/translate/analyze.rs | 2 +- core/translate/attach.rs | 8 ++++---- core/translate/index.rs | 6 +++--- core/translate/insert.rs | 13 ++++++------- core/translate/upsert.rs | 4 ++-- core/translate/view.rs | 2 +- 6 files changed, 17 insertions(+), 18 deletions(-) diff --git a/core/translate/analyze.rs b/core/translate/analyze.rs index 8848f6c5f..f91b6b4d8 100644 --- a/core/translate/analyze.rs +++ b/core/translate/analyze.rs @@ -142,7 +142,7 @@ pub fn translate_analyze( // Add the table entry to sqlite_schema emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, None, SchemaEntryType::Table, diff --git a/core/translate/attach.rs b/core/translate/attach.rs index 0ec29e101..5cf7729b2 100644 --- a/core/translate/attach.rs +++ b/core/translate/attach.rs @@ -54,7 +54,7 @@ pub fn translate_attach( }); } _ => { - translate_expr(&mut program, None, expr, arg_reg, &resolver)?; + translate_expr(&mut program, None, expr, arg_reg, resolver)?; } } @@ -85,13 +85,13 @@ pub fn translate_attach( }); } _ => { - translate_expr(&mut program, None, db_name, arg_reg + 1, &resolver)?; + translate_expr(&mut program, None, db_name, arg_reg + 1, resolver)?; } } // Load key argument (NULL if not provided) if let Some(key_expr) = key { - translate_expr(&mut program, None, key_expr, arg_reg + 2, &resolver)?; + translate_expr(&mut program, None, key_expr, arg_reg + 2, resolver)?; } else { program.emit_insn(Insn::Null { dest: arg_reg + 2, @@ -157,7 +157,7 @@ pub fn translate_detach( }); } _ => { - translate_expr(&mut program, None, expr, arg_reg, &resolver)?; + translate_expr(&mut program, None, expr, arg_reg, resolver)?; } } diff --git a/core/translate/index.rs b/core/translate/index.rs index 75d657446..44930fb08 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -181,7 +181,7 @@ pub fn translate_create_index( let cdc_table = prepare_cdc_if_necessary(&mut program, resolver.schema, SQLITE_TABLEID)?; emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, cdc_table.map(|x| x.0), SchemaEntryType::Index, @@ -239,7 +239,7 @@ pub fn translate_create_index( jump_target_when_false: label, jump_target_when_true: BranchOffset::Placeholder, }, - &resolver, + resolver, )?; skip_row_label = Some(label); } @@ -553,7 +553,7 @@ pub fn translate_drop_index( }; emit_cdc_insns( &mut program, - &resolver, + resolver, OperationMode::DELETE, cdc_cursor_id, row_id_reg, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 374b8cd87..2567a3215 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -438,7 +438,7 @@ pub fn translate_insert( &mut program, &insertion, yield_reg_opt.unwrap() + 1, - &resolver, + resolver, &temp_table_ctx, )?; } else { @@ -449,7 +449,7 @@ pub fn translate_insert( db: 0, }); - translate_rows_single(&mut program, &values.unwrap(), &insertion, &resolver)?; + translate_rows_single(&mut program, &values.unwrap(), &insertion, resolver)?; } // Open all the index btrees for writing @@ -714,7 +714,7 @@ pub fn translate_insert( Some(&TableReferences::new_empty()), &where_for_eval, reg, - &resolver, + resolver, NoConstantOptReason::RegisterReuse, )?; let lbl = program.allocate_label(); @@ -953,7 +953,7 @@ pub fn translate_insert( Some(&TableReferences::new_empty()), &where_for_eval, reg, - &resolver, + resolver, NoConstantOptReason::RegisterReuse, )?; let lbl = program.allocate_label(); @@ -1060,7 +1060,7 @@ pub fn translate_insert( }; emit_cdc_insns( &mut program, - &resolver, + resolver, OperationMode::INSERT, *cdc_cursor_id, insertion.key_register(), @@ -1100,14 +1100,13 @@ pub fn translate_insert( emit_upsert( &mut program, - resolver.schema, &table, &insertion, cursor_id, conflict_rowid_reg, &mut rewritten_sets, where_clause, - &resolver, + resolver, &idx_cursors, &mut result_columns, cdc_table.as_ref().map(|c| c.0), diff --git a/core/translate/upsert.rs b/core/translate/upsert.rs index 9755802c8..38ef1d5d6 100644 --- a/core/translate/upsert.rs +++ b/core/translate/upsert.rs @@ -341,7 +341,6 @@ pub fn resolve_upsert_target( /// (unchanged) row. To refer to would-be inserted values, use `excluded.x`. pub fn emit_upsert( program: &mut ProgramBuilder, - schema: &Schema, table: &Table, insertion: &Insertion, tbl_cursor_id: usize, @@ -476,7 +475,8 @@ pub fn emit_upsert( let (changed_cols, rowid_changed) = collect_changed_cols(table, set_pairs); for (idx_name, _root, idx_cid) in idx_cursors { - let idx_meta = schema + let idx_meta = resolver + .schema .get_index(table.get_name(), idx_name) .expect("index exists"); diff --git a/core/translate/view.rs b/core/translate/view.rs index 548ca5bc2..592d7731a 100644 --- a/core/translate/view.rs +++ b/core/translate/view.rs @@ -190,7 +190,7 @@ pub fn translate_create_materialized_view( ); emit_schema_entry( &mut program, - &resolver, + resolver, sqlite_schema_cursor_id, None, // cdc_table_cursor_id SchemaEntryType::Index, From 9bd852297a6cefe398ce081e505faebaef0cf80b Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 17:31:02 -0400 Subject: [PATCH 35/65] Allow in parser using `rowid` explicitly for a col when creating table --- parser/src/parser.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/parser/src/parser.rs b/parser/src/parser.rs index e39e0ca13..aa756b03e 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -3446,10 +3446,6 @@ impl<'a> Parser<'a> { pub fn parse_column_definition(&mut self, in_alter: bool) -> Result { let col_name = self.parse_nm()?; - if !in_alter && col_name.as_str().eq_ignore_ascii_case("rowid") { - return Err(Error::Custom("cannot use reserved word: ROWID".to_owned())); - } - let col_type = self.parse_type()?; let constraints = self.parse_named_column_constraints(in_alter)?; Ok(ColumnDefinition { @@ -4039,7 +4035,6 @@ mod tests { "ALTER TABLE my_table ADD COLUMN my_column PRIMARY KEY", "ALTER TABLE my_table ADD COLUMN my_column UNIQUE", "CREATE TEMP TABLE baz.foo(bar)", - "CREATE TABLE foo(rowid)", "CREATE TABLE foo(d INT AS (a*abs(b)))", "CREATE TABLE foo(d INT AS (a*abs(b)))", "CREATE TABLE foo(bar UNKNOWN_INT) STRICT", From af215c2906bd2fdaae67e1a77c168f725b34e64c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 17:32:16 -0400 Subject: [PATCH 36/65] Check cols first before falling back to explicit rowid in UPDATE translation --- core/translate/update.rs | 60 +++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/core/translate/update.rs b/core/translate/update.rs index ab035f377..e045fa329 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -228,36 +228,40 @@ pub fn prepare_update_plan( for (col_name, expr) in set.col_names.iter().zip(values.iter()) { let ident = normalize_ident(col_name.as_str()); - // Check if this is the 'rowid' keyword - if ROWID_STRS.iter().any(|s| s.eq_ignore_ascii_case(&ident)) { - // Find the rowid alias column if it exists - if let Some((idx, _col)) = table - .columns() - .iter() - .enumerate() - .find(|(_, c)| c.is_rowid_alias) - { - // Use the rowid alias column index - match set_clauses.iter_mut().find(|(i, _)| i == &idx) { - Some((_, existing_expr)) => *existing_expr = expr.clone(), - None => set_clauses.push((idx, expr.clone())), - } - } else { - // No rowid alias, use sentinel value for actual rowid - match set_clauses.iter_mut().find(|(i, _)| *i == ROWID_SENTINEL) { - Some((_, existing_expr)) => *existing_expr = expr.clone(), - None => set_clauses.push((ROWID_SENTINEL, expr.clone())), + let col_index = match column_lookup.get(&ident) { + Some(idx) => *idx, + None => { + // Check if this is the 'rowid' keyword + if ROWID_STRS.iter().any(|s| s.eq_ignore_ascii_case(&ident)) { + // Find the rowid alias column if it exists + if let Some((idx, _col)) = table + .columns() + .iter() + .enumerate() + .find(|(_i, c)| c.is_rowid_alias) + { + // Use the rowid alias column index + match set_clauses.iter_mut().find(|(i, _)| i == &idx) { + Some((_, existing_expr)) => *existing_expr = expr.clone(), + None => set_clauses.push((idx, expr.clone())), + } + idx + } else { + // No rowid alias, use sentinel value for actual rowid + match set_clauses.iter_mut().find(|(i, _)| *i == ROWID_SENTINEL) { + Some((_, existing_expr)) => *existing_expr = expr.clone(), + None => set_clauses.push((ROWID_SENTINEL, expr.clone())), + } + ROWID_SENTINEL + } + } else { + crate::bail_parse_error!("no such column: {}.{}", table_name, col_name); } } - } else { - let col_index = match column_lookup.get(&ident) { - Some(idx) => idx, - None => bail_parse_error!("no such column: {}", ident), - }; - match set_clauses.iter_mut().find(|(idx, _)| idx == col_index) { - Some((_, existing_expr)) => *existing_expr = expr.clone(), - None => set_clauses.push((*col_index, expr.clone())), - } + }; + match set_clauses.iter_mut().find(|(idx, _)| *idx == col_index) { + Some((_, existing_expr)) => *existing_expr = expr.clone(), + None => set_clauses.push((col_index, expr.clone())), } } } From e52aa1538e10f0d13783881073c2ece64d1b9e29 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 17:32:51 -0400 Subject: [PATCH 37/65] Remove unused BTreeTable method for checking single field on Column in schema --- core/schema.rs | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 13bd620fb..b1aece65a 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -976,17 +976,13 @@ impl BTreeTable { pub fn get_rowid_alias_column(&self) -> Option<(usize, &Column)> { if self.primary_key_columns.len() == 1 { let (idx, col) = self.get_column(&self.primary_key_columns[0].0)?; - if self.column_is_rowid_alias(col) { + if col.is_rowid_alias { return Some((idx, col)); } } None } - pub fn column_is_rowid_alias(&self, col: &Column) -> bool { - col.is_rowid_alias - } - /// Returns the column position and column for a given column name. /// Returns None if the column name is not found. /// E.g. if table is CREATE TABLE t (a, b, c) @@ -2027,7 +2023,7 @@ mod tests { let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!( - !table.column_is_rowid_alias(column), + !column.is_rowid_alias, "column 'a´ has type different than INTEGER so can't be a rowid alias" ); Ok(()) @@ -2038,10 +2034,7 @@ mod tests { let sql = r#"CREATE TABLE t1 (a INTEGER PRIMARY KEY, b TEXT);"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; - assert!( - table.column_is_rowid_alias(column), - "column 'a´ should be a rowid alias" - ); + assert!(column.is_rowid_alias, "column 'a´ should be a rowid alias"); Ok(()) } @@ -2051,10 +2044,7 @@ mod tests { let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, PRIMARY KEY(a));"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; - assert!( - table.column_is_rowid_alias(column), - "column 'a´ should be a rowid alias" - ); + assert!(column.is_rowid_alias, "column 'a´ should be a rowid alias"); Ok(()) } @@ -2065,7 +2055,7 @@ mod tests { let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!( - !table.column_is_rowid_alias(column), + !column, "column 'a´ shouldn't be a rowid alias because table has no rowid" ); Ok(()) @@ -2077,7 +2067,7 @@ mod tests { let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!( - !table.column_is_rowid_alias(column), + !column.is_rowid_alias, "column 'a´ shouldn't be a rowid alias because table has no rowid" ); Ok(()) @@ -2100,7 +2090,7 @@ mod tests { let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!( - !table.column_is_rowid_alias(column), + !column.is_rowid_alias, "column 'a´ shouldn't be a rowid alias because table has composite primary key" ); Ok(()) From d4dc45832872fddd1f941fa9205dde4008ae8629 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 17:33:38 -0400 Subject: [PATCH 38/65] Evaluate table column refs before checking `rowid` to allow using it as col name --- core/translate/expr.rs | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index fd6fbebce..c15296418 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -3302,12 +3302,6 @@ pub fn bind_and_rewrite_expr<'a>( top_level_expr, &mut |expr: &mut ast::Expr| -> Result { match expr { - ast::Expr::Id(ast::Name::Ident(n)) if n.eq_ignore_ascii_case("true") => { - *expr = ast::Expr::Literal(ast::Literal::Numeric("1".to_string())); - } - ast::Expr::Id(ast::Name::Ident(n)) if n.eq_ignore_ascii_case("false") => { - *expr = ast::Expr::Literal(ast::Literal::Numeric("0".to_string())); - } // Rewrite anonymous variables in encounter order. ast::Expr::Variable(var) if var.is_empty() => { if !param_state.is_valid() { @@ -3370,17 +3364,6 @@ pub fn bind_and_rewrite_expr<'a>( } } } - if !referenced_tables.joined_tables().is_empty() { - if let Some(row_id_expr) = parse_row_id( - &normalized_id, - referenced_tables.joined_tables()[0].internal_id, - || referenced_tables.joined_tables().len() != 1, - )? { - *expr = row_id_expr; - - return Ok(WalkControl::Continue); - } - } let mut match_result = None; // First check joined tables @@ -3416,6 +3399,15 @@ pub fn bind_and_rewrite_expr<'a>( col_idx.unwrap(), col.is_rowid_alias, )); + // only if we haven't found a match, check for explicit rowid reference + } else if let Some(row_id_expr) = parse_row_id( + &normalized_id, + referenced_tables.joined_tables()[0].internal_id, + || referenced_tables.joined_tables().len() != 1, + )? { + *expr = row_id_expr; + + return Ok(WalkControl::Continue); } } @@ -3496,17 +3488,16 @@ pub fn bind_and_rewrite_expr<'a>( } let (tbl_id, tbl) = matching_tbl.unwrap(); let normalized_id = normalize_ident(id.as_str()); - - if let Some(row_id_expr) = parse_row_id(&normalized_id, tbl_id, || false)? { - *expr = row_id_expr; - - return Ok(WalkControl::Continue); - } let col_idx = tbl.columns().iter().position(|c| { c.name .as_ref() .is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id)) }); + if let Some(row_id_expr) = parse_row_id(&normalized_id, tbl_id, || false)? { + *expr = row_id_expr; + + return Ok(WalkControl::Continue); + } let Some(col_idx) = col_idx else { crate::bail_parse_error!("no such column: {}", normalized_id); }; From abab04dac92917c18ee08d5283f8222869a39b63 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 17:33:53 -0400 Subject: [PATCH 39/65] Add regression test for col named rowid --- testing/create_table.test | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/testing/create_table.test b/testing/create_table.test index f5fceba67..7eb7bea7d 100755 --- a/testing/create_table.test +++ b/testing/create_table.test @@ -45,3 +45,11 @@ do_execsql_test_in_memory_any_error create_table_column_and_table_primary_keys { do_execsql_test_in_memory_any_error create_table_multiple_table_primary_keys { CREATE TABLE t(a,b,c,d,primary key(a,b), primary key(c,d)); } + +# https://github.com/tursodatabase/turso/issues/3282 +do_execsql_test_on_specific_db {:memory:} col-named-rowid { + create table t(rowid, a); + insert into t values (1,2), (2,3), (3,4); + update t set rowid = 1; -- should allow regular update and not throw unique constraint + select count(*) from t where rowid = 1; +} {3} From 5d8a735aafa7f5924e03ab9d3e16ccc8f608b57e Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 26 Sep 2025 18:06:09 -0400 Subject: [PATCH 40/65] fix clippy error --- core/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schema.rs b/core/schema.rs index b1aece65a..20ae6380e 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -2055,7 +2055,7 @@ mod tests { let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!( - !column, + !column.is_rowid_alias, "column 'a´ shouldn't be a rowid alias because table has no rowid" ); Ok(()) From d28022b491e98b70ecf4513ad03125343c3d5438 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 26 Sep 2025 10:22:52 -0300 Subject: [PATCH 41/65] support mixed integer and float expressions in the expr_compiler Fixes #3373 --- core/incremental/expr_compiler.rs | 130 ++++++++++++++++++++++++++---- 1 file changed, 116 insertions(+), 14 deletions(-) diff --git a/core/incremental/expr_compiler.rs b/core/incremental/expr_compiler.rs index ae93a4b05..44b2cef49 100644 --- a/core/incremental/expr_compiler.rs +++ b/core/incremental/expr_compiler.rs @@ -92,14 +92,14 @@ pub enum ExpressionExecutor { } /// Trivial expression that can be evaluated inline without VDBE -/// Only supports operations where operands have the same type (no coercion) +/// Supports arithmetic operations with automatic type promotion (integer to float) #[derive(Clone, Debug)] pub enum TrivialExpression { /// Direct column reference Column(usize), /// Immediate value Immediate(Value), - /// Binary operation on trivial expressions (same-type operands only) + /// Binary operation on trivial expressions (supports type promotion) Binary { left: Box, op: Operator, @@ -109,7 +109,7 @@ pub enum TrivialExpression { impl TrivialExpression { /// Evaluate the trivial expression with the given input values - /// Panics if type mismatch occurs (this indicates a bug in validation) + /// Automatically promotes integers to floats when mixing types in arithmetic pub fn evaluate(&self, values: &[Value]) -> Value { match self { TrivialExpression::Column(idx) => values.get(*idx).cloned().unwrap_or(Value::Null), @@ -118,23 +118,32 @@ impl TrivialExpression { let left_val = left.evaluate(values); let right_val = right.evaluate(values); - // Only perform operations on same-type operands + // Perform operations with type promotion when needed match op { Operator::Add => match (&left_val, &right_val) { (Value::Integer(a), Value::Integer(b)) => Value::Integer(a + b), (Value::Float(a), Value::Float(b)) => Value::Float(a + b), + // Mixed integer/float - promote integer to float + (Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 + b), + (Value::Float(a), Value::Integer(b)) => Value::Float(a + *b as f64), (Value::Null, _) | (_, Value::Null) => Value::Null, _ => panic!("Type mismatch in trivial expression: {left_val:?} + {right_val:?}. This is a bug in trivial expression validation."), }, Operator::Subtract => match (&left_val, &right_val) { (Value::Integer(a), Value::Integer(b)) => Value::Integer(a - b), (Value::Float(a), Value::Float(b)) => Value::Float(a - b), + // Mixed integer/float - promote integer to float + (Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 - b), + (Value::Float(a), Value::Integer(b)) => Value::Float(a - *b as f64), (Value::Null, _) | (_, Value::Null) => Value::Null, _ => panic!("Type mismatch in trivial expression: {left_val:?} - {right_val:?}. This is a bug in trivial expression validation."), }, Operator::Multiply => match (&left_val, &right_val) { (Value::Integer(a), Value::Integer(b)) => Value::Integer(a * b), (Value::Float(a), Value::Float(b)) => Value::Float(a * b), + // Mixed integer/float - promote integer to float + (Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 * b), + (Value::Float(a), Value::Integer(b)) => Value::Float(a * *b as f64), (Value::Null, _) | (_, Value::Null) => Value::Null, _ => panic!("Type mismatch in trivial expression: {left_val:?} * {right_val:?}. This is a bug in trivial expression validation."), }, @@ -153,6 +162,21 @@ impl TrivialExpression { Value::Null } } + // Mixed integer/float - promote integer to float + (Value::Integer(a), Value::Float(b)) => { + if *b != 0.0 { + Value::Float(*a as f64 / b) + } else { + Value::Null + } + } + (Value::Float(a), Value::Integer(b)) => { + if *b != 0 { + Value::Float(a / *b as f64) + } else { + Value::Null + } + } (Value::Null, _) | (_, Value::Null) => Value::Null, _ => panic!("Type mismatch in trivial expression: {left_val:?} / {right_val:?}. This is a bug in trivial expression validation."), }, @@ -266,23 +290,27 @@ impl CompiledExpression { let right_trivial = Self::try_get_trivial_expr(right, input_column_names)?; // Check if we can determine types statically - // If both are immediates, they must have the same type - // If either is a column, we can't validate at compile time, - // but we'll assert at runtime if there's a mismatch + // For arithmetic operations, we allow mixing integers and floats + // since we promote integers to floats as needed if let (Some(left_type), Some(right_type)) = ( Self::get_trivial_type(&left_trivial), Self::get_trivial_type(&right_trivial), ) { - // Both types are known - they must match (or one is null) - if left_type != right_type - && left_type != TrivialType::Null - && right_type != TrivialType::Null - { - return None; // Type mismatch - not trivial + // Both types are known - check if they're numeric or null + let numeric_types = matches!( + left_type, + TrivialType::Integer | TrivialType::Float | TrivialType::Null + ) && matches!( + right_type, + TrivialType::Integer | TrivialType::Float | TrivialType::Null + ); + + if !numeric_types { + return None; // Non-numeric types - not trivial } } // If we can't determine types (columns involved), we optimistically - // assume they'll match at runtime (and assert if they don't) + // assume they'll be compatible at runtime Some(TrivialExpression::Binary { left: Box::new(left_trivial), @@ -450,3 +478,77 @@ impl CompiledExpression { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mixed_type_arithmetic() { + // Test integer - float + let expr = TrivialExpression::Binary { + left: Box::new(TrivialExpression::Immediate(Value::Integer(1))), + op: Operator::Subtract, + right: Box::new(TrivialExpression::Immediate(Value::Float(0.5))), + }; + let result = expr.evaluate(&[]); + assert_eq!(result, Value::Float(0.5)); + + // Test float - integer + let expr = TrivialExpression::Binary { + left: Box::new(TrivialExpression::Immediate(Value::Float(2.5))), + op: Operator::Subtract, + right: Box::new(TrivialExpression::Immediate(Value::Integer(1))), + }; + let result = expr.evaluate(&[]); + assert_eq!(result, Value::Float(1.5)); + + // Test integer * float + let expr = TrivialExpression::Binary { + left: Box::new(TrivialExpression::Immediate(Value::Integer(10))), + op: Operator::Multiply, + right: Box::new(TrivialExpression::Immediate(Value::Float(0.1))), + }; + let result = expr.evaluate(&[]); + assert_eq!(result, Value::Float(1.0)); + + // Test integer / float + let expr = TrivialExpression::Binary { + left: Box::new(TrivialExpression::Immediate(Value::Integer(1))), + op: Operator::Divide, + right: Box::new(TrivialExpression::Immediate(Value::Float(2.0))), + }; + let result = expr.evaluate(&[]); + assert_eq!(result, Value::Float(0.5)); + + // Test integer + float + let expr = TrivialExpression::Binary { + left: Box::new(TrivialExpression::Immediate(Value::Integer(1))), + op: Operator::Add, + right: Box::new(TrivialExpression::Immediate(Value::Float(0.5))), + }; + let result = expr.evaluate(&[]); + assert_eq!(result, Value::Float(1.5)); + } + + #[test] + fn test_nested_mixed_type_expressions() { + // Test nested expressions with mixed types: (1 - 0.04) + let one_minus_float = TrivialExpression::Binary { + left: Box::new(TrivialExpression::Immediate(Value::Integer(1))), + op: Operator::Subtract, + right: Box::new(TrivialExpression::Immediate(Value::Float(0.04))), + }; + let result = one_minus_float.evaluate(&[]); + assert_eq!(result, Value::Float(0.96)); + + // Test multiplication with nested mixed-type expression: 100.0 * (1 - 0.04) + let nested_expr = TrivialExpression::Binary { + left: Box::new(TrivialExpression::Immediate(Value::Float(100.0))), + op: Operator::Multiply, + right: Box::new(one_minus_float), + }; + let result = nested_expr.evaluate(&[]); + assert_eq!(result, Value::Float(96.0)); + } +} From a2d833c073bd90315273203c6f9ac0e3e4b284e6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 27 Sep 2025 09:35:32 +0300 Subject: [PATCH 42/65] ALTER TABLE: add comment about things preventing drop column --- core/translate/alter.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/core/translate/alter.rs b/core/translate/alter.rs index 0e2820ce9..8a92247aa 100644 --- a/core/translate/alter.rs +++ b/core/translate/alter.rs @@ -80,6 +80,16 @@ pub fn translate_alter_table( LimboError::ParseError(format!("no such column: \"{column_name}\"")) })?; + // Column cannot be dropped if: + // The column is a PRIMARY KEY or part of one. + // The column has a UNIQUE constraint. + // The column is indexed. + // The column is named in the WHERE clause of a partial index. + // The column is named in a table or column CHECK constraint not associated with the column being dropped. + // The column is used in a foreign key constraint. + // The column is used in the expression of a generated column. + // The column appears in a trigger or view. + if column.primary_key { return Err(LimboError::ParseError(format!( "cannot drop column \"{column_name}\": PRIMARY KEY" From 085b92dc4ec68d9d29163c06ca26c0c4804c1c28 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 27 Sep 2025 09:36:13 +0300 Subject: [PATCH 43/65] ALTER TABLE: prevent dropping indexed columns in translate layer --- core/translate/alter.rs | 76 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/core/translate/alter.rs b/core/translate/alter.rs index 8a92247aa..5875b201f 100644 --- a/core/translate/alter.rs +++ b/core/translate/alter.rs @@ -1,10 +1,17 @@ use std::sync::Arc; -use turso_parser::{ast, parser::Parser}; +use turso_parser::{ + ast::{self, TableInternalId}, + parser::Parser, +}; use crate::{ function::{AlterTableFunc, Func}, - schema::Column, - translate::emitter::Resolver, + schema::{Column, Table}, + translate::{ + emitter::Resolver, + expr::{walk_expr, WalkControl}, + plan::{ColumnUsedMask, OuterQueryReference, TableReferences}, + }, util::normalize_ident, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -34,7 +41,9 @@ pub fn translate_alter_table( crate::bail_parse_error!("table {} may not be modified", table_name); } - if resolver.schema.table_has_indexes(table_name) && !resolver.schema.indexes_enabled() { + let table_indexes = resolver.schema.get_indices(table_name).collect::>(); + + if !table_indexes.is_empty() && !resolver.schema.indexes_enabled() { // Let's disable altering a table with indices altogether instead of checking column by // column to be extra safe. crate::bail_parse_error!( @@ -108,6 +117,65 @@ pub fn translate_alter_table( ))); } + for index in table_indexes.iter() { + // Referenced in regular index + if index + .columns + .iter() + .any(|col| col.pos_in_table == dropped_index) + { + return Err(LimboError::ParseError(format!( + "cannot drop column \"{column_name}\": indexed" + ))); + } + // Referenced in partial index + if index.where_clause.is_some() { + let mut table_references = TableReferences::new( + vec![], + vec![OuterQueryReference { + identifier: table_name.to_string(), + internal_id: TableInternalId::from(0), + table: Table::BTree(Arc::new(btree.clone())), + col_used_mask: ColumnUsedMask::default(), + }], + ); + let where_copy = index + .bind_where_expr(Some(&mut table_references), connection) + .expect("where clause to exist"); + let mut column_referenced = false; + walk_expr( + &where_copy, + &mut |e: &ast::Expr| -> crate::Result { + if let ast::Expr::Column { + table, + column: column_index, + .. + } = e + { + if *table == TableInternalId::from(0) + && *column_index == dropped_index + { + column_referenced = true; + return Ok(WalkControl::SkipChildren); + } + } + Ok(WalkControl::Continue) + }, + )?; + if column_referenced { + return Err(LimboError::ParseError(format!( + "cannot drop column \"{column_name}\": indexed" + ))); + } + } + } + + // TODO: check usage in CHECK constraint when implemented + // TODO: check usage in foreign key constraint when implemented + // TODO: check usage in generated column when implemented + + // References in VIEWs are checked in the VDBE layer op_drop_column instruction. + btree.columns.remove(dropped_index); let sql = btree.to_sql().replace('\'', "''"); From 31373570922730487af4ca15bbf89958226e13ba Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 27 Sep 2025 09:37:03 +0300 Subject: [PATCH 44/65] ALTER TABLE: prevent dropping indexed column in VDBE layer --- core/vdbe/execute.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index fc55eee0e..4cc61a4f6 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -7883,6 +7883,21 @@ pub fn op_drop_column( let conn = program.connection.clone(); + let column_name = { + let schema = conn.schema.read(); + let table = schema + .tables + .get(table) + .expect("table being ALTERed should be in schema"); + table + .get_column_at(*column_index) + .expect("column being ALTERed should be in schema") + .name + .as_ref() + .expect("column being ALTERed should be named") + .clone() + }; + conn.with_schema_mut(|schema| { let table = schema .tables @@ -7899,6 +7914,21 @@ pub fn op_drop_column( btree.columns.remove(*column_index) }); + let schema = conn.schema.read(); + if let Some(indexes) = schema.indexes.get(table) { + for index in indexes { + if index + .columns + .iter() + .any(|column| column.pos_in_table == *column_index) + { + return Err(LimboError::ParseError(format!( + "cannot drop column \"{column_name}\": indexed" + ))); + } + } + } + state.pc += 1; Ok(InsnFunctionStepResult::Step) } From 67d320960d940b3164d94cfd57717cb5afbdf236 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 27 Sep 2025 09:37:15 +0300 Subject: [PATCH 45/65] ALTER TABLE: prevent dropping/renaming column referenced in VIEW --- core/vdbe/execute.rs | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4cc61a4f6..de826ebd1 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -7929,6 +7929,16 @@ pub fn op_drop_column( } } + for (view_name, view) in schema.views.iter() { + let view_select_sql = format!("SELECT * FROM {view_name}"); + conn.prepare(view_select_sql.as_str()).map_err(|e| { + LimboError::ParseError(format!( + "cannot drop column \"{}\": referenced in VIEW {view_name}: {}", + column_name, view.sql, + )) + })?; + } + state.pc += 1; Ok(InsnFunctionStepResult::Step) } @@ -7984,6 +7994,20 @@ pub fn op_alter_column( let conn = program.connection.clone(); let normalized_table_name = normalize_ident(table_name.as_str()); + let old_column_name = { + let schema = conn.schema.read(); + let table = schema + .tables + .get(table_name) + .expect("table being ALTERed should be in schema"); + table + .get_column_at(*column_index) + .expect("column being ALTERed should be in schema") + .name + .as_ref() + .expect("column being ALTERed should be named") + .clone() + }; let new_column = crate::schema::Column::from(definition); conn.with_schema_mut(|schema| { @@ -8025,6 +8049,27 @@ pub fn op_alter_column( } }); + let schema = conn.schema.read(); + if *rename { + let table = schema + .tables + .get(&normalized_table_name) + .expect("table being ALTERed should be in schema"); + let column = table + .get_column_at(*column_index) + .expect("column being ALTERed should be in schema"); + for (view_name, view) in schema.views.iter() { + let view_select_sql = format!("SELECT * FROM {view_name}"); + // FIXME: this should rewrite the view to reference the new column name + conn.prepare(view_select_sql.as_str()).map_err(|e| { + LimboError::ParseError(format!( + "cannot rename column \"{}\": referenced in VIEW {view_name}: {}", + old_column_name, view.sql, + )) + })?; + } + } + state.pc += 1; Ok(InsnFunctionStepResult::Step) } From b43a89e4234fdc8df402b01b5bea38c6ed1b3eea Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 27 Sep 2025 09:38:02 +0300 Subject: [PATCH 46/65] Add regression tests for ALTER TABLE stuff --- tests/integration/query_processing/mod.rs | 1 + .../integration/query_processing/test_ddl.rs | 134 ++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 tests/integration/query_processing/test_ddl.rs diff --git a/tests/integration/query_processing/mod.rs b/tests/integration/query_processing/mod.rs index 742cdf52c..fd30cac1e 100644 --- a/tests/integration/query_processing/mod.rs +++ b/tests/integration/query_processing/mod.rs @@ -1,4 +1,5 @@ mod test_btree; +mod test_ddl; mod test_read_path; mod test_write_path; diff --git a/tests/integration/query_processing/test_ddl.rs b/tests/integration/query_processing/test_ddl.rs new file mode 100644 index 000000000..990101973 --- /dev/null +++ b/tests/integration/query_processing/test_ddl.rs @@ -0,0 +1,134 @@ +use crate::common::TempDatabase; + +#[test] +fn test_fail_drop_indexed_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a, b);", true); + let conn = tmp_db.connect_limbo(); + + conn.execute("CREATE INDEX i ON t (a)")?; + let res = conn.execute("ALTER TABLE t DROP COLUMN a"); + assert!(res.is_err(), "Expected error when dropping indexed column"); + Ok(()) +} + +#[test] +fn test_fail_drop_unique_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a UNIQUE, b);", true); + let conn = tmp_db.connect_limbo(); + + let res = conn.execute("ALTER TABLE t DROP COLUMN a"); + assert!(res.is_err(), "Expected error when dropping UNIQUE column"); + Ok(()) +} + +#[test] +fn test_fail_drop_compound_unique_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a, b, UNIQUE(a, b));", true); + let conn = tmp_db.connect_limbo(); + + let res = conn.execute("ALTER TABLE t DROP COLUMN a"); + assert!( + res.is_err(), + "Expected error when dropping column in compound UNIQUE" + ); + Ok(()) +} + +#[test] +fn test_fail_drop_primary_key_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a PRIMARY KEY, b);", true); + let conn = tmp_db.connect_limbo(); + + let res = conn.execute("ALTER TABLE t DROP COLUMN a"); + assert!( + res.is_err(), + "Expected error when dropping PRIMARY KEY column" + ); + Ok(()) +} + +#[test] +fn test_fail_drop_compound_primary_key_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a, b, PRIMARY KEY(a, b));", true); + let conn = tmp_db.connect_limbo(); + + let res = conn.execute("ALTER TABLE t DROP COLUMN a"); + assert!( + res.is_err(), + "Expected error when dropping column in compound PRIMARY KEY" + ); + Ok(()) +} + +#[test] +fn test_fail_drop_partial_index_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a, b);", true); + let conn = tmp_db.connect_limbo(); + + conn.execute("CREATE INDEX i ON t (b) WHERE a > 0")?; + let res = conn.execute("ALTER TABLE t DROP COLUMN a"); + assert!( + res.is_err(), + "Expected error when dropping column referenced by partial index" + ); + Ok(()) +} + +#[test] +fn test_fail_drop_view_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a, b);", true); + let conn = tmp_db.connect_limbo(); + + conn.execute("CREATE VIEW v AS SELECT a, b FROM t")?; + let res = conn.execute("ALTER TABLE t DROP COLUMN a"); + assert!( + res.is_err(), + "Expected error when dropping column referenced by view" + ); + Ok(()) +} + +// FIXME: this should rewrite the view to reference the new column name +#[test] +fn test_fail_rename_view_column() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE t (a, b);", true); + let conn = tmp_db.connect_limbo(); + + conn.execute("CREATE VIEW v AS SELECT a, b FROM t")?; + let res = conn.execute("ALTER TABLE t RENAME a TO c"); + assert!( + res.is_err(), + "Expected error when renaming column referenced by view" + ); + Ok(()) +} + +#[test] +fn test_allow_drop_unreferenced_columns() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let tmp_db = TempDatabase::new_with_rusqlite( + "CREATE TABLE t (pk INTEGER PRIMARY KEY, indexed INTEGER, viewed INTEGER, partial INTEGER, compound1 INTEGER, compound2 INTEGER, unused1 INTEGER, unused2 INTEGER, unused3 INTEGER);", + true + ); + let conn = tmp_db.connect_limbo(); + + conn.execute("CREATE INDEX idx ON t(indexed)")?; + conn.execute("CREATE VIEW v AS SELECT viewed FROM t")?; + conn.execute("CREATE INDEX partial_idx ON t(compound1) WHERE partial > 0")?; + conn.execute("CREATE INDEX compound_idx ON t(compound1, compound2)")?; + + // Should be able to drop unused columns + conn.execute("ALTER TABLE t DROP COLUMN unused1")?; + conn.execute("ALTER TABLE t DROP COLUMN unused2")?; + conn.execute("ALTER TABLE t DROP COLUMN unused3")?; + + Ok(()) +} From 283fba2e0d38a1503f7adb8292dcf18be16d19c6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 27 Sep 2025 09:53:11 +0300 Subject: [PATCH 47/65] use normalized table name --- core/vdbe/execute.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index de826ebd1..67435caec 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -7883,11 +7883,13 @@ pub fn op_drop_column( let conn = program.connection.clone(); + let normalized_table_name = normalize_ident(table.as_str()); + let column_name = { let schema = conn.schema.read(); let table = schema .tables - .get(table) + .get(&normalized_table_name) .expect("table being ALTERed should be in schema"); table .get_column_at(*column_index) @@ -7901,7 +7903,7 @@ pub fn op_drop_column( conn.with_schema_mut(|schema| { let table = schema .tables - .get_mut(table) + .get_mut(&normalized_table_name) .expect("table being renamed should be in schema"); let table = Arc::make_mut(table); @@ -7915,7 +7917,7 @@ pub fn op_drop_column( }); let schema = conn.schema.read(); - if let Some(indexes) = schema.indexes.get(table) { + if let Some(indexes) = schema.indexes.get(&normalized_table_name) { for index in indexes { if index .columns @@ -7998,7 +8000,7 @@ pub fn op_alter_column( let schema = conn.schema.read(); let table = schema .tables - .get(table_name) + .get(&normalized_table_name) .expect("table being ALTERed should be in schema"); table .get_column_at(*column_index) From 3ee97ddf36eeea538a5b49b606993c04cf05bc8b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 26 Sep 2025 21:57:38 -0300 Subject: [PATCH 48/65] Make sure complex expressions in filters go through Project We had code for this, but the code had a fatal flaw: it tried to detect a complex operation (an operation that needs projection), and return false (no need for projection), for the others. This is the exact opposite of what we should do: we should identify the *simple* operations, and then return true (needs projection) for the rest. CAST is a special beast, since it is not a function, but rather, a special opcode. Everything else above is the true just the same. But for CAST, we have to do the extra work to capture it in the logical plan and pass it down. Fixes #3372 Fixes #3370 Fixes #3369 --- core/incremental/compiler.rs | 232 +++++++++++++++++++++++++++++--- core/translate/logical.rs | 13 ++ testing/materialized_views.test | 167 +++++++++++++++++++++++ 3 files changed, 390 insertions(+), 22 deletions(-) diff --git a/core/incremental/compiler.rs b/core/incremental/compiler.rs index f3237c4d1..4a1cf7453 100644 --- a/core/incremental/compiler.rs +++ b/core/incremental/compiler.rs @@ -1638,6 +1638,106 @@ impl DbspCompiler { }, }) } + LogicalExpr::Between { + expr, + low, + high, + negated, + } => { + // BETWEEN x AND y is rewritten as (expr >= x AND expr <= y) + // NOT BETWEEN x AND y is rewritten as (expr < x OR expr > y) + let expr_ast = Self::logical_to_ast_expr_with_schema(expr, schema)?; + let low_ast = Self::logical_to_ast_expr_with_schema(low, schema)?; + let high_ast = Self::logical_to_ast_expr_with_schema(high, schema)?; + + if *negated { + // NOT BETWEEN: (expr < low OR expr > high) + Ok(ast::Expr::Binary( + Box::new(ast::Expr::Binary( + Box::new(expr_ast.clone()), + ast::Operator::Less, + Box::new(low_ast), + )), + ast::Operator::Or, + Box::new(ast::Expr::Binary( + Box::new(expr_ast), + ast::Operator::Greater, + Box::new(high_ast), + )), + )) + } else { + // BETWEEN: (expr >= low AND expr <= high) + Ok(ast::Expr::Binary( + Box::new(ast::Expr::Binary( + Box::new(expr_ast.clone()), + ast::Operator::GreaterEquals, + Box::new(low_ast), + )), + ast::Operator::And, + Box::new(ast::Expr::Binary( + Box::new(expr_ast), + ast::Operator::LessEquals, + Box::new(high_ast), + )), + )) + } + } + LogicalExpr::InList { + expr, + list, + negated, + } => { + let lhs = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?); + let values: Result> = list + .iter() + .map(|item| { + let ast_expr = Self::logical_to_ast_expr_with_schema(item, schema)?; + Ok(Box::new(ast_expr)) + }) + .collect(); + Ok(ast::Expr::InList { + lhs, + not: *negated, + rhs: values?, + }) + } + LogicalExpr::Like { + expr, + pattern, + escape, + negated, + } => { + let lhs = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?); + let rhs = Box::new(Self::logical_to_ast_expr_with_schema(pattern, schema)?); + let escape_expr = escape + .map(|c| Box::new(ast::Expr::Literal(ast::Literal::String(c.to_string())))); + Ok(ast::Expr::Like { + lhs, + not: *negated, + op: ast::LikeOperator::Like, + rhs, + escape: escape_expr, + }) + } + LogicalExpr::IsNull { expr, negated } => { + let inner_expr = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?); + if *negated { + // IS NOT NULL needs to be represented differently + Ok(ast::Expr::Unary( + ast::UnaryOperator::Not, + Box::new(ast::Expr::IsNull(inner_expr)), + )) + } else { + Ok(ast::Expr::IsNull(inner_expr)) + } + } + LogicalExpr::Cast { expr, type_name } => { + let inner_expr = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?); + Ok(ast::Expr::Cast { + expr: inner_expr, + type_name: type_name.clone(), + }) + } _ => Err(LimboError::ParseError(format!( "Cannot convert LogicalExpr to AST Expr: {expr:?}" ))), @@ -1648,21 +1748,55 @@ impl DbspCompiler { fn predicate_needs_projection(expr: &LogicalExpr) -> bool { match expr { LogicalExpr::BinaryExpr { left, op, right } => { + // Only these specific simple patterns DON'T need projection match (left.as_ref(), right.as_ref()) { - // Simple column to literal - OK - (LogicalExpr::Column(_), LogicalExpr::Literal(_)) => false, - // Simple column to column - OK - (LogicalExpr::Column(_), LogicalExpr::Column(_)) => false, + // Simple column to literal comparisons + (LogicalExpr::Column(_), LogicalExpr::Literal(_)) + if matches!( + op, + BinaryOperator::Equals + | BinaryOperator::NotEquals + | BinaryOperator::Greater + | BinaryOperator::GreaterEquals + | BinaryOperator::Less + | BinaryOperator::LessEquals + ) => + { + false + } + + // Simple column to column comparisons + (LogicalExpr::Column(_), LogicalExpr::Column(_)) + if matches!( + op, + BinaryOperator::Equals + | BinaryOperator::NotEquals + | BinaryOperator::Greater + | BinaryOperator::GreaterEquals + | BinaryOperator::Less + | BinaryOperator::LessEquals + ) => + { + false + } + // AND/OR of simple expressions - check recursively _ if matches!(op, BinaryOperator::And | BinaryOperator::Or) => { Self::predicate_needs_projection(left) || Self::predicate_needs_projection(right) } - // Any other pattern needs projection + + // Everything else needs projection _ => true, } } - _ => false, + // These simple cases don't need projection + LogicalExpr::Column(_) | LogicalExpr::Literal(_) => false, + + // Default: assume we need projection for safety + // This includes: Between, InList, Like, IsNull, Cast, ScalarFunction, Case, + // InSubquery, Exists, ScalarSubquery, and any future expression types + _ => true, } } @@ -1684,7 +1818,7 @@ impl DbspCompiler { return Ok(expr.clone()); } - // For expressions like (age * 2) > 30, we want to extract (age * 2) + // For comparison expressions, check if we need to extract a subexpression if matches!( op, BinaryOperator::Greater @@ -1694,17 +1828,30 @@ impl DbspCompiler { | BinaryOperator::Equals | BinaryOperator::NotEquals ) { - // Return the left side if it's not a simple column - if !matches!(left.as_ref(), LogicalExpr::Column(_)) { - Ok((**left).clone()) - } else { - // Must be the whole expression then - Ok(expr.clone()) + // If the left side is complex (not a column), extract it + if !matches!( + left.as_ref(), + LogicalExpr::Column(_) | LogicalExpr::Literal(_) + ) { + return Ok((**left).clone()); } + // If the right side is complex (not a literal), extract it + if !matches!( + right.as_ref(), + LogicalExpr::Column(_) | LogicalExpr::Literal(_) + ) { + return Ok((**right).clone()); + } + // Both sides are simple but the expression as a whole might need projection + // (e.g., for arithmetic operations) + Ok(expr.clone()) } else { + // For other binary operators (arithmetic, etc.), return the whole expression Ok(expr.clone()) } } + // For non-binary expressions (BETWEEN, IN, LIKE, functions, etc.), + // we need to compute the whole expression as a boolean _ => Ok(expr.clone()), } } @@ -1729,20 +1876,61 @@ impl DbspCompiler { // Check if this is a complex comparison that needs replacement if Self::predicate_needs_projection(expr) { - // Replace the complex expression (left side) with the temp column - return Ok(LogicalExpr::BinaryExpr { - left: Box::new(LogicalExpr::Column(Column { - name: temp_column_name.to_string(), - table: None, - })), - op: *op, - right: right.clone(), - }); + // Determine which side is complex and needs replacement + let left_is_simple = matches!( + left.as_ref(), + LogicalExpr::Column(_) | LogicalExpr::Literal(_) + ); + let right_is_simple = matches!( + right.as_ref(), + LogicalExpr::Column(_) | LogicalExpr::Literal(_) + ); + + if !left_is_simple { + // Left side is complex - replace it with temp column + return Ok(LogicalExpr::BinaryExpr { + left: Box::new(LogicalExpr::Column(Column { + name: temp_column_name.to_string(), + table: None, + })), + op: *op, + right: right.clone(), + }); + } else if !right_is_simple { + // Right side is complex - replace it with temp column + return Ok(LogicalExpr::BinaryExpr { + left: left.clone(), + op: *op, + right: Box::new(LogicalExpr::Column(Column { + name: temp_column_name.to_string(), + table: None, + })), + }); + } else { + // Both sides are simple, but the expression as a whole needs projection + // This shouldn't happen normally, but keep the expression as-is + return Ok(expr.clone()); + } } // Simple comparison - keep as is Ok(expr.clone()) } + // For non-binary expressions that need projection (BETWEEN, IN, etc.), + // replace the whole expression with a column reference to the temp column + // The temp column will hold the boolean result of evaluating the expression + _ if Self::predicate_needs_projection(expr) => { + // The complex expression result is in the temp column + // We need to check if it's true (non-zero) + Ok(LogicalExpr::BinaryExpr { + left: Box::new(LogicalExpr::Column(Column { + name: temp_column_name.to_string(), + table: None, + })), + op: BinaryOperator::Equals, + right: Box::new(LogicalExpr::Literal(Value::Integer(1))), // true = 1 in SQL + }) + } _ => Ok(expr.clone()), } } diff --git a/core/translate/logical.rs b/core/translate/logical.rs index 4c27a506b..cc0c2a8b8 100644 --- a/core/translate/logical.rs +++ b/core/translate/logical.rs @@ -346,6 +346,11 @@ pub enum LogicalExpr { escape: Option, negated: bool, }, + /// CAST expression + Cast { + expr: Box, + type_name: Option, + }, } /// Column reference @@ -1774,6 +1779,14 @@ impl<'a> LogicalPlanBuilder<'a> { self.build_expr(&exprs[0], _schema) } + ast::Expr::Cast { expr, type_name } => { + let inner = self.build_expr(expr, _schema)?; + Ok(LogicalExpr::Cast { + expr: Box::new(inner), + type_name: type_name.clone(), + }) + } + _ => Err(LimboError::ParseError(format!( "Unsupported expression type in logical plan: {expr:?}" ))), diff --git a/testing/materialized_views.test b/testing/materialized_views.test index 354f65d39..1755aede2 100755 --- a/testing/materialized_views.test +++ b/testing/materialized_views.test @@ -1428,3 +1428,170 @@ do_execsql_test_on_specific_db {:memory:} matview-union-all-text-filter { 2 3 4} + +# Test BETWEEN in WHERE clause +do_execsql_test_on_specific_db {:memory:} matview-between-filter { + CREATE TABLE products(id INTEGER PRIMARY KEY, name TEXT, price INTEGER); + INSERT INTO products VALUES + (1, 'Cheap', 10), + (2, 'Mid1', 50), + (3, 'Mid2', 75), + (4, 'Expensive', 150); + + CREATE MATERIALIZED VIEW mid_range AS + SELECT id, name, price FROM products WHERE price BETWEEN 40 AND 100; + + SELECT * FROM mid_range ORDER BY id; +} {2|Mid1|50 +3|Mid2|75} + +# Test IN list in WHERE clause +do_execsql_test_on_specific_db {:memory:} matview-in-filter { + CREATE TABLE orders(id INTEGER PRIMARY KEY, customer TEXT, status TEXT); + INSERT INTO orders VALUES + (1, 'Alice', 'shipped'), + (2, 'Bob', 'pending'), + (3, 'Charlie', 'delivered'), + (4, 'David', 'cancelled'), + (5, 'Eve', 'shipped'); + + CREATE MATERIALIZED VIEW active_orders AS + SELECT id, customer FROM orders WHERE status IN ('pending', 'shipped'); + + SELECT * FROM active_orders ORDER BY id; +} {1|Alice +2|Bob +5|Eve} + +# Test CAST with TEXT in WHERE clause +do_execsql_test_on_specific_db {:memory:} matview-cast-text { + CREATE TABLE records(id INTEGER PRIMARY KEY, code TEXT); + INSERT INTO records VALUES + (1, 'A100'), + (2, 'B200'), + (3, 'A300'); + + CREATE MATERIALIZED VIEW filtered AS + SELECT id FROM records WHERE code < CAST('B' AS TEXT); + + SELECT * FROM filtered ORDER BY id; +} {1 +3} + +# Test BETWEEN and IN together +do_execsql_test_on_specific_db {:memory:} matview-between-and-in { + CREATE TABLE inventory(id INTEGER PRIMARY KEY, product TEXT, quantity INTEGER, location TEXT); + INSERT INTO inventory VALUES + (1, 'Widget', 50, 'WH1'), + (2, 'Gadget', 30, 'WH2'), + (3, 'Tool', 80, 'WH1'), + (4, 'Part', 15, 'WH3'), + (5, 'Device', 45, 'WH2'); + + CREATE MATERIALIZED VIEW wh1_wh2_medium_stock AS + SELECT id, product, quantity + FROM inventory + WHERE quantity BETWEEN 25 AND 60 + AND location IN ('WH1', 'WH2'); + + SELECT * FROM wh1_wh2_medium_stock ORDER BY id; +} {1|Widget|50 +2|Gadget|30 +5|Device|45} + +# Test complex OR conditions with IN +do_execsql_test_on_specific_db {:memory:} matview-complex-or-with-in { + CREATE TABLE shipments(id INTEGER PRIMARY KEY, size INTEGER, mode TEXT, priority TEXT); + INSERT INTO shipments VALUES + (1, 5, 'AIR', 'high'), + (2, 15, 'TRUCK', 'normal'), + (3, 8, 'AIR', 'normal'), + (4, 20, 'SHIP', 'low'), + (5, 12, 'AIR_REG', 'high'); + + CREATE MATERIALIZED VIEW express_shipments AS + SELECT id, size, mode + FROM shipments + WHERE (size BETWEEN 5 AND 10 AND mode IN ('AIR', 'AIR_REG')) + OR priority = 'high'; + + SELECT * FROM express_shipments ORDER BY id; +} {1|5|AIR +3|8|AIR +5|12|AIR_REG} + +# Test join with BETWEEN in WHERE +do_execsql_test_on_specific_db {:memory:} matview-join-with-between { + CREATE TABLE parts(id INTEGER PRIMARY KEY, size INTEGER); + CREATE TABLE suppliers(id INTEGER PRIMARY KEY, part_id INTEGER, price INTEGER); + + INSERT INTO parts VALUES (1, 5), (2, 10), (3, 20); + INSERT INTO suppliers VALUES (1, 1, 100), (2, 2, 150), (3, 3, 200); + + CREATE MATERIALIZED VIEW medium_parts AS + SELECT p.id, p.size, s.price + FROM parts p + JOIN suppliers s ON p.id = s.part_id + WHERE p.size BETWEEN 8 AND 15; + + SELECT * FROM medium_parts ORDER BY id; +} {2|10|150} + +# Test join with IN in WHERE +do_execsql_test_on_specific_db {:memory:} matview-join-with-in { + CREATE TABLE customers(id INTEGER PRIMARY KEY, region TEXT); + CREATE TABLE orders(id INTEGER PRIMARY KEY, customer_id INTEGER, amount INTEGER); + + INSERT INTO customers VALUES (1, 'USA'), (2, 'Canada'), (3, 'UK'), (4, 'Mexico'); + INSERT INTO orders VALUES (1, 1, 100), (2, 2, 200), (3, 3, 150), (4, 4, 300); + + CREATE MATERIALIZED VIEW north_america_orders AS + SELECT c.region, o.amount + FROM customers c + JOIN orders o ON c.id = o.customer_id + WHERE c.region IN ('USA', 'Canada', 'Mexico'); + + SELECT * FROM north_america_orders ORDER BY region, amount; +} {Canada|200 +Mexico|300 +USA|100} + +# Test incremental maintenance with BETWEEN +do_execsql_test_on_specific_db {:memory:} matview-between-incremental { + CREATE TABLE items(id INTEGER PRIMARY KEY, value INTEGER); + INSERT INTO items VALUES (1, 5), (2, 15); + + CREATE MATERIALIZED VIEW mid_values AS + SELECT id, value FROM items WHERE value BETWEEN 10 AND 20; + + SELECT COUNT(*) FROM mid_values; + + INSERT INTO items VALUES (3, 12), (4, 25); + SELECT * FROM mid_values ORDER BY id; + + UPDATE items SET value = 30 WHERE id = 2; + SELECT * FROM mid_values ORDER BY id; +} {1 +2|15 +3|12 +3|12} + +# Test incremental maintenance with IN +do_execsql_test_on_specific_db {:memory:} matview-in-incremental { + CREATE TABLE logs(id INTEGER PRIMARY KEY, level TEXT, message TEXT); + INSERT INTO logs VALUES (1, 'INFO', 'start'), (2, 'DEBUG', 'test'); + + CREATE MATERIALIZED VIEW important_logs AS + SELECT id, level, message FROM logs WHERE level IN ('ERROR', 'WARN', 'INFO'); + + SELECT COUNT(*) FROM important_logs; + + INSERT INTO logs VALUES (3, 'ERROR', 'fail'), (4, 'TRACE', 'detail'); + SELECT * FROM important_logs ORDER BY id; + + DELETE FROM logs WHERE id = 1; + SELECT * FROM important_logs ORDER BY id; +} {1 +1|INFO|start +3|ERROR|fail +3|ERROR|fail} From 8d9d2dad1d830ec52aed8288ef4d9e3aaa48ffbe Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 25 Sep 2025 14:22:20 +0300 Subject: [PATCH 49/65] core/storage: Wrap WalFile::syncing with AtomicBool --- core/storage/wal.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index e4c1c721f..6172bb1de 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -9,7 +9,7 @@ use tracing::{instrument, Level}; use parking_lot::RwLock; use std::fmt::{Debug, Formatter}; use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; -use std::{cell::Cell, fmt, rc::Rc, sync::Arc}; +use std::{cell::Cell, fmt, sync::Arc}; use super::buffer_pool::BufferPool; use super::pager::{PageRef, Pager}; @@ -561,7 +561,7 @@ pub struct WalFile { io: Arc, buffer_pool: Arc, - syncing: Rc>, + syncing: Arc, shared: Arc>, ongoing_checkpoint: OngoingCheckpoint, @@ -593,7 +593,7 @@ pub struct WalFile { impl fmt::Debug for WalFile { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("WalFile") - .field("syncing", &self.syncing.get()) + .field("syncing", &self.syncing.load(Ordering::SeqCst)) .field("page_size", &self.page_size()) .field("shared", &self.shared) .field("ongoing_checkpoint", &self.ongoing_checkpoint) @@ -1310,10 +1310,10 @@ impl Wal for WalFile { let syncing = self.syncing.clone(); let completion = Completion::new_sync(move |_| { tracing::debug!("wal_sync finish"); - syncing.set(false); + syncing.store(false, Ordering::SeqCst); }); let shared = self.get_shared(); - self.syncing.set(true); + self.syncing.store(true, Ordering::SeqCst); assert!(shared.enabled.load(Ordering::SeqCst), "WAL must be enabled"); let file = shared.file.as_ref().unwrap(); let c = file.sync(completion)?; @@ -1322,7 +1322,7 @@ impl Wal for WalFile { // Currently used for assertion purposes fn is_syncing(&self) -> bool { - self.syncing.get() + self.syncing.load(Ordering::SeqCst) } fn get_max_frame_in_wal(&self) -> u64 { @@ -1616,7 +1616,7 @@ impl WalFile { checkpoint_threshold: 1000, buffer_pool, checkpoint_seq: AtomicU32::new(0), - syncing: Rc::new(Cell::new(false)), + syncing: Arc::new(AtomicBool::new(false)), min_frame: 0, max_frame_read_lock_index: NO_LOCK_HELD.into(), last_checksum, @@ -1695,7 +1695,7 @@ impl WalFile { fn reset_internal_states(&mut self) { self.max_frame_read_lock_index.set(NO_LOCK_HELD); self.ongoing_checkpoint.reset(); - self.syncing.set(false); + self.syncing.store(false, Ordering::SeqCst); } /// the WAL file has been truncated and we are writing the first From b31818f77ca1cea48d608c8a652fb67d7efa8ba6 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 27 Sep 2025 14:08:25 +0300 Subject: [PATCH 50/65] core/vdbe: Wrap SortedChunk::buffer with RwLock --- core/vdbe/sorter.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index d8469458c..ca81835bf 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -4,7 +4,7 @@ use std::cell::{Cell, RefCell}; use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd, Reverse}; use std::collections::BinaryHeap; use std::rc::Rc; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use tempfile; use crate::types::IOCompletions; @@ -394,7 +394,7 @@ struct SortedChunk { /// The size of this chunk file in bytes. chunk_size: usize, /// The read buffer. - buffer: Rc>>, + buffer: Arc>>, /// The current length of the buffer. buffer_len: Rc>, /// The records decoded from the chunk file. @@ -413,7 +413,7 @@ impl SortedChunk { file, start_offset: start_offset as u64, chunk_size: 0, - buffer: Rc::new(RefCell::new(vec![0; buffer_size])), + buffer: Arc::new(RwLock::new(vec![0; buffer_size])), buffer_len: Rc::new(Cell::new(0)), records: Vec::new(), io_state: Rc::new(Cell::new(SortedChunkIOState::None)), @@ -432,7 +432,7 @@ impl SortedChunk { } if self.records.is_empty() { - let mut buffer_ref = self.buffer.borrow_mut(); + let mut buffer_ref = self.buffer.write().unwrap(); let buffer = buffer_ref.as_mut_slice(); let mut buffer_offset = 0; while buffer_offset < buffer_len { @@ -503,7 +503,7 @@ impl SortedChunk { fn read(&mut self) -> Result { self.io_state.set(SortedChunkIOState::WaitingForRead); - let read_buffer_size = self.buffer.borrow().len() - self.buffer_len.get(); + let read_buffer_size = self.buffer.read().unwrap().len() - self.buffer_len.get(); let read_buffer_size = read_buffer_size.min(self.chunk_size - self.total_bytes_read.get()); let read_buffer = Buffer::new_temporary(read_buffer_size); @@ -527,7 +527,7 @@ impl SortedChunk { } chunk_io_state_copy.set(SortedChunkIOState::ReadComplete); - let mut stored_buf_ref = stored_buffer_copy.borrow_mut(); + let mut stored_buf_ref = stored_buffer_copy.write().unwrap(); let stored_buf = stored_buf_ref.as_mut_slice(); let mut stored_buf_len = stored_buffer_len_copy.get(); From 5f39987ec08a9ea93bc1b4fdc394eeac5036c155 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 27 Sep 2025 14:14:20 +0300 Subject: [PATCH 51/65] core/vdbe: Wrap SortedChunk::buffer_len with AtomicUsize --- core/vdbe/sorter.rs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index ca81835bf..088fba8f9 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -4,7 +4,7 @@ use std::cell::{Cell, RefCell}; use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd, Reverse}; use std::collections::BinaryHeap; use std::rc::Rc; -use std::sync::{Arc, RwLock}; +use std::sync::{atomic, Arc, RwLock}; use tempfile; use crate::types::IOCompletions; @@ -396,7 +396,7 @@ struct SortedChunk { /// The read buffer. buffer: Arc>>, /// The current length of the buffer. - buffer_len: Rc>, + buffer_len: Arc, /// The records decoded from the chunk file. records: Vec, /// The current IO state of the chunk. @@ -414,7 +414,7 @@ impl SortedChunk { start_offset: start_offset as u64, chunk_size: 0, buffer: Arc::new(RwLock::new(vec![0; buffer_size])), - buffer_len: Rc::new(Cell::new(0)), + buffer_len: Arc::new(atomic::AtomicUsize::new(0)), records: Vec::new(), io_state: Rc::new(Cell::new(SortedChunkIOState::None)), total_bytes_read: Rc::new(Cell::new(0)), @@ -422,11 +422,19 @@ impl SortedChunk { } } + fn buffer_len(&self) -> usize { + self.buffer_len.load(atomic::Ordering::SeqCst) + } + + fn set_buffer_len(&self, len: usize) { + self.buffer_len.store(len, atomic::Ordering::SeqCst); + } + fn next(&mut self) -> Result>> { loop { match self.next_state { NextState::Start => { - let mut buffer_len = self.buffer_len.get(); + let mut buffer_len = self.buffer_len(); if self.records.is_empty() && buffer_len == 0 { return Ok(IOResult::Done(None)); } @@ -474,7 +482,7 @@ impl SortedChunk { } else { buffer_len = 0; } - self.buffer_len.set(buffer_len); + self.set_buffer_len(buffer_len); self.records.reverse(); } @@ -503,7 +511,7 @@ impl SortedChunk { fn read(&mut self) -> Result { self.io_state.set(SortedChunkIOState::WaitingForRead); - let read_buffer_size = self.buffer.read().unwrap().len() - self.buffer_len.get(); + let read_buffer_size = self.buffer.read().unwrap().len() - self.buffer_len(); let read_buffer_size = read_buffer_size.min(self.chunk_size - self.total_bytes_read.get()); let read_buffer = Buffer::new_temporary(read_buffer_size); @@ -529,13 +537,13 @@ impl SortedChunk { let mut stored_buf_ref = stored_buffer_copy.write().unwrap(); let stored_buf = stored_buf_ref.as_mut_slice(); - let mut stored_buf_len = stored_buffer_len_copy.get(); + let mut stored_buf_len = stored_buffer_len_copy.load(atomic::Ordering::SeqCst); stored_buf[stored_buf_len..stored_buf_len + bytes_read] .copy_from_slice(&read_buf[..bytes_read]); stored_buf_len += bytes_read; - stored_buffer_len_copy.set(stored_buf_len); + stored_buffer_len_copy.store(stored_buf_len, atomic::Ordering::SeqCst); total_bytes_read_copy.set(total_bytes_read_copy.get() + bytes_read); }); From 61b3f569976da59a2b8c30f9ce46b95db5289d65 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 27 Sep 2025 14:25:35 +0300 Subject: [PATCH 52/65] core/vdbe: Wrap SortedChunk::io_state with RwLock --- core/vdbe/sorter.rs | 46 ++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 088fba8f9..e7ef60503 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -175,7 +175,10 @@ impl Sorter { SortState::InitHeap => { turso_assert!( !self.chunks.iter().any(|chunk| { - matches!(chunk.io_state.get(), SortedChunkIOState::WaitingForWrite) + matches!( + *chunk.io_state.read().unwrap(), + SortedChunkIOState::WaitingForWrite + ) }), "chunks should been written" ); @@ -231,7 +234,10 @@ impl Sorter { InsertState::Insert => { turso_assert!( !self.chunks.iter().any(|chunk| { - matches!(chunk.io_state.get(), SortedChunkIOState::WaitingForWrite) + matches!( + *chunk.io_state.read().unwrap(), + SortedChunkIOState::WaitingForWrite + ) }), "chunks should have written" ); @@ -272,7 +278,7 @@ impl Sorter { // Make sure all chunks read at least one record into their buffer. turso_assert!( !self.chunks.iter().any(|chunk| matches!( - chunk.io_state.get(), + *chunk.io_state.read().unwrap(), SortedChunkIOState::WaitingForRead )), "chunks should have been read" @@ -292,10 +298,10 @@ impl Sorter { fn next_from_chunk_heap(&mut self) -> Result>> { // Make sure all chunks read at least one record into their buffer. turso_assert!( - !self - .chunks - .iter() - .any(|chunk| matches!(chunk.io_state.get(), SortedChunkIOState::WaitingForRead)), + !self.chunks.iter().any(|chunk| matches!( + *chunk.io_state.read().unwrap(), + SortedChunkIOState::WaitingForRead + )), "chunks should have been read" ); @@ -400,7 +406,7 @@ struct SortedChunk { /// The records decoded from the chunk file. records: Vec, /// The current IO state of the chunk. - io_state: Rc>, + io_state: Arc>, /// The total number of bytes read from the chunk file. total_bytes_read: Rc>, /// State machine for [SortedChunk::next] @@ -416,7 +422,7 @@ impl SortedChunk { buffer: Arc::new(RwLock::new(vec![0; buffer_size])), buffer_len: Arc::new(atomic::AtomicUsize::new(0)), records: Vec::new(), - io_state: Rc::new(Cell::new(SortedChunkIOState::None)), + io_state: Arc::new(RwLock::new(SortedChunkIOState::None)), total_bytes_read: Rc::new(Cell::new(0)), next_state: NextState::Start, } @@ -451,7 +457,8 @@ impl SortedChunk { (record_size as usize, bytes_read) } Err(LimboError::Corrupt(_)) - if self.io_state.get() != SortedChunkIOState::ReadEOF => + if *self.io_state.read().unwrap() + != SortedChunkIOState::ReadEOF => { // Failed to decode a partial varint. break; @@ -461,7 +468,7 @@ impl SortedChunk { } }; if record_size > buffer_len - (buffer_offset + bytes_read) { - if self.io_state.get() == SortedChunkIOState::ReadEOF { + if *self.io_state.read().unwrap() == SortedChunkIOState::ReadEOF { crate::bail_corrupt_error!("Incomplete record"); } break; @@ -489,11 +496,12 @@ impl SortedChunk { self.next_state = NextState::Finish; // This check is done to see if we need to read more from the chunk before popping the record - if self.records.len() == 1 && self.io_state.get() != SortedChunkIOState::ReadEOF + if self.records.len() == 1 + && *self.io_state.read().unwrap() != SortedChunkIOState::ReadEOF { // We've consumed the last record. Read more payload into the buffer. if self.chunk_size - self.total_bytes_read.get() == 0 { - self.io_state.set(SortedChunkIOState::ReadEOF); + *self.io_state.write().unwrap() = SortedChunkIOState::ReadEOF; } else { let c = self.read()?; io_yield_one!(c); @@ -509,7 +517,7 @@ impl SortedChunk { } fn read(&mut self) -> Result { - self.io_state.set(SortedChunkIOState::WaitingForRead); + *self.io_state.write().unwrap() = SortedChunkIOState::WaitingForRead; let read_buffer_size = self.buffer.read().unwrap().len() - self.buffer_len(); let read_buffer_size = read_buffer_size.min(self.chunk_size - self.total_bytes_read.get()); @@ -530,10 +538,10 @@ impl SortedChunk { let bytes_read = bytes_read as usize; if bytes_read == 0 { - chunk_io_state_copy.set(SortedChunkIOState::ReadEOF); + *chunk_io_state_copy.write().unwrap() = SortedChunkIOState::ReadEOF; return; } - chunk_io_state_copy.set(SortedChunkIOState::ReadComplete); + *chunk_io_state_copy.write().unwrap() = SortedChunkIOState::ReadComplete; let mut stored_buf_ref = stored_buffer_copy.write().unwrap(); let stored_buf = stored_buf_ref.as_mut_slice(); @@ -560,8 +568,8 @@ impl SortedChunk { record_size_lengths: Vec, chunk_size: usize, ) -> Result { - assert!(self.io_state.get() == SortedChunkIOState::None); - self.io_state.set(SortedChunkIOState::WaitingForWrite); + assert!(*self.io_state.read().unwrap() == SortedChunkIOState::None); + *self.io_state.write().unwrap() = SortedChunkIOState::WaitingForWrite; self.chunk_size = chunk_size; let buffer = Buffer::new_temporary(self.chunk_size); @@ -586,7 +594,7 @@ impl SortedChunk { let Ok(bytes_written) = res else { return; }; - chunk_io_state_copy.set(SortedChunkIOState::WriteComplete); + *chunk_io_state_copy.write().unwrap() = SortedChunkIOState::WriteComplete; let buf_len = buffer_ref_copy.len(); if bytes_written < buf_len as i32 { tracing::error!("wrote({bytes_written}) less than expected({buf_len})"); From 7b6fc0f3b6725731670e5cca268283ca4c073e09 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 27 Sep 2025 14:29:48 +0300 Subject: [PATCH 53/65] core/vdbe: Wrap SortedChunk::total_bytes_read with AtomicUsize --- core/vdbe/sorter.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index e7ef60503..105d8095d 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,6 +1,6 @@ use turso_parser::ast::SortOrder; -use std::cell::{Cell, RefCell}; +use std::cell::RefCell; use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd, Reverse}; use std::collections::BinaryHeap; use std::rc::Rc; @@ -408,7 +408,7 @@ struct SortedChunk { /// The current IO state of the chunk. io_state: Arc>, /// The total number of bytes read from the chunk file. - total_bytes_read: Rc>, + total_bytes_read: Arc, /// State machine for [SortedChunk::next] next_state: NextState, } @@ -423,7 +423,7 @@ impl SortedChunk { buffer_len: Arc::new(atomic::AtomicUsize::new(0)), records: Vec::new(), io_state: Arc::new(RwLock::new(SortedChunkIOState::None)), - total_bytes_read: Rc::new(Cell::new(0)), + total_bytes_read: Arc::new(atomic::AtomicUsize::new(0)), next_state: NextState::Start, } } @@ -500,7 +500,9 @@ impl SortedChunk { && *self.io_state.read().unwrap() != SortedChunkIOState::ReadEOF { // We've consumed the last record. Read more payload into the buffer. - if self.chunk_size - self.total_bytes_read.get() == 0 { + if self.chunk_size - self.total_bytes_read.load(atomic::Ordering::SeqCst) + == 0 + { *self.io_state.write().unwrap() = SortedChunkIOState::ReadEOF; } else { let c = self.read()?; @@ -520,7 +522,8 @@ impl SortedChunk { *self.io_state.write().unwrap() = SortedChunkIOState::WaitingForRead; let read_buffer_size = self.buffer.read().unwrap().len() - self.buffer_len(); - let read_buffer_size = read_buffer_size.min(self.chunk_size - self.total_bytes_read.get()); + let read_buffer_size = read_buffer_size + .min(self.chunk_size - self.total_bytes_read.load(atomic::Ordering::SeqCst)); let read_buffer = Buffer::new_temporary(read_buffer_size); let read_buffer_ref = Arc::new(read_buffer); @@ -552,13 +555,14 @@ impl SortedChunk { stored_buf_len += bytes_read; stored_buffer_len_copy.store(stored_buf_len, atomic::Ordering::SeqCst); - total_bytes_read_copy.set(total_bytes_read_copy.get() + bytes_read); + total_bytes_read_copy.fetch_add(bytes_read, atomic::Ordering::SeqCst); }); let c = Completion::new_read(read_buffer_ref, read_complete); - let c = self - .file - .pread(self.start_offset + self.total_bytes_read.get() as u64, c)?; + let c = self.file.pread( + self.start_offset + self.total_bytes_read.load(atomic::Ordering::SeqCst) as u64, + c, + )?; Ok(c) } From ce76aa11b2d755c225d2dfa92ca55525035a00fc Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 27 Sep 2025 15:14:47 +0300 Subject: [PATCH 54/65] core/storage: Mark Page as Send and Sync --- core/storage/pager.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/storage/pager.rs b/core/storage/pager.rs index c4c4df620..0b85be394 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -168,6 +168,11 @@ pub struct Page { pub inner: UnsafeCell, } +// SAFETY: Page is thread-safe because we use atomic page flags to serialize +// concurrent modifications. +unsafe impl Send for Page {} +unsafe impl Sync for Page {} + // Concurrency control of pages will be handled by the pager, we won't wrap Page with RwLock // because that is bad bad. pub type PageRef = Arc; From 2f38d2ef04f0091ac44abaa932bfb76469233151 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 27 Sep 2025 16:52:35 +0300 Subject: [PATCH 55/65] Turso 0.2.0-pre.10 --- Cargo.lock | 56 +++++++++---------- Cargo.toml | 36 ++++++------ bindings/javascript/package-lock.json | 36 ++++++------ bindings/javascript/package.json | 2 +- .../packages/browser-common/package.json | 2 +- .../javascript/packages/browser/package.json | 6 +- .../javascript/packages/common/package.json | 2 +- .../javascript/packages/native/package.json | 4 +- .../sync/packages/browser/package.json | 8 +-- .../sync/packages/common/package.json | 4 +- .../sync/packages/native/package.json | 6 +- bindings/javascript/yarn.lock | 24 ++++---- 12 files changed, 93 insertions(+), 93 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 461f8c48f..53f7120dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -684,7 +684,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_tester" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "anyhow", "assert_cmd", @@ -2264,7 +2264,7 @@ dependencies = [ [[package]] name = "limbo_completion" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "mimalloc", "turso_ext", @@ -2272,7 +2272,7 @@ dependencies = [ [[package]] name = "limbo_crypto" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "blake3", "data-encoding", @@ -2285,7 +2285,7 @@ dependencies = [ [[package]] name = "limbo_csv" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "csv", "mimalloc", @@ -2295,7 +2295,7 @@ dependencies = [ [[package]] name = "limbo_fuzzy" -version = "0.2.0-pre.8" +version = "0.2.0-pre.10" dependencies = [ "mimalloc", "turso_ext", @@ -2303,7 +2303,7 @@ dependencies = [ [[package]] name = "limbo_ipaddr" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "ipnetwork", "mimalloc", @@ -2312,7 +2312,7 @@ dependencies = [ [[package]] name = "limbo_percentile" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "mimalloc", "turso_ext", @@ -2320,7 +2320,7 @@ dependencies = [ [[package]] name = "limbo_regexp" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "mimalloc", "regex", @@ -2329,7 +2329,7 @@ dependencies = [ [[package]] name = "limbo_sim" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "anyhow", "chrono", @@ -2365,7 +2365,7 @@ dependencies = [ [[package]] name = "limbo_sqlite_test_ext" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "cc", ] @@ -3101,7 +3101,7 @@ dependencies = [ [[package]] name = "py-turso" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "anyhow", "pyo3", @@ -3828,7 +3828,7 @@ checksum = "d372029cb5195f9ab4e4b9aef550787dce78b124fcaee8d82519925defcd6f0d" [[package]] name = "sql_generation" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "anarchist-readable-name-generator-lib 0.2.0", "anyhow", @@ -4351,7 +4351,7 @@ dependencies = [ [[package]] name = "turso" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "rand 0.9.2", "rand_chacha 0.9.0", @@ -4363,7 +4363,7 @@ dependencies = [ [[package]] name = "turso-java" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "jni", "thiserror 2.0.16", @@ -4372,7 +4372,7 @@ dependencies = [ [[package]] name = "turso_cli" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "anyhow", "cfg-if", @@ -4408,7 +4408,7 @@ dependencies = [ [[package]] name = "turso_core" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "aegis", "aes", @@ -4466,7 +4466,7 @@ dependencies = [ [[package]] name = "turso_dart" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "flutter_rust_bridge", "turso_core", @@ -4474,7 +4474,7 @@ dependencies = [ [[package]] name = "turso_ext" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "chrono", "getrandom 0.3.2", @@ -4483,7 +4483,7 @@ dependencies = [ [[package]] name = "turso_ext_tests" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "env_logger 0.11.7", "lazy_static", @@ -4494,7 +4494,7 @@ dependencies = [ [[package]] name = "turso_macros" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "proc-macro2", "quote", @@ -4503,7 +4503,7 @@ dependencies = [ [[package]] name = "turso_node" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "chrono", "napi", @@ -4516,7 +4516,7 @@ dependencies = [ [[package]] name = "turso_parser" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "bitflags 2.9.4", "criterion", @@ -4532,7 +4532,7 @@ dependencies = [ [[package]] name = "turso_sqlite3" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "env_logger 0.11.7", "libc", @@ -4545,7 +4545,7 @@ dependencies = [ [[package]] name = "turso_sqlite3_parser" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "bitflags 2.9.4", "cc", @@ -4563,7 +4563,7 @@ dependencies = [ [[package]] name = "turso_stress" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "anarchist-readable-name-generator-lib 0.1.2", "antithesis_sdk", @@ -4579,7 +4579,7 @@ dependencies = [ [[package]] name = "turso_sync_engine" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "base64", "bytes", @@ -4605,7 +4605,7 @@ dependencies = [ [[package]] name = "turso_sync_js" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "genawaiter", "napi", @@ -4620,7 +4620,7 @@ dependencies = [ [[package]] name = "turso_whopper" -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index bab73af3a..1e828acfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,30 +40,30 @@ exclude = [ ] [workspace.package] -version = "0.2.0-pre.9" +version = "0.2.0-pre.10" authors = ["the Limbo authors"] edition = "2021" license = "MIT" repository = "https://github.com/tursodatabase/turso" [workspace.dependencies] -turso = { path = "bindings/rust", version = "0.2.0-pre.9" } -turso_node = { path = "bindings/javascript", version = "0.2.0-pre.9" } -limbo_completion = { path = "extensions/completion", version = "0.2.0-pre.9" } -turso_core = { path = "core", version = "0.2.0-pre.9" } -turso_sync_engine = { path = "sync/engine", version = "0.2.0-pre.9" } -limbo_crypto = { path = "extensions/crypto", version = "0.2.0-pre.9" } -limbo_csv = { path = "extensions/csv", version = "0.2.0-pre.9" } -turso_ext = { path = "extensions/core", version = "0.2.0-pre.9" } -turso_ext_tests = { path = "extensions/tests", version = "0.2.0-pre.9" } -limbo_ipaddr = { path = "extensions/ipaddr", version = "0.2.0-pre.9" } -turso_macros = { path = "macros", version = "0.2.0-pre.9" } -limbo_percentile = { path = "extensions/percentile", version = "0.2.0-pre.9" } -limbo_regexp = { path = "extensions/regexp", version = "0.2.0-pre.9" } -turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.2.0-pre.9" } -limbo_uuid = { path = "extensions/uuid", version = "0.2.0-pre.9" } -turso_parser = { path = "parser", version = "0.2.0-pre.9" } -limbo_fuzzy = { path = "extensions/fuzzy", version = "0.2.0-pre.9" } +turso = { path = "bindings/rust", version = "0.2.0-pre.10" } +turso_node = { path = "bindings/javascript", version = "0.2.0-pre.10" } +limbo_completion = { path = "extensions/completion", version = "0.2.0-pre.10" } +turso_core = { path = "core", version = "0.2.0-pre.10" } +turso_sync_engine = { path = "sync/engine", version = "0.2.0-pre.10" } +limbo_crypto = { path = "extensions/crypto", version = "0.2.0-pre.10" } +limbo_csv = { path = "extensions/csv", version = "0.2.0-pre.10" } +turso_ext = { path = "extensions/core", version = "0.2.0-pre.10" } +turso_ext_tests = { path = "extensions/tests", version = "0.2.0-pre.10" } +limbo_ipaddr = { path = "extensions/ipaddr", version = "0.2.0-pre.10" } +turso_macros = { path = "macros", version = "0.2.0-pre.10" } +limbo_percentile = { path = "extensions/percentile", version = "0.2.0-pre.10" } +limbo_regexp = { path = "extensions/regexp", version = "0.2.0-pre.10" } +turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.2.0-pre.10" } +limbo_uuid = { path = "extensions/uuid", version = "0.2.0-pre.10" } +turso_parser = { path = "parser", version = "0.2.0-pre.10" } +limbo_fuzzy = { path = "extensions/fuzzy", version = "0.2.0-pre.10" } sql_generation = { path = "sql_generation" } strum = { version = "0.26", features = ["derive"] } strum_macros = "0.26" diff --git a/bindings/javascript/package-lock.json b/bindings/javascript/package-lock.json index 59393adbf..aba909bc7 100644 --- a/bindings/javascript/package-lock.json +++ b/bindings/javascript/package-lock.json @@ -1,11 +1,11 @@ { "name": "javascript", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "lockfileVersion": 3, "requires": true, "packages": { "": { - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "workspaces": [ "packages/common", "packages/native", @@ -4592,11 +4592,11 @@ }, "packages/browser": { "name": "@tursodatabase/database-browser", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "license": "MIT", "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.9", - "@tursodatabase/database-common": "^0.2.0-pre.9" + "@tursodatabase/database-browser-common": "^0.2.0-pre.10", + "@tursodatabase/database-common": "^0.2.0-pre.10" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", @@ -4609,7 +4609,7 @@ }, "packages/browser-common": { "name": "@tursodatabase/database-browser-common", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "license": "MIT", "dependencies": { "@napi-rs/wasm-runtime": "^1.0.5" @@ -4620,7 +4620,7 @@ }, "packages/common": { "name": "@tursodatabase/database-common", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "license": "MIT", "devDependencies": { "typescript": "^5.9.2" @@ -4628,10 +4628,10 @@ }, "packages/native": { "name": "@tursodatabase/database", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "license": "MIT", "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.9" + "@tursodatabase/database-common": "^0.2.0-pre.10" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", @@ -4645,12 +4645,12 @@ }, "sync/packages/browser": { "name": "@tursodatabase/sync-browser", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "license": "MIT", "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.9", - "@tursodatabase/database-common": "^0.2.0-pre.9", - "@tursodatabase/sync-common": "^0.2.0-pre.9" + "@tursodatabase/database-browser-common": "^0.2.0-pre.10", + "@tursodatabase/database-common": "^0.2.0-pre.10", + "@tursodatabase/sync-common": "^0.2.0-pre.10" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", @@ -4663,10 +4663,10 @@ }, "sync/packages/common": { "name": "@tursodatabase/sync-common", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "license": "MIT", "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.9" + "@tursodatabase/database-common": "^0.2.0-pre.10" }, "devDependencies": { "typescript": "^5.9.2" @@ -4674,11 +4674,11 @@ }, "sync/packages/native": { "name": "@tursodatabase/sync", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "license": "MIT", "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.9", - "@tursodatabase/sync-common": "^0.2.0-pre.9" + "@tursodatabase/database-common": "^0.2.0-pre.10", + "@tursodatabase/sync-common": "^0.2.0-pre.10" }, "devDependencies": { "@napi-rs/cli": "^3.1.5", diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json index 849f25785..48445d48c 100644 --- a/bindings/javascript/package.json +++ b/bindings/javascript/package.json @@ -14,5 +14,5 @@ "sync/packages/native", "sync/packages/browser" ], - "version": "0.2.0-pre.9" + "version": "0.2.0-pre.10" } diff --git a/bindings/javascript/packages/browser-common/package.json b/bindings/javascript/packages/browser-common/package.json index e2a68d6fe..52fc96198 100644 --- a/bindings/javascript/packages/browser-common/package.json +++ b/bindings/javascript/packages/browser-common/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database-browser-common", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" diff --git a/bindings/javascript/packages/browser/package.json b/bindings/javascript/packages/browser/package.json index 19f592ff9..bf800bf30 100644 --- a/bindings/javascript/packages/browser/package.json +++ b/bindings/javascript/packages/browser/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database-browser", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -51,7 +51,7 @@ ] }, "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.9", - "@tursodatabase/database-common": "^0.2.0-pre.9" + "@tursodatabase/database-browser-common": "^0.2.0-pre.10", + "@tursodatabase/database-common": "^0.2.0-pre.10" } } diff --git a/bindings/javascript/packages/common/package.json b/bindings/javascript/packages/common/package.json index 4916ece55..5a698aca6 100644 --- a/bindings/javascript/packages/common/package.json +++ b/bindings/javascript/packages/common/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database-common", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" diff --git a/bindings/javascript/packages/native/package.json b/bindings/javascript/packages/native/package.json index eed6f8886..9e1cc73c7 100644 --- a/bindings/javascript/packages/native/package.json +++ b/bindings/javascript/packages/native/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/database", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -47,7 +47,7 @@ ] }, "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.9" + "@tursodatabase/database-common": "^0.2.0-pre.10" }, "imports": { "#index": "./index.js" diff --git a/bindings/javascript/sync/packages/browser/package.json b/bindings/javascript/sync/packages/browser/package.json index 8ef179b53..992bf573f 100644 --- a/bindings/javascript/sync/packages/browser/package.json +++ b/bindings/javascript/sync/packages/browser/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/sync-browser", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -54,8 +54,8 @@ "#index": "./index.js" }, "dependencies": { - "@tursodatabase/database-browser-common": "^0.2.0-pre.9", - "@tursodatabase/database-common": "^0.2.0-pre.9", - "@tursodatabase/sync-common": "^0.2.0-pre.9" + "@tursodatabase/database-browser-common": "^0.2.0-pre.10", + "@tursodatabase/database-common": "^0.2.0-pre.10", + "@tursodatabase/sync-common": "^0.2.0-pre.10" } } diff --git a/bindings/javascript/sync/packages/common/package.json b/bindings/javascript/sync/packages/common/package.json index bdae31766..6bd716e3a 100644 --- a/bindings/javascript/sync/packages/common/package.json +++ b/bindings/javascript/sync/packages/common/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/sync-common", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -23,6 +23,6 @@ "test": "echo 'no tests'" }, "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.9" + "@tursodatabase/database-common": "^0.2.0-pre.10" } } diff --git a/bindings/javascript/sync/packages/native/package.json b/bindings/javascript/sync/packages/native/package.json index a649f068c..4a318e469 100644 --- a/bindings/javascript/sync/packages/native/package.json +++ b/bindings/javascript/sync/packages/native/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/sync", - "version": "0.2.0-pre.9", + "version": "0.2.0-pre.10", "repository": { "type": "git", "url": "https://github.com/tursodatabase/turso" @@ -44,8 +44,8 @@ ] }, "dependencies": { - "@tursodatabase/database-common": "^0.2.0-pre.9", - "@tursodatabase/sync-common": "^0.2.0-pre.9" + "@tursodatabase/database-common": "^0.2.0-pre.10", + "@tursodatabase/sync-common": "^0.2.0-pre.10" }, "imports": { "#index": "./index.js" diff --git a/bindings/javascript/yarn.lock b/bindings/javascript/yarn.lock index 1ea9a8437..9a69e0937 100644 --- a/bindings/javascript/yarn.lock +++ b/bindings/javascript/yarn.lock @@ -1575,7 +1575,7 @@ __metadata: languageName: node linkType: hard -"@tursodatabase/database-browser-common@npm:^0.2.0-pre.9, @tursodatabase/database-browser-common@workspace:packages/browser-common": +"@tursodatabase/database-browser-common@npm:^0.2.0-pre.10, @tursodatabase/database-browser-common@workspace:packages/browser-common": version: 0.0.0-use.local resolution: "@tursodatabase/database-browser-common@workspace:packages/browser-common" dependencies: @@ -1589,8 +1589,8 @@ __metadata: resolution: "@tursodatabase/database-browser@workspace:packages/browser" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.9" - "@tursodatabase/database-common": "npm:^0.2.0-pre.9" + "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.10" + "@tursodatabase/database-common": "npm:^0.2.0-pre.10" "@vitest/browser": "npm:^3.2.4" playwright: "npm:^1.55.0" typescript: "npm:^5.9.2" @@ -1599,7 +1599,7 @@ __metadata: languageName: unknown linkType: soft -"@tursodatabase/database-common@npm:^0.2.0-pre.9, @tursodatabase/database-common@workspace:packages/common": +"@tursodatabase/database-common@npm:^0.2.0-pre.10, @tursodatabase/database-common@workspace:packages/common": version: 0.0.0-use.local resolution: "@tursodatabase/database-common@workspace:packages/common" dependencies: @@ -1612,7 +1612,7 @@ __metadata: resolution: "@tursodatabase/database@workspace:packages/native" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-common": "npm:^0.2.0-pre.9" + "@tursodatabase/database-common": "npm:^0.2.0-pre.10" "@types/node": "npm:^24.3.1" better-sqlite3: "npm:^12.2.0" drizzle-kit: "npm:^0.31.4" @@ -1627,9 +1627,9 @@ __metadata: resolution: "@tursodatabase/sync-browser@workspace:sync/packages/browser" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.9" - "@tursodatabase/database-common": "npm:^0.2.0-pre.9" - "@tursodatabase/sync-common": "npm:^0.2.0-pre.9" + "@tursodatabase/database-browser-common": "npm:^0.2.0-pre.10" + "@tursodatabase/database-common": "npm:^0.2.0-pre.10" + "@tursodatabase/sync-common": "npm:^0.2.0-pre.10" "@vitest/browser": "npm:^3.2.4" playwright: "npm:^1.55.0" typescript: "npm:^5.9.2" @@ -1638,11 +1638,11 @@ __metadata: languageName: unknown linkType: soft -"@tursodatabase/sync-common@npm:^0.2.0-pre.9, @tursodatabase/sync-common@workspace:sync/packages/common": +"@tursodatabase/sync-common@npm:^0.2.0-pre.10, @tursodatabase/sync-common@workspace:sync/packages/common": version: 0.0.0-use.local resolution: "@tursodatabase/sync-common@workspace:sync/packages/common" dependencies: - "@tursodatabase/database-common": "npm:^0.2.0-pre.9" + "@tursodatabase/database-common": "npm:^0.2.0-pre.10" typescript: "npm:^5.9.2" languageName: unknown linkType: soft @@ -1652,8 +1652,8 @@ __metadata: resolution: "@tursodatabase/sync@workspace:sync/packages/native" dependencies: "@napi-rs/cli": "npm:^3.1.5" - "@tursodatabase/database-common": "npm:^0.2.0-pre.9" - "@tursodatabase/sync-common": "npm:^0.2.0-pre.9" + "@tursodatabase/database-common": "npm:^0.2.0-pre.10" + "@tursodatabase/sync-common": "npm:^0.2.0-pre.10" "@types/node": "npm:^24.3.1" typescript: "npm:^5.9.2" vitest: "npm:^3.2.4" From b7fc9fef40e7eeb83866183edec6f3cd12de765b Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 27 Sep 2025 13:12:19 -0400 Subject: [PATCH 56/65] Persist NOT NULL column constraint to schema table --- core/schema.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/schema.rs b/core/schema.rs index 20ae6380e..2d7c2f8d9 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1033,6 +1033,9 @@ impl BTreeTable { sql.push(' '); sql.push_str(&column.ty_str); } + if column.notnull { + sql.push_str(" NOT NULL"); + } if column.unique { sql.push_str(" UNIQUE"); From bd17c5d5df8edd8d739cead39f9accfcf06e4a0b Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 27 Sep 2025 13:15:48 -0400 Subject: [PATCH 57/65] Add regression test for alter table with notnull constraint --- testing/alter_table.test | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/testing/alter_table.test b/testing/alter_table.test index 172802395..8c5b25bec 100755 --- a/testing/alter_table.test +++ b/testing/alter_table.test @@ -204,3 +204,10 @@ do_execsql_test_on_specific_db {:memory:} alter-table-rename-to-quoted-identifie "CREATE INDEX idx ON \"t t\" (\"b b\")" "2" } + +# https://github.com/tursodatabase/turso/issues/3391 +do_execsql_test_on_specific_db {:memory:} alter-table-add-notnull-col { + CREATE TABLE t (a); + ALTER TABLE t ADD b NOT NULL; + .schema t +} {{CREATE TABLE t (a, b NOT NULL);}} From 6e3c30623c352415304b7fbda9b325ffc654e287 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 27 Sep 2025 13:17:50 -0400 Subject: [PATCH 58/65] Fix regression test to not use dot command --- testing/alter_table.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/alter_table.test b/testing/alter_table.test index 8c5b25bec..33adc5b34 100755 --- a/testing/alter_table.test +++ b/testing/alter_table.test @@ -209,5 +209,5 @@ do_execsql_test_on_specific_db {:memory:} alter-table-rename-to-quoted-identifie do_execsql_test_on_specific_db {:memory:} alter-table-add-notnull-col { CREATE TABLE t (a); ALTER TABLE t ADD b NOT NULL; - .schema t + SELECT sql FROM sqlite_schema WHERE type = 'table' AND name = 't'; } {{CREATE TABLE t (a, b NOT NULL);}} From 03046faccb18e0fb7a34171976470e521c6fb0ef Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 27 Sep 2025 13:56:04 -0400 Subject: [PATCH 59/65] Translate default value Expr::Id to Literal to allow for identifier in col def --- core/schema.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 20ae6380e..4c348d1ac 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1233,7 +1233,11 @@ pub fn create_table( ast::ColumnConstraint::NotNull { nullable, .. } => { notnull = !nullable; } - ast::ColumnConstraint::Default(ref expr) => default = Some(expr), + ast::ColumnConstraint::Default(ref expr) => { + default = Some( + translate_ident_to_string_literal(expr).unwrap_or(expr.clone()), + ); + } // TODO: for now we don't check Resolve type of unique ast::ColumnConstraint::Unique(on_conflict) => { if on_conflict.is_some() { @@ -1268,7 +1272,7 @@ pub fn create_table( primary_key, is_rowid_alias: typename_exactly_integer && primary_key, notnull, - default: default.cloned(), + default, unique, collation, hidden: false, @@ -1359,6 +1363,19 @@ pub fn create_table( }) } +pub fn translate_ident_to_string_literal(expr: &Expr) -> Option> { + match expr { + // SQLite treats a bare identifier as a string literal in DEFAULT clause + Expr::Name(Name::Ident(str)) | Expr::Id(Name::Ident(str)) => { + Some(Box::new(Expr::Literal(Literal::String(format!("'{str}'"))))) + } + Expr::Name(Name::Quoted(str)) | Expr::Id(Name::Quoted(str)) => Some(Box::new( + Expr::Literal(Literal::String(format!("'{}'", normalize_ident(str)))), + )), + _ => None, + } +} + pub fn _build_pseudo_table(columns: &[ResultColumn]) -> PseudoCursorType { let table = PseudoCursorType::new(); for column in columns { @@ -1415,7 +1432,8 @@ impl From<&ColumnDefinition> for Column { ast::ColumnConstraint::NotNull { .. } => notnull = true, ast::ColumnConstraint::Unique(..) => unique = true, ast::ColumnConstraint::Default(expr) => { - default.replace(expr.clone()); + default + .replace(translate_ident_to_string_literal(expr).unwrap_or(expr.clone())); } ast::ColumnConstraint::Collate { collation_name } => { collation.replace( From ace2ac632a89adea726f059e2664803bc9b82c6d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 27 Sep 2025 13:59:51 -0400 Subject: [PATCH 60/65] Remove semicolon from test --- testing/alter_table.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/alter_table.test b/testing/alter_table.test index 33adc5b34..5a856eca9 100755 --- a/testing/alter_table.test +++ b/testing/alter_table.test @@ -210,4 +210,4 @@ do_execsql_test_on_specific_db {:memory:} alter-table-add-notnull-col { CREATE TABLE t (a); ALTER TABLE t ADD b NOT NULL; SELECT sql FROM sqlite_schema WHERE type = 'table' AND name = 't'; -} {{CREATE TABLE t (a, b NOT NULL);}} +} {{CREATE TABLE t (a, b NOT NULL)}} From 4315a34939b7a85b5573a78a03ae6f208c9ca8db Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 27 Sep 2025 14:13:45 -0400 Subject: [PATCH 61/65] Move png image to assets director --- README.md | 2 +- turso.png => assets/turso.png | Bin 2 files changed, 1 insertion(+), 1 deletion(-) rename turso.png => assets/turso.png (100%) diff --git a/README.md b/README.md index b0bd7a251..1caaebf60 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- Turso Database + Turso Database

Turso Database

diff --git a/turso.png b/assets/turso.png similarity index 100% rename from turso.png rename to assets/turso.png From 30f80c2000823b4a8b1bba5bd8d67abb1d32964f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 27 Sep 2025 17:38:45 -0400 Subject: [PATCH 62/65] Correct spelling issue in ForeignKey ast node --- parser/src/ast.rs | 4 ++-- parser/src/ast/fmt.rs | 12 ++++++------ parser/src/parser.rs | 34 +++++++++++++++++----------------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/parser/src/ast.rs b/parser/src/ast.rs index ed58d0bba..73cbf7ca6 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -1066,7 +1066,7 @@ pub enum ColumnConstraint { /// clause clause: ForeignKeyClause, /// `DEFERRABLE` - deref_clause: Option, + defer_clause: Option, }, /// `GENERATED` Generated { @@ -1118,7 +1118,7 @@ pub enum TableConstraint { /// `REFERENCES` clause: ForeignKeyClause, /// `DEFERRABLE` - deref_clause: Option, + defer_clause: Option, }, } diff --git a/parser/src/ast/fmt.rs b/parser/src/ast/fmt.rs index 80defd6d3..114bb4035 100644 --- a/parser/src/ast/fmt.rs +++ b/parser/src/ast/fmt.rs @@ -1573,12 +1573,12 @@ impl ToTokens for ColumnConstraint { } Self::ForeignKey { clause, - deref_clause, + defer_clause, } => { s.append(TK_REFERENCES, None)?; clause.to_tokens(s, context)?; - if let Some(deref_clause) = deref_clause { - deref_clause.to_tokens(s, context)?; + if let Some(defer_clause) = defer_clause { + defer_clause.to_tokens(s, context)?; } Ok(()) } @@ -1663,7 +1663,7 @@ impl ToTokens for TableConstraint { Self::ForeignKey { columns, clause, - deref_clause, + defer_clause, } => { s.append(TK_FOREIGN, None)?; s.append(TK_KEY, None)?; @@ -1672,8 +1672,8 @@ impl ToTokens for TableConstraint { s.append(TK_RP, None)?; s.append(TK_REFERENCES, None)?; clause.to_tokens(s, context)?; - if let Some(deref_clause) = deref_clause { - deref_clause.to_tokens(s, context)?; + if let Some(defer_clause) = defer_clause { + defer_clause.to_tokens(s, context)?; } Ok(()) } diff --git a/parser/src/parser.rs b/parser/src/parser.rs index aa756b03e..a5a563084 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -2688,11 +2688,11 @@ impl<'a> Parser<'a> { let columns = self.parse_eid_list(false)?; peek_expect!(self, TK_REFERENCES); let clause = self.parse_foreign_key_clause()?; - let deref_clause = self.parse_defer_subclause()?; + let defer_clause = self.parse_defer_subclause()?; Ok(TableConstraint::ForeignKey { columns, clause, - deref_clause, + defer_clause, }) } @@ -3300,10 +3300,10 @@ impl<'a> Parser<'a> { fn parse_reference_column_constraint(&mut self) -> Result { let clause = self.parse_foreign_key_clause()?; - let deref_clause = self.parse_defer_subclause()?; + let defer_clause = self.parse_defer_subclause()?; Ok(ColumnConstraint::ForeignKey { clause, - deref_clause, + defer_clause, }) } @@ -9420,7 +9420,7 @@ mod tests { columns: vec![], args: vec![] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9460,7 +9460,7 @@ mod tests { RefArg::OnInsert(RefAct::SetNull), ] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9500,7 +9500,7 @@ mod tests { RefArg::OnUpdate(RefAct::SetNull), ] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9540,7 +9540,7 @@ mod tests { RefArg::OnDelete(RefAct::SetNull), ] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9580,7 +9580,7 @@ mod tests { RefArg::OnDelete(RefAct::SetDefault), ] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9620,7 +9620,7 @@ mod tests { RefArg::OnDelete(RefAct::Cascade), ] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9660,7 +9660,7 @@ mod tests { RefArg::OnDelete(RefAct::Restrict), ] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9700,7 +9700,7 @@ mod tests { RefArg::OnDelete(RefAct::NoAction), ] }, - deref_clause: None + defer_clause: None }, }, ], @@ -9726,7 +9726,7 @@ mod tests { columns: vec![], args: vec![] }, - deref_clause: Some(DeferSubclause { + defer_clause: Some(DeferSubclause { deferrable: true, init_deferred: None, }) @@ -9755,7 +9755,7 @@ mod tests { columns: vec![], args: vec![] }, - deref_clause: Some(DeferSubclause { + defer_clause: Some(DeferSubclause { deferrable: false, init_deferred: Some(InitDeferredPred::InitiallyImmediate), }) @@ -9784,7 +9784,7 @@ mod tests { columns: vec![], args: vec![] }, - deref_clause: Some(DeferSubclause { + defer_clause: Some(DeferSubclause { deferrable: false, init_deferred: Some(InitDeferredPred::InitiallyDeferred), }) @@ -9813,7 +9813,7 @@ mod tests { columns: vec![], args: vec![] }, - deref_clause: Some(DeferSubclause { + defer_clause: Some(DeferSubclause { deferrable: false, init_deferred: Some(InitDeferredPred::InitiallyDeferred), }) @@ -10207,7 +10207,7 @@ mod tests { ], args: vec![], }, - deref_clause: None, + defer_clause: None, }, }, NamedTableConstraint { From aba596441c4c658932c7947a5b5e381a50fdda6b Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 28 Sep 2025 13:38:08 +0300 Subject: [PATCH 63/65] core/storage: Wrap WalFile::max_frame_read_lock_index with AtomicUsize --- core/storage/wal.rs | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 6172bb1de..4307345a2 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -8,7 +8,7 @@ use tracing::{instrument, Level}; use parking_lot::RwLock; use std::fmt::{Debug, Formatter}; -use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering}; use std::{cell::Cell, fmt, sync::Arc}; use super::buffer_pool::BufferPool; @@ -569,7 +569,7 @@ pub struct WalFile { // min and max frames for this connection /// This is the index to the read_lock in WalFileShared that we are holding. This lock contains /// the max frame for this connection. - max_frame_read_lock_index: Cell, + max_frame_read_lock_index: AtomicUsize, /// Max frame allowed to lookup range=(minframe..max_frame) max_frame: u64, /// Start of range to look for frames range=(minframe..max_frame) @@ -812,9 +812,11 @@ impl Wal for WalFile { #[instrument(skip_all, level = Level::DEBUG)] fn begin_read_tx(&mut self) -> Result { turso_assert!( - self.max_frame_read_lock_index.get().eq(&NO_LOCK_HELD), + self.max_frame_read_lock_index + .load(Ordering::Acquire) + .eq(&NO_LOCK_HELD), "cannot start a new read tx without ending an existing one, lock_value={}, expected={}", - self.max_frame_read_lock_index.get(), + self.max_frame_read_lock_index.load(Ordering::Acquire), NO_LOCK_HELD ); let (shared_max, nbackfills, last_checksum, checkpoint_seq) = { @@ -843,7 +845,8 @@ impl Wal for WalFile { // we need to keep self.max_frame set to the appropriate // max frame in the wal at the time this transaction starts. self.max_frame = shared_max; - self.max_frame_read_lock_index.set(lock_0_idx); + self.max_frame_read_lock_index + .store(lock_0_idx, Ordering::Release); self.min_frame = nbackfills + 1; self.last_checksum = last_checksum; return Ok(db_changed); @@ -937,7 +940,8 @@ impl Wal for WalFile { } self.min_frame = nb2 + 1; self.max_frame = best_mark as u64; - self.max_frame_read_lock_index.set(best_idx as usize); + self.max_frame_read_lock_index + .store(best_idx as usize, Ordering::Release); tracing::debug!( "begin_read_tx(min={}, max={}, slot={}, max_frame_in_wal={})", self.min_frame, @@ -952,10 +956,11 @@ impl Wal for WalFile { #[inline(always)] #[instrument(skip_all, level = Level::DEBUG)] fn end_read_tx(&self) { - let slot = self.max_frame_read_lock_index.get(); + let slot = self.max_frame_read_lock_index.load(Ordering::Acquire); if slot != NO_LOCK_HELD { self.get_shared_mut().read_locks[slot].unlock(); - self.max_frame_read_lock_index.set(NO_LOCK_HELD); + self.max_frame_read_lock_index + .store(NO_LOCK_HELD, Ordering::Release); tracing::debug!("end_read_tx(slot={slot})"); } else { tracing::debug!("end_read_tx(slot=no_lock)"); @@ -972,7 +977,7 @@ impl Wal for WalFile { // assert(pWal->readLock >= 0); // assert(pWal->writeLock == 0 && pWal->iReCksum == 0); turso_assert!( - self.max_frame_read_lock_index.get() != NO_LOCK_HELD, + self.max_frame_read_lock_index.load(Ordering::Acquire) != NO_LOCK_HELD, "must have a read transaction to begin a write transaction" ); if !shared.write_lock.write() { @@ -1033,7 +1038,9 @@ impl Wal for WalFile { // min_frame is set to nbackfill + 1 and max_frame is set to shared_max_frame // // by default, SQLite tries to restart log file in this case - but for now let's keep it simple in the turso-db - if self.max_frame_read_lock_index.get() == 0 && self.max_frame < self.min_frame { + if self.max_frame_read_lock_index.load(Ordering::Acquire) == 0 + && self.max_frame < self.min_frame + { tracing::debug!( "find_frame(page_id={}, frame_watermark={:?}): max_frame is 0 - read from DB file", page_id, @@ -1618,7 +1625,7 @@ impl WalFile { checkpoint_seq: AtomicU32::new(0), syncing: Arc::new(AtomicBool::new(false)), min_frame: 0, - max_frame_read_lock_index: NO_LOCK_HELD.into(), + max_frame_read_lock_index: AtomicUsize::new(NO_LOCK_HELD), last_checksum, prev_checkpoint: CheckpointResult::default(), checkpoint_guard: None, @@ -1693,7 +1700,8 @@ impl WalFile { } fn reset_internal_states(&mut self) { - self.max_frame_read_lock_index.set(NO_LOCK_HELD); + self.max_frame_read_lock_index + .store(NO_LOCK_HELD, Ordering::Release); self.ongoing_checkpoint.reset(); self.syncing.store(false, Ordering::SeqCst); } @@ -3037,7 +3045,7 @@ pub mod test { { let wal_any = wal.as_any(); if let Some(wal_file) = wal_any.downcast_ref::() { - return wal_file.max_frame_read_lock_index.get() == expected_slot; + return wal_file.max_frame_read_lock_index.load(Ordering::Acquire) == expected_slot; } } From 250ac66c36ec77e5f5eedcce1f820546e54824c0 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 28 Sep 2025 14:16:18 +0300 Subject: [PATCH 64/65] github: Increase tpc-h-criterion timeout to 60 minutes --- .github/workflows/rust_perf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust_perf.yml b/.github/workflows/rust_perf.yml index f93051b08..f88930005 100644 --- a/.github/workflows/rust_perf.yml +++ b/.github/workflows/rust_perf.yml @@ -104,7 +104,7 @@ jobs: tpc-h-criterion: runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 60 env: DB_FILE: "perf/tpc-h/TPC-H.db" steps: From d3abeb6281435691eb82f664da56be5a604cbc2a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 28 Sep 2025 14:09:20 +0300 Subject: [PATCH 65/65] core/storage: Wrap WalFile::{max,min}_frame with AtomicU64 --- core/storage/wal.rs | 75 +++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 4307345a2..899da558a 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -571,9 +571,9 @@ pub struct WalFile { /// the max frame for this connection. max_frame_read_lock_index: AtomicUsize, /// Max frame allowed to lookup range=(minframe..max_frame) - max_frame: u64, + max_frame: AtomicU64, /// Start of range to look for frames range=(minframe..max_frame) - min_frame: u64, + min_frame: AtomicU64, /// Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL last_checksum: (u32, u32), checkpoint_seq: AtomicU32, @@ -827,7 +827,7 @@ impl Wal for WalFile { let checkpoint_seq = shared.wal_header.lock().checkpoint_seq; (mx, nb, ck, checkpoint_seq) }; - let db_changed = shared_max != self.max_frame + let db_changed = shared_max != self.max_frame.load(Ordering::Acquire) || last_checksum != self.last_checksum || checkpoint_seq != self.checkpoint_seq.load(Ordering::Acquire); @@ -844,10 +844,10 @@ impl Wal for WalFile { } // we need to keep self.max_frame set to the appropriate // max frame in the wal at the time this transaction starts. - self.max_frame = shared_max; + self.max_frame.store(shared_max, Ordering::Release); self.max_frame_read_lock_index .store(lock_0_idx, Ordering::Release); - self.min_frame = nbackfills + 1; + self.min_frame.store(nbackfills + 1, Ordering::Release); self.last_checksum = last_checksum; return Ok(db_changed); } @@ -938,14 +938,14 @@ impl Wal for WalFile { { return Err(LimboError::Busy); } - self.min_frame = nb2 + 1; - self.max_frame = best_mark as u64; + self.min_frame.store(nb2 + 1, Ordering::Release); + self.max_frame.store(best_mark as u64, Ordering::Release); self.max_frame_read_lock_index .store(best_idx as usize, Ordering::Release); tracing::debug!( "begin_read_tx(min={}, max={}, slot={}, max_frame_in_wal={})", - self.min_frame, - self.max_frame, + self.min_frame.load(Ordering::Acquire), + self.max_frame.load(Ordering::Acquire), best_idx, shared_max ); @@ -988,16 +988,16 @@ impl Wal for WalFile { shared.nbackfills.load(Ordering::Acquire), shared.last_checksum, ); - if self.max_frame == shared_max { + if self.max_frame.load(Ordering::Acquire) == shared_max { // Snapshot still valid; adopt counters drop(shared); self.last_checksum = last_checksum; - self.min_frame = nbackfills + 1; + self.min_frame.store(nbackfills + 1, Ordering::Release); return Ok(()); } // Snapshot is stale, give up and let caller retry from scratch - tracing::debug!("unable to upgrade transaction from read to write: snapshot is stale, give up and let caller retry from scratch, self.max_frame={}, shared_max={}", self.max_frame, shared_max); + tracing::debug!("unable to upgrade transaction from read to write: snapshot is stale, give up and let caller retry from scratch, self.max_frame={}, shared_max={}", self.max_frame.load(Ordering::Acquire), shared_max); shared.write_lock.unlock(); Err(LimboError::Busy) } @@ -1019,7 +1019,7 @@ impl Wal for WalFile { ); turso_assert!( - frame_watermark.unwrap_or(0) <= self.max_frame, + frame_watermark.unwrap_or(0) <= self.max_frame.load(Ordering::Acquire), "frame_watermark must be <= than current WAL max_frame value" ); @@ -1039,7 +1039,7 @@ impl Wal for WalFile { // // by default, SQLite tries to restart log file in this case - but for now let's keep it simple in the turso-db if self.max_frame_read_lock_index.load(Ordering::Acquire) == 0 - && self.max_frame < self.min_frame + && self.max_frame.load(Ordering::Acquire) < self.min_frame.load(Ordering::Acquire) { tracing::debug!( "find_frame(page_id={}, frame_watermark={:?}): max_frame is 0 - read from DB file", @@ -1050,15 +1050,15 @@ impl Wal for WalFile { } let shared = self.get_shared(); let frames = shared.frame_cache.lock(); - let range = frame_watermark - .map(|x| 0..=x) - .unwrap_or(self.min_frame..=self.max_frame); + let range = frame_watermark.map(|x| 0..=x).unwrap_or( + self.min_frame.load(Ordering::Acquire)..=self.max_frame.load(Ordering::Acquire), + ); tracing::debug!( "find_frame(page_id={}, frame_watermark={:?}): min_frame={}, max_frame={}", page_id, frame_watermark, - self.min_frame, - self.max_frame + self.min_frame.load(Ordering::Acquire), + self.max_frame.load(Ordering::Acquire) ); if let Some(list) = frames.get(&page_id) { if let Some(f) = list.iter().rfind(|&&f| range.contains(&f)) { @@ -1211,14 +1211,15 @@ impl Wal for WalFile { self.page_size(), ))); } - if frame_id > self.max_frame + 1 { + if frame_id > self.max_frame.load(Ordering::Acquire) + 1 { // attempt to write frame out of sequential order - error out return Err(LimboError::InvalidArgument(format!( "frame_id is beyond next frame in the WAL: frame_id={}, max_frame={}", - frame_id, self.max_frame + frame_id, + self.max_frame.load(Ordering::Acquire) ))); } - if frame_id <= self.max_frame { + if frame_id <= self.max_frame.load(Ordering::Acquire) { // just validate if page content from the frame matches frame in the WAL let offset = self.frame_offset(frame_id); let conflict = Arc::new(Cell::new(false)); @@ -1341,11 +1342,11 @@ impl Wal for WalFile { } fn get_max_frame(&self) -> u64 { - self.max_frame + self.max_frame.load(Ordering::Acquire) } fn get_min_frame(&self) -> u64 { - self.min_frame + self.min_frame.load(Ordering::Acquire) } #[instrument(err, skip_all, level = Level::DEBUG)] @@ -1364,7 +1365,7 @@ impl Wal for WalFile { (max_frame, shared.last_checksum) }; self.last_checksum = last_checksum; - self.max_frame = max_frame; + self.max_frame.store(max_frame, Ordering::Release); self.reset_internal_states(); Ok(()) } @@ -1372,8 +1373,10 @@ impl Wal for WalFile { #[instrument(skip_all, level = Level::DEBUG)] fn finish_append_frames_commit(&mut self) -> Result<()> { let mut shared = self.get_shared_mut(); - shared.max_frame.store(self.max_frame, Ordering::Release); - tracing::trace!(self.max_frame, ?self.last_checksum); + shared + .max_frame + .store(self.max_frame.load(Ordering::Acquire), Ordering::Release); + tracing::trace!(max_frame = self.max_frame.load(Ordering::Acquire), ?self.last_checksum); shared.last_checksum = self.last_checksum; Ok(()) } @@ -1435,7 +1438,7 @@ impl Wal for WalFile { checksum }; - self.max_frame = 0; + self.max_frame.store(0, Ordering::Release); let shared = self.get_shared(); assert!(shared.enabled.load(Ordering::SeqCst), "WAL must be enabled"); let file = shared.file.as_ref().unwrap(); @@ -1492,7 +1495,7 @@ impl Wal for WalFile { // Rolling checksum input to each frame build let mut rolling_checksum: (u32, u32) = self.last_checksum; - let mut next_frame_id = self.max_frame + 1; + let mut next_frame_id = self.max_frame.load(Ordering::Acquire) + 1; // Build every frame in order, updating the rolling checksum for (idx, page) in pages.iter().enumerate() { let page_id = page.get().id; @@ -1538,7 +1541,7 @@ impl Wal for WalFile { next_frame_id += 1; } - let first_frame_id = self.max_frame + 1; + let first_frame_id = self.max_frame.load(Ordering::Acquire) + 1; let start_off = self.frame_offset(first_frame_id); // pre-advance in-memory WAL state @@ -1584,7 +1587,7 @@ impl Wal for WalFile { fn update_max_frame(&mut self) { let new_max_frame = self.get_shared().max_frame.load(Ordering::Acquire); - self.max_frame = new_max_frame; + self.max_frame.store(new_max_frame, Ordering::Release); } } @@ -1607,7 +1610,7 @@ impl WalFile { Self { io, // default to max frame in WAL, so that when we read schema we can read from WAL too if it's there. - max_frame, + max_frame: AtomicU64::new(max_frame), shared, ongoing_checkpoint: OngoingCheckpoint { time: now, @@ -1624,7 +1627,7 @@ impl WalFile { buffer_pool, checkpoint_seq: AtomicU32::new(0), syncing: Arc::new(AtomicBool::new(false)), - min_frame: 0, + min_frame: AtomicU64::new(0), max_frame_read_lock_index: AtomicUsize::new(NO_LOCK_HELD), last_checksum, prev_checkpoint: CheckpointResult::default(), @@ -1684,7 +1687,7 @@ impl WalFile { fn complete_append_frame(&mut self, page_id: u64, frame_id: u64, checksums: (u32, u32)) { self.last_checksum = checksums; - self.max_frame = frame_id; + self.max_frame.store(frame_id, Ordering::Release); let shared = self.get_shared(); { let mut frame_cache = shared.frame_cache.lock(); @@ -2112,8 +2115,8 @@ impl WalFile { self.get_shared_mut().restart_wal_header(&self.io, mode); let cksm = self.get_shared().last_checksum; self.last_checksum = cksm; - self.max_frame = 0; - self.min_frame = 0; + self.max_frame.store(0, Ordering::Release); + self.min_frame.store(0, Ordering::Release); self.checkpoint_seq.fetch_add(1, Ordering::Release); Ok(()) }