From 40f59f124f453d27da30b4f023bf9a46fe58d3be Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 8 Feb 2025 11:12:16 +0200 Subject: [PATCH 01/15] Fix comment: new -> old --- core/mvcc/database/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/mvcc/database/mod.rs b/core/mvcc/database/mod.rs index 4ae85f195..4cdd4f65e 100644 --- a/core/mvcc/database/mod.rs +++ b/core/mvcc/database/mod.rs @@ -534,7 +534,7 @@ impl MvStore Date: Sat, 8 Feb 2025 11:18:05 +0200 Subject: [PATCH 02/15] Comment about tx visibility when deleting a row --- core/mvcc/database/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/mvcc/database/mod.rs b/core/mvcc/database/mod.rs index 4cdd4f65e..b2a683cc9 100644 --- a/core/mvcc/database/mod.rs +++ b/core/mvcc/database/mod.rs @@ -318,8 +318,9 @@ impl MvStore Date: Sat, 8 Feb 2025 14:28:04 +0200 Subject: [PATCH 03/15] refactor: is_version_visible() -> RowVersion::is_visible_to() --- core/mvcc/database/mod.rs | 28 ++++++++++++++++------------ core/mvcc/database/tests.rs | 2 +- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/core/mvcc/database/mod.rs b/core/mvcc/database/mod.rs index b2a683cc9..51b3ee3eb 100644 --- a/core/mvcc/database/mod.rs +++ b/core/mvcc/database/mod.rs @@ -320,7 +320,7 @@ impl MvStore MvStore( } } -pub(crate) fn is_version_visible( - txs: &SkipMap>, - tx: &Transaction, - rv: &RowVersion, -) -> bool { - is_begin_visible(txs, tx, rv) && is_end_visible(txs, tx, rv) +impl RowVersion { + pub fn is_visible_to( + &self, + tx: &Transaction, + txs: &SkipMap>, + ) -> bool { + is_begin_visible(txs, tx, self) && is_end_visible(txs, tx, self) + } } fn is_begin_visible( diff --git a/core/mvcc/database/tests.rs b/core/mvcc/database/tests.rs index b317a15d2..f3dcabca9 100644 --- a/core/mvcc/database/tests.rs +++ b/core/mvcc/database/tests.rs @@ -699,7 +699,7 @@ fn test_snapshot_isolation_tx_visible1() { }, }; tracing::debug!("Testing visibility of {row_version:?}"); - is_version_visible(&txs, ¤t_tx, &row_version) + row_version.is_visible_to(¤t_tx, &txs) }; // begin visible: transaction committed with ts < current_tx.begin_ts From 3826c540ae1b45e25ee43fcb939c33b503b35c97 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 8 Feb 2025 14:47:41 +0200 Subject: [PATCH 04/15] thank you clippy, actually a nice suggestion --- core/mvcc/database/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/mvcc/database/mod.rs b/core/mvcc/database/mod.rs index 51b3ee3eb..ca0425ecb 100644 --- a/core/mvcc/database/mod.rs +++ b/core/mvcc/database/mod.rs @@ -367,10 +367,11 @@ impl MvStore Date: Sat, 15 Feb 2025 13:20:57 +0400 Subject: [PATCH 05/15] add fuzz test for replace and substr --- tests/integration/fuzz/mod.rs | 48 ++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index a9bc88360..b48ddda0e 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -151,7 +151,7 @@ mod tests { } #[test] - pub fn logical_expression_fuzz_ex1() { + pub fn fuzz_ex() { let _ = env_logger::try_init(); let db = TempDatabase::new_empty(); let limbo_conn = db.connect_limbo(); @@ -166,6 +166,7 @@ mod tests { "SELECT CASE ( NULL < NULL ) WHEN ( 0 ) THEN ( NULL ) ELSE ( 2.0 ) END;", "SELECT (COALESCE(0, COALESCE(0, 0)));", "SELECT CAST((1 > 0) AS INTEGER);", + "SELECT substr('ABC', -1)", ] { let limbo = limbo_exec_row(&limbo_conn, query); let sqlite = sqlite_exec_row(&sqlite_conn, query); @@ -185,6 +186,20 @@ mod tests { let (bin_op, bin_op_builder) = g.create_handle(); let (scalar, scalar_builder) = g.create_handle(); let (paren, paren_builder) = g.create_handle(); + let (number, number_builder) = g.create_handle(); + + number_builder + .choice() + .option_symbol(rand_int(-5..10)) + .option( + g.create() + .concat(" ") + .push(number) + .push(g.create().choice().options_str(["+", "-", "*"]).build()) + .push(number) + .build(), + ) + .build(); paren_builder .concat("") @@ -246,6 +261,37 @@ mod tests { .push_str(")") .build(), ) + .option( + g.create() + .concat("") + .push(g.create().choice().options_str(["replace"]).build()) + .push_str("(") + .push(g.create().concat("").push(expr).repeat(3..4, ", ").build()) + .push_str(")") + .build(), + ) + .option( + g.create() + .concat("") + .push( + g.create() + .choice() + .options_str(["substr", "substring"]) + .build(), + ) + .push_str("(") + .push(expr) + .push_str(", ") + .push( + g.create() + .concat("") + .push(number) + .repeat(1..3, ", ") + .build(), + ) + .push_str(")") + .build(), + ) .build(); expr_builder From b35dab5b6d51a946d9864b0b53b797877c1c3110 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 13:21:30 +0400 Subject: [PATCH 06/15] fix substr implementation --- core/vdbe/mod.rs | 56 ++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 5fa53dd07..252d934c0 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2202,7 +2202,11 @@ impl Program { ScalarFunc::Substr | ScalarFunc::Substring => { let str_value = &state.registers[*start_reg]; let start_value = &state.registers[*start_reg + 1]; - let length_value = &state.registers[*start_reg + 2]; + let length_value = if func.arg_count == 3 { + Some(&state.registers[*start_reg + 2]) + } else { + None + }; let result = exec_substring(str_value, start_value, length_value); state.registers[*dest] = result; } @@ -3238,39 +3242,31 @@ fn exec_nullif(first_value: &OwnedValue, second_value: &OwnedValue) -> OwnedValu fn exec_substring( str_value: &OwnedValue, start_value: &OwnedValue, - length_value: &OwnedValue, + length_value: Option<&OwnedValue>, ) -> OwnedValue { - if let (OwnedValue::Text(str), OwnedValue::Integer(start), OwnedValue::Integer(length)) = - (str_value, start_value, length_value) - { - let start = *start as usize; - let str_len = str.as_str().len(); + if let (OwnedValue::Text(str), OwnedValue::Integer(start)) = (str_value, start_value) { + let str_len = str.as_str().len() as i64; - if start > str_len { - return OwnedValue::build_text(""); - } - - let start_idx = start - 1; - let end = if *length != -1 { - start_idx + *length as usize + // The left-most character of X is number 1. + // If Y is negative then the first character of the substring is found by counting from the right rather than the left. + let first_position = if *start < 0 { + str_len.saturating_sub((*start).abs()) } else { - str_len + *start - 1 }; - let substring = &str.as_str()[start_idx..end.min(str_len)]; - - OwnedValue::build_text(substring) - } else if let (OwnedValue::Text(str), OwnedValue::Integer(start)) = (str_value, start_value) { - let start = *start as usize; - let str_len = str.as_str().len(); - - if start > str_len { - return OwnedValue::build_text(""); - } - - let start_idx = start - 1; - let substring = &str.as_str()[start_idx..str_len]; - - OwnedValue::build_text(substring) + // If Z is negative then the abs(Z) characters preceding the Y-th character are returned. + let last_position = match length_value { + Some(OwnedValue::Integer(length)) => first_position + *length, + _ => str_len, + }; + let (start, end) = if first_position <= last_position { + (first_position, last_position) + } else { + (last_position, first_position) + }; + OwnedValue::build_text( + &str.as_str()[start.clamp(-0, str_len) as usize..end.clamp(0, str_len) as usize], + ) } else { OwnedValue::Null } From 2ee53826894bc4d3cf608e8b0f47d4004176ec79 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 13:25:49 +0400 Subject: [PATCH 07/15] add substr cases in TCL tests --- testing/scalar-functions.test | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index 47cca09d4..2ba0d58a4 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -535,6 +535,21 @@ do_execsql_test substr-2-args { SELECT substr('limbo', 3); } {mbo} +do_execsql_test substr-cases { + SELECT substr('limbo', 0); + SELECT substr('limbo', 0, 3); + SELECT substr('limbo', -2); + SELECT substr('limbo', -2, 1); + SELECT substr('limbo', -10, 7); + SELECT substr('limbo', 10, -7); +} {limbo +li +bo +b +li +mbo +} + do_execsql_test substring-3-args { SELECT substring('limbo', 1, 3); } {lim} From 91d723016de066204568547d155997fdc855caa7 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 13:29:14 +0400 Subject: [PATCH 08/15] fix test --- core/vdbe/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 252d934c0..e3ac9c613 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -4341,7 +4341,7 @@ mod tests { let length_value = OwnedValue::Integer(3); let expected_val = OwnedValue::build_text("lim"); assert_eq!( - exec_substring(&str_value, &start_value, &length_value), + exec_substring(&str_value, &start_value, Some(&length_value)), expected_val ); @@ -4350,7 +4350,7 @@ mod tests { let length_value = OwnedValue::Integer(10); let expected_val = OwnedValue::build_text("limbo"); assert_eq!( - exec_substring(&str_value, &start_value, &length_value), + exec_substring(&str_value, &start_value, Some(&length_value)), expected_val ); @@ -4359,7 +4359,7 @@ mod tests { let length_value = OwnedValue::Integer(3); let expected_val = OwnedValue::build_text(""); assert_eq!( - exec_substring(&str_value, &start_value, &length_value), + exec_substring(&str_value, &start_value, Some(&length_value)), expected_val ); @@ -4368,7 +4368,7 @@ mod tests { let length_value = OwnedValue::Null; let expected_val = OwnedValue::build_text("mbo"); assert_eq!( - exec_substring(&str_value, &start_value, &length_value), + exec_substring(&str_value, &start_value, Some(&length_value)), expected_val ); @@ -4377,7 +4377,7 @@ mod tests { let length_value = OwnedValue::Null; let expected_val = OwnedValue::build_text(""); assert_eq!( - exec_substring(&str_value, &start_value, &length_value), + exec_substring(&str_value, &start_value, Some(&length_value)), expected_val ); } From 610d41ae9e9cea0d30e6de1314676cf3011cc94c Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 14:33:37 +0400 Subject: [PATCH 09/15] add fuzz test for math functions --- tests/.cargo/config.toml | 2 + tests/integration/fuzz/mod.rs | 105 ++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 tests/.cargo/config.toml diff --git a/tests/.cargo/config.toml b/tests/.cargo/config.toml new file mode 100644 index 000000000..91dc29b59 --- /dev/null +++ b/tests/.cargo/config.toml @@ -0,0 +1,2 @@ +[env] +LIBSQLITE3_FLAGS = "-DSQLITE_ENABLE_MATH_FUNCTIONS" diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index a9bc88360..f57dc2feb 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -177,6 +177,111 @@ mod tests { } } + #[test] + pub fn math_expression_fuzz_run() { + let _ = env_logger::try_init(); + let g = GrammarGenerator::new(); + let (expr, expr_builder) = g.create_handle(); + let (bin_op, bin_op_builder) = g.create_handle(); + let (scalar, scalar_builder) = g.create_handle(); + let (paren, paren_builder) = g.create_handle(); + + paren_builder + .concat("") + .push_str("(") + .push(expr) + .push_str(")") + .build(); + + bin_op_builder + .concat(" ") + .push(expr) + .push( + g.create() + .choice() + .options_str(["+", "-", "/", "*"]) + .build(), + ) + .push(expr) + .build(); + + scalar_builder + .choice() + .option( + g.create() + .concat("") + .push( + g.create() + .choice() + .options_str([ + "acos", "acosh", "asin", "asinh", "atan", "atanh", "ceil", + "ceiling", "cos", "cosh", "degrees", "exp", "floor", "ln", "log", + "log10", "log2", "radians", "sin", "sinh", "sqrt", "tan", "tanh", + "trunc", + ]) + .build(), + ) + .push_str("(") + .push(expr) + .push_str(")") + .build(), + ) + .option( + g.create() + .concat("") + .push( + g.create() + .choice() + .options_str(["atan2", "log", "mod", "pow", "power"]) + .build(), + ) + .push_str("(") + .push(g.create().concat("").push(expr).repeat(2..3, ", ").build()) + .push_str(")") + .build(), + ) + .build(); + + expr_builder + .choice() + .options_str(["-2.0", "-1.0", "0.0", "0.5", "1.0", "2.0"]) + .option_w(bin_op, 10.0) + .option_w(paren, 10.0) + .option_w(scalar, 10.0) + .build(); + + let sql = g.create().concat(" ").push_str("SELECT").push(expr).build(); + + let db = TempDatabase::new_empty(); + let limbo_conn = db.connect_limbo(); + let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); + + let (mut rng, seed) = rng_from_time(); + log::info!("seed: {}", seed); + for _ in 0..1024 { + let query = g.generate(&mut rng, sql, 50); + log::info!("query: {}", query); + let limbo = limbo_exec_row(&limbo_conn, &query); + let sqlite = sqlite_exec_row(&sqlite_conn, &query); + match (&limbo[0], &sqlite[0]) { + // compare only finite results because some evaluations are not so stable around infinity + (rusqlite::types::Value::Real(limbo), rusqlite::types::Value::Real(sqlite)) + if limbo.is_finite() && sqlite.is_finite() => + { + assert!( + (limbo - sqlite).abs() < 1e-9 + || (limbo - sqlite) / (limbo.abs().max(sqlite.abs())) < 1e-9, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, + limbo, + sqlite + ) + } + _ => {} + } + } + } + #[test] pub fn string_expression_fuzz_run() { let _ = env_logger::try_init(); From e25660833b3b35febd8be272b023505215f0e887 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 14:33:57 +0400 Subject: [PATCH 10/15] fix codegen for binary functions --- core/translate/expr.rs | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 9bfcc4a8f..8fffe19a2 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1848,15 +1848,24 @@ pub fn translate_expr( MathFuncArity::Binary => { let args = expect_arguments_exact!(args, 2, math_func); - let reg1 = program.alloc_register(); - let _ = - translate_expr(program, referenced_tables, &args[0], reg1, resolver)?; - let reg2 = program.alloc_register(); - let _ = - translate_expr(program, referenced_tables, &args[1], reg2, resolver)?; + let start_reg = program.alloc_registers(2); + let _ = translate_expr( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; + let _ = translate_expr( + program, + referenced_tables, + &args[1], + start_reg + 1, + resolver, + )?; program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); From ac263fa5a85501d3e864b9d3c7e9445985ff4379 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 14:42:03 +0400 Subject: [PATCH 11/15] reset rust caches in the CI --- .github/workflows/rust.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 4cb38436a..9cd941802 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -27,6 +27,8 @@ jobs: steps: - uses: actions/checkout@v3 - uses: Swatinem/rust-cache@v2 + with: + prefix-key: "v1-rust" # can be updated if we need to reset caches due to non-trivial change in the dependencies (for example, custom env var were set for single workspace project) - name: Set up Python 3.10 uses: actions/setup-python@v5 with: From e4541edb48f9718059f3f20438e5090b53fac200 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 15 Feb 2025 12:44:56 +0200 Subject: [PATCH 12/15] Fix IdxGt,IdxGe,IdxLt,IdxLe instructions According to SQLite documentation, the way to use these instructions is to compare the seek key to the index key as you would with the Compare opcode. The compare opcode states: "Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B")." In other words, we should compare the same number of columns from each, not compare the entire keys. This fixes a few Clickbench queries returning incorrect results, and so closes #1009 --- Future work: support index seek keys that use multiple columns. Our index seek is many times slower than SQLite because we're not utilizing all the possible columns -- instead we just use the first index column to seek. --- core/vdbe/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index d86a488bd..dc28939de 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1399,8 +1399,8 @@ impl Program { let record_from_regs: Record = make_owned_record(&state.registers, start_reg, num_regs); if let Some(ref idx_record) = *cursor.record()? { - // omit the rowid from the idx_record, which is the last value - if idx_record.get_values()[..idx_record.len() - 1] + // Compare against the same number of values + if idx_record.get_values()[..record_from_regs.len()] >= record_from_regs.get_values()[..] { state.pc = target_pc.to_offset_int(); @@ -1423,8 +1423,8 @@ impl Program { let record_from_regs: Record = make_owned_record(&state.registers, start_reg, num_regs); if let Some(ref idx_record) = *cursor.record()? { - // omit the rowid from the idx_record, which is the last value - if idx_record.get_values()[..idx_record.len() - 1] + // Compare against the same number of values + if idx_record.get_values()[..record_from_regs.len()] <= record_from_regs.get_values()[..] { state.pc = target_pc.to_offset_int(); @@ -1447,8 +1447,8 @@ impl Program { let record_from_regs: Record = make_owned_record(&state.registers, start_reg, num_regs); if let Some(ref idx_record) = *cursor.record()? { - // omit the rowid from the idx_record, which is the last value - if idx_record.get_values()[..idx_record.len() - 1] + // Compare against the same number of values + if idx_record.get_values()[..record_from_regs.len()] > record_from_regs.get_values()[..] { state.pc = target_pc.to_offset_int(); @@ -1471,8 +1471,8 @@ impl Program { let record_from_regs: Record = make_owned_record(&state.registers, start_reg, num_regs); if let Some(ref idx_record) = *cursor.record()? { - // omit the rowid from the idx_record, which is the last value - if idx_record.get_values()[..idx_record.len() - 1] + // Compare against the same number of values + if idx_record.get_values()[..record_from_regs.len()] < record_from_regs.get_values()[..] { state.pc = target_pc.to_offset_int(); From 32ec8f64d5194816eeee03640c31377d356715db Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 14:54:19 +0400 Subject: [PATCH 13/15] move .config to the top level in order to have rusqlite dep built identically across multiple projects --- {tests/.cargo => .cargo}/config.toml | 0 .github/workflows/rust.yml | 2 -- 2 files changed, 2 deletions(-) rename {tests/.cargo => .cargo}/config.toml (100%) diff --git a/tests/.cargo/config.toml b/.cargo/config.toml similarity index 100% rename from tests/.cargo/config.toml rename to .cargo/config.toml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 9cd941802..4cb38436a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -27,8 +27,6 @@ jobs: steps: - uses: actions/checkout@v3 - uses: Swatinem/rust-cache@v2 - with: - prefix-key: "v1-rust" # can be updated if we need to reset caches due to non-trivial change in the dependencies (for example, custom env var were set for single workspace project) - name: Set up Python 3.10 uses: actions/setup-python@v5 with: From 7fa6ff9bb34803e5825df7f4c4110a72e70b5942 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 14:42:03 +0400 Subject: [PATCH 14/15] reset rust caches in the CI --- .github/workflows/rust.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 4cb38436a..9cd941802 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -27,6 +27,8 @@ jobs: steps: - uses: actions/checkout@v3 - uses: Swatinem/rust-cache@v2 + with: + prefix-key: "v1-rust" # can be updated if we need to reset caches due to non-trivial change in the dependencies (for example, custom env var were set for single workspace project) - name: Set up Python 3.10 uses: actions/setup-python@v5 with: From 10d0dc218be38690264992d50f7643bc3cbc7837 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sat, 15 Feb 2025 15:05:36 +0400 Subject: [PATCH 15/15] add comment about config.toml env var --- .cargo/config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index 91dc29b59..2586c8988 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,2 +1,2 @@ [env] -LIBSQLITE3_FLAGS = "-DSQLITE_ENABLE_MATH_FUNCTIONS" +LIBSQLITE3_FLAGS = "-DSQLITE_ENABLE_MATH_FUNCTIONS" # necessary for rusqlite dependency in order to bundle SQLite with math functions included