From a744dd8419a401fdc0468a2780e32006505b2bca Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:18:46 +0400 Subject: [PATCH 1/8] translate_and_mark helper with its previous signature is error prone - so replace it with more explicit option which accept target register --- core/translate/expr.rs | 149 +++++++++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 43 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 36acfa935..785eece83 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -237,8 +237,10 @@ pub fn translate_condition_expr( )?; } ast::Expr::Binary(lhs, op, rhs) => { - let lhs_reg = translate_and_mark(program, Some(referenced_tables), lhs, resolver)?; - let rhs_reg = translate_and_mark(program, Some(referenced_tables), rhs, resolver)?; + let lhs_reg = program.alloc_register(); + let rhs_reg = program.alloc_register(); + translate_and_mark(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; + translate_and_mark(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; match op { ast::Operator::Greater => { emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg) @@ -417,14 +419,14 @@ pub fn translate_condition_expr( let cur_reg = program.alloc_register(); match op { ast::LikeOperator::Like | ast::LikeOperator::Glob => { - let pattern_reg = program.alloc_register(); + let start_reg = program.alloc_registers(2); let mut constant_mask = 0; - let _ = translate_and_mark(program, Some(referenced_tables), lhs, resolver); + translate_and_mark(program, Some(referenced_tables), lhs, start_reg, resolver)?; let _ = translate_expr( program, Some(referenced_tables), rhs, - pattern_reg, + start_reg + 1, resolver, )?; if matches!(rhs.as_ref(), ast::Expr::Literal(_)) { @@ -438,7 +440,7 @@ pub fn translate_condition_expr( }; program.emit_insn(Insn::Function { constant_mask, - start_reg: pattern_reg, + start_reg, dest: cur_reg, func: FuncCtx { func: Func::Scalar(func), @@ -1003,16 +1005,23 @@ pub fn translate_expr( ) } JsonFunc::JsonRemove => { + let start_reg = + program.alloc_registers(args.as_ref().map(|x| x.len()).unwrap_or(1)); if let Some(args) = args { - for arg in args.iter() { + for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - let _ = - translate_and_mark(program, referenced_tables, arg, resolver)?; + translate_and_mark( + program, + referenced_tables, + arg, + start_reg + i, + resolver, + )?; } } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1337,11 +1346,17 @@ pub fn translate_expr( | ScalarFunc::Soundex | ScalarFunc::ZeroBlob => { let args = expect_arguments_exact!(args, 1, srf); - let reg = - translate_and_mark(program, referenced_tables, &args[0], resolver)?; + let start_reg = program.alloc_register(); + translate_and_mark( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: reg, + start_reg, dest: target_register, func: func_ctx, }); @@ -1350,11 +1365,17 @@ pub fn translate_expr( #[cfg(not(target_family = "wasm"))] ScalarFunc::LoadExtension => { let args = expect_arguments_exact!(args, 1, srf); - let reg = - translate_and_mark(program, referenced_tables, &args[0], resolver)?; + let start_reg = program.alloc_register(); + translate_and_mark( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: reg, + start_reg, dest: target_register, func: func_ctx, }); @@ -1377,20 +1398,23 @@ pub fn translate_expr( Ok(target_register) } ScalarFunc::Date | ScalarFunc::DateTime => { + let start_reg = program + .alloc_registers(args.as_ref().map(|x| x.len()).unwrap_or(1)); if let Some(args) = args { - for arg in args.iter() { + for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - let _ = translate_and_mark( + translate_and_mark( program, referenced_tables, arg, + start_reg + i, resolver, )?; } } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1457,11 +1481,17 @@ pub fn translate_expr( } else { crate::bail_parse_error!("hex function with no arguments",); }; - let regs = - translate_and_mark(program, referenced_tables, &args[0], resolver)?; + let start_reg = program.alloc_register(); + translate_and_mark( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: regs, + start_reg, dest: target_register, func: func_ctx, }); @@ -1495,20 +1525,23 @@ pub fn translate_expr( Ok(target_register) } ScalarFunc::Time => { + let start_reg = program + .alloc_registers(args.as_ref().map(|x| x.len()).unwrap_or(1)); if let Some(args) = args { - for arg in args.iter() { + for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - let _ = translate_and_mark( + translate_and_mark( program, referenced_tables, arg, + start_reg + i, resolver, )?; } } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1537,12 +1570,19 @@ pub fn translate_expr( | ScalarFunc::Unhex => { let args = expect_arguments_max!(args, 2, srf); - for arg in args.iter() { - translate_and_mark(program, referenced_tables, arg, resolver)?; + let start_reg = program.alloc_registers(args.len()); + for (i, arg) in args.iter().enumerate() { + translate_and_mark( + program, + referenced_tables, + arg, + start_reg + i, + resolver, + )?; } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1559,13 +1599,20 @@ pub fn translate_expr( } else { crate::bail_parse_error!("min function with no arguments"); }; - for arg in args { - translate_and_mark(program, referenced_tables, arg, resolver)?; + let start_reg = program.alloc_registers(args.len()); + for (i, arg) in args.iter().enumerate() { + translate_and_mark( + program, + referenced_tables, + arg, + start_reg + i, + resolver, + )?; } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1582,13 +1629,20 @@ pub fn translate_expr( } else { crate::bail_parse_error!("max function with no arguments"); }; - for arg in args { - translate_and_mark(program, referenced_tables, arg, resolver)?; + let start_reg = program.alloc_registers(args.len()); + for (i, arg) in args.iter().enumerate() { + translate_and_mark( + program, + referenced_tables, + arg, + start_reg + i, + resolver, + )?; } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1725,20 +1779,23 @@ pub fn translate_expr( Ok(target_register) } ScalarFunc::StrfTime => { + let start_reg = program + .alloc_registers(args.as_ref().map(|x| x.len()).unwrap_or(1)); if let Some(args) = args { - for arg in args.iter() { + for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - let _ = translate_and_mark( + translate_and_mark( program, referenced_tables, arg, + start_reg + i, resolver, )?; } } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1771,11 +1828,17 @@ pub fn translate_expr( MathFuncArity::Unary => { let args = expect_arguments_exact!(args, 1, math_func); - let reg = - translate_and_mark(program, referenced_tables, &args[0], resolver)?; + let start_reg = program.alloc_register(); + translate_and_mark( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: reg, + start_reg, dest: target_register, func: func_ctx, }); @@ -2165,14 +2228,14 @@ pub fn translate_and_mark( program: &mut ProgramBuilder, referenced_tables: Option<&[TableReference]>, expr: &ast::Expr, + target_register: usize, resolver: &Resolver, -) -> Result { - let target_register = program.alloc_register(); +) -> Result<()> { translate_expr(program, referenced_tables, expr, target_register, resolver)?; if matches!(expr, ast::Expr::Literal(_)) { program.mark_last_insn_constant(); } - Ok(target_register) + Ok(()) } /// Sanitaizes a string literal by removing single quote at front and back From 5773f767af2fcfa0cc810450fc31384cb3ef2b4d Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:42:24 +0400 Subject: [PATCH 2/8] fix bug after refactoring in LIKE and GLOB --- core/translate/expr.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 785eece83..2040387ab 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -421,14 +421,15 @@ pub fn translate_condition_expr( ast::LikeOperator::Like | ast::LikeOperator::Glob => { let start_reg = program.alloc_registers(2); let mut constant_mask = 0; - translate_and_mark(program, Some(referenced_tables), lhs, start_reg, resolver)?; - let _ = translate_expr( + translate_and_mark( program, Some(referenced_tables), - rhs, + lhs, start_reg + 1, resolver, )?; + let _ = + translate_expr(program, Some(referenced_tables), rhs, start_reg, resolver)?; if matches!(rhs.as_ref(), ast::Expr::Literal(_)) { program.mark_last_insn_constant(); constant_mask = 1; From 459a78bc6471f62cb19a8c1f4444bffa9790e4a8 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:30:25 +0400 Subject: [PATCH 3/8] add fuzz test for few string functions --- tests/integration/fuzz/mod.rs | 107 ++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 8179dfde4..4cdae5c54 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -174,6 +174,113 @@ mod tests { } } + #[test] + pub fn string_expression_fuzz_run() { + let _ = env_logger::try_init(); + let g = GrammarGenerator::new(); + let (expr, expr_builder) = g.create_handle(); + let (bin_op, bin_op_builder) = g.create_handle(); + let (scalar, scalar_builder) = g.create_handle(); + let (paren, paren_builder) = g.create_handle(); + + paren_builder + .concat("") + .push_str("(") + .push(expr) + .push_str(")") + .build(); + + bin_op_builder + .concat(" ") + .push(expr) + .push(g.create().choice().options_str(["||"]).build()) + .push(expr) + .build(); + + scalar_builder + .choice() + .option( + g.create() + .concat("") + .push_str("char(") + .push( + g.create() + .concat("") + .push_symbol(rand_int(65..91)) + .repeat(1..8, ", ") + .build(), + ) + .push_str(")") + .build(), + ) + .option( + g.create() + .concat("") + .push( + g.create() + .choice() + .options_str(["ltrim", "rtrim", "trim"]) + .build(), + ) + .push_str("(") + .push(g.create().concat("").push(expr).repeat(2..3, ", ").build()) + .push_str(")") + .build(), + ) + .option( + g.create() + .concat("") + .push( + g.create() + .choice() + .options_str([ + "ltrim", "rtrim", "lower", "upper", "quote", "hex", "trim", + ]) + .build(), + ) + .push_str("(") + .push(expr) + .push_str(")") + .build(), + ) + .build(); + + expr_builder + .choice() + .option_w(bin_op, 1.0) + .option_w(paren, 1.0) + .option_w(scalar, 1.0) + .option( + g.create() + .concat("") + .push_str("'") + .push_symbol(rand_str("", 2)) + .push_str("'") + .build(), + ) + .build(); + + let sql = g.create().concat(" ").push_str("SELECT").push(expr).build(); + + let db = TempDatabase::new_empty(); + let limbo_conn = db.connect_limbo(); + let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); + + let (mut rng, seed) = rng_from_time(); + log::info!("seed: {}", seed); + for _ in 0..10240 { + let query = g.generate(&mut rng, sql, 50); + log::info!("query: {}", query); + let limbo = limbo_exec_row(&limbo_conn, &query); + let sqlite = sqlite_exec_row(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + #[test] pub fn logical_expression_fuzz_run() { let _ = env_logger::try_init(); From cc7267b0bd4b5aa50958e0408805bedd59b34c41 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:30:59 +0400 Subject: [PATCH 4/8] fix trim function pattern handling --- core/vdbe/mod.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 3ccfdc1e3..57a2726e9 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2146,19 +2146,31 @@ impl Program { } ScalarFunc::Trim => { let reg_value = state.registers[*start_reg].clone(); - let pattern_value = state.registers.get(*start_reg + 1).cloned(); + let pattern_value = if func.arg_count == 2 { + state.registers.get(*start_reg + 1).cloned() + } else { + None + }; let result = exec_trim(®_value, pattern_value); state.registers[*dest] = result; } ScalarFunc::LTrim => { let reg_value = state.registers[*start_reg].clone(); - let pattern_value = state.registers.get(*start_reg + 1).cloned(); + let pattern_value = if func.arg_count == 2 { + state.registers.get(*start_reg + 1).cloned() + } else { + None + }; let result = exec_ltrim(®_value, pattern_value); state.registers[*dest] = result; } ScalarFunc::RTrim => { let reg_value = state.registers[*start_reg].clone(); - let pattern_value = state.registers.get(*start_reg + 1).cloned(); + let pattern_value = if func.arg_count == 2 { + state.registers.get(*start_reg + 1).cloned() + } else { + None + }; let result = exec_rtrim(®_value, pattern_value); state.registers[*dest] = result; } From cd4bfac0595a5902cdacf2141f8ad0be0686b2ed Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:31:18 +0400 Subject: [PATCH 5/8] fix quote functions --- core/vdbe/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 57a2726e9..37dc141ce 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -3103,6 +3103,9 @@ fn exec_quote(value: &OwnedValue) -> OwnedValue { for c in s.as_str().chars() { if c == '\0' { break; + } else if c == '\'' { + quoted.push('\''); + quoted.push(c); } else { quoted.push(c); } From 5fa6a452c199866686902aaddabbbf944320c4bb Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:32:39 +0400 Subject: [PATCH 6/8] add TCL test for quoting of quotes --- testing/scalar-functions.test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index f04fa1765..924bafa6d 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -631,6 +631,10 @@ do_execsql_test quote-string { SELECT quote('limbo') } {'limbo'} +do_execsql_test quote-escape { + SELECT quote('''quote''') +} {'''quote'''} + do_execsql_test quote-null { SELECT quote(null) } {NULL} From f22bfad6958675c8fb0ced9d7d7df1ef1eee64c6 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:44:38 +0400 Subject: [PATCH 7/8] reduce amount of iterations in fuzz test --- tests/integration/fuzz/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 4cdae5c54..167bad035 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -268,7 +268,7 @@ mod tests { let (mut rng, seed) = rng_from_time(); log::info!("seed: {}", seed); - for _ in 0..10240 { + for _ in 0..1024 { let query = g.generate(&mut rng, sql, 50); log::info!("query: {}", query); let limbo = limbo_exec_row(&limbo_conn, &query); From 7cdad64a5fbd55f0fe07374206ace642dd74ba2a Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 9 Feb 2025 23:50:11 +0400 Subject: [PATCH 8/8] fix assertion in test_quote --- core/vdbe/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 37dc141ce..a9a66c904 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -3838,7 +3838,7 @@ mod tests { assert_eq!(exec_quote(&input), expected); let input = OwnedValue::build_text(Rc::new(String::from("hello''world"))); - let expected = OwnedValue::build_text(Rc::new(String::from("'hello''world'"))); + let expected = OwnedValue::build_text(Rc::new(String::from("'hello''''world'"))); assert_eq!(exec_quote(&input), expected); }