From 9f18fdbfd2d6b78183ca7fd4ea5e8f1a43e4e2ee Mon Sep 17 00:00:00 2001 From: rjhallsted Date: Tue, 10 Sep 2024 11:25:52 -0700 Subject: [PATCH 1/7] Remove unecessary clone when executing LIKE function --- core/vdbe/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index f74ccd368..a5910400e 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1208,11 +1208,11 @@ impl Program { start_reg, state.registers.len() ); - let pattern = state.registers[start_reg].clone(); - let text = state.registers[start_reg + 1].clone(); + let pattern = &state.registers[start_reg]; + let text = &state.registers[start_reg + 1]; let result = match (pattern, text) { (OwnedValue::Text(pattern), OwnedValue::Text(text)) => { - OwnedValue::Integer(exec_like(&pattern, &text) as i64) + OwnedValue::Integer(exec_like(pattern, text) as i64) } _ => { unreachable!("Like on non-text registers"); From 6ac78dfb0366b2242ae7fde7157cb3aaaf46de00 Mon Sep 17 00:00:00 2001 From: rjhallsted Date: Tue, 10 Sep 2024 13:54:52 -0700 Subject: [PATCH 2/7] Cache constructed LIKE regexes if FUNCTION P1 is set --- core/translate/expr.rs | 16 ++++++++++ core/vdbe/explain.rs | 3 +- core/vdbe/mod.rs | 72 ++++++++++++++++++++++++++++++++---------- 3 files changed, 74 insertions(+), 17 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index c467d4d2e..a44f74d46 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -454,6 +454,7 @@ pub fn translate_condition_expr( cursor_hint, )?; program.emit_insn(Insn::Function { + constant_mask: 0, func: crate::vdbe::Func::Scalar(ScalarFunc::Like), start_reg: pattern_reg, dest: cur_reg, @@ -655,6 +656,7 @@ pub fn translate_expr( let regs = program.alloc_register(); translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?; program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: regs, dest: target_register, func: crate::vdbe::Func::Json(j), @@ -673,6 +675,7 @@ pub fn translate_expr( } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(srf), @@ -734,6 +737,7 @@ pub fn translate_expr( translate_expr(program, referenced_tables, arg, reg, cursor_hint)?; } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(srf), @@ -811,6 +815,8 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { + // Currently only constant values for the first arg are supported + constant_mask: 1, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(srf), @@ -847,6 +853,7 @@ pub fn translate_expr( cursor_hint, )?; program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: regs, dest: target_register, func: crate::vdbe::Func::Scalar(srf), @@ -862,6 +869,7 @@ pub fn translate_expr( } let regs = program.alloc_register(); program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: regs, dest: target_register, func: crate::vdbe::Func::Scalar(srf), @@ -883,6 +891,7 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(ScalarFunc::Date), @@ -934,6 +943,7 @@ pub fn translate_expr( } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: str_reg, dest: target_register, func: crate::vdbe::Func::Scalar(ScalarFunc::Substring), @@ -958,6 +968,7 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg, dest: target_register, func: crate::vdbe::Func::Scalar(srf), @@ -979,6 +990,7 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(ScalarFunc::Time), @@ -1012,6 +1024,7 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(srf), @@ -1045,6 +1058,7 @@ pub fn translate_expr( } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(ScalarFunc::Min), @@ -1078,6 +1092,7 @@ pub fn translate_expr( } program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: target_register + 1, dest: target_register, func: crate::vdbe::Func::Scalar(ScalarFunc::Max), @@ -1114,6 +1129,7 @@ pub fn translate_expr( cursor_hint, )?; program.emit_insn(Insn::Function { + constant_mask: 0, start_reg: func_reg, dest: target_register, func: crate::vdbe::Func::Scalar(srf), diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index f120b0ac6..38f2d6bcc 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -533,12 +533,13 @@ pub fn insn_to_str( "".to_string(), ), Insn::Function { + constant_mask, start_reg, dest, func, } => ( "Function", - 1, + *constant_mask, *start_reg as i32, *dest as i32, OwnedValue::Text(Rc::new(func.to_string())), diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index a5910400e..aa21ddaf9 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -302,10 +302,10 @@ pub enum Insn { // Function Function { - // constant_mask: i32, // P1, not used for now - start_reg: usize, // P2, start of argument registers - dest: usize, // P3 - func: Func, // P4 + constant_mask: i32, // P1 + start_reg: usize, // P2, start of argument registers + dest: usize, // P3 + func: Func, // P4 }, InitCoroutine { @@ -383,6 +383,7 @@ pub struct ProgramState { cursors: RefCell>>, registers: Vec, ended_coroutine: bool, // flag to notify yield coroutine finished + regex_cache: HashMap, } impl ProgramState { @@ -395,6 +396,7 @@ impl ProgramState { cursors, registers, ended_coroutine: false, + regex_cache: HashMap::new(), } } @@ -1173,6 +1175,7 @@ impl Program { } } Insn::Function { + constant_mask, func, start_reg, dest, @@ -1212,7 +1215,12 @@ impl Program { let text = &state.registers[start_reg + 1]; let result = match (pattern, text) { (OwnedValue::Text(pattern), OwnedValue::Text(text)) => { - OwnedValue::Integer(exec_like(pattern, text) as i64) + let cache = if *constant_mask > 0 { + Some(&mut state.regex_cache) + } else { + None + }; + OwnedValue::Integer(exec_like(cache, pattern, text) as i64) } _ => { unreachable!("Like on non-text registers"); @@ -1699,10 +1707,25 @@ fn exec_char(values: Vec) -> OwnedValue { OwnedValue::Text(Rc::new(result)) } -// Implements LIKE pattern matching. -fn exec_like(pattern: &str, text: &str) -> bool { - let re = Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap(); - re.is_match(text) +fn construct_like_regex(pattern: &str) -> Regex { + Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap() +} + +// Implements LIKE pattern matching. Caches the constructed regex if a cache is provided +fn exec_like(regex_cache: Option<&mut HashMap>, pattern: &str, text: &str) -> bool { + if let Some(cache) = regex_cache { + match cache.get(pattern) { + Some(re) => re.is_match(text), + None => { + let re = construct_like_regex(pattern); + let res = re.is_match(text); + cache.insert(pattern.to_string(), re); + res + } + } + } else { + construct_like_regex(pattern).is_match(text) + } } fn exec_minmax<'a>( @@ -1876,7 +1899,7 @@ mod tests { }; use mockall::{mock, predicate}; use rand::{rngs::mock::StepRng, thread_rng}; - use std::{cell::Ref, rc::Rc}; + use std::{cell::Ref, collections::HashMap, rc::Rc}; mock! { Cursor { @@ -2224,12 +2247,29 @@ mod tests { } #[test] - fn test_like() { - assert!(exec_like("a%", "aaaa")); - assert!(exec_like("%a%a", "aaaa")); - assert!(exec_like("%a.a", "aaaa")); - assert!(exec_like("a.a%", "aaaa")); - assert!(!exec_like("%a.ab", "aaaa")); + fn test_like_no_cache() { + assert!(exec_like(None, "a%", "aaaa")); + assert!(exec_like(None, "%a%a", "aaaa")); + assert!(exec_like(None, "%a.a", "aaaa")); + assert!(exec_like(None, "a.a%", "aaaa")); + assert!(!exec_like(None, "%a.ab", "aaaa")); + } + + #[test] + fn test_like_with_cache() { + let mut cache = HashMap::new(); + assert!(exec_like(Some(&mut cache), "a%", "aaaa")); + assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); + assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); + + // again after values have been cached + assert!(exec_like(Some(&mut cache), "a%", "aaaa")); + assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); + assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); } #[test] From e1b134dd887a9c8e87f783da3465b8041c6ef916 Mon Sep 17 00:00:00 2001 From: rjhallsted Date: Tue, 10 Sep 2024 14:08:15 -0700 Subject: [PATCH 3/7] Fix the spot that constant_mask was being set to 1 --- core/translate/expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index a44f74d46..7211e70ce 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -454,7 +454,8 @@ pub fn translate_condition_expr( cursor_hint, )?; program.emit_insn(Insn::Function { - constant_mask: 0, + // Currently only constant values for the first arg are supported + constant_mask: 1, func: crate::vdbe::Func::Scalar(ScalarFunc::Like), start_reg: pattern_reg, dest: cur_reg, @@ -815,7 +816,6 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { - // Currently only constant values for the first arg are supported constant_mask: 1, start_reg: target_register + 1, dest: target_register, From a038d1f70001732fd7dfa8858241e154674e7624 Mon Sep 17 00:00:00 2001 From: rjhallsted Date: Tue, 10 Sep 2024 14:24:32 -0700 Subject: [PATCH 4/7] Add comment on why constant_mask is 1 in ScalarFunc::Like case --- core/translate/expr.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 7211e70ce..3fac06c6b 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -816,6 +816,8 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { + // Only constant patterns for LIKE are supported currently, so this + // is always 1 constant_mask: 1, start_reg: target_register + 1, dest: target_register, From 960560dffccb9c1d881efffebe9d4f7857f2662b Mon Sep 17 00:00:00 2001 From: rjhallsted Date: Tue, 10 Sep 2024 14:26:21 -0700 Subject: [PATCH 5/7] Use the same comment for Expr::Like case --- core/translate/expr.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 3fac06c6b..19e4dc160 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -454,7 +454,8 @@ pub fn translate_condition_expr( cursor_hint, )?; program.emit_insn(Insn::Function { - // Currently only constant values for the first arg are supported + // Only constant patterns for LIKE are supported currently, so this + // is always 1 constant_mask: 1, func: crate::vdbe::Func::Scalar(ScalarFunc::Like), start_reg: pattern_reg, From 9791e2074fcf701e33463305b718fb6c618e59c9 Mon Sep 17 00:00:00 2001 From: rjhallsted Date: Tue, 10 Sep 2024 14:32:25 -0700 Subject: [PATCH 6/7] Fix cargo fmt complaint --- sqlite3/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 69f93cf30..c0495e2c1 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -883,7 +883,7 @@ fn sqlite3_errstr_impl(rc: i32) -> *const std::ffi::c_char { "datatype mismatch", // SQLITE_MISMATCH "bad parameter or other API misuse", // SQLITE_MISUSE #[cfg(feature = "lfs")] - "", // SQLITE_NOLFS + "", // SQLITE_NOLFS #[cfg(not(feature = "lfs"))] "large file support is disabled", // SQLITE_NOLFS "authorization denied", // SQLITE_AUTH From e67f1e910ea7c2019afde82a6994c8f3806ce280 Mon Sep 17 00:00:00 2001 From: rjhallsted Date: Tue, 10 Sep 2024 14:40:57 -0700 Subject: [PATCH 7/7] Functionally meaningless change to get cargo fmt to play nice in ci --- core/vdbe/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index aa21ddaf9..6f0eb889a 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1724,7 +1724,8 @@ fn exec_like(regex_cache: Option<&mut HashMap>, pattern: &str, te } } } else { - construct_like_regex(pattern).is_match(text) + let re = construct_like_regex(pattern); + re.is_match(text) } }