Cache constructed LIKE regexes if FUNCTION P1 is set

This commit is contained in:
rjhallsted
2024-09-10 13:54:52 -07:00
parent 9f18fdbfd2
commit 6ac78dfb03
3 changed files with 74 additions and 17 deletions

View File

@@ -454,6 +454,7 @@ pub fn translate_condition_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
func: crate::vdbe::Func::Scalar(ScalarFunc::Like),
start_reg: pattern_reg,
dest: cur_reg,
@@ -655,6 +656,7 @@ pub fn translate_expr(
let regs = program.alloc_register();
translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Json(j),
@@ -673,6 +675,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -734,6 +737,7 @@ pub fn translate_expr(
translate_expr(program, referenced_tables, arg, reg, cursor_hint)?;
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -811,6 +815,8 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
// Currently only constant values for the first arg are supported
constant_mask: 1,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -847,6 +853,7 @@ pub fn translate_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -862,6 +869,7 @@ pub fn translate_expr(
}
let regs = program.alloc_register();
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -883,6 +891,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Date),
@@ -934,6 +943,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: str_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Substring),
@@ -958,6 +968,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -979,6 +990,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Time),
@@ -1012,6 +1024,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -1045,6 +1058,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Min),
@@ -1078,6 +1092,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Max),
@@ -1114,6 +1129,7 @@ pub fn translate_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: func_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),

View File

@@ -533,12 +533,13 @@ pub fn insn_to_str(
"".to_string(),
),
Insn::Function {
constant_mask,
start_reg,
dest,
func,
} => (
"Function",
1,
*constant_mask,
*start_reg as i32,
*dest as i32,
OwnedValue::Text(Rc::new(func.to_string())),

View File

@@ -302,10 +302,10 @@ pub enum Insn {
// Function
Function {
// constant_mask: i32, // P1, not used for now
start_reg: usize, // P2, start of argument registers
dest: usize, // P3
func: Func, // P4
constant_mask: i32, // P1
start_reg: usize, // P2, start of argument registers
dest: usize, // P3
func: Func, // P4
},
InitCoroutine {
@@ -383,6 +383,7 @@ pub struct ProgramState {
cursors: RefCell<BTreeMap<CursorID, Box<dyn Cursor>>>,
registers: Vec<OwnedValue>,
ended_coroutine: bool, // flag to notify yield coroutine finished
regex_cache: HashMap<String, Regex>,
}
impl ProgramState {
@@ -395,6 +396,7 @@ impl ProgramState {
cursors,
registers,
ended_coroutine: false,
regex_cache: HashMap::new(),
}
}
@@ -1173,6 +1175,7 @@ impl Program {
}
}
Insn::Function {
constant_mask,
func,
start_reg,
dest,
@@ -1212,7 +1215,12 @@ impl Program {
let text = &state.registers[start_reg + 1];
let result = match (pattern, text) {
(OwnedValue::Text(pattern), OwnedValue::Text(text)) => {
OwnedValue::Integer(exec_like(pattern, text) as i64)
let cache = if *constant_mask > 0 {
Some(&mut state.regex_cache)
} else {
None
};
OwnedValue::Integer(exec_like(cache, pattern, text) as i64)
}
_ => {
unreachable!("Like on non-text registers");
@@ -1699,10 +1707,25 @@ fn exec_char(values: Vec<OwnedValue>) -> OwnedValue {
OwnedValue::Text(Rc::new(result))
}
// Implements LIKE pattern matching.
fn exec_like(pattern: &str, text: &str) -> bool {
let re = Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap();
re.is_match(text)
fn construct_like_regex(pattern: &str) -> Regex {
Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap()
}
// Implements LIKE pattern matching. Caches the constructed regex if a cache is provided
fn exec_like(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, text: &str) -> bool {
if let Some(cache) = regex_cache {
match cache.get(pattern) {
Some(re) => re.is_match(text),
None => {
let re = construct_like_regex(pattern);
let res = re.is_match(text);
cache.insert(pattern.to_string(), re);
res
}
}
} else {
construct_like_regex(pattern).is_match(text)
}
}
fn exec_minmax<'a>(
@@ -1876,7 +1899,7 @@ mod tests {
};
use mockall::{mock, predicate};
use rand::{rngs::mock::StepRng, thread_rng};
use std::{cell::Ref, rc::Rc};
use std::{cell::Ref, collections::HashMap, rc::Rc};
mock! {
Cursor {
@@ -2224,12 +2247,29 @@ mod tests {
}
#[test]
fn test_like() {
assert!(exec_like("a%", "aaaa"));
assert!(exec_like("%a%a", "aaaa"));
assert!(exec_like("%a.a", "aaaa"));
assert!(exec_like("a.a%", "aaaa"));
assert!(!exec_like("%a.ab", "aaaa"));
fn test_like_no_cache() {
assert!(exec_like(None, "a%", "aaaa"));
assert!(exec_like(None, "%a%a", "aaaa"));
assert!(exec_like(None, "%a.a", "aaaa"));
assert!(exec_like(None, "a.a%", "aaaa"));
assert!(!exec_like(None, "%a.ab", "aaaa"));
}
#[test]
fn test_like_with_cache() {
let mut cache = HashMap::new();
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
// again after values have been cached
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
}
#[test]