Merge 'Cache LIKE regexes' from RJ Barman

This PR adds a regex cache to `ProgramState` so that we ca re-use
already constructed regexes while processing LIKE expressions. I didn't
find anywhere else that seemed like a good fit to put an execution-time
only cache like this, so let me know if there's a better spot.
To best match sqlite, I added the constant mask into the `Function`
instruction (this indicates whether the first argument to the function
was determined to be constant at compile time), and decide whether to
use the cache based on its value. I've left the value for
`constant_mask` as 0 on every other kind of `Function` instruction. That
seemed to be the safest choice, as that appears to be what has been
implicitly done up to this point. Happy to change that if you'd advise
otherwise.

Fixes #168
Closes #320
This commit is contained in:
Pekka Enberg
2024-09-12 16:57:53 +03:00
4 changed files with 81 additions and 20 deletions

View File

@@ -454,6 +454,9 @@ pub fn translate_condition_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
// Only constant patterns for LIKE are supported currently, so this
// is always 1
constant_mask: 1,
func: crate::vdbe::Func::Scalar(ScalarFunc::Like),
start_reg: pattern_reg,
dest: cur_reg,
@@ -655,6 +658,7 @@ pub fn translate_expr(
let regs = program.alloc_register();
translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Json(j),
@@ -673,6 +677,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -734,6 +739,7 @@ pub fn translate_expr(
translate_expr(program, referenced_tables, arg, reg, cursor_hint)?;
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -811,6 +817,9 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
// Only constant patterns for LIKE are supported currently, so this
// is always 1
constant_mask: 1,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -847,6 +856,7 @@ pub fn translate_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -862,6 +872,7 @@ pub fn translate_expr(
}
let regs = program.alloc_register();
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -883,6 +894,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Date),
@@ -934,6 +946,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: str_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Substring),
@@ -958,6 +971,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -979,6 +993,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Time),
@@ -1012,6 +1027,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
@@ -1045,6 +1061,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Min),
@@ -1078,6 +1095,7 @@ pub fn translate_expr(
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Max),
@@ -1114,6 +1132,7 @@ pub fn translate_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: func_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),

View File

@@ -533,12 +533,13 @@ pub fn insn_to_str(
"".to_string(),
),
Insn::Function {
constant_mask,
start_reg,
dest,
func,
} => (
"Function",
1,
*constant_mask,
*start_reg as i32,
*dest as i32,
OwnedValue::Text(Rc::new(func.to_string())),

View File

@@ -302,10 +302,10 @@ pub enum Insn {
// Function
Function {
// constant_mask: i32, // P1, not used for now
start_reg: usize, // P2, start of argument registers
dest: usize, // P3
func: Func, // P4
constant_mask: i32, // P1
start_reg: usize, // P2, start of argument registers
dest: usize, // P3
func: Func, // P4
},
InitCoroutine {
@@ -383,6 +383,7 @@ pub struct ProgramState {
cursors: RefCell<BTreeMap<CursorID, Box<dyn Cursor>>>,
registers: Vec<OwnedValue>,
ended_coroutine: bool, // flag to notify yield coroutine finished
regex_cache: HashMap<String, Regex>,
}
impl ProgramState {
@@ -395,6 +396,7 @@ impl ProgramState {
cursors,
registers,
ended_coroutine: false,
regex_cache: HashMap::new(),
}
}
@@ -1173,6 +1175,7 @@ impl Program {
}
}
Insn::Function {
constant_mask,
func,
start_reg,
dest,
@@ -1208,11 +1211,16 @@ impl Program {
start_reg,
state.registers.len()
);
let pattern = state.registers[start_reg].clone();
let text = state.registers[start_reg + 1].clone();
let pattern = &state.registers[start_reg];
let text = &state.registers[start_reg + 1];
let result = match (pattern, text) {
(OwnedValue::Text(pattern), OwnedValue::Text(text)) => {
OwnedValue::Integer(exec_like(&pattern, &text) as i64)
let cache = if *constant_mask > 0 {
Some(&mut state.regex_cache)
} else {
None
};
OwnedValue::Integer(exec_like(cache, pattern, text) as i64)
}
_ => {
unreachable!("Like on non-text registers");
@@ -1699,10 +1707,26 @@ fn exec_char(values: Vec<OwnedValue>) -> OwnedValue {
OwnedValue::Text(Rc::new(result))
}
// Implements LIKE pattern matching.
fn exec_like(pattern: &str, text: &str) -> bool {
let re = Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap();
re.is_match(text)
fn construct_like_regex(pattern: &str) -> Regex {
Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap()
}
// Implements LIKE pattern matching. Caches the constructed regex if a cache is provided
fn exec_like(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, text: &str) -> bool {
if let Some(cache) = regex_cache {
match cache.get(pattern) {
Some(re) => re.is_match(text),
None => {
let re = construct_like_regex(pattern);
let res = re.is_match(text);
cache.insert(pattern.to_string(), re);
res
}
}
} else {
let re = construct_like_regex(pattern);
re.is_match(text)
}
}
fn exec_minmax<'a>(
@@ -1876,7 +1900,7 @@ mod tests {
};
use mockall::{mock, predicate};
use rand::{rngs::mock::StepRng, thread_rng};
use std::{cell::Ref, rc::Rc};
use std::{cell::Ref, collections::HashMap, rc::Rc};
mock! {
Cursor {
@@ -2224,12 +2248,29 @@ mod tests {
}
#[test]
fn test_like() {
assert!(exec_like("a%", "aaaa"));
assert!(exec_like("%a%a", "aaaa"));
assert!(exec_like("%a.a", "aaaa"));
assert!(exec_like("a.a%", "aaaa"));
assert!(!exec_like("%a.ab", "aaaa"));
fn test_like_no_cache() {
assert!(exec_like(None, "a%", "aaaa"));
assert!(exec_like(None, "%a%a", "aaaa"));
assert!(exec_like(None, "%a.a", "aaaa"));
assert!(exec_like(None, "a.a%", "aaaa"));
assert!(!exec_like(None, "%a.ab", "aaaa"));
}
#[test]
fn test_like_with_cache() {
let mut cache = HashMap::new();
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
// again after values have been cached
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
}
#[test]

View File

@@ -883,7 +883,7 @@ fn sqlite3_errstr_impl(rc: i32) -> *const std::ffi::c_char {
"datatype mismatch", // SQLITE_MISMATCH
"bad parameter or other API misuse", // SQLITE_MISUSE
#[cfg(feature = "lfs")]
"", // SQLITE_NOLFS
"", // SQLITE_NOLFS
#[cfg(not(feature = "lfs"))]
"large file support is disabled", // SQLITE_NOLFS
"authorization denied", // SQLITE_AUTH