mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-09 02:04:22 +01:00
Merge 'Cache LIKE regexes' from RJ Barman
This PR adds a regex cache to `ProgramState` so that we ca re-use already constructed regexes while processing LIKE expressions. I didn't find anywhere else that seemed like a good fit to put an execution-time only cache like this, so let me know if there's a better spot. To best match sqlite, I added the constant mask into the `Function` instruction (this indicates whether the first argument to the function was determined to be constant at compile time), and decide whether to use the cache based on its value. I've left the value for `constant_mask` as 0 on every other kind of `Function` instruction. That seemed to be the safest choice, as that appears to be what has been implicitly done up to this point. Happy to change that if you'd advise otherwise. Fixes #168 Closes #320
This commit is contained in:
@@ -454,6 +454,9 @@ pub fn translate_condition_expr(
|
||||
cursor_hint,
|
||||
)?;
|
||||
program.emit_insn(Insn::Function {
|
||||
// Only constant patterns for LIKE are supported currently, so this
|
||||
// is always 1
|
||||
constant_mask: 1,
|
||||
func: crate::vdbe::Func::Scalar(ScalarFunc::Like),
|
||||
start_reg: pattern_reg,
|
||||
dest: cur_reg,
|
||||
@@ -655,6 +658,7 @@ pub fn translate_expr(
|
||||
let regs = program.alloc_register();
|
||||
translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?;
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: regs,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Json(j),
|
||||
@@ -673,6 +677,7 @@ pub fn translate_expr(
|
||||
}
|
||||
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
@@ -734,6 +739,7 @@ pub fn translate_expr(
|
||||
translate_expr(program, referenced_tables, arg, reg, cursor_hint)?;
|
||||
}
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
@@ -811,6 +817,9 @@ pub fn translate_expr(
|
||||
}
|
||||
}
|
||||
program.emit_insn(Insn::Function {
|
||||
// Only constant patterns for LIKE are supported currently, so this
|
||||
// is always 1
|
||||
constant_mask: 1,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
@@ -847,6 +856,7 @@ pub fn translate_expr(
|
||||
cursor_hint,
|
||||
)?;
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: regs,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
@@ -862,6 +872,7 @@ pub fn translate_expr(
|
||||
}
|
||||
let regs = program.alloc_register();
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: regs,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
@@ -883,6 +894,7 @@ pub fn translate_expr(
|
||||
}
|
||||
}
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(ScalarFunc::Date),
|
||||
@@ -934,6 +946,7 @@ pub fn translate_expr(
|
||||
}
|
||||
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: str_reg,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(ScalarFunc::Substring),
|
||||
@@ -958,6 +971,7 @@ pub fn translate_expr(
|
||||
}
|
||||
}
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
@@ -979,6 +993,7 @@ pub fn translate_expr(
|
||||
}
|
||||
}
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(ScalarFunc::Time),
|
||||
@@ -1012,6 +1027,7 @@ pub fn translate_expr(
|
||||
}
|
||||
}
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
@@ -1045,6 +1061,7 @@ pub fn translate_expr(
|
||||
}
|
||||
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(ScalarFunc::Min),
|
||||
@@ -1078,6 +1095,7 @@ pub fn translate_expr(
|
||||
}
|
||||
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: target_register + 1,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(ScalarFunc::Max),
|
||||
@@ -1114,6 +1132,7 @@ pub fn translate_expr(
|
||||
cursor_hint,
|
||||
)?;
|
||||
program.emit_insn(Insn::Function {
|
||||
constant_mask: 0,
|
||||
start_reg: func_reg,
|
||||
dest: target_register,
|
||||
func: crate::vdbe::Func::Scalar(srf),
|
||||
|
||||
@@ -533,12 +533,13 @@ pub fn insn_to_str(
|
||||
"".to_string(),
|
||||
),
|
||||
Insn::Function {
|
||||
constant_mask,
|
||||
start_reg,
|
||||
dest,
|
||||
func,
|
||||
} => (
|
||||
"Function",
|
||||
1,
|
||||
*constant_mask,
|
||||
*start_reg as i32,
|
||||
*dest as i32,
|
||||
OwnedValue::Text(Rc::new(func.to_string())),
|
||||
|
||||
@@ -302,10 +302,10 @@ pub enum Insn {
|
||||
|
||||
// Function
|
||||
Function {
|
||||
// constant_mask: i32, // P1, not used for now
|
||||
start_reg: usize, // P2, start of argument registers
|
||||
dest: usize, // P3
|
||||
func: Func, // P4
|
||||
constant_mask: i32, // P1
|
||||
start_reg: usize, // P2, start of argument registers
|
||||
dest: usize, // P3
|
||||
func: Func, // P4
|
||||
},
|
||||
|
||||
InitCoroutine {
|
||||
@@ -383,6 +383,7 @@ pub struct ProgramState {
|
||||
cursors: RefCell<BTreeMap<CursorID, Box<dyn Cursor>>>,
|
||||
registers: Vec<OwnedValue>,
|
||||
ended_coroutine: bool, // flag to notify yield coroutine finished
|
||||
regex_cache: HashMap<String, Regex>,
|
||||
}
|
||||
|
||||
impl ProgramState {
|
||||
@@ -395,6 +396,7 @@ impl ProgramState {
|
||||
cursors,
|
||||
registers,
|
||||
ended_coroutine: false,
|
||||
regex_cache: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1173,6 +1175,7 @@ impl Program {
|
||||
}
|
||||
}
|
||||
Insn::Function {
|
||||
constant_mask,
|
||||
func,
|
||||
start_reg,
|
||||
dest,
|
||||
@@ -1208,11 +1211,16 @@ impl Program {
|
||||
start_reg,
|
||||
state.registers.len()
|
||||
);
|
||||
let pattern = state.registers[start_reg].clone();
|
||||
let text = state.registers[start_reg + 1].clone();
|
||||
let pattern = &state.registers[start_reg];
|
||||
let text = &state.registers[start_reg + 1];
|
||||
let result = match (pattern, text) {
|
||||
(OwnedValue::Text(pattern), OwnedValue::Text(text)) => {
|
||||
OwnedValue::Integer(exec_like(&pattern, &text) as i64)
|
||||
let cache = if *constant_mask > 0 {
|
||||
Some(&mut state.regex_cache)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
OwnedValue::Integer(exec_like(cache, pattern, text) as i64)
|
||||
}
|
||||
_ => {
|
||||
unreachable!("Like on non-text registers");
|
||||
@@ -1699,10 +1707,26 @@ fn exec_char(values: Vec<OwnedValue>) -> OwnedValue {
|
||||
OwnedValue::Text(Rc::new(result))
|
||||
}
|
||||
|
||||
// Implements LIKE pattern matching.
|
||||
fn exec_like(pattern: &str, text: &str) -> bool {
|
||||
let re = Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap();
|
||||
re.is_match(text)
|
||||
fn construct_like_regex(pattern: &str) -> Regex {
|
||||
Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap()
|
||||
}
|
||||
|
||||
// Implements LIKE pattern matching. Caches the constructed regex if a cache is provided
|
||||
fn exec_like(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, text: &str) -> bool {
|
||||
if let Some(cache) = regex_cache {
|
||||
match cache.get(pattern) {
|
||||
Some(re) => re.is_match(text),
|
||||
None => {
|
||||
let re = construct_like_regex(pattern);
|
||||
let res = re.is_match(text);
|
||||
cache.insert(pattern.to_string(), re);
|
||||
res
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let re = construct_like_regex(pattern);
|
||||
re.is_match(text)
|
||||
}
|
||||
}
|
||||
|
||||
fn exec_minmax<'a>(
|
||||
@@ -1876,7 +1900,7 @@ mod tests {
|
||||
};
|
||||
use mockall::{mock, predicate};
|
||||
use rand::{rngs::mock::StepRng, thread_rng};
|
||||
use std::{cell::Ref, rc::Rc};
|
||||
use std::{cell::Ref, collections::HashMap, rc::Rc};
|
||||
|
||||
mock! {
|
||||
Cursor {
|
||||
@@ -2224,12 +2248,29 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_like() {
|
||||
assert!(exec_like("a%", "aaaa"));
|
||||
assert!(exec_like("%a%a", "aaaa"));
|
||||
assert!(exec_like("%a.a", "aaaa"));
|
||||
assert!(exec_like("a.a%", "aaaa"));
|
||||
assert!(!exec_like("%a.ab", "aaaa"));
|
||||
fn test_like_no_cache() {
|
||||
assert!(exec_like(None, "a%", "aaaa"));
|
||||
assert!(exec_like(None, "%a%a", "aaaa"));
|
||||
assert!(exec_like(None, "%a.a", "aaaa"));
|
||||
assert!(exec_like(None, "a.a%", "aaaa"));
|
||||
assert!(!exec_like(None, "%a.ab", "aaaa"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_like_with_cache() {
|
||||
let mut cache = HashMap::new();
|
||||
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
|
||||
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
|
||||
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
|
||||
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
|
||||
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
|
||||
|
||||
// again after values have been cached
|
||||
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
|
||||
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
|
||||
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
|
||||
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
|
||||
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -883,7 +883,7 @@ fn sqlite3_errstr_impl(rc: i32) -> *const std::ffi::c_char {
|
||||
"datatype mismatch", // SQLITE_MISMATCH
|
||||
"bad parameter or other API misuse", // SQLITE_MISUSE
|
||||
#[cfg(feature = "lfs")]
|
||||
"", // SQLITE_NOLFS
|
||||
"", // SQLITE_NOLFS
|
||||
#[cfg(not(feature = "lfs"))]
|
||||
"large file support is disabled", // SQLITE_NOLFS
|
||||
"authorization denied", // SQLITE_AUTH
|
||||
|
||||
Reference in New Issue
Block a user