Merge 'Fixes like function when pattern has regex meta chars' from Vrishabh

Fixes #552
In our construct regex function, we were not escaping the required
characters properly which was causing the failure.
Limbo output with this branch
```
limbo> select like('\%A', '\A');
1
limbo> select like('A$%', 'A$');
1
limbo> select like('%a.a', 'aaaa');
0
```

Closes #553
This commit is contained in:
Pekka Enberg
2024-12-27 18:35:47 +02:00
3 changed files with 51 additions and 11 deletions

View File

@@ -44,7 +44,8 @@ sieve-cache = "0.1.4"
sqlite3-parser = { path = "../vendored/sqlite3-parser" }
thiserror = "1.0.61"
getrandom = { version = "0.2.15", features = ["js"] }
regex = "1.10.5"
regex = "1.11.1"
regex-syntax = { version = "0.8.5", default-features = false, features = ["unicode"] }
chrono = "0.4.38"
julian_day_converter = "0.3.2"
jsonb = { version = "0.4.4", optional = true }

View File

@@ -46,7 +46,7 @@ use datetime::{exec_date, exec_time, exec_unixepoch};
use rand::distributions::{Distribution, Uniform};
use rand::{thread_rng, Rng};
use regex::Regex;
use regex::{Regex, RegexBuilder};
use std::borrow::{Borrow, BorrowMut};
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};
@@ -3199,10 +3199,31 @@ fn exec_char(values: Vec<OwnedValue>) -> OwnedValue {
}
fn construct_like_regex(pattern: &str) -> Regex {
let mut regex_pattern = String::from("(?i)^");
regex_pattern.push_str(&pattern.replace('%', ".*").replace('_', "."));
let mut regex_pattern = String::with_capacity(pattern.len() * 2);
regex_pattern.push('^');
for c in pattern.chars() {
match c {
'\\' => regex_pattern.push_str("\\\\"),
'%' => regex_pattern.push_str(".*"),
'_' => regex_pattern.push('.'),
ch => {
if regex_syntax::is_meta_character(c) {
regex_pattern.push('\\');
}
regex_pattern.push(ch);
}
}
}
regex_pattern.push('$');
Regex::new(&regex_pattern).unwrap()
RegexBuilder::new(&regex_pattern)
.case_insensitive(true)
.dot_matches_new_line(true)
.build()
.unwrap()
}
// Implements LIKE pattern matching. Caches the constructed regex if a cache is provided
@@ -4353,12 +4374,18 @@ mod tests {
);
}
#[test]
fn test_like_with_escape_or_regexmeta_chars() {
assert!(exec_like(None, r#"\%A"#, r#"\A"#));
assert!(exec_like(None, "%a%a", "aaaa"));
}
#[test]
fn test_like_no_cache() {
assert!(exec_like(None, "a%", "aaaa"));
assert!(exec_like(None, "%a%a", "aaaa"));
assert!(exec_like(None, "%a.a", "aaaa"));
assert!(exec_like(None, "a.a%", "aaaa"));
assert!(!exec_like(None, "%a.a", "aaaa"));
assert!(!exec_like(None, "a.a%", "aaaa"));
assert!(!exec_like(None, "%a.ab", "aaaa"));
}
@@ -4367,15 +4394,15 @@ mod tests {
let mut cache = HashMap::new();
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
// again after values have been cached
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
}

View File

@@ -77,3 +77,15 @@ Robert|Roberts}
do_execsql_test where-like-impossible {
select * from products where 'foobar' like 'fooba';
} {}
do_execsql_test like-with-backslash {
select like('\%A', '\A')
} {1}
do_execsql_test like-with-dollar {
select like('A$%', 'A$')
} {1}
do_execsql_test like-with-dot {
select like('%a.a', 'aaaa')
} {0}