mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-05 16:24:23 +01:00
Merge 'Fixes glob giving wrong results in some cases ' from Vrishabh
Fixes #577 With the previous implementation we weren't escaping the regex meta characters . And in certain cases glob had a different meaning than regex. For e.g , the below shows a glob pattern with its regex equivalent - `[][]` translates to `[\]\[]` - `[^][]` translates to `[^\]\[]` Closes #578
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use regex::{Regex, RegexBuilder};
|
||||
|
||||
use crate::{types::OwnedValue, LimboError};
|
||||
@@ -61,6 +63,124 @@ fn construct_like_regex_with_escape(pattern: &str, escape: char) -> Regex {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
// Implements GLOB pattern matching. Caches the constructed regex if a cache is provided
|
||||
pub fn exec_glob(
|
||||
regex_cache: Option<&mut HashMap<String, Regex>>,
|
||||
pattern: &str,
|
||||
text: &str,
|
||||
) -> bool {
|
||||
if let Some(cache) = regex_cache {
|
||||
match cache.get(pattern) {
|
||||
Some(re) => re.is_match(text),
|
||||
None => match construct_glob_regex(pattern) {
|
||||
Ok(re) => {
|
||||
let res = re.is_match(text);
|
||||
cache.insert(pattern.to_string(), re);
|
||||
res
|
||||
}
|
||||
Err(_) => false,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
construct_glob_regex(pattern)
|
||||
.map(|re| re.is_match(text))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn push_char_to_regex_pattern(c: char, regex_pattern: &mut String) {
|
||||
if regex_syntax::is_meta_character(c) {
|
||||
regex_pattern.push('\\');
|
||||
}
|
||||
regex_pattern.push(c);
|
||||
}
|
||||
|
||||
fn construct_glob_regex(pattern: &str) -> Result<Regex, LimboError> {
|
||||
let mut regex_pattern = String::with_capacity(pattern.len() * 2);
|
||||
|
||||
regex_pattern.push('^');
|
||||
|
||||
let mut chars = pattern.chars();
|
||||
let mut bracket_closed = true;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
match ch {
|
||||
'[' => {
|
||||
bracket_closed = false;
|
||||
regex_pattern.push('[');
|
||||
if let Some(next_ch) = chars.next() {
|
||||
match next_ch {
|
||||
']' => {
|
||||
// The string enclosed by the brackets cannot be empty;
|
||||
// therefore ']' can be allowed between the brackets,
|
||||
// provided that it is the first character.
|
||||
// so this means
|
||||
// - `[]]` will be translated to `[\]]`
|
||||
// - `[[]` will be translated to `[\[]`
|
||||
regex_pattern.push_str("\\]");
|
||||
}
|
||||
'^' => {
|
||||
// For the most cases we can pass `^` directly to regex
|
||||
// but in certain cases like [^][a] , `[^]` will make regex crate
|
||||
// throw unenclosed character class. So this means
|
||||
// - `[^][a]` will be translated to `[^\]a]`
|
||||
regex_pattern.push('^');
|
||||
if let Some(next_ch_2) = chars.next() {
|
||||
match next_ch_2 {
|
||||
']' => {
|
||||
regex_pattern.push('\\');
|
||||
regex_pattern.push(']');
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
while let Some(next_ch) = chars.next() {
|
||||
match next_ch {
|
||||
']' => {
|
||||
bracket_closed = true;
|
||||
regex_pattern.push(']');
|
||||
break;
|
||||
}
|
||||
'-' => {
|
||||
regex_pattern.push('-');
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'?' => {
|
||||
regex_pattern.push('.');
|
||||
}
|
||||
'*' => {
|
||||
regex_pattern.push_str(".*");
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
regex_pattern.push('$');
|
||||
|
||||
if bracket_closed {
|
||||
Ok(Regex::new(®ex_pattern).unwrap())
|
||||
} else {
|
||||
Result::Err(LimboError::Constraint(
|
||||
"blob pattern is not closed".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
@@ -82,4 +202,16 @@ mod test {
|
||||
assert!(!exec_like_with_escape("abcXX", "abc", 'X'));
|
||||
assert!(!exec_like_with_escape("abcXX", "abcXX", 'X'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_glob_no_cache() {
|
||||
assert!(exec_glob(None, r#"?*/abc/?*"#, r#"x//a/ab/abc/y"#));
|
||||
assert!(exec_glob(None, r#"a[1^]"#, r#"a1"#));
|
||||
assert!(exec_glob(None, r#"a[1^]*"#, r#"a^"#));
|
||||
assert!(!exec_glob(None, r#"a[a*"#, r#"a["#));
|
||||
assert!(!exec_glob(None, r#"a[a"#, r#"a[a"#));
|
||||
assert!(exec_glob(None, r#"a[[]"#, r#"a["#));
|
||||
assert!(exec_glob(None, r#"abc[^][*?]efg"#, r#"abcdefg"#));
|
||||
assert!(!exec_glob(None, r#"abc[^][*?]efg"#, r#"abc]efg"#));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ use crate::vdbe::insn::Insn;
|
||||
use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length};
|
||||
use crate::{Connection, Result, Rows, TransactionState, DATABASE_VERSION};
|
||||
use datetime::{exec_date, exec_time, exec_unixepoch};
|
||||
use likeop::{construct_like_escape_arg, exec_like_with_escape};
|
||||
use likeop::{construct_like_escape_arg, exec_glob, exec_like_with_escape};
|
||||
use rand::distributions::{Distribution, Uniform};
|
||||
use rand::{thread_rng, Rng};
|
||||
use regex::{Regex, RegexBuilder};
|
||||
@@ -2880,31 +2880,6 @@ fn exec_like(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, te
|
||||
}
|
||||
}
|
||||
|
||||
fn construct_glob_regex(pattern: &str) -> Regex {
|
||||
let mut regex_pattern = String::from("^");
|
||||
regex_pattern.push_str(&pattern.replace('*', ".*").replace("?", "."));
|
||||
regex_pattern.push('$');
|
||||
Regex::new(®ex_pattern).unwrap()
|
||||
}
|
||||
|
||||
// Implements GLOB pattern matching. Caches the constructed regex if a cache is provided
|
||||
fn exec_glob(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, text: &str) -> bool {
|
||||
if let Some(cache) = regex_cache {
|
||||
match cache.get(pattern) {
|
||||
Some(re) => re.is_match(text),
|
||||
None => {
|
||||
let re = construct_glob_regex(pattern);
|
||||
let res = re.is_match(text);
|
||||
cache.insert(pattern.to_string(), re);
|
||||
res
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let re = construct_glob_regex(pattern);
|
||||
re.is_match(text)
|
||||
}
|
||||
}
|
||||
|
||||
fn exec_min(regs: Vec<&OwnedValue>) -> OwnedValue {
|
||||
regs.iter()
|
||||
.min()
|
||||
|
||||
@@ -68,3 +68,75 @@ Robert|Roberts}
|
||||
do_execsql_test where-glob-impossible {
|
||||
select * from products where 'foobar' glob 'fooba';
|
||||
} {}
|
||||
|
||||
foreach {testnum pattern text ans} {
|
||||
1 abcdefg abcdefg 1
|
||||
2 abcdefG abcdefg 0
|
||||
3 abcdef abcdefg 0
|
||||
4 abcdefgh abcdefg 0
|
||||
5 abcdef? abcdefg 1
|
||||
6 abcdef? abcdef 0
|
||||
7 abcdef? abcdefgh 0
|
||||
8 abcdefg abcdef? 0
|
||||
9 abcdef? abcdef? 1
|
||||
10 abc/def abc/def 1
|
||||
11 abc//def abc/def 0
|
||||
12 */abc/* x/abc/y 1
|
||||
13 */abc/* /abc/ 1
|
||||
16 */abc/* x///a/ab/abc 0
|
||||
17 */abc/* x//a/ab/abc/ 1
|
||||
16 */abc/* x///a/ab/abc 0
|
||||
17 */abc/* x//a/ab/abc/ 1
|
||||
18 **/abc/** x//a/ab/abc/ 1
|
||||
19 *?/abc/*? x//a/ab/abc/y 1
|
||||
20 ?*/abc/?* x//a/ab/abc/y 1
|
||||
21 {abc[cde]efg} abcbefg 0
|
||||
22 {abc[cde]efg} abccefg 1
|
||||
23 {abc[cde]efg} abcdefg 1
|
||||
24 {abc[cde]efg} abceefg 1
|
||||
25 {abc[cde]efg} abcfefg 0
|
||||
26 {abc[^cde]efg} abcbefg 1
|
||||
27 {abc[^cde]efg} abccefg 0
|
||||
28 {abc[^cde]efg} abcdefg 0
|
||||
29 {abc[^cde]efg} abceefg 0
|
||||
30 {abc[^cde]efg} abcfefg 1
|
||||
31 {abc[c-e]efg} abcbefg 0
|
||||
32 {abc[c-e]efg} abccefg 1
|
||||
33 {abc[c-e]efg} abcdefg 1
|
||||
34 {abc[c-e]efg} abceefg 1
|
||||
35 {abc[c-e]efg} abcfefg 0
|
||||
36 {abc[^c-e]efg} abcbefg 1
|
||||
37 {abc[^c-e]efg} abccefg 0
|
||||
38 {abc[^c-e]efg} abcdefg 0
|
||||
39 {abc[^c-e]efg} abceefg 0
|
||||
40 {abc[^c-e]efg} abcfefg 1
|
||||
41 {abc[c-e]efg} abc-efg 0
|
||||
42 {abc[-ce]efg} abc-efg 1
|
||||
43 {abc[ce-]efg} abc-efg 1
|
||||
44 {abc[][*?]efg} {abc]efg} 1
|
||||
45 {abc[][*?]efg} {abc*efg} 1
|
||||
46 {abc[][*?]efg} {abc?efg} 1
|
||||
47 {abc[][*?]efg} {abc[efg} 1
|
||||
48 {abc[^][*?]efg} {abc]efg} 0
|
||||
49 {abc[^][*?]efg} {abc*efg} 0
|
||||
50 {abc[^][*?]efg} {abc?efg} 0
|
||||
51 {abc[^][*?]efg} {abc[efg} 0
|
||||
52 {abc[^][*?]efg} {abcdefg} 1
|
||||
53 {*[xyz]efg} {abcxefg} 1
|
||||
54 {*[xyz]efg} {abcwefg} 0
|
||||
55 {[-c]} {c} 1
|
||||
56 {[-c]} {-} 1
|
||||
57 {[-c]} {x} 0
|
||||
} {
|
||||
do_execsql_test glob-$testnum.1 "SELECT glob ( '$pattern' , '$text' )" $::ans
|
||||
}
|
||||
|
||||
|
||||
foreach {testnum pattern text ans} {
|
||||
1 {abc[} {abc[} 0
|
||||
2 {abc[} {abc} 0
|
||||
3 {a]b} {a]b} 1
|
||||
4 {a]b} {a[b} 0
|
||||
} {
|
||||
do_execsql_test glob-unenclosed-$testnum.1 "SELECT glob ( '$pattern' , '$text' )" $::ans
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user