diff --git a/COMPAT.md b/COMPAT.md index e72304a30..33e166c11 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -610,8 +610,8 @@ The `regexp` extension is compatible with [sqlean-regexp](https://github.com/nal | regexp(pattern, source) | Yes | | | regexp_like(source, pattern) | Yes | | | regexp_substr(source, pattern) | Yes | | -| regexp_capture(source, pattern[, n]) | No | | -| regexp_replace(source, pattern, replacement) | No | | +| regexp_capture(source, pattern[, n]) | Yes | | +| regexp_replace(source, pattern, replacement) | Yes | | ### Vector diff --git a/extensions/regexp/src/lib.rs b/extensions/regexp/src/lib.rs index f388f5698..e7b448eae 100644 --- a/extensions/regexp/src/lib.rs +++ b/extensions/regexp/src/lib.rs @@ -2,7 +2,7 @@ use regex::Regex; use turso_ext::{register_extension, scalar, Value, ValueType}; register_extension! { - scalars: { regexp, regexp_like, regexp_substr, regexp_replace } + scalars: { regexp, regexp_like, regexp_substr, regexp_replace, regexp_capture } } #[scalar(name = "regexp")] @@ -59,26 +59,56 @@ fn regexp_substr(&self, args: &[Value]) -> Value { #[scalar(name = "regexp_replace")] fn regexp_replace(&self, args: &[Value]) -> Value { - let replacement = match args.get(2) { - Some(repl) => repl.to_text().unwrap_or_default(), - None => "", // If args[2] does not exist, use an empty string + if args.len() < 2 { + return Value::from_text("".to_string()); + } + + let Some(source_text) = args[0].to_text() else { + return Value::from_text("".to_string()); }; - match (args.first(), args.get(1)) { - (Some(haystack), Some(pattern)) => { - let Some(haystack_text) = haystack.to_text() else { - return Value::from_text("".to_string()); // Return an empty string if haystack is not valid - }; - let Some(pattern_text) = pattern.to_text() else { - return Value::from_text("".to_string()); // Return an empty string if pattern is not valid - }; + let Some(pattern_text) = args[1].to_text() else { + return Value::from_text("".to_string()); + }; - let re = match Regex::new(pattern_text) { - Ok(re) => re, - Err(_) => return Value::from_text("".to_string()), // Return an empty string if regex compilation fails - }; - Value::from_text(re.replace(haystack_text, replacement).to_string()) - } - _ => Value::from_text("".to_string()), // Return an empty string for invalid value types - } + let replacement = args.get(2).and_then(|v| v.to_text()).unwrap_or(""); + + let re = match Regex::new(pattern_text) { + Ok(re) => re, + Err(_) => return Value::from_text("".to_string()), + }; + + Value::from_text(re.replace(source_text, replacement).to_string()) +} + +#[scalar(name = "regexp_capture")] +fn regexp_capture(args: &[Value]) -> Value { + if args.len() < 2 { + return Value::from_text("".to_string()); + } + let Some(source_text) = args[0].to_text() else { + return Value::null(); + }; + let Some(pattern_text) = args[1].to_text() else { + return Value::null(); + }; + + let group_index: usize = args + .get(2) + .and_then(|v| v.to_integer()) + .map(|n| n as usize) + .unwrap_or(1); + + let re = match Regex::new(pattern_text) { + Ok(re) => re, + Err(_) => return Value::null(), + }; + + if let Some(caps) = re.captures(source_text) { + if let Some(m) = caps.get(group_index) { + return Value::from_text(m.as_str().to_string()); + } + } + + Value::null() } diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index f76896ae7..361ddbf31 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -103,6 +103,18 @@ def test_regexp(): "select regexp_replace('the year is 2021', '([0-9]+)', '$1 or 2050') = 'the year is 2021 or 2050';", true, ) + limbo.run_test_fn( + "select regexp_capture('the year is 2021', '([0-9]+)') = '2021';", + true, + ) + limbo.run_test_fn( + "select regexp_capture('abc 123 def', '([a-z]+) ([0-9]+) ([a-z]+)', 2) = '123';", + true, + ) + limbo.run_test_fn( + "select regexp_capture('no digits here', '([0-9]+)');", + null, + ) limbo.quit()