Merge 'Add regexp capture' from bit-aloo

This PR adds RegExp Capture to regexp module

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #2465
This commit is contained in:
Jussi Saurio
2025-08-06 12:13:19 +03:00
committed by GitHub
3 changed files with 64 additions and 22 deletions

View File

@@ -610,8 +610,8 @@ The `regexp` extension is compatible with [sqlean-regexp](https://github.com/nal
| regexp(pattern, source) | Yes | |
| regexp_like(source, pattern) | Yes | |
| regexp_substr(source, pattern) | Yes | |
| regexp_capture(source, pattern[, n]) | No | |
| regexp_replace(source, pattern, replacement) | No | |
| regexp_capture(source, pattern[, n]) | Yes | |
| regexp_replace(source, pattern, replacement) | Yes | |
### Vector

View File

@@ -2,7 +2,7 @@ use regex::Regex;
use turso_ext::{register_extension, scalar, Value, ValueType};
register_extension! {
scalars: { regexp, regexp_like, regexp_substr, regexp_replace }
scalars: { regexp, regexp_like, regexp_substr, regexp_replace, regexp_capture }
}
#[scalar(name = "regexp")]
@@ -59,26 +59,56 @@ fn regexp_substr(&self, args: &[Value]) -> Value {
#[scalar(name = "regexp_replace")]
fn regexp_replace(&self, args: &[Value]) -> Value {
let replacement = match args.get(2) {
Some(repl) => repl.to_text().unwrap_or_default(),
None => "", // If args[2] does not exist, use an empty string
if args.len() < 2 {
return Value::from_text("".to_string());
}
let Some(source_text) = args[0].to_text() else {
return Value::from_text("".to_string());
};
match (args.first(), args.get(1)) {
(Some(haystack), Some(pattern)) => {
let Some(haystack_text) = haystack.to_text() else {
return Value::from_text("".to_string()); // Return an empty string if haystack is not valid
};
let Some(pattern_text) = pattern.to_text() else {
return Value::from_text("".to_string()); // Return an empty string if pattern is not valid
};
let Some(pattern_text) = args[1].to_text() else {
return Value::from_text("".to_string());
};
let re = match Regex::new(pattern_text) {
Ok(re) => re,
Err(_) => return Value::from_text("".to_string()), // Return an empty string if regex compilation fails
};
Value::from_text(re.replace(haystack_text, replacement).to_string())
}
_ => Value::from_text("".to_string()), // Return an empty string for invalid value types
}
let replacement = args.get(2).and_then(|v| v.to_text()).unwrap_or("");
let re = match Regex::new(pattern_text) {
Ok(re) => re,
Err(_) => return Value::from_text("".to_string()),
};
Value::from_text(re.replace(source_text, replacement).to_string())
}
#[scalar(name = "regexp_capture")]
fn regexp_capture(args: &[Value]) -> Value {
if args.len() < 2 {
return Value::from_text("".to_string());
}
let Some(source_text) = args[0].to_text() else {
return Value::null();
};
let Some(pattern_text) = args[1].to_text() else {
return Value::null();
};
let group_index: usize = args
.get(2)
.and_then(|v| v.to_integer())
.map(|n| n as usize)
.unwrap_or(1);
let re = match Regex::new(pattern_text) {
Ok(re) => re,
Err(_) => return Value::null(),
};
if let Some(caps) = re.captures(source_text) {
if let Some(m) = caps.get(group_index) {
return Value::from_text(m.as_str().to_string());
}
}
Value::null()
}

View File

@@ -103,6 +103,18 @@ def test_regexp():
"select regexp_replace('the year is 2021', '([0-9]+)', '$1 or 2050') = 'the year is 2021 or 2050';",
true,
)
limbo.run_test_fn(
"select regexp_capture('the year is 2021', '([0-9]+)') = '2021';",
true,
)
limbo.run_test_fn(
"select regexp_capture('abc 123 def', '([a-z]+) ([0-9]+) ([a-z]+)', 2) = '123';",
true,
)
limbo.run_test_fn(
"select regexp_capture('no digits here', '([0-9]+)');",
null,
)
limbo.quit()