Merge 'extensions/regexp: Add support for regexp_replace()' from lgualtieri75

Reviewed-by: Preston Thorpe (@PThorpe92)

Closes #986
This commit is contained in:
Pekka Enberg
2025-02-12 09:21:52 +02:00
2 changed files with 42 additions and 1 deletions

View File

@@ -2,7 +2,7 @@ use limbo_ext::{register_extension, scalar, Value, ValueType};
use regex::Regex;
register_extension! {
scalars: { regexp, regexp_like, regexp_substr }
scalars: { regexp, regexp_like, regexp_substr, regexp_replace }
}
#[scalar(name = "regexp")]
@@ -56,3 +56,29 @@ fn regexp_substr(&self, args: &[Value]) -> Value {
_ => Value::null(),
}
}
#[scalar(name = "regexp_replace")]
fn regexp_replace(&self, args: &[Value]) -> Value {
let replacement = match args.get(2) {
Some(repl) => repl.to_text().unwrap_or_default(),
None => "", // If args[2] does not exist, use an empty string
};
match (args.get(0), args.get(1)) {
(Some(haystack), Some(pattern)) => {
let Some(haystack_text) = haystack.to_text() else {
return Value::from_text("".to_string()); // Return an empty string if haystack is not valid
};
let Some(pattern_text) = pattern.to_text() else {
return Value::from_text("".to_string()); // Return an empty string if pattern is not valid
};
let re = match Regex::new(&pattern_text) {
Ok(re) => re,
Err(_) => return Value::from_text("".to_string()), // Return an empty string if regex compilation fails
};
Value::from_text(re.replace(&haystack_text, replacement).to_string())
}
_ => Value::from_text("".to_string()), // Return an empty string for invalid value types
}
}

View File

@@ -190,6 +190,21 @@ def test_regexp(pipe):
run_test(
pipe, "SELECT regexp_substr('the year is unknow', '[0-9]+');", returns_null
)
run_test(
pipe,
"select regexp_replace('the year is 2021', '[0-9]+', '2050') = 'the year is 2050';",
validate_true,
)
run_test(
pipe,
"select regexp_replace('the year is 2021', '2k21', '2050') = 'the year is 2021';",
validate_true,
)
run_test(
pipe,
"select regexp_replace('the year is 2021', '([0-9]+)', '$1 or 2050') = 'the year is 2021 or 2050';",
validate_true,
)
def validate_median(res):