From 6bd40f05077bbd61088510a4c12bb32931831457 Mon Sep 17 00:00:00 2001 From: "l.gualtieri" Date: Tue, 11 Feb 2025 19:03:17 +0100 Subject: [PATCH] Add support for REGEXP_REPLACE in limbo extension #740 --- extensions/regexp/src/lib.rs | 28 +++++++++++++++++++++++++++- testing/extensions.py | 15 +++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/extensions/regexp/src/lib.rs b/extensions/regexp/src/lib.rs index 6f037e4d4..ce9b6d2ee 100644 --- a/extensions/regexp/src/lib.rs +++ b/extensions/regexp/src/lib.rs @@ -2,7 +2,7 @@ use limbo_ext::{register_extension, scalar, Value, ValueType}; use regex::Regex; register_extension! { - scalars: { regexp, regexp_like, regexp_substr } + scalars: { regexp, regexp_like, regexp_substr, regexp_replace } } #[scalar(name = "regexp")] @@ -56,3 +56,29 @@ fn regexp_substr(&self, args: &[Value]) -> Value { _ => Value::null(), } } + +#[scalar(name = "regexp_replace")] +fn regexp_replace(&self, args: &[Value]) -> Value { + let replacement = match args.get(2) { + Some(repl) => repl.to_text().unwrap_or_default(), + None => "", // If args[2] does not exist, use an empty string + }; + + match (args.get(0), args.get(1)) { + (Some(haystack), Some(pattern)) => { + let Some(haystack_text) = haystack.to_text() else { + return Value::from_text("".to_string()); // Return an empty string if haystack is not valid + }; + let Some(pattern_text) = pattern.to_text() else { + return Value::from_text("".to_string()); // Return an empty string if pattern is not valid + }; + + let re = match Regex::new(&pattern_text) { + Ok(re) => re, + Err(_) => return Value::from_text("".to_string()), // Return an empty string if regex compilation fails + }; + Value::from_text(re.replace(&haystack_text, replacement).to_string()) + } + _ => Value::from_text("".to_string()), // Return an empty string for invalid value types + } +} diff --git a/testing/extensions.py b/testing/extensions.py index 77ecbc703..76af242dd 100755 --- a/testing/extensions.py +++ b/testing/extensions.py @@ -190,6 +190,21 @@ def test_regexp(pipe): run_test( pipe, "SELECT regexp_substr('the year is unknow', '[0-9]+');", returns_null ) + run_test( + pipe, + "select regexp_replace('the year is 2021', '[0-9]+', '2050') = 'the year is 2050';", + validate_true, + ) + run_test( + pipe, + "select regexp_replace('the year is 2021', '2k21', '2050') = 'the year is 2021';", + validate_true, + ) + run_test( + pipe, + "select regexp_replace('the year is 2021', '([0-9]+)', '$1 or 2050') = 'the year is 2021 or 2050';", + validate_true, + ) def validate_median(res):