From 0fcb302d8f5d11edbd5df8e343fd984ccb6850df Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 5 Aug 2025 20:18:14 +0530 Subject: [PATCH 1/7] add regexp_capture --- extensions/regexp/src/lib.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/extensions/regexp/src/lib.rs b/extensions/regexp/src/lib.rs index f388f5698..70e08ce24 100644 --- a/extensions/regexp/src/lib.rs +++ b/extensions/regexp/src/lib.rs @@ -82,3 +82,36 @@ fn regexp_replace(&self, args: &[Value]) -> Value { _ => Value::from_text("".to_string()), // Return an empty string for invalid value types } } + + +#[scalar(name = "regexp_capture")] +fn regexp_capture(args: &[Value]) -> Value { + match (args.get(0), args.get(1)) { + (Some(source), Some(pattern)) if source.value_type() == ValueType::Text && pattern.value_type() == ValueType::Text => { + let Some(source_text) = source.to_text() else { + return Value::null(); + }; + + let Some(pattern_text) = pattern.to_text() else { + return Value::null(); + }; + + let group_index: usize = args.get(2).and_then(|v| v.to_integer()).map(|n| n as usize).unwrap_or(1); + + + let re = match Regex::new(pattern_text) { + Ok(re) => re, + Err(_) => return Value::null() + }; + + if let Some(caps) = re.captures(source_text) { + if let Some(m) = caps.get(group_index) { + return Value::from_text(m.as_str().to_string()); + } + } + + Value::null() + } + _ => Value::null() + } +} \ No newline at end of file From 20d4da705421d7fdb8d449a47237e1cc22ab7941 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 5 Aug 2025 20:19:48 +0530 Subject: [PATCH 2/7] register regexp_capture with register_extension --- extensions/regexp/src/lib.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/extensions/regexp/src/lib.rs b/extensions/regexp/src/lib.rs index 70e08ce24..63903dc6f 100644 --- a/extensions/regexp/src/lib.rs +++ b/extensions/regexp/src/lib.rs @@ -2,7 +2,7 @@ use regex::Regex; use turso_ext::{register_extension, scalar, Value, ValueType}; register_extension! { - scalars: { regexp, regexp_like, regexp_substr, regexp_replace } + scalars: { regexp, regexp_like, regexp_substr, regexp_replace, regexp_capture } } #[scalar(name = "regexp")] @@ -83,11 +83,13 @@ fn regexp_replace(&self, args: &[Value]) -> Value { } } - #[scalar(name = "regexp_capture")] fn regexp_capture(args: &[Value]) -> Value { match (args.get(0), args.get(1)) { - (Some(source), Some(pattern)) if source.value_type() == ValueType::Text && pattern.value_type() == ValueType::Text => { + (Some(source), Some(pattern)) + if source.value_type() == ValueType::Text + && pattern.value_type() == ValueType::Text => + { let Some(source_text) = source.to_text() else { return Value::null(); }; @@ -96,12 +98,15 @@ fn regexp_capture(args: &[Value]) -> Value { return Value::null(); }; - let group_index: usize = args.get(2).and_then(|v| v.to_integer()).map(|n| n as usize).unwrap_or(1); - + let group_index: usize = args + .get(2) + .and_then(|v| v.to_integer()) + .map(|n| n as usize) + .unwrap_or(1); let re = match Regex::new(pattern_text) { Ok(re) => re, - Err(_) => return Value::null() + Err(_) => return Value::null(), }; if let Some(caps) = re.captures(source_text) { @@ -112,6 +117,6 @@ fn regexp_capture(args: &[Value]) -> Value { Value::null() } - _ => Value::null() + _ => Value::null(), } -} \ No newline at end of file +} From b26a58f652f0d778743530c4bfd7eb294d096ccb Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 5 Aug 2025 20:36:09 +0530 Subject: [PATCH 3/7] update extension.py with regexp_replace test --- testing/cli_tests/extensions.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index f76896ae7..de5592bdf 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -103,6 +103,18 @@ def test_regexp(): "select regexp_replace('the year is 2021', '([0-9]+)', '$1 or 2050') = 'the year is 2021 or 2050';", true, ) + limbo.run_test_fn( + "select regexp_capture('the year is 2021', '([0-9]+)') = '2021';", + true, + ); + limbo.run_test_fn( + "select regexp_capture('abc 123 def', '([a-z]+) ([0-9]+) ([a-z]+)', 2) = '123';", + true, + ); + limbo.run_test_fn( + "select regexp_capture('no digits here', '([0-9]+)');", + null, + ); limbo.quit() From db17a195f3f89a48aabf90f6e4145eccc2645c0d Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 5 Aug 2025 20:48:47 +0530 Subject: [PATCH 4/7] refactor regexp capture --- extensions/regexp/src/lib.rs | 60 ++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/extensions/regexp/src/lib.rs b/extensions/regexp/src/lib.rs index 63903dc6f..343350269 100644 --- a/extensions/regexp/src/lib.rs +++ b/extensions/regexp/src/lib.rs @@ -85,38 +85,32 @@ fn regexp_replace(&self, args: &[Value]) -> Value { #[scalar(name = "regexp_capture")] fn regexp_capture(args: &[Value]) -> Value { - match (args.get(0), args.get(1)) { - (Some(source), Some(pattern)) - if source.value_type() == ValueType::Text - && pattern.value_type() == ValueType::Text => - { - let Some(source_text) = source.to_text() else { - return Value::null(); - }; - - let Some(pattern_text) = pattern.to_text() else { - return Value::null(); - }; - - let group_index: usize = args - .get(2) - .and_then(|v| v.to_integer()) - .map(|n| n as usize) - .unwrap_or(1); - - let re = match Regex::new(pattern_text) { - Ok(re) => re, - Err(_) => return Value::null(), - }; - - if let Some(caps) = re.captures(source_text) { - if let Some(m) = caps.get(group_index) { - return Value::from_text(m.as_str().to_string()); - } - } - - Value::null() - } - _ => Value::null(), + if args.len() < 2 { + return Value::from_text("".to_string()); } + let Some(source_text) = args[0].to_text() else { + return Value::null(); + }; + let Some(pattern_text) = args[1].to_text() else { + return Value::null(); + }; + + let group_index: usize = args + .get(2) + .and_then(|v| v.to_integer()) + .map(|n| n as usize) + .unwrap_or(1); + + let re = match Regex::new(pattern_text) { + Ok(re) => re, + Err(_) => return Value::null(), + }; + + if let Some(caps) = re.captures(source_text) { + if let Some(m) = caps.get(group_index) { + return Value::from_text(m.as_str().to_string()); + } + } + + Value::null() } From e92fdfca9f394b18703c63b49c81f9540e59ddc0 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 5 Aug 2025 20:53:24 +0530 Subject: [PATCH 5/7] refactir regexp replace --- extensions/regexp/src/lib.rs | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/extensions/regexp/src/lib.rs b/extensions/regexp/src/lib.rs index 343350269..e7b448eae 100644 --- a/extensions/regexp/src/lib.rs +++ b/extensions/regexp/src/lib.rs @@ -59,28 +59,26 @@ fn regexp_substr(&self, args: &[Value]) -> Value { #[scalar(name = "regexp_replace")] fn regexp_replace(&self, args: &[Value]) -> Value { - let replacement = match args.get(2) { - Some(repl) => repl.to_text().unwrap_or_default(), - None => "", // If args[2] does not exist, use an empty string + if args.len() < 2 { + return Value::from_text("".to_string()); + } + + let Some(source_text) = args[0].to_text() else { + return Value::from_text("".to_string()); }; - match (args.first(), args.get(1)) { - (Some(haystack), Some(pattern)) => { - let Some(haystack_text) = haystack.to_text() else { - return Value::from_text("".to_string()); // Return an empty string if haystack is not valid - }; - let Some(pattern_text) = pattern.to_text() else { - return Value::from_text("".to_string()); // Return an empty string if pattern is not valid - }; + let Some(pattern_text) = args[1].to_text() else { + return Value::from_text("".to_string()); + }; - let re = match Regex::new(pattern_text) { - Ok(re) => re, - Err(_) => return Value::from_text("".to_string()), // Return an empty string if regex compilation fails - }; - Value::from_text(re.replace(haystack_text, replacement).to_string()) - } - _ => Value::from_text("".to_string()), // Return an empty string for invalid value types - } + let replacement = args.get(2).and_then(|v| v.to_text()).unwrap_or(""); + + let re = match Regex::new(pattern_text) { + Ok(re) => re, + Err(_) => return Value::from_text("".to_string()), + }; + + Value::from_text(re.replace(source_text, replacement).to_string()) } #[scalar(name = "regexp_capture")] From eff096273c5bb28910b0ec74abc14a2f6a63fe2a Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 5 Aug 2025 20:54:55 +0530 Subject: [PATCH 6/7] update compat.md --- COMPAT.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index a988ca829..eb844488b 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -610,8 +610,8 @@ The `regexp` extension is compatible with [sqlean-regexp](https://github.com/nal | regexp(pattern, source) | Yes | | | regexp_like(source, pattern) | Yes | | | regexp_substr(source, pattern) | Yes | | -| regexp_capture(source, pattern[, n]) | No | | -| regexp_replace(source, pattern, replacement) | No | | +| regexp_capture(source, pattern[, n]) | Yes | | +| regexp_replace(source, pattern, replacement) | Yes | | ### Vector From 84316d920635364c8e2e1df4d9e0881fa287fd0a Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Wed, 6 Aug 2025 12:02:20 +0530 Subject: [PATCH 7/7] fix python lint --- testing/cli_tests/extensions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index de5592bdf..361ddbf31 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -106,15 +106,15 @@ def test_regexp(): limbo.run_test_fn( "select regexp_capture('the year is 2021', '([0-9]+)') = '2021';", true, - ); + ) limbo.run_test_fn( "select regexp_capture('abc 123 def', '([a-z]+) ([0-9]+) ([a-z]+)', 2) = '123';", true, - ); + ) limbo.run_test_fn( "select regexp_capture('no digits here', '([0-9]+)');", null, - ); + ) limbo.quit()