mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-02 06:44:23 +01:00
Implement regexp extension
This commit is contained in:
10
COMPAT.md
10
COMPAT.md
@@ -500,3 +500,13 @@ UUID's in Limbo are `blobs` by default.
|
||||
| uuid7_timestamp_ms(X) | Yes | Convert a UUID v7 to milliseconds since epoch |
|
||||
| uuid_str(X) | Yes | Convert a valid UUID to string |
|
||||
| uuid_blob(X) | Yes | Convert a valid UUID to blob |
|
||||
|
||||
### REGEXP
|
||||
|
||||
| Function | Status | Comment |
|
||||
|------------------------------------------------|--------|---------|
|
||||
| regexp(pattern, source) | Yes | |
|
||||
| regexp_like(source, pattern) | Yes | |
|
||||
| regexp_substr(source, pattern) | Yes | |
|
||||
| regexp_capture(source, pattern[, n]) | No | |
|
||||
| regexp_replace(source, pattern, replacement) | No | |
|
||||
9
Cargo.lock
generated
9
Cargo.lock
generated
@@ -1274,6 +1274,15 @@ dependencies = [
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "limbo_regexp"
|
||||
version = "0.0.12"
|
||||
dependencies = [
|
||||
"limbo_ext",
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "limbo_sim"
|
||||
version = "0.0.12"
|
||||
|
||||
@@ -11,6 +11,7 @@ members = [
|
||||
"core",
|
||||
"extensions/core",
|
||||
"extensions/uuid",
|
||||
"extensions/regexp",
|
||||
"macros",
|
||||
"simulator",
|
||||
"sqlite3",
|
||||
|
||||
16
extensions/regexp/Cargo.toml
Normal file
16
extensions/regexp/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "limbo_regexp"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "lib"]
|
||||
|
||||
|
||||
[dependencies]
|
||||
limbo_ext = { path = "../core"}
|
||||
regex = "1.11.1"
|
||||
log = "0.4.20"
|
||||
65
extensions/regexp/src/lib.rs
Normal file
65
extensions/regexp/src/lib.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
use limbo_ext::{export_scalar, register_extension, register_scalar_functions, Value, ValueType};
|
||||
use regex::Regex;
|
||||
|
||||
register_extension! {
|
||||
scalars: {
|
||||
"regexp" => regexp,
|
||||
"regexp_like" => regexp_like,
|
||||
"regexp_substr" => regexp_substr,
|
||||
},
|
||||
}
|
||||
|
||||
#[export_scalar]
|
||||
#[args(2)]
|
||||
fn regexp(args: &[Value]) -> Value {
|
||||
regex(&args[0], &args[1])
|
||||
}
|
||||
|
||||
#[export_scalar]
|
||||
#[args(2)]
|
||||
fn regexp_like(args: &[Value]) -> Value {
|
||||
regex(&args[1], &args[0])
|
||||
}
|
||||
|
||||
fn regex(pattern: &Value, haystack: &Value) -> Value {
|
||||
match (pattern.value_type(), haystack.value_type()) {
|
||||
(ValueType::Text, ValueType::Text) => {
|
||||
let Some(pattern) = pattern.to_text() else {
|
||||
return Value::null();
|
||||
};
|
||||
let Some(haystack) = haystack.to_text() else {
|
||||
return Value::null();
|
||||
};
|
||||
let re = match Regex::new(&pattern) {
|
||||
Ok(re) => re,
|
||||
Err(_) => return Value::null(),
|
||||
};
|
||||
Value::from_integer(re.is_match(&haystack) as i64)
|
||||
}
|
||||
_ => Value::null(),
|
||||
}
|
||||
}
|
||||
|
||||
#[export_scalar]
|
||||
#[args(2)]
|
||||
fn regexp_substr(args: &[Value]) -> Value {
|
||||
return match (args[0].value_type(), args[1].value_type()) {
|
||||
(ValueType::Text, ValueType::Text) => {
|
||||
let Some(haystack) = &args[0].to_text() else {
|
||||
return Value::null();
|
||||
};
|
||||
let Some(pattern) = &args[1].to_text() else {
|
||||
return Value::null();
|
||||
};
|
||||
let re = match Regex::new(pattern) {
|
||||
Ok(re) => re,
|
||||
Err(_) => return Value::null(),
|
||||
};
|
||||
match re.find(haystack) {
|
||||
Some(mat) => Value::from_text(mat.as_str().to_string()),
|
||||
None => Value::null(),
|
||||
}
|
||||
}
|
||||
_ => Value::null(),
|
||||
};
|
||||
}
|
||||
@@ -77,6 +77,11 @@ def run_test(pipe, sql, validator=None):
|
||||
raise Exception("Validation failed")
|
||||
print("Test PASSED")
|
||||
|
||||
def validate_true(result):
|
||||
return result == "1"
|
||||
|
||||
def validate_false(result):
|
||||
return result == "0"
|
||||
|
||||
def validate_blob(result):
|
||||
# HACK: blobs are difficult to test because the shell
|
||||
@@ -100,33 +105,54 @@ def assert_now_unixtime(result):
|
||||
def assert_specific_time(result):
|
||||
return result == "1736720789"
|
||||
|
||||
|
||||
def main():
|
||||
def test_uuid(pipe):
|
||||
specific_time = "01945ca0-3189-76c0-9a8f-caf310fc8b8e"
|
||||
extension_path = "./target/debug/liblimbo_uuid.so"
|
||||
|
||||
# before extension loads, assert no function
|
||||
run_test(pipe, "SELECT uuid4();", returns_null)
|
||||
run_test(pipe, "SELECT uuid4_str();", returns_null)
|
||||
run_test(pipe, f".load {extension_path}", returns_null)
|
||||
print(f"Extension {extension_path} loaded successfully.")
|
||||
run_test(pipe, "SELECT hex(uuid4());", validate_blob)
|
||||
run_test(pipe, "SELECT uuid4_str();", validate_string_uuid)
|
||||
run_test(pipe, "SELECT hex(uuid7());", validate_blob)
|
||||
run_test(
|
||||
pipe,
|
||||
"SELECT uuid7_timestamp_ms(uuid7()) / 1000;",
|
||||
)
|
||||
run_test(pipe, "SELECT uuid7_str();", validate_string_uuid)
|
||||
run_test(pipe, "SELECT uuid_str(uuid7());", validate_string_uuid)
|
||||
run_test(pipe, "SELECT hex(uuid_blob(uuid7_str()));", validate_blob)
|
||||
run_test(pipe, "SELECT uuid_str(uuid_blob(uuid7_str()));", validate_string_uuid)
|
||||
run_test(
|
||||
pipe,
|
||||
f"SELECT uuid7_timestamp_ms('{specific_time}') / 1000;",
|
||||
assert_specific_time,
|
||||
)
|
||||
|
||||
def test_regexp(pipe):
|
||||
extension_path = "./target/debug/liblimbo_regexp.so"
|
||||
|
||||
# before extension loads, assert no function
|
||||
run_test(pipe, "SELECT regexp('a.c', 'abc');", returns_null)
|
||||
run_test(pipe, f".load {extension_path}", returns_null)
|
||||
print(f"Extension {extension_path} loaded successfully.")
|
||||
run_test(pipe, "SELECT regexp('a.c', 'abc');", validate_true)
|
||||
run_test(pipe, "SELECT regexp('a.c', 'ac');", validate_false)
|
||||
run_test(pipe, "SELECT regexp('[0-9]+', 'the year is 2021');", validate_true)
|
||||
run_test(pipe, "SELECT regexp('[0-9]+', 'the year is unknow');", validate_false)
|
||||
run_test(pipe, "SELECT regexp_like('the year is 2021', '[0-9]+');", validate_true)
|
||||
run_test(pipe, "SELECT regexp_like('the year is unknow', '[0-9]+');", validate_false)
|
||||
run_test(pipe, "SELECT regexp_substr('the year is 2021', '[0-9]+') = '2021';", validate_true)
|
||||
run_test(pipe, "SELECT regexp_substr('the year is unknow', '[0-9]+');", returns_null)
|
||||
|
||||
|
||||
def main():
|
||||
pipe = init_limbo()
|
||||
try:
|
||||
# before extension loads, assert no function
|
||||
run_test(pipe, "SELECT uuid4();", returns_null)
|
||||
run_test(pipe, "SELECT uuid4_str();", returns_null)
|
||||
run_test(pipe, f".load {extension_path}", returns_null)
|
||||
print("Extension loaded successfully.")
|
||||
run_test(pipe, "SELECT hex(uuid4());", validate_blob)
|
||||
run_test(pipe, "SELECT uuid4_str();", validate_string_uuid)
|
||||
run_test(pipe, "SELECT hex(uuid7());", validate_blob)
|
||||
run_test(
|
||||
pipe,
|
||||
"SELECT uuid7_timestamp_ms(uuid7()) / 1000;",
|
||||
)
|
||||
run_test(pipe, "SELECT uuid7_str();", validate_string_uuid)
|
||||
run_test(pipe, "SELECT uuid_str(uuid7());", validate_string_uuid)
|
||||
run_test(pipe, "SELECT hex(uuid_blob(uuid7_str()));", validate_blob)
|
||||
run_test(pipe, "SELECT uuid_str(uuid_blob(uuid7_str()));", validate_string_uuid)
|
||||
run_test(
|
||||
pipe,
|
||||
f"SELECT uuid7_timestamp_ms('{specific_time}') / 1000;",
|
||||
assert_specific_time,
|
||||
)
|
||||
test_regexp(pipe)
|
||||
test_uuid(pipe)
|
||||
except Exception as e:
|
||||
print(f"Test FAILED: {e}")
|
||||
pipe.terminate()
|
||||
|
||||
Reference in New Issue
Block a user