From dd58be3b602dfa78c3e95d77f8332f9b15911a8a Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Wed, 5 Feb 2025 23:08:20 -0300 Subject: [PATCH 1/3] Add basic structure for crypto extension --- Cargo.lock | 63 ++++++++++++++++++++++++++ Cargo.toml | 1 + core/Cargo.toml | 4 +- core/ext/mod.rs | 4 ++ extensions/core/src/types.rs | 32 +++++++++++++- extensions/crypto/Cargo.toml | 21 +++++++++ extensions/crypto/src/crypto.rs | 55 +++++++++++++++++++++++ extensions/crypto/src/lib.rs | 78 +++++++++++++++++++++++++++++++++ testing/extensions.py | 1 - 9 files changed, 256 insertions(+), 3 deletions(-) create mode 100644 extensions/crypto/Cargo.toml create mode 100644 extensions/crypto/src/crypto.rs create mode 100644 extensions/crypto/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index cb6dda8d8..700451334 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -140,6 +140,12 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -204,6 +210,19 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +[[package]] +name = "blake3" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -455,6 +474,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1590,6 +1615,7 @@ dependencies = [ "julian_day_converter", "libc", "libloading", + "limbo_crypto", "limbo_ext", "limbo_macros", "limbo_percentile", @@ -1621,6 +1647,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "limbo_crypto" +version = "0.0.14" +dependencies = [ + "blake3", + "limbo_ext", + "mimalloc", + "ring", +] + [[package]] name = "limbo_ext" version = "0.0.14" @@ -2538,6 +2574,21 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rstest" version = "0.18.2" @@ -2761,6 +2812,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "sqlite3-parser" version = "0.13.0" @@ -3130,6 +3187,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.4" diff --git a/Cargo.toml b/Cargo.toml index 85b8c46d7..c6b4137fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "extensions/percentile", "extensions/vector", "extensions/time", + "extensions/crypto", ] exclude = ["perf/latency/limbo"] diff --git a/core/Cargo.toml b/core/Cargo.toml index f2bba5f59..12f827ea2 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -14,7 +14,7 @@ name = "limbo_core" path = "lib.rs" [features] -default = ["fs", "json", "uuid", "vector", "io_uring", "time"] +default = ["fs", "json", "uuid", "vector", "io_uring", "time", "crypto"] fs = [] json = [ "dep:jsonb", @@ -27,6 +27,7 @@ io_uring = ["dep:io-uring", "rustix/io_uring"] percentile = ["limbo_percentile/static"] regexp = ["limbo_regexp/static"] time = ["limbo_time/static"] +crypto = ["limbo_crypto/static"] [target.'cfg(target_os = "linux")'.dependencies] io-uring = { version = "0.6.1", optional = true } @@ -67,6 +68,7 @@ limbo_vector = { path = "../extensions/vector", optional = true, features = ["st limbo_regexp = { path = "../extensions/regexp", optional = true, features = ["static"] } limbo_percentile = { path = "../extensions/percentile", optional = true, features = ["static"] } limbo_time = { path = "../extensions/time", optional = true, features = ["static"] } +limbo_crypto = { path = "../extensions/crypto", optional = true, features = ["static"] } miette = "7.4.0" strum = "0.26" parking_lot = "0.12.3" diff --git a/core/ext/mod.rs b/core/ext/mod.rs index 8a9212556..06ca4d7fb 100644 --- a/core/ext/mod.rs +++ b/core/ext/mod.rs @@ -96,6 +96,10 @@ impl Database { if unsafe { !limbo_time::register_extension_static(&ext_api).is_ok() } { return Err("Failed to register time extension".to_string()); } + #[cfg(feature = "crypto")] + if unsafe { !limbo_crypto::register_extension_static(&ext_api).is_ok() } { + return Err("Failed to register crypto extension".to_string()); + } Ok(()) } } diff --git a/extensions/core/src/types.rs b/extensions/core/src/types.rs index 74fa670ad..63c9a3b54 100644 --- a/extensions/core/src/types.rs +++ b/extensions/core/src/types.rs @@ -1,4 +1,4 @@ -use std::fmt::Display; +use std::{fmt::Display, mem}; /// Error type is of type ExtError which can be /// either a user defined error or an error code @@ -204,6 +204,13 @@ impl Blob { pub fn new(data: *const u8, size: u64) -> Self { Self { data, size } } + + pub fn as_bytes(&self) -> &[u8] { + if self.data.is_null() { + return &[]; + } + unsafe { std::slice::from_raw_parts(self.data, self.size as usize) } + } } impl Value { @@ -303,6 +310,29 @@ impl Value { } } + // Return ValueData as raw bytes + pub fn as_bytes(&self) -> Vec { + let mut bytes = vec![]; + + unsafe { + match self.value_type { + ValueType::Integer => bytes.extend_from_slice(&self.value.int.to_le_bytes()), + ValueType::Float => bytes.extend_from_slice(&self.value.float.to_le_bytes()), + ValueType::Text => { + let text = self.value.text.as_ref().expect("Invalid text pointer"); + bytes.extend_from_slice(text.as_str().as_bytes()); + } + ValueType::Blob => { + let blob = self.value.blob.as_ref().expect("Invalid blob pointer"); + bytes.extend_from_slice(blob.as_bytes()); + } + ValueType::Error | ValueType::Null => {} + } + } + + bytes + } + /// Creates a new integer Value from an i64 pub fn from_integer(i: i64) -> Self { Self { diff --git a/extensions/crypto/Cargo.toml b/extensions/crypto/Cargo.toml new file mode 100644 index 000000000..9cd714156 --- /dev/null +++ b/extensions/crypto/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "limbo_crypto" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +crate-type = ["cdylib", "lib"] + +[features] +static= [ "limbo_ext/static" ] + +[dependencies] +blake3 = "1.5.5" +limbo_ext = { path = "../core", features = ["static"] } +ring = "0.17.8" + +[target.'cfg(not(target_family = "wasm"))'.dependencies] +mimalloc = { version = "*", default-features = false } diff --git a/extensions/crypto/src/crypto.rs b/extensions/crypto/src/crypto.rs new file mode 100644 index 000000000..472b8dac1 --- /dev/null +++ b/extensions/crypto/src/crypto.rs @@ -0,0 +1,55 @@ +use crate::Error; +use blake3::Hasher; +use limbo_ext::{Value, ValueType}; +use ring::digest::{self, digest}; + +pub fn sha256(data: &Value) -> Result, Error> { + match data.value_type() { + ValueType::Error | ValueType::Null => Err(Error::InvalidType), + _ => { + let hash = digest(&digest::SHA256, &data.as_bytes()); + Ok(hash.as_ref().to_vec()) + } + } +} + +pub fn sha512(data: &Value) -> Result, Error> { + match data.value_type() { + ValueType::Error | ValueType::Null => Err(Error::InvalidType), + _ => { + let hash = digest(&digest::SHA512, &data.as_bytes()); + Ok(hash.as_ref().to_vec()) + } + } +} + +pub fn sha384(data: &Value) -> Result, Error> { + match data.value_type() { + ValueType::Error | ValueType::Null => Err(Error::InvalidType), + _ => { + let hash = digest(&digest::SHA384, &data.as_bytes()); + Ok(hash.as_ref().to_vec()) + } + } +} + +pub fn blake3(data: &Value) -> Result, Error> { + match data.value_type() { + ValueType::Error | ValueType::Null => Err(Error::InvalidType), + _ => { + let mut hasher = Hasher::new(); + hasher.update(data.as_bytes().as_ref()); + Ok(hasher.finalize().as_bytes().to_vec()) + } + } +} + +pub fn sha1(data: &Value) -> Result, Error> { + match data.value_type() { + ValueType::Error | ValueType::Null => Err(Error::InvalidType), + _ => { + let hash = digest(&digest::SHA1_FOR_LEGACY_USE_ONLY, &data.as_bytes()); + Ok(hash.as_ref().to_vec()) + } + } +} diff --git a/extensions/crypto/src/lib.rs b/extensions/crypto/src/lib.rs new file mode 100644 index 000000000..49f6d3e9b --- /dev/null +++ b/extensions/crypto/src/lib.rs @@ -0,0 +1,78 @@ +use crypto::{blake3, sha1, sha256, sha384, sha512}; +use limbo_ext::{register_extension, scalar, ResultCode, Value}; + +mod crypto; + +#[derive(Debug)] +enum Error { + InvalidType, +} + +#[scalar(name = "crypto_sha256", alias = "crypto_sha256")] +fn crypto_sha256(args: &[Value]) -> Value { + if args.len() != 1 { + return Value::error(ResultCode::Error); + } + + let Ok(hash) = sha256(&args[0]) else { + return Value::error(ResultCode::Error); + }; + + Value::from_blob(hash) +} + +#[scalar(name = "crypto_sha512", alias = "crypto_sha512")] +fn crypto_sha512(args: &[Value]) -> Value { + if args.len() != 1 { + return Value::error(ResultCode::Error); + } + + let Ok(hash) = sha512(&args[0]) else { + return Value::error(ResultCode::Error); + }; + + Value::from_blob(hash) +} + +#[scalar(name = "crypto_sha384", alias = "crypto_sha384")] +fn crypto_sha384(args: &[Value]) -> Value { + if args.len() != 1 { + return Value::error(ResultCode::Error); + } + + let Ok(hash) = sha384(&args[0]) else { + return Value::error(ResultCode::Error); + }; + + Value::from_blob(hash) +} + +#[scalar(name = "crypto_blake3", alias = "crypto_blake3")] +fn crypto_blake3(args: &[Value]) -> Value { + if args.len() != 1 { + return Value::error(ResultCode::Error); + } + + let Ok(hash) = blake3(&args[0]) else { + return Value::error(ResultCode::Error); + }; + + Value::from_blob(hash) +} + +#[scalar(name = "crypto_sha1", alias = "crypto_sha1")] +fn crypto_sha1(args: &[Value]) -> Value { + if args.len() != 1 { + return Value::error(ResultCode::Error); + } + + let Ok(hash) = sha1(&args[0]) else { + return Value::error(ResultCode::Error); + }; + + Value::from_blob(hash) +} + +register_extension! { + scalars: { crypto_sha256, crypto_sha512, crypto_sha384, crypto_blake3, crypto_sha1 }, +} diff --git a/testing/extensions.py b/testing/extensions.py index 74755a012..a1094e865 100755 --- a/testing/extensions.py +++ b/testing/extensions.py @@ -255,7 +255,6 @@ def test_aggregates(pipe): pipe, "SELECT percentile_disc(value, 0.55) from test;", validate_percentile_disc ) - def main(): pipe = init_limbo() try: From 846d5ed4141666bcd4a950c9d34e3ada4f4aafa1 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 6 Feb 2025 00:04:36 -0300 Subject: [PATCH 2/3] add md5 and encode to extension --- extensions/crypto/Cargo.toml | 4 ++++ extensions/crypto/src/crypto.rs | 36 +++++++++++++++++++++++++++++++++ extensions/crypto/src/lib.rs | 33 +++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/extensions/crypto/Cargo.toml b/extensions/crypto/Cargo.toml index 9cd714156..84bd10efc 100644 --- a/extensions/crypto/Cargo.toml +++ b/extensions/crypto/Cargo.toml @@ -13,9 +13,13 @@ crate-type = ["cdylib", "lib"] static= [ "limbo_ext/static" ] [dependencies] +ascii85 = "0.2.1" blake3 = "1.5.5" +data-encoding = "2.7.0" limbo_ext = { path = "../core", features = ["static"] } +md5 = "0.7.0" ring = "0.17.8" +urlencoding = "2.1.3" [target.'cfg(not(target_family = "wasm"))'.dependencies] mimalloc = { version = "*", default-features = false } diff --git a/extensions/crypto/src/crypto.rs b/extensions/crypto/src/crypto.rs index 472b8dac1..654307bd4 100644 --- a/extensions/crypto/src/crypto.rs +++ b/extensions/crypto/src/crypto.rs @@ -1,5 +1,6 @@ use crate::Error; use blake3::Hasher; +use data_encoding::{BASE32, BASE64, HEXLOWER}; use limbo_ext::{Value, ValueType}; use ring::digest::{self, digest}; @@ -53,3 +54,38 @@ pub fn sha1(data: &Value) -> Result, Error> { } } } + +pub fn md5(data: &Value) -> Result, Error> { + match data.value_type() { + ValueType::Error | ValueType::Null => Err(Error::InvalidType), + _ => { + let digest = md5::compute::<&Vec>(data.as_bytes().as_ref()); + + Ok(digest.as_ref().to_vec()) + } + } +} + +pub fn encode(data: &Value, format: &Value) -> Result { + match (data.value_type(), format.value_type()) { + (ValueType::Error, _) | (ValueType::Null, _) => Err(Error::InvalidType), + (_, ValueType::Text) => match format.to_text().unwrap().to_lowercase().as_str() { + "base32" => Ok(Value::from_text(BASE32.encode(data.as_bytes().as_ref()))), + "base64" => Ok(Value::from_text(BASE64.encode(data.as_bytes().as_ref()))), + "hex" => Ok(Value::from_text(HEXLOWER.encode(data.as_bytes().as_ref()))), + "base85" => { + let result = ascii85::encode(data.as_bytes().as_ref()) + .replace("<~", "") + .replace("~>", ""); + Ok(Value::from_text(result)) + } + "url" => { + let data = data.as_bytes(); + let url = urlencoding::encode_binary(&data); + Ok(Value::from_text(url.to_string())) + } + _ => Err(Error::UnknownOperation), + }, + _ => Err(Error::InvalidType), + } +} diff --git a/extensions/crypto/src/lib.rs b/extensions/crypto/src/lib.rs index 49f6d3e9b..09fe28c38 100644 --- a/extensions/crypto/src/lib.rs +++ b/extensions/crypto/src/lib.rs @@ -1,4 +1,4 @@ -use crypto::{blake3, sha1, sha256, sha384, sha512}; +use crypto::{blake3, encode, md5, sha1, sha256, sha384, sha512}; use limbo_ext::{register_extension, scalar, ResultCode, Value}; mod crypto; @@ -6,6 +6,7 @@ mod crypto; #[derive(Debug)] enum Error { InvalidType, + UnknownOperation, } #[scalar(name = "crypto_sha256", alias = "crypto_sha256")] @@ -73,6 +74,32 @@ fn crypto_sha1(args: &[Value]) -> Value { Value::from_blob(hash) } -register_extension! { - scalars: { crypto_sha256, crypto_sha512, crypto_sha384, crypto_blake3, crypto_sha1 }, +#[scalar(name = "crypto_md5", alias = "crypto_md5")] +fn crypto_md5(args: &[Value]) -> Value { + if args.len() != 1 { + return Value::error(ResultCode::Error); + } + + let Ok(hash) = md5(&args[0]) else { + return Value::error(ResultCode::Error); + }; + + Value::from_blob(hash) +} + +#[scalar(name = "crypto_encode", alias = "crypto_encode")] +fn crypto_encode(args: &[Value]) -> Value { + if args.len() != 2 { + return Value::error(ResultCode::Error); + } + + let Ok(payload) = encode(&args[0], &args[1]) else { + return Value::error(ResultCode::Error); + }; + + payload +} + +register_extension! { + scalars: { crypto_sha256, crypto_sha512, crypto_sha384, crypto_blake3, crypto_sha1, crypto_md5, crypto_encode }, } From 05057a04ac9ae0afb3269a580961a20d0cab4567 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 6 Feb 2025 01:42:47 -0300 Subject: [PATCH 3/3] completes crypto extension It aims to be compatible with https://github.com/nalgeon/sqlean/blob/main/docs/crypto.md --- Cargo.lock | 21 ++++ core/Cargo.toml | 2 +- extensions/core/src/types.rs | 2 +- extensions/crypto/Cargo.toml | 1 - extensions/crypto/src/crypto.rs | 136 +++++++++++++++++++++++-- extensions/crypto/src/lib.rs | 21 +++- testing/extensions.py | 171 +++++++++++++++++++++++++++++++- 7 files changed, 339 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 700451334..ee60a825c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -642,6 +642,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "data-encoding" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e60eed09d8c01d3cee5b7d30acb059b76614c918fa0f992e0dd6eeb10daad6f" + [[package]] name = "debugid" version = "0.8.0" @@ -1652,9 +1658,12 @@ name = "limbo_crypto" version = "0.0.14" dependencies = [ "blake3", + "data-encoding", "limbo_ext", + "md5", "mimalloc", "ring", + "urlencoding", ] [[package]] @@ -1787,6 +1796,12 @@ version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.7.4" @@ -3204,6 +3219,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf16_iter" version = "1.0.5" diff --git a/core/Cargo.toml b/core/Cargo.toml index 12f827ea2..97406179f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -14,7 +14,7 @@ name = "limbo_core" path = "lib.rs" [features] -default = ["fs", "json", "uuid", "vector", "io_uring", "time", "crypto"] +default = ["fs", "json", "uuid", "vector", "io_uring", "time"] fs = [] json = [ "dep:jsonb", diff --git a/extensions/core/src/types.rs b/extensions/core/src/types.rs index 63c9a3b54..464e07bfd 100644 --- a/extensions/core/src/types.rs +++ b/extensions/core/src/types.rs @@ -1,4 +1,4 @@ -use std::{fmt::Display, mem}; +use std::fmt::Display; /// Error type is of type ExtError which can be /// either a user defined error or an error code diff --git a/extensions/crypto/Cargo.toml b/extensions/crypto/Cargo.toml index 84bd10efc..7aa8cc5e6 100644 --- a/extensions/crypto/Cargo.toml +++ b/extensions/crypto/Cargo.toml @@ -13,7 +13,6 @@ crate-type = ["cdylib", "lib"] static= [ "limbo_ext/static" ] [dependencies] -ascii85 = "0.2.1" blake3 = "1.5.5" data-encoding = "2.7.0" limbo_ext = { path = "../core", features = ["static"] } diff --git a/extensions/crypto/src/crypto.rs b/extensions/crypto/src/crypto.rs index 654307bd4..ddebbd0a6 100644 --- a/extensions/crypto/src/crypto.rs +++ b/extensions/crypto/src/crypto.rs @@ -3,6 +3,7 @@ use blake3::Hasher; use data_encoding::{BASE32, BASE64, HEXLOWER}; use limbo_ext::{Value, ValueType}; use ring::digest::{self, digest}; +use std::{borrow::Cow, error::Error as StdError}; pub fn sha256(data: &Value) -> Result, Error> { match data.value_type() { @@ -73,12 +74,7 @@ pub fn encode(data: &Value, format: &Value) -> Result { "base32" => Ok(Value::from_text(BASE32.encode(data.as_bytes().as_ref()))), "base64" => Ok(Value::from_text(BASE64.encode(data.as_bytes().as_ref()))), "hex" => Ok(Value::from_text(HEXLOWER.encode(data.as_bytes().as_ref()))), - "base85" => { - let result = ascii85::encode(data.as_bytes().as_ref()) - .replace("<~", "") - .replace("~>", ""); - Ok(Value::from_text(result)) - } + "base85" => Ok(Value::from_text(encode_ascii85(data.as_bytes().as_ref()))), "url" => { let data = data.as_bytes(); let url = urlencoding::encode_binary(&data); @@ -89,3 +85,131 @@ pub fn encode(data: &Value, format: &Value) -> Result { _ => Err(Error::InvalidType), } } + +pub fn decode(data: &Value, format: &Value) -> Result { + match (data.value_type(), format.value_type()) { + (ValueType::Error, _) | (ValueType::Null, _) => Err(Error::InvalidType), + (ValueType::Text, ValueType::Text) => { + let format_str = format.to_text().ok_or(Error::InvalidType)?.to_lowercase(); + let input_text = data.to_text().ok_or(Error::InvalidType)?; + + match format_str.as_str() { + "base32" => { + let payload = BASE32 + .decode(input_text.as_bytes()) + .map_err(|_| Error::DecodeFailed)?; + Ok(Value::from_text( + String::from_utf8(payload).map_err(|_| Error::InvalidUtf8)?, + )) + } + "base64" => { + let payload = BASE64 + .decode(input_text.as_bytes()) + .map_err(|_| Error::DecodeFailed)?; + Ok(Value::from_text( + String::from_utf8(payload).map_err(|_| Error::InvalidUtf8)?, + )) + } + "hex" => { + let payload = HEXLOWER + .decode(input_text.to_lowercase().as_bytes()) + .map_err(|_| Error::DecodeFailed)?; + Ok(Value::from_text( + String::from_utf8(payload).map_err(|_| Error::InvalidUtf8)?, + )) + } + "base85" => { + let decoded = decode_ascii85(&input_text).map_err(|_| Error::DecodeFailed)?; + + Ok(Value::from_text( + String::from_utf8(decoded).map_err(|_| Error::InvalidUtf8)?, + )) + } + "url" => { + let decoded = urlencoding::decode_binary(input_text.as_bytes()); + Ok(Value::from_text( + String::from_utf8(decoded.to_vec()).map_err(|_| Error::InvalidUtf8)?, + )) + } + _ => Err(Error::UnknownOperation), + } + } + _ => Err(Error::InvalidType), + } +} + +// Ascii85 functions to avoid +1 dependency and to remove '~>' '<~' + +const TABLE: [u32; 5] = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1]; + +fn decode_ascii85(input: &str) -> Result, Box> { + let mut result = Vec::with_capacity(4 * (input.len() / 5 + 16)); + + let mut counter = 0; + let mut chunk = 0; + + for digit in input.trim().bytes().filter(|c| !c.is_ascii_whitespace()) { + if digit == b'z' { + if counter == 0 { + result.extend_from_slice(&[0, 0, 0, 0]); + } else { + return Err("Missaligned z in input".into()); + } + } + + if digit < 33 || digit > 117 { + return Err("Input char is out of range for Ascii85".into()); + } + + decode_digit(digit, &mut counter, &mut chunk, &mut result); + } + + let mut to_remove = 0; + + while counter != 0 { + decode_digit(b'u', &mut counter, &mut chunk, &mut result); + to_remove += 1; + } + + result.drain((result.len() - to_remove)..result.len()); + + Ok(result) +} + +fn decode_digit(digit: u8, counter: &mut usize, chunk: &mut u32, result: &mut Vec) { + let byte = digit - 33; + + *chunk += byte as u32 * TABLE[*counter]; + + if *counter == 4 { + result.extend_from_slice(&chunk.to_be_bytes()); + *chunk = 0; + *counter = 0; + } else { + *counter += 1; + } +} + +fn encode_ascii85(input: &[u8]) -> String { + let mut result = String::with_capacity(5 * (input.len() / 4 + 16)); + + for chunk in input.chunks(4) { + let (chunk, count) = if chunk.len() == 4 { + (Cow::from(chunk), 5) + } else { + let mut new_chunk = Vec::new(); + new_chunk.resize_with(4, || 0); + new_chunk[..chunk.len()].copy_from_slice(chunk); + (Cow::from(new_chunk), 5 - (4 - chunk.len())) + }; + + let number = u32::from_be_bytes(chunk.as_ref().try_into().expect("Internal Error")); + + for i in 0..count { + let digit = (((number / TABLE[i]) % 85) + 33) as u8; + result.push(digit as char); + } + } + + result +} diff --git a/extensions/crypto/src/lib.rs b/extensions/crypto/src/lib.rs index 09fe28c38..604f313e0 100644 --- a/extensions/crypto/src/lib.rs +++ b/extensions/crypto/src/lib.rs @@ -1,4 +1,4 @@ -use crypto::{blake3, encode, md5, sha1, sha256, sha384, sha512}; +use crypto::{blake3, decode, encode, md5, sha1, sha256, sha384, sha512}; use limbo_ext::{register_extension, scalar, ResultCode, Value}; mod crypto; @@ -7,6 +7,8 @@ mod crypto; enum Error { InvalidType, UnknownOperation, + DecodeFailed, + InvalidUtf8, } #[scalar(name = "crypto_sha256", alias = "crypto_sha256")] @@ -100,6 +102,19 @@ fn crypto_encode(args: &[Value]) -> Value { payload } -register_extension! { - scalars: { crypto_sha256, crypto_sha512, crypto_sha384, crypto_blake3, crypto_sha1, crypto_md5, crypto_encode }, +#[scalar(name = "crypto_decode", alias = "crypto_decode")] +fn crypto_decode(args: &[Value]) -> Value { + if args.len() != 2 { + return Value::error(ResultCode::Error); + } + + let Ok(payload) = decode(&args[0], &args[1]) else { + return Value::error(ResultCode::Error); + }; + + payload +} + +register_extension! { + scalars: { crypto_sha256, crypto_sha512, crypto_sha384, crypto_blake3, crypto_sha1, crypto_md5, crypto_encode, crypto_decode }, } diff --git a/testing/extensions.py b/testing/extensions.py index a1094e865..d4a0a69c0 100755 --- a/testing/extensions.py +++ b/testing/extensions.py @@ -110,7 +110,6 @@ def validate_blob(result): # and assert they are valid hex digits return int(result, 16) is not None - def validate_string_uuid(result): return len(result) == 36 and result.count("-") == 4 @@ -130,7 +129,6 @@ def assert_now_unixtime(result): def assert_specific_time(result): return result == "1736720789" - def test_uuid(pipe): specific_time = "01945ca0-3189-76c0-9a8f-caf310fc8b8e" # these are built into the binary, so we just test they work @@ -207,7 +205,6 @@ def validate_percentile2(res): def validate_percentile_disc(res): return res == "40.0" - def test_aggregates(pipe): extension_path = "./target/debug/liblimbo_percentile.so" # assert no function before extension loads @@ -255,12 +252,180 @@ def test_aggregates(pipe): pipe, "SELECT percentile_disc(value, 0.55) from test;", validate_percentile_disc ) +# Hashes +def validate_blake3(a): + return a == "6437b3ac38465133ffb63b75273a8db548c558465d79db03fd359c6cd5bd9d85" + +def validate_md5(a): + return a == "900150983cd24fb0d6963f7d28e17f72" + +def validate_sha1(a): + return a == "a9993e364706816aba3e25717850c26c9cd0d89d" + +def validate_sha256(a): + return a == "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" + +def validate_sha384(a): + return a == "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7" + +def validate_sha512(a): + return a == "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f" + +# Encoders and decoders +def validate_url_encode(a): + return a == f"%2Fhello%3Ftext%3D%28%E0%B2%A0_%E0%B2%A0%29" + +def validate_url_decode(a): + return a == "/hello?text=(ಠ_ಠ)" + +def validate_hex_encode(a): + return a == "68656c6c6f" + +def validate_hex_decode(a): + return a == "hello" + +def validate_base85_encode(a): + return a == "BOu!rDZ" + +def validate_base85_decode(a): + return a == "hello" + +def validate_base32_encode(a): + return a == "NBSWY3DP" + +def validate_base32_decode(a): + return a == "hello" + +def validate_base64_encode(a): + return a == "aGVsbG8=" + +def validate_base64_decode(a): + return a == "hello" + +def test_crypto(pipe): + extension_path = "./target/debug/liblimbo_crypto.so" + # assert no function before extension loads + run_test( + pipe, + "SELECT crypto_blake('a');", + returns_error, + "crypto_blake3 returns null when ext not loaded", + ) + run_test( + pipe, + f".load {extension_path}", + returns_null, + "load extension command works properly", + ) + # Hashing and Decode + run_test( + pipe, + "SELECT crypto_encode(crypto_blake3('abc'), 'hex');", + validate_blake3, + "blake3 should encrypt correctly" + ) + run_test( + pipe, + "SELECT crypto_encode(crypto_md5('abc'), 'hex');", + validate_md5, + "md5 should encrypt correctly" + ) + run_test( + pipe, + "SELECT crypto_encode(crypto_sha1('abc'), 'hex');", + validate_sha1, + "sha1 should encrypt correctly" + ) + run_test( + pipe, + "SELECT crypto_encode(crypto_sha256('abc'), 'hex');", + validate_sha256, + "sha256 should encrypt correctly" + ) + run_test( + pipe, + "SELECT crypto_encode(crypto_sha384('abc'), 'hex');", + validate_sha384, + "sha384 should encrypt correctly" + ) + run_test( + pipe, + "SELECT crypto_encode(crypto_sha512('abc'), 'hex');", + validate_sha512, + "sha512 should encrypt correctly" + ) + + # Encoding and Decoding + run_test( + pipe, + "SELECT crypto_encode('hello', 'base32');", + validate_base32_encode, + "base32 should encode correctly" + ) + run_test( + pipe, + "SELECT crypto_decode('NBSWY3DP', 'base32');", + validate_base32_decode, + "base32 should decode correctly" + ) + run_test( + pipe, + "SELECT crypto_encode('hello', 'base64');", + validate_base64_encode, + "base64 should encode correctly" + ) + run_test( + pipe, + "SELECT crypto_decode('aGVsbG8=', 'base64');", + validate_base64_decode, + "base64 should decode correctly" + ) + run_test( + pipe, + "SELECT crypto_encode('hello', 'base85');", + validate_base85_encode, + "base85 should encode correctly" + ) + run_test( + pipe, + "SELECT crypto_decode('BOu!rDZ', 'base85');", + validate_base85_decode, + "base85 should decode correctly" + ) + + run_test( + pipe, + "SELECT crypto_encode('hello', 'hex');", + validate_hex_encode, + "hex should encode correctly" + ) + run_test( + pipe, + "SELECT crypto_decode('68656c6c6f', 'hex');", + validate_hex_decode, + "hex should decode correctly" + ) + + run_test( + pipe, + "SELECT crypto_encode('/hello?text=(ಠ_ಠ)', 'url');", + validate_url_encode, + "url should encode correctly" + ) + run_test( + pipe, + f"SELECT crypto_decode('%2Fhello%3Ftext%3D%28%E0%B2%A0_%E0%B2%A0%29', 'url');", + validate_url_decode, + "url should decode correctly" + ) + def main(): pipe = init_limbo() try: test_regexp(pipe) test_uuid(pipe) test_aggregates(pipe) + test_crypto(pipe) except Exception as e: print(f"Test FAILED: {e}") pipe.terminate()