Merge 'Sqlean Crypto extension' from Diego Reis

Introduces a new `crypto` extension, compatible with the Sqlean [crypto
extension](https://github.com/nalgeon/sqlean/blob/main/docs/crypto.md).

Closes #903
This commit is contained in:
Pekka Enberg
2025-02-06 13:46:01 +02:00
9 changed files with 647 additions and 3 deletions

84
Cargo.lock generated
View File

@@ -140,6 +140,12 @@ version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
[[package]]
name = "arrayref"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
[[package]]
name = "arrayvec"
version = "0.7.6"
@@ -204,6 +210,19 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
[[package]]
name = "blake3"
version = "1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@@ -455,6 +474,12 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "constant_time_eq"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
@@ -617,6 +642,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "data-encoding"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e60eed09d8c01d3cee5b7d30acb059b76614c918fa0f992e0dd6eeb10daad6f"
[[package]]
name = "debugid"
version = "0.8.0"
@@ -1580,6 +1611,7 @@ dependencies = [
"julian_day_converter",
"libc",
"libloading",
"limbo_crypto",
"limbo_ext",
"limbo_macros",
"limbo_percentile",
@@ -1612,6 +1644,19 @@ dependencies = [
"tracing",
]
[[package]]
name = "limbo_crypto"
version = "0.0.14"
dependencies = [
"blake3",
"data-encoding",
"limbo_ext",
"md5",
"mimalloc",
"ring",
"urlencoding",
]
[[package]]
name = "limbo_ext"
version = "0.0.14"
@@ -1732,6 +1777,12 @@ version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
[[package]]
name = "md5"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "memchr"
version = "2.7.4"
@@ -2517,6 +2568,21 @@ dependencies = [
"bytemuck",
]
[[package]]
name = "ring"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
dependencies = [
"cc",
"cfg-if",
"getrandom 0.2.15",
"libc",
"spin",
"untrusted",
"windows-sys 0.52.0",
]
[[package]]
name = "rstest"
version = "0.18.2"
@@ -2740,6 +2806,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "sqlite3-parser"
version = "0.13.0"
@@ -3109,6 +3181,12 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
[[package]]
name = "untrusted"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "url"
version = "2.5.4"
@@ -3120,6 +3198,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf16_iter"
version = "1.0.5"

View File

@@ -19,6 +19,7 @@ members = [
"tests",
"extensions/percentile",
"extensions/time",
"extensions/crypto",
]
exclude = ["perf/latency/limbo"]

View File

@@ -26,6 +26,7 @@ io_uring = ["dep:io-uring", "rustix/io_uring"]
percentile = ["limbo_percentile/static"]
regexp = ["limbo_regexp/static"]
time = ["limbo_time/static"]
crypto = ["limbo_crypto/static"]
[target.'cfg(target_os = "linux")'.dependencies]
io-uring = { version = "0.6.1", optional = true }
@@ -65,6 +66,7 @@ limbo_uuid = { path = "../extensions/uuid", optional = true, features = ["static
limbo_regexp = { path = "../extensions/regexp", optional = true, features = ["static"] }
limbo_percentile = { path = "../extensions/percentile", optional = true, features = ["static"] }
limbo_time = { path = "../extensions/time", optional = true, features = ["static"] }
limbo_crypto = { path = "../extensions/crypto", optional = true, features = ["static"] }
miette = "7.4.0"
strum = "0.26"
parking_lot = "0.12.3"

View File

@@ -92,6 +92,10 @@ impl Database {
if unsafe { !limbo_time::register_extension_static(&ext_api).is_ok() } {
return Err("Failed to register time extension".to_string());
}
#[cfg(feature = "crypto")]
if unsafe { !limbo_crypto::register_extension_static(&ext_api).is_ok() } {
return Err("Failed to register crypto extension".to_string());
}
Ok(())
}
}

View File

@@ -204,6 +204,13 @@ impl Blob {
pub fn new(data: *const u8, size: u64) -> Self {
Self { data, size }
}
pub fn as_bytes(&self) -> &[u8] {
if self.data.is_null() {
return &[];
}
unsafe { std::slice::from_raw_parts(self.data, self.size as usize) }
}
}
impl Value {
@@ -303,6 +310,29 @@ impl Value {
}
}
// Return ValueData as raw bytes
pub fn as_bytes(&self) -> Vec<u8> {
let mut bytes = vec![];
unsafe {
match self.value_type {
ValueType::Integer => bytes.extend_from_slice(&self.value.int.to_le_bytes()),
ValueType::Float => bytes.extend_from_slice(&self.value.float.to_le_bytes()),
ValueType::Text => {
let text = self.value.text.as_ref().expect("Invalid text pointer");
bytes.extend_from_slice(text.as_str().as_bytes());
}
ValueType::Blob => {
let blob = self.value.blob.as_ref().expect("Invalid blob pointer");
bytes.extend_from_slice(blob.as_bytes());
}
ValueType::Error | ValueType::Null => {}
}
}
bytes
}
/// Creates a new integer Value from an i64
pub fn from_integer(i: i64) -> Self {
Self {

View File

@@ -0,0 +1,24 @@
[package]
name = "limbo_crypto"
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
repository.workspace = true
[lib]
crate-type = ["cdylib", "lib"]
[features]
static= [ "limbo_ext/static" ]
[dependencies]
blake3 = "1.5.5"
data-encoding = "2.7.0"
limbo_ext = { path = "../core", features = ["static"] }
md5 = "0.7.0"
ring = "0.17.8"
urlencoding = "2.1.3"
[target.'cfg(not(target_family = "wasm"))'.dependencies]
mimalloc = { version = "*", default-features = false }

View File

@@ -0,0 +1,215 @@
use crate::Error;
use blake3::Hasher;
use data_encoding::{BASE32, BASE64, HEXLOWER};
use limbo_ext::{Value, ValueType};
use ring::digest::{self, digest};
use std::{borrow::Cow, error::Error as StdError};
pub fn sha256(data: &Value) -> Result<Vec<u8>, Error> {
match data.value_type() {
ValueType::Error | ValueType::Null => Err(Error::InvalidType),
_ => {
let hash = digest(&digest::SHA256, &data.as_bytes());
Ok(hash.as_ref().to_vec())
}
}
}
pub fn sha512(data: &Value) -> Result<Vec<u8>, Error> {
match data.value_type() {
ValueType::Error | ValueType::Null => Err(Error::InvalidType),
_ => {
let hash = digest(&digest::SHA512, &data.as_bytes());
Ok(hash.as_ref().to_vec())
}
}
}
pub fn sha384(data: &Value) -> Result<Vec<u8>, Error> {
match data.value_type() {
ValueType::Error | ValueType::Null => Err(Error::InvalidType),
_ => {
let hash = digest(&digest::SHA384, &data.as_bytes());
Ok(hash.as_ref().to_vec())
}
}
}
pub fn blake3(data: &Value) -> Result<Vec<u8>, Error> {
match data.value_type() {
ValueType::Error | ValueType::Null => Err(Error::InvalidType),
_ => {
let mut hasher = Hasher::new();
hasher.update(data.as_bytes().as_ref());
Ok(hasher.finalize().as_bytes().to_vec())
}
}
}
pub fn sha1(data: &Value) -> Result<Vec<u8>, Error> {
match data.value_type() {
ValueType::Error | ValueType::Null => Err(Error::InvalidType),
_ => {
let hash = digest(&digest::SHA1_FOR_LEGACY_USE_ONLY, &data.as_bytes());
Ok(hash.as_ref().to_vec())
}
}
}
pub fn md5(data: &Value) -> Result<Vec<u8>, Error> {
match data.value_type() {
ValueType::Error | ValueType::Null => Err(Error::InvalidType),
_ => {
let digest = md5::compute::<&Vec<u8>>(data.as_bytes().as_ref());
Ok(digest.as_ref().to_vec())
}
}
}
pub fn encode(data: &Value, format: &Value) -> Result<Value, Error> {
match (data.value_type(), format.value_type()) {
(ValueType::Error, _) | (ValueType::Null, _) => Err(Error::InvalidType),
(_, ValueType::Text) => match format.to_text().unwrap().to_lowercase().as_str() {
"base32" => Ok(Value::from_text(BASE32.encode(data.as_bytes().as_ref()))),
"base64" => Ok(Value::from_text(BASE64.encode(data.as_bytes().as_ref()))),
"hex" => Ok(Value::from_text(HEXLOWER.encode(data.as_bytes().as_ref()))),
"base85" => Ok(Value::from_text(encode_ascii85(data.as_bytes().as_ref()))),
"url" => {
let data = data.as_bytes();
let url = urlencoding::encode_binary(&data);
Ok(Value::from_text(url.to_string()))
}
_ => Err(Error::UnknownOperation),
},
_ => Err(Error::InvalidType),
}
}
pub fn decode(data: &Value, format: &Value) -> Result<Value, Error> {
match (data.value_type(), format.value_type()) {
(ValueType::Error, _) | (ValueType::Null, _) => Err(Error::InvalidType),
(ValueType::Text, ValueType::Text) => {
let format_str = format.to_text().ok_or(Error::InvalidType)?.to_lowercase();
let input_text = data.to_text().ok_or(Error::InvalidType)?;
match format_str.as_str() {
"base32" => {
let payload = BASE32
.decode(input_text.as_bytes())
.map_err(|_| Error::DecodeFailed)?;
Ok(Value::from_text(
String::from_utf8(payload).map_err(|_| Error::InvalidUtf8)?,
))
}
"base64" => {
let payload = BASE64
.decode(input_text.as_bytes())
.map_err(|_| Error::DecodeFailed)?;
Ok(Value::from_text(
String::from_utf8(payload).map_err(|_| Error::InvalidUtf8)?,
))
}
"hex" => {
let payload = HEXLOWER
.decode(input_text.to_lowercase().as_bytes())
.map_err(|_| Error::DecodeFailed)?;
Ok(Value::from_text(
String::from_utf8(payload).map_err(|_| Error::InvalidUtf8)?,
))
}
"base85" => {
let decoded = decode_ascii85(&input_text).map_err(|_| Error::DecodeFailed)?;
Ok(Value::from_text(
String::from_utf8(decoded).map_err(|_| Error::InvalidUtf8)?,
))
}
"url" => {
let decoded = urlencoding::decode_binary(input_text.as_bytes());
Ok(Value::from_text(
String::from_utf8(decoded.to_vec()).map_err(|_| Error::InvalidUtf8)?,
))
}
_ => Err(Error::UnknownOperation),
}
}
_ => Err(Error::InvalidType),
}
}
// Ascii85 functions to avoid +1 dependency and to remove '~>' '<~'
const TABLE: [u32; 5] = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1];
fn decode_ascii85(input: &str) -> Result<Vec<u8>, Box<dyn StdError>> {
let mut result = Vec::with_capacity(4 * (input.len() / 5 + 16));
let mut counter = 0;
let mut chunk = 0;
for digit in input.trim().bytes().filter(|c| !c.is_ascii_whitespace()) {
if digit == b'z' {
if counter == 0 {
result.extend_from_slice(&[0, 0, 0, 0]);
} else {
return Err("Missaligned z in input".into());
}
}
if digit < 33 || digit > 117 {
return Err("Input char is out of range for Ascii85".into());
}
decode_digit(digit, &mut counter, &mut chunk, &mut result);
}
let mut to_remove = 0;
while counter != 0 {
decode_digit(b'u', &mut counter, &mut chunk, &mut result);
to_remove += 1;
}
result.drain((result.len() - to_remove)..result.len());
Ok(result)
}
fn decode_digit(digit: u8, counter: &mut usize, chunk: &mut u32, result: &mut Vec<u8>) {
let byte = digit - 33;
*chunk += byte as u32 * TABLE[*counter];
if *counter == 4 {
result.extend_from_slice(&chunk.to_be_bytes());
*chunk = 0;
*counter = 0;
} else {
*counter += 1;
}
}
fn encode_ascii85(input: &[u8]) -> String {
let mut result = String::with_capacity(5 * (input.len() / 4 + 16));
for chunk in input.chunks(4) {
let (chunk, count) = if chunk.len() == 4 {
(Cow::from(chunk), 5)
} else {
let mut new_chunk = Vec::new();
new_chunk.resize_with(4, || 0);
new_chunk[..chunk.len()].copy_from_slice(chunk);
(Cow::from(new_chunk), 5 - (4 - chunk.len()))
};
let number = u32::from_be_bytes(chunk.as_ref().try_into().expect("Internal Error"));
for i in 0..count {
let digit = (((number / TABLE[i]) % 85) + 33) as u8;
result.push(digit as char);
}
}
result
}

View File

@@ -0,0 +1,120 @@
use crypto::{blake3, decode, encode, md5, sha1, sha256, sha384, sha512};
use limbo_ext::{register_extension, scalar, ResultCode, Value};
mod crypto;
#[derive(Debug)]
enum Error {
InvalidType,
UnknownOperation,
DecodeFailed,
InvalidUtf8,
}
#[scalar(name = "crypto_sha256", alias = "crypto_sha256")]
fn crypto_sha256(args: &[Value]) -> Value {
if args.len() != 1 {
return Value::error(ResultCode::Error);
}
let Ok(hash) = sha256(&args[0]) else {
return Value::error(ResultCode::Error);
};
Value::from_blob(hash)
}
#[scalar(name = "crypto_sha512", alias = "crypto_sha512")]
fn crypto_sha512(args: &[Value]) -> Value {
if args.len() != 1 {
return Value::error(ResultCode::Error);
}
let Ok(hash) = sha512(&args[0]) else {
return Value::error(ResultCode::Error);
};
Value::from_blob(hash)
}
#[scalar(name = "crypto_sha384", alias = "crypto_sha384")]
fn crypto_sha384(args: &[Value]) -> Value {
if args.len() != 1 {
return Value::error(ResultCode::Error);
}
let Ok(hash) = sha384(&args[0]) else {
return Value::error(ResultCode::Error);
};
Value::from_blob(hash)
}
#[scalar(name = "crypto_blake3", alias = "crypto_blake3")]
fn crypto_blake3(args: &[Value]) -> Value {
if args.len() != 1 {
return Value::error(ResultCode::Error);
}
let Ok(hash) = blake3(&args[0]) else {
return Value::error(ResultCode::Error);
};
Value::from_blob(hash)
}
#[scalar(name = "crypto_sha1", alias = "crypto_sha1")]
fn crypto_sha1(args: &[Value]) -> Value {
if args.len() != 1 {
return Value::error(ResultCode::Error);
}
let Ok(hash) = sha1(&args[0]) else {
return Value::error(ResultCode::Error);
};
Value::from_blob(hash)
}
#[scalar(name = "crypto_md5", alias = "crypto_md5")]
fn crypto_md5(args: &[Value]) -> Value {
if args.len() != 1 {
return Value::error(ResultCode::Error);
}
let Ok(hash) = md5(&args[0]) else {
return Value::error(ResultCode::Error);
};
Value::from_blob(hash)
}
#[scalar(name = "crypto_encode", alias = "crypto_encode")]
fn crypto_encode(args: &[Value]) -> Value {
if args.len() != 2 {
return Value::error(ResultCode::Error);
}
let Ok(payload) = encode(&args[0], &args[1]) else {
return Value::error(ResultCode::Error);
};
payload
}
#[scalar(name = "crypto_decode", alias = "crypto_decode")]
fn crypto_decode(args: &[Value]) -> Value {
if args.len() != 2 {
return Value::error(ResultCode::Error);
}
let Ok(payload) = decode(&args[0], &args[1]) else {
return Value::error(ResultCode::Error);
};
payload
}
register_extension! {
scalars: { crypto_sha256, crypto_sha512, crypto_sha384, crypto_blake3, crypto_sha1, crypto_md5, crypto_encode, crypto_decode },
}

View File

@@ -110,7 +110,6 @@ def validate_blob(result):
# and assert they are valid hex digits
return int(result, 16) is not None
def validate_string_uuid(result):
return len(result) == 36 and result.count("-") == 4
@@ -130,7 +129,6 @@ def assert_now_unixtime(result):
def assert_specific_time(result):
return result == "1736720789"
def test_uuid(pipe):
specific_time = "01945ca0-3189-76c0-9a8f-caf310fc8b8e"
# these are built into the binary, so we just test they work
@@ -207,7 +205,6 @@ def validate_percentile2(res):
def validate_percentile_disc(res):
return res == "40.0"
def test_aggregates(pipe):
extension_path = "./target/debug/liblimbo_percentile.so"
# assert no function before extension loads
@@ -255,6 +252,172 @@ def test_aggregates(pipe):
pipe, "SELECT percentile_disc(value, 0.55) from test;", validate_percentile_disc
)
# Hashes
def validate_blake3(a):
return a == "6437b3ac38465133ffb63b75273a8db548c558465d79db03fd359c6cd5bd9d85"
def validate_md5(a):
return a == "900150983cd24fb0d6963f7d28e17f72"
def validate_sha1(a):
return a == "a9993e364706816aba3e25717850c26c9cd0d89d"
def validate_sha256(a):
return a == "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
def validate_sha384(a):
return a == "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7"
def validate_sha512(a):
return a == "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"
# Encoders and decoders
def validate_url_encode(a):
return a == f"%2Fhello%3Ftext%3D%28%E0%B2%A0_%E0%B2%A0%29"
def validate_url_decode(a):
return a == "/hello?text=(ಠ_ಠ)"
def validate_hex_encode(a):
return a == "68656c6c6f"
def validate_hex_decode(a):
return a == "hello"
def validate_base85_encode(a):
return a == "BOu!rDZ"
def validate_base85_decode(a):
return a == "hello"
def validate_base32_encode(a):
return a == "NBSWY3DP"
def validate_base32_decode(a):
return a == "hello"
def validate_base64_encode(a):
return a == "aGVsbG8="
def validate_base64_decode(a):
return a == "hello"
def test_crypto(pipe):
extension_path = "./target/debug/liblimbo_crypto.so"
# assert no function before extension loads
run_test(
pipe,
"SELECT crypto_blake('a');",
returns_error,
"crypto_blake3 returns null when ext not loaded",
)
run_test(
pipe,
f".load {extension_path}",
returns_null,
"load extension command works properly",
)
# Hashing and Decode
run_test(
pipe,
"SELECT crypto_encode(crypto_blake3('abc'), 'hex');",
validate_blake3,
"blake3 should encrypt correctly"
)
run_test(
pipe,
"SELECT crypto_encode(crypto_md5('abc'), 'hex');",
validate_md5,
"md5 should encrypt correctly"
)
run_test(
pipe,
"SELECT crypto_encode(crypto_sha1('abc'), 'hex');",
validate_sha1,
"sha1 should encrypt correctly"
)
run_test(
pipe,
"SELECT crypto_encode(crypto_sha256('abc'), 'hex');",
validate_sha256,
"sha256 should encrypt correctly"
)
run_test(
pipe,
"SELECT crypto_encode(crypto_sha384('abc'), 'hex');",
validate_sha384,
"sha384 should encrypt correctly"
)
run_test(
pipe,
"SELECT crypto_encode(crypto_sha512('abc'), 'hex');",
validate_sha512,
"sha512 should encrypt correctly"
)
# Encoding and Decoding
run_test(
pipe,
"SELECT crypto_encode('hello', 'base32');",
validate_base32_encode,
"base32 should encode correctly"
)
run_test(
pipe,
"SELECT crypto_decode('NBSWY3DP', 'base32');",
validate_base32_decode,
"base32 should decode correctly"
)
run_test(
pipe,
"SELECT crypto_encode('hello', 'base64');",
validate_base64_encode,
"base64 should encode correctly"
)
run_test(
pipe,
"SELECT crypto_decode('aGVsbG8=', 'base64');",
validate_base64_decode,
"base64 should decode correctly"
)
run_test(
pipe,
"SELECT crypto_encode('hello', 'base85');",
validate_base85_encode,
"base85 should encode correctly"
)
run_test(
pipe,
"SELECT crypto_decode('BOu!rDZ', 'base85');",
validate_base85_decode,
"base85 should decode correctly"
)
run_test(
pipe,
"SELECT crypto_encode('hello', 'hex');",
validate_hex_encode,
"hex should encode correctly"
)
run_test(
pipe,
"SELECT crypto_decode('68656c6c6f', 'hex');",
validate_hex_decode,
"hex should decode correctly"
)
run_test(
pipe,
"SELECT crypto_encode('/hello?text=(ಠ_ಠ)', 'url');",
validate_url_encode,
"url should encode correctly"
)
run_test(
pipe,
f"SELECT crypto_decode('%2Fhello%3Ftext%3D%28%E0%B2%A0_%E0%B2%A0%29', 'url');",
validate_url_decode,
"url should decode correctly"
)
def main():
pipe = init_limbo()
@@ -262,6 +425,7 @@ def main():
test_regexp(pipe)
test_uuid(pipe)
test_aggregates(pipe)
test_crypto(pipe)
except Exception as e:
print(f"Test FAILED: {e}")
pipe.terminate()