From 643ad147c042d4465ce7375ca8a55be80c02718d Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 30 Jan 2025 01:26:47 -0300 Subject: [PATCH 1/7] checkpoint: implemented time_now, time_fmt_iso, time_date --- Cargo.lock | 31 ++++++ Cargo.toml | 3 +- core/Cargo.toml | 4 +- core/ext/mod.rs | 4 + extensions/time/Cargo.toml | 20 ++++ extensions/time/src/lib.rs | 127 ++++++++++++++++++++++ extensions/time/src/time.rs | 208 ++++++++++++++++++++++++++++++++++++ 7 files changed, 395 insertions(+), 2 deletions(-) create mode 100644 extensions/time/Cargo.toml create mode 100644 extensions/time/src/lib.rs create mode 100644 extensions/time/src/time.rs diff --git a/Cargo.lock b/Cargo.lock index 12e00f676..05837fc26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1569,6 +1569,7 @@ dependencies = [ "limbo_macros", "limbo_percentile", "limbo_regexp", + "limbo_time", "limbo_uuid", "limbo_vector", "log", @@ -1661,6 +1662,17 @@ dependencies = [ "log", ] +[[package]] +name = "limbo_time" +version = "0.0.13" +dependencies = [ + "chrono", + "limbo_ext", + "strum", + "strum_macros", + "thiserror 2.0.11", +] + [[package]] name = "limbo_uuid" version = "0.0.13" @@ -2729,6 +2741,25 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.96", +] + [[package]] name = "supports-color" version = "3.0.2" diff --git a/Cargo.toml b/Cargo.toml index 3ff1ad34a..67812fa41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,8 @@ members = [ "sqlite3", "tests", "extensions/percentile", - "extensions/vector", + "extensions/vector", + "extensions/time", ] exclude = ["perf/latency/limbo"] diff --git a/core/Cargo.toml b/core/Cargo.toml index 347602f57..a5700c175 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -14,7 +14,7 @@ name = "limbo_core" path = "lib.rs" [features] -default = ["fs", "json", "uuid", "vector", "io_uring"] +default = ["fs", "json", "uuid", "vector", "io_uring", "time"] fs = [] json = [ "dep:jsonb", @@ -26,6 +26,7 @@ vector = ["limbo_vector/static"] io_uring = ["dep:io-uring", "rustix/io_uring"] percentile = ["limbo_percentile/static"] regexp = ["limbo_regexp/static"] +time = ["limbo_time/static"] [target.'cfg(target_os = "linux")'.dependencies] io-uring = { version = "0.6.1", optional = true } @@ -65,6 +66,7 @@ limbo_uuid = { path = "../extensions/uuid", optional = true, features = ["static limbo_vector = { path = "../extensions/vector", optional = true, features = ["static"] } limbo_regexp = { path = "../extensions/regexp", optional = true, features = ["static"] } limbo_percentile = { path = "../extensions/percentile", optional = true, features = ["static"] } +limbo_time = { path = "../extensions/time", optional = true, features = ["static"] } miette = "7.4.0" [build-dependencies] diff --git a/core/ext/mod.rs b/core/ext/mod.rs index c38a99f9c..8a9212556 100644 --- a/core/ext/mod.rs +++ b/core/ext/mod.rs @@ -92,6 +92,10 @@ impl Database { if unsafe { !limbo_regexp::register_extension_static(&ext_api).is_ok() } { return Err("Failed to register regexp extension".to_string()); } + #[cfg(feature = "time")] + if unsafe { !limbo_time::register_extension_static(&ext_api).is_ok() } { + return Err("Failed to register time extension".to_string()); + } Ok(()) } } diff --git a/extensions/time/Cargo.toml b/extensions/time/Cargo.toml new file mode 100644 index 000000000..0f47bd9d7 --- /dev/null +++ b/extensions/time/Cargo.toml @@ -0,0 +1,20 @@ +[package] +authors.workspace = true +edition.workspace = true +license.workspace = true +name = "limbo_time" +repository.workspace = true +version.workspace = true + +[lib] +crate-type = ["cdylib", "lib"] + +[features] +static = ["limbo_ext/static"] + +[dependencies] +chrono = "0.4.39" +limbo_ext = { path = "../core", features = ["static"] } +strum = "0.26.3" +strum_macros = "0.26.3" +thiserror = "2.0.11" diff --git a/extensions/time/src/lib.rs b/extensions/time/src/lib.rs new file mode 100644 index 000000000..756ff6fb3 --- /dev/null +++ b/extensions/time/src/lib.rs @@ -0,0 +1,127 @@ +use chrono::prelude::*; +use chrono::NaiveDateTime; +use thiserror::Error; + +use limbo_ext::{register_extension, scalar, ResultCode, Value}; + +mod time; + +use time::*; + +register_extension! { + scalars: {time_now, time_fmt_iso, time_date}, +} + +macro_rules! ok_tri { + ($e:expr $(,)?) => { + match $e { + Some(val) => val, + None => return Value::error(ResultCode::Error), + } + }; +} + +macro_rules! tri { + ($e:expr $(,)?) => { + match $e { + Ok(val) => val, + Err(_) => return Value::error(ResultCode::Error), + } + }; +} + +#[derive(Error, Debug)] +pub enum TimeError { + /// Timezone offset is invalid + #[error("invalid timezone offset")] + InvalidOffset, + #[error("invalid datetime format")] + InvalidFormat, + /// Blob is not size of `TIME_BLOB_SIZE` + #[error("blob is not correct size")] + InvalidSize, + /// Blob time version not matching + #[error("time blob version mismatch")] + MismatchVersion, + #[error("time blob version mismatch")] + UnknownField(#[from] ::Err), +} + +type Result = core::result::Result; + +#[scalar(name = "time_now", alias = "now")] +fn time_now(args: &[Value]) -> Value { + if args.len() != 0 { + return Value::error(ResultCode::Error); + } + let t = Time::new(); + + t.into_blob() +} + +#[scalar(name = "time_fmt_iso")] +fn time_fmt_iso(args: &[Value]) -> Value { + if args.len() != 1 && args.len() != 2 { + return Value::error(ResultCode::Error); + } + let blob = ok_tri!(args[0].to_blob()); + + let t = tri!(Time::try_from(blob)); + + let offset_sec = { + if args.len() == 2 { + ok_tri!(args[1].to_integer()) as i32 + } else { + 0 + } + }; + + let fmt_str = tri!(t.fmt_iso(offset_sec)); + + Value::from_text(fmt_str) +} + +/// \ +/// Caveat: this function differs from sqlean's as it does not support normalizing the inputs +/// For example, October 32 will error. It will not normalize to November 1.\ +/// Also due to a current limitation in the extension system, the function can only have one alias +/// and the alias cannot have different number of arguments. So no aliasing for now for this one. +#[scalar(name = "time_date")] +fn time_date(args: &[Value]) -> Value { + if args.len() != 3 && args.len() != 6 && args.len() != 7 && args.len() != 8 { + return Value::error(ResultCode::Error); + } + + let year = ok_tri!(&args[0].to_integer()).to_owned() as i32; + let month = ok_tri!(&args[1].to_integer()).to_owned() as u32; + let day = ok_tri!(&args[2].to_integer()).to_owned() as u32; + + let mut datetime: NaiveDateTime = ok_tri!(NaiveDate::from_ymd_opt(year, month, day)) + .and_hms_opt(0, 0, 0) + .unwrap(); + + if args.len() >= 6 { + let hour = ok_tri!(&args[3].to_integer()).to_owned() as u32; + let minute = ok_tri!(&args[4].to_integer()).to_owned() as u32; + let seconds = ok_tri!(&args[5].to_integer()).to_owned() as u32; + datetime = ok_tri!(datetime.with_hour(hour)); + datetime = ok_tri!(datetime.with_minute(minute)); + datetime = ok_tri!(datetime.with_second(seconds)); + } + + if args.len() >= 7 { + let nano_sec = ok_tri!(&args[6].to_integer()).to_owned() as u32; + datetime = ok_tri!(datetime.with_nanosecond(nano_sec)); + } + + if args.len() == 8 { + let offset_sec = ok_tri!(&args[7].to_integer()).to_owned() as i32; + let offset = ok_tri!(FixedOffset::east_opt(offset_sec)); + // I believe this is not a double conversion here + datetime = ok_tri!(datetime.and_local_timezone(offset).single()).naive_utc(); + } + + let t = Time::from_datetime(datetime.and_utc()); + + t.into_blob() +} diff --git a/extensions/time/src/time.rs b/extensions/time/src/time.rs new file mode 100644 index 000000000..15c589826 --- /dev/null +++ b/extensions/time/src/time.rs @@ -0,0 +1,208 @@ +use std::str::FromStr; + +use chrono::prelude::*; +use chrono::{self, DateTime, NaiveDate, Timelike, Utc}; + +use limbo_ext::Value; + +use crate::{Result, TimeError}; + +const DAYS_BEFORE_EPOCH: i64 = 719162; +const TIME_BLOB_SIZE: usize = 13; +const VERSION: u8 = 1; + +#[derive(Debug)] +enum TimePrecision { + Seconds, + Millis, + Micro, + Nano, +} + +#[derive(Debug)] +pub struct Time { + // seconds: i64, + // nanoseconds: u32, + inner: DateTime, +} + +#[derive(Debug)] +pub struct Duration { + inner: i64, +} + +#[derive(strum_macros::Display, strum_macros::EnumString)] +pub enum TimeField { + #[strum(to_string = "millennium")] + Millennium, + #[strum(to_string = "century")] + Century, + #[strum(to_string = "decade")] + Decade, + #[strum(to_string = "year")] + Year, + #[strum(to_string = "quarter")] + Quarter, + #[strum(to_string = "month")] + Month, + #[strum(to_string = "day")] + Day, + #[strum(to_string = "hour")] + Hour, + #[strum(to_string = "minute")] + Minute, + #[strum(to_string = "second")] + Second, + #[strum(to_string = "millisecond")] + MilliSecond, + #[strum(to_string = "milli")] + Milli, + #[strum(to_string = "microsecond")] + MicroSecond, + #[strum(to_string = "micro")] + Micro, + #[strum(to_string = "nanosecond")] + NanoSecond, + #[strum(to_string = "nano")] + Nano, + #[strum(to_string = "isoyear")] + IsoYear, + #[strum(to_string = "isoweek")] + IsoWeek, + #[strum(to_string = "isodow")] + IsoDow, + #[strum(to_string = "yearday")] + YearDay, + #[strum(to_string = "weekday")] + WeekDay, + #[strum(to_string = "epoch")] + Epoch, +} + +impl Time { + pub fn new() -> Self { + Self { inner: Utc::now() } + } + + pub fn into_blob(self) -> Value { + let blob: [u8; 13] = self.into(); + Value::from_blob(blob.to_vec()) + } + + pub fn fmt_iso(&self, offset_sec: i32) -> Result { + if offset_sec == 0 { + if self.inner.nanosecond() == 0 { + return Ok(self.inner.format("%FT%TZ").to_string()); + } else { + return Ok(self.inner.format("%FT%T%.9fZ").to_string()); + } + } + // I do not see how this can error + let offset = &FixedOffset::east_opt(offset_sec).ok_or(TimeError::InvalidFormat)?; + + let timezone_date = self.inner.with_timezone(offset); + + if timezone_date.nanosecond() == 0 { + return Ok(timezone_date.format("%FT%T%:z").to_string()); + } else { + return Ok(timezone_date.format("%FT%T%.9f%:z").to_string()); + } + } + + /// Adjust the datetime to the offset + pub fn from_datetime(dt: DateTime) -> Self { + Self { inner: dt } + } + + pub fn time_get(&self, field: &str) -> Result { + use TimeField::*; + + chrono::format::strftime + self.inner.format_with_items(items) + + let val = match TimeField::from_str(field)? { + Millennium => Value::from_integer((self.inner.year() / 1000) as i64), + Century => Value::from_integer((self.inner.year() / 100) as i64), + Decade => Value::from_integer((self.inner.year() / 10) as i64), + Year => Value::from_integer(self.inner.year() as i64), + Quarter => Value::from_integer((self.inner.month().div_ceil(4) + 1) as i64), + Month => Value::from_integer(self.inner.month() as i64), + Day => Value::from_integer(self.inner.day() as i64), + Hour => Value::from_integer(self.inner.hour() as i64), + Minute => Value::from_integer(self.inner.minute() as i64), + Second => Value::from_float(f64::from_str(s)) + MilliSecond | Milli => Value:: from_integer(self.inner.) + + }; + + Ok(val) + } +} + +impl From