From 871fae3286060daef18a75f5ce0d40bb3c90e985 Mon Sep 17 00:00:00 2001 From: gandeevanr Date: Thu, 18 Jul 2024 16:42:44 -0700 Subject: [PATCH] Implement the Date() method --- Cargo.lock | 78 ++++++++++ core/Cargo.toml | 2 + core/datetime.rs | 315 +++++++++++++++++++++++++++++++++++++++++ core/function.rs | 9 ++ core/lib.rs | 1 + core/translate/expr.rs | 18 +++ core/vdbe/mod.rs | 17 +++ 7 files changed, 440 insertions(+) create mode 100644 core/datetime.rs diff --git a/Cargo.lock b/Cargo.lock index f3737cf57..475eb2940 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,6 +45,21 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anes" version = "0.1.6" @@ -217,6 +232,20 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18758054972164c3264f7c8386f5fc6da6114cb46b619fd365d4e3b2dc3ae487" +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.52.6", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -357,6 +386,12 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + [[package]] name = "cpp_demangle" version = "0.4.3" @@ -809,6 +844,29 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -898,6 +956,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "julian_day_converter" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b8b1d2decaeec65c1d9729098450800ffb40767dcab1b9be3e6e7eb21c6e7a5" +dependencies = [ + "chrono", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -968,10 +1035,12 @@ version = "0.0.1" dependencies = [ "anyhow", "cfg_block", + "chrono", "criterion", "fallible-iterator 0.3.0", "getrandom", "io-uring", + "julian_day_converter", "libc", "log", "mimalloc", @@ -1966,6 +2035,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/core/Cargo.toml b/core/Cargo.toml index b94931e30..ca7b7c30c 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -39,6 +39,8 @@ sqlite3-parser = "0.11.0" thiserror = "1.0.61" getrandom = { version = "0.2.15", features = ["js"] } regex = "1.10.5" +chrono = "0.4.38" +julian_day_converter = "0.3.2" [target.'cfg(not(target_family = "windows"))'.dev-dependencies] pprof = { version = "0.12.1", features = ["criterion", "flamegraph"] } diff --git a/core/datetime.rs b/core/datetime.rs new file mode 100644 index 000000000..183265654 --- /dev/null +++ b/core/datetime.rs @@ -0,0 +1,315 @@ +use crate::types::OwnedValue; +use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Timelike}; +use anyhow; +use std::{error::Error, fmt::Display}; + +#[derive(Debug)] +enum DateTimeError { + InvalidArgument(String), + Other(String), +} + +impl Display for DateTimeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DateTimeError::InvalidArgument(s) => write!(f, "Invalid argument: {}", s), + DateTimeError::Other(s) => write!(f, "Other error: {}", s), + } + } +} + +impl Error for DateTimeError {} + +#[derive(Debug, Clone, Copy)] +pub enum TimeUnit { + Second, + Minute, + Hour, + Day, + Month, + Year, +} + +/* +** The following table defines various date transformations of the form +** +** 'NNN days' +** +** Where NNN is an arbitrary floating-point number and "days" can be one +** of several units of time. +*/ +impl TimeUnit { + pub fn name(&self) -> &'static str { + match self { + TimeUnit::Second => "second", + TimeUnit::Minute => "minute", + TimeUnit::Hour => "hour", + TimeUnit::Day => "day", + TimeUnit::Month => "month", + TimeUnit::Year => "year", + } + } + + // Maximum value for each unit in Julian calendar + // Each corresponds to ~14713 years in the Julian calendar, which equals + // 10000 years in the Gregorian calendar + pub fn max_value_julian(&self) -> f64 { + match self { + TimeUnit::Second => 4.6427e14, + TimeUnit::Minute => 7.7379e12, + TimeUnit::Hour => 1.2897e11, + TimeUnit::Day => 5373485.0, + TimeUnit::Month => 176546.0, + TimeUnit::Year => 14713.0, + } + } + + // Conversion factor from the unit to seconds + pub fn seconds_conversion(&self) -> f64 { + match self { + TimeUnit::Second => 1.0, + TimeUnit::Minute => 60.0, + TimeUnit::Hour => 3600.0, + TimeUnit::Day => 86400.0, + TimeUnit::Month => 2592000.0, + TimeUnit::Year => 31536000.0, + } + } +} + +fn get_max_datetime_exclusive() -> NaiveDateTime { + // The maximum date in SQLite is 9999-12-31 + NaiveDateTime::new( + NaiveDate::from_ymd_opt(10000, 01, 01).unwrap(), + NaiveTime::from_hms_milli_opt(00, 00, 00, 000).unwrap(), + ) +} + +pub fn get_date_from_time_value(time_value: &OwnedValue) -> anyhow::Result { + let dt = match time_value { + OwnedValue::Text(s) => get_date_time_from_time_value_string(s), + OwnedValue::Integer(i) => get_date_time_from_time_value_integer(*i), + OwnedValue::Float(f) => get_date_time_from_time_value_float(*f), + _ => Err(DateTimeError::InvalidArgument(format!( + "Invalid time value: {}", + time_value + ))), + }; + if dt.is_ok() { + return Ok(get_date_from_naive_datetime(dt.unwrap())); + } else { + match dt.unwrap_err() { + DateTimeError::InvalidArgument(_) => Ok(String::new()), + DateTimeError::Other(s) => anyhow::bail!(s), + } + } +} + +fn get_date_time_from_time_value_string(value: &str) -> Result { + // Time-value formats: + // 1-7. YYYY-MM-DD[THH:MM[:SS[.SSS]]] + // 8-10. HH:MM[:SS[.SSS]] + // 11. 'now' + // 12. DDDDDDDDDD (Julian day number as integer or float) + // + // Ref: https://sqlite.org/lang_datefunc.html#tmval + + // Check for 'now' + if value.trim().eq_ignore_ascii_case("now") { + return Ok(chrono::Local::now().to_utc().naive_utc()); + } + + // Check for Julian day number (integer or float) + if let Ok(julian_day) = value.parse::() { + return get_date_time_from_time_value_float(julian_day); + } + + // Attempt to parse with various formats + let date_only_format = "%Y-%m-%d"; + let datetime_formats: [&str; 9] = [ + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M:%S%.f", + "%Y-%m-%dT%H:%M", + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M:%S%.f", + "%H:%M", + "%H:%M:%S", + "%H:%M:%S%.f", + ]; + + // First, try to parse as date-only format + if let Ok(date) = NaiveDate::parse_from_str(value, date_only_format) { + return Ok(date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap())); + } + + for format in &datetime_formats { + if let Ok(dt) = if format.starts_with("%H") { + // For time-only formats, assume date 2000-01-01 + // Ref: https://sqlite.org/lang_datefunc.html#tmval + NaiveDateTime::parse_from_str( + &format!("2000-01-01 {}", value), + &format!("%Y-%m-%d {}", format), + ) + } else { + NaiveDateTime::parse_from_str(value, format) + } { + return Ok(dt); + } + } + + return Err(DateTimeError::InvalidArgument(format!( + "Invalid time value: {}", + value + ))); +} + +fn get_date_time_from_time_value_integer(value: i64) -> Result { + i32::try_from(value).map_or_else( + |_| { + Err(DateTimeError::InvalidArgument(format!( + "Invalid julian day: {}", + value + ))) + }, + |value| get_date_time_from_time_value_float(value as f64), + ) +} + +fn get_date_time_from_time_value_float(value: f64) -> Result { + if value.is_infinite() + || value.is_nan() + || value < 0.0 + || value >= TimeUnit::Day.max_value_julian() + { + return Err(DateTimeError::InvalidArgument(format!( + "Invalid julian day: {}", + value + ))); + } + let dt = julian_day_converter::julian_day_to_datetime(value) + .map_err(|_| DateTimeError::Other("Failed parsing the julian date".to_string()))?; + Ok(dt) +} + +fn is_leap_second(dt: &NaiveDateTime) -> bool { + // The range from 1,000,000,000 to 1,999,999,999 represents the leap second. + dt.nanosecond() >= 1_000_000_000 && dt.nanosecond() <= 1_999_999_999 +} + +fn get_date_from_naive_datetime(value: NaiveDateTime) -> String { + // NaiveDateTime supports leap seconds, but SQLite does not. + // So we ignore them. + if is_leap_second(&value) || value > get_max_datetime_exclusive() { + return String::new(); + } + value.format("%Y-%m-%d").to_string() +} + +mod tests { + use super::*; + use std::rc::Rc; + + #[test] + fn test_valid_get_date_from_time_value() { + let test_date_str = "2024-07-21"; + + // Test all specified formats + let test_cases = [ + ( + OwnedValue::Text(Rc::new("2024-07-21".to_string())), + test_date_str, + ), // Format 1: YYYY-MM-DD + ( + OwnedValue::Text(Rc::new("2024-07-21 14:30".to_string())), + test_date_str, + ), // Format 2: YYYY-MM-DD HH:MM + ( + OwnedValue::Text(Rc::new("2024-07-21 14:30:45".to_string())), + test_date_str, + ), // Format 3: YYYY-MM-DD HH:MM:SS + ( + OwnedValue::Text(Rc::new("2024-07-21 14:30:45.123".to_string())), + test_date_str, + ), // Format 4: YYYY-MM-DD HH:MM:SS.SSS + ( + OwnedValue::Text(Rc::new("2024-07-21T14:30".to_string())), + test_date_str, + ), // Format 5: YYYY-MM-DDTHH:MM + ( + OwnedValue::Text(Rc::new("2024-07-21T14:30:45".to_string())), + test_date_str, + ), // Format 6: YYYY-MM-DDTHH:MM:SS + ( + OwnedValue::Text(Rc::new("2024-07-21T14:30:45.123".to_string())), + test_date_str, + ), // Format 7: YYYY-MM-DDTHH:MM:SS.SSS + (OwnedValue::Text(Rc::new("14:30".to_string())), "2000-01-01"), // Format 8: HH:MM + ( + OwnedValue::Text(Rc::new("14:30:45".to_string())), + "2000-01-01", + ), // Format 9: HH:MM:SS + ( + OwnedValue::Text(Rc::new("14:30:45.123".to_string())), + "2000-01-01", + ), // Format 10: HH:MM:SS.SSS + (OwnedValue::Float(2460512.5), test_date_str), // Format 12: DDDDDDDDDD (Julian date as float) + (OwnedValue::Integer(2460513), test_date_str), // Format 12: DDDDDDDDDD (Julian date as integer) + ]; + + for (input, expected) in test_cases { + assert_eq!( + get_date_from_time_value(&input).unwrap(), + expected, + "Failed for input: {:?}", + input + ); + } + + // Test Format 11: 'now' + let now = chrono::Local::now().to_utc().format("%Y-%m-%d").to_string(); + assert_eq!( + get_date_from_time_value(&OwnedValue::Text(Rc::new("now".to_string()))).unwrap(), + now + ); + } + + #[test] + fn test_invalid_get_date_from_time_value() { + let invalid_cases = [ + OwnedValue::Text(Rc::new("2024-07-21 25:00".to_string())), // Invalid hour + OwnedValue::Text(Rc::new("2024-07-21 24:00:00".to_string())), // Invalid hour + OwnedValue::Text(Rc::new("2024-07-21 23:60:00".to_string())), // Invalid minute + OwnedValue::Text(Rc::new("2024-07-21 22:58:60".to_string())), // Invalid second + OwnedValue::Text(Rc::new("2024-07-32".to_string())), // Invalid day + OwnedValue::Text(Rc::new("2024-13-01".to_string())), // Invalid month + OwnedValue::Text(Rc::new("invalid_date".to_string())), // Completely invalid string + OwnedValue::Text(Rc::new("".to_string())), // Empty string + OwnedValue::Integer(i64::MAX), // Large Julian day + OwnedValue::Integer(-1), // Negative Julian day + OwnedValue::Float(f64::MAX), // Large float + OwnedValue::Float(-1.0), // Negative Julian day as float + OwnedValue::Float(f64::NAN), // NaN + OwnedValue::Float(f64::INFINITY), // Infinity + OwnedValue::Null, // Null value + OwnedValue::Blob(vec![1, 2, 3].into()), // Blob (unsupported type) + ]; + + for case in invalid_cases.iter() { + let result = get_date_from_time_value(case); + assert!( + result.is_ok(), + "Error encountered while parsing time value {}: {}", + case, + result.unwrap_err() + ); + let result_str = result.unwrap(); + assert!( + result_str.is_empty(), + "Expected empty string for input: {:?}, but got: {:?}", + case, + result_str + ); + } + } +} diff --git a/core/function.rs b/core/function.rs index f3d7650b8..e6951e4e0 100644 --- a/core/function.rs +++ b/core/function.rs @@ -37,8 +37,12 @@ pub enum SingleRowFunc { Trim, Round, Length, +<<<<<<< HEAD Min, Max, +======= + Date, +>>>>>>> d25b16e (Implement the Date() method) } impl ToString for SingleRowFunc { @@ -53,8 +57,12 @@ impl ToString for SingleRowFunc { SingleRowFunc::Trim => "trim".to_string(), SingleRowFunc::Round => "round".to_string(), SingleRowFunc::Length => "length".to_string(), +<<<<<<< HEAD SingleRowFunc::Min => "min".to_string(), SingleRowFunc::Max => "max".to_string(), +======= + SingleRowFunc::Date => "date".to_string(), +>>>>>>> d25b16e (Implement the Date() method) } } } @@ -88,6 +96,7 @@ impl Func{ "trim" => Ok(Func::SingleRow(SingleRowFunc::Trim)), "round" => Ok(Func::SingleRow(SingleRowFunc::Round)), "length" => Ok(Func::SingleRow(SingleRowFunc::Length)), + "date" => Ok(Func::SingleRow(SingleRowFunc::Date)), _ => Err(()), } } diff --git a/core/lib.rs b/core/lib.rs index acb59adc0..315e149fd 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -11,6 +11,7 @@ mod translate; mod types; mod util; mod vdbe; +mod datetime; #[cfg(not(target_family = "wasm"))] #[global_allocator] diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 9ea943a8f..18b8b7880 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -348,6 +348,24 @@ pub fn translate_expr( }); Ok(target_register) } + SingleRowFunc::Date => { + let mut start_reg = 0; + if let Some(args) = args { + if args.len() > 1 { + anyhow::bail!("Parse error: date function with > 1 arguments. Modifiers are not yet supported."); + } else if args.len() == 1 { + let arg_reg = program.alloc_register(); + let _ = translate_expr(program, select, &args[0], arg_reg, cursor_hint)?; + start_reg = arg_reg; + } + } + program.emit_insn(Insn::Function { + start_reg: start_reg, + dest: target_register, + func: SingleRowFunc::Date, + }); + Ok(target_register) + } SingleRowFunc::Trim | SingleRowFunc::Round => { let args = if let Some(args) = args { if args.len() > 2 { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 575dbb6bc..c103b770b 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -3,6 +3,7 @@ pub mod explain; pub mod sorter; use crate::btree::BTreeCursor; +use crate::datetime::get_date_from_time_value; use crate::function::{AggFunc, SingleRowFunc}; use crate::pager::Pager; use crate::pseudo::PseudoCursor; @@ -1098,6 +1099,22 @@ impl Program { } state.pc += 1; } + SingleRowFunc::Date => { + if *start_reg == 0 { + let date_str = get_date_from_time_value(&OwnedValue::Text(Rc::new("now".to_string())))?; + state.registers[*dest] = OwnedValue::Text(Rc::new(date_str)); + } else { + let time_value = &state.registers[*start_reg]; + let date_str = get_date_from_time_value(time_value); + match date_str { + Ok(date) => state.registers[*dest] = OwnedValue::Text(Rc::new(date)), + Err(e) => { + anyhow::bail!("Error encountered while parsing time value: {}", e) + } + } + } + state.pc += 1; + } }, } }