diff --git a/Cargo.lock b/Cargo.lock index 7877c782a..6a4ef9686 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -171,6 +171,15 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -183,6 +192,12 @@ version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cast" version = "0.3.0" @@ -417,6 +432,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + [[package]] name = "criterion" version = "0.5.1" @@ -485,6 +509,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "csv" version = "1.3.0" @@ -525,6 +559,16 @@ dependencies = [ "uuid", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dirs" version = "5.0.1" @@ -638,6 +682,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fast-float" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" + [[package]] name = "fastrand" version = "2.1.0" @@ -762,6 +812,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -911,6 +971,7 @@ checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown 0.14.5", + "serde", ] [[package]] @@ -982,6 +1043,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonb" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d1dd2023aa6a1b74f23432e2db5a9122bf53ae98053efc650951d31f59ffb" +dependencies = [ + "byteorder", + "fast-float", + "itoa", + "nom", + "ordered-float", + "rand", + "ryu", + "serde_json", +] + [[package]] name = "julian_day_converter" version = "0.3.2" @@ -1065,18 +1142,23 @@ dependencies = [ "criterion", "fallible-iterator 0.3.0", "getrandom", + "indexmap 2.2.6", "io-uring", + "jsonb", "julian_day_converter", "libc", "log", "mimalloc", "nix 0.29.0", + "pest", + "pest_derive", "polling", "pprof", "regex", "rstest", "rusqlite", "rustix", + "serde", "sieve-cache", "sqlite3-parser", "tempfile", @@ -1148,6 +1230,12 @@ dependencies = [ "libmimalloc-sys", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.4" @@ -1201,6 +1289,16 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-format" version = "0.4.4" @@ -1247,6 +1345,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "4.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ff2cf528c6c03d9ed653d6c4ce1dc0582dc4af309790ad92f07c1cd551b0be" +dependencies = [ + "num-traits", +] + [[package]] name = "os_str_bytes" version = "6.6.1" @@ -1276,6 +1383,51 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "pest" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd53dff83f26735fdc1ca837098ccf133605d794cdae66acfc2bfac3ec809d95" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a548d2beca6773b1c244554d36fcf8548a8a58e74156968211567250e48e49a" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c93a82e8d145725dcbaf44e5ea887c8a869efdcc28706df2d08c69e17077183" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.69", +] + +[[package]] +name = "pest_meta" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a941429fea7e08bedec25e4f6785b6ffaacc6b755da98df5ef3e7dcf4a124c4f" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "phf" version = "0.11.2" @@ -1702,11 +1854,23 @@ version = "1.0.120" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" dependencies = [ + "indexmap 2.2.6", "itoa", "ryu", "serde", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sieve-cache" version = "0.1.4" @@ -1909,6 +2073,18 @@ version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "uncased" version = "0.9.10" diff --git a/core/Cargo.toml b/core/Cargo.toml index 10813c173..331fe824f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -40,6 +40,11 @@ getrandom = { version = "0.2.15", features = ["js"] } regex = "1.10.5" chrono = "0.4.38" julian_day_converter = "0.3.2" +jsonb = "0.4.1" +indexmap = { version="2.2.6", features = ["serde"] } +serde = { version = "1.0", features = ["derive"] } +pest = "2.0" +pest_derive = "2.0" [target.'cfg(not(target_family = "windows"))'.dev-dependencies] pprof = { version = "0.12.1", features = ["criterion", "flamegraph"] } diff --git a/core/function.rs b/core/function.rs index ded9907bf..206d6d9c0 100644 --- a/core/function.rs +++ b/core/function.rs @@ -1,3 +1,16 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum JsonFunc { + JSON, +} + +impl ToString for JsonFunc { + fn to_string(&self) -> String { + match self { + JsonFunc::JSON => "json".to_string(), + } + } +} + #[derive(Debug, Clone, PartialEq)] pub enum AggFunc { Avg, @@ -72,6 +85,7 @@ impl ToString for ScalarFunc { pub enum Func { Agg(AggFunc), Scalar(ScalarFunc), + Json(JsonFunc), } impl Func { @@ -101,6 +115,7 @@ impl Func { "date" => Ok(Func::Scalar(ScalarFunc::Date)), "time" => Ok(Func::Scalar(ScalarFunc::Time)), "unicode" => Ok(Func::Scalar(ScalarFunc::Unicode)), + "json" => Ok(Func::Json((JsonFunc::JSON))), _ => Err(()), } } diff --git a/core/json/de.rs b/core/json/de.rs new file mode 100644 index 000000000..d11647dbc --- /dev/null +++ b/core/json/de.rs @@ -0,0 +1,638 @@ +use pest::iterators::Pair; +use pest::Parser as P; +use pest_derive::Parser; +use serde::de; +use serde::forward_to_deserialize_any; +use std::collections::VecDeque; +use std::f64; + +use crate::json::error::{self, Error, Result}; + +#[derive(Parser)] +#[grammar_inline = r#" +// see https://spec.json5.org/#syntactic-grammar and +// https://spec.json5.org/#lexical-grammar + +COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" | "//" ~ (!line_terminator ~ ANY)* } + +WHITESPACE = _{ + "\u{0009}" | + "\u{000B}" | + "\u{000C}" | + "\u{0020}" | + "\u{00A0}" | + "\u{FEFF}" | + SPACE_SEPARATOR | + line_terminator +} + +array = { "[" ~ "]" | "[" ~ value ~ ("," ~ value)* ~ ","? ~ "]" } + +boolean = @{ "true" | "false" } + +char_escape_sequence = @{ single_escape_char | non_escape_char } + +char_literal = @{ !("\\" | line_terminator) ~ ANY } + +decimal_integer_literal = _{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* } + +decimal_literal = _{ + decimal_integer_literal ~ "." ~ ASCII_DIGIT* ~ exponent_part? | + "." ~ ASCII_DIGIT+~ exponent_part? | + decimal_integer_literal ~ exponent_part? +} + +double_quote_char = _{ + "\\" ~ escape_sequence | + line_continuation | + !"\"" ~ char_literal +} + +escape_char = _{ single_escape_char | ASCII_DIGIT | "x" | "u" } + +escape_sequence = _{ + char_escape_sequence | + nul_escape_sequence | + "x" ~ hex_escape_sequence | + "u" ~ unicode_escape_sequence +} + +exponent_part = _{ ^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+ } + +hex_escape_sequence = @{ ASCII_HEX_DIGIT{2} } + +hex_integer_literal = _{ ("+" | "-")? ~ ^"0x" ~ ASCII_HEX_DIGIT+ } + +identifier = ${ identifier_start ~ identifier_part* } + +identifier_part = _{ + identifier_start | + &( + NONSPACING_MARK | + DIACRITIC | // not sure about this, spec says "Combining spacing mark (Mc)" + DECIMAL_NUMBER | + CONNECTOR_PUNCTUATION | + "\u{200C}" | + "\u{200D}" + ) ~ char_literal +} + +identifier_start = _{ + &(unicode_letter | "$" | "_") ~ char_literal | + "\\u" ~ unicode_escape_sequence +} + +key = _{ identifier | string } + +line_continuation = _{ "\\" ~ line_terminator_sequence } + +line_terminator = _{ "\u{000A}" | "\u{000D}" | "\u{2028}" | "\u{2029}" } + +line_terminator_sequence = _{ "\u{000D}" ~ "\u{000A}" | line_terminator } + +non_escape_char = _{ !(escape_char | line_terminator) ~ ANY } + +nul_escape_sequence = @{ "0" } + +null = @{ "null" } + +number = @{ ("+" | "-")? ~ numeric_literal } + +numeric_literal = _{ + hex_integer_literal | + decimal_literal | + "Infinity" | + "NaN" +} + +object = { "{" ~ "}" | "{" ~ pair ~ ("," ~ pair)* ~ ","? ~ "}" } + +pair = _{ key ~ ":" ~ value } + +single_escape_char = _{ "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v" } + +single_quote_char = _{ + "\\" ~ escape_sequence | + line_continuation | + !"'" ~ char_literal +} + +double_single_quote_char = _{ + "\\" ~ escape_sequence | + line_continuation | + !("''") ~ char_literal +} + +string = ${ "\"" ~ double_quote_char* ~ "\"" | "''" ~ double_single_quote_char* ~ "''" | "'" ~ single_quote_char* ~ "'" } + +text = _{ SOI ~ value ~ EOI } + +unicode_escape_sequence = @{ ASCII_HEX_DIGIT{4} } + +unicode_letter = _{ + UPPERCASE_LETTER | + LOWERCASE_LETTER | + TITLECASE_LETTER | + MODIFIER_LETTER | + OTHER_LETTER | + LETTER_NUMBER +} + +value = _{ null | boolean | string | number | object | array } +"#] + +struct Parser; + +/// Deserialize an instance of type `T` from a string of JSON5 text. Can fail if the input is +/// invalid JSON5, or doesn’t match the structure of the target type. +pub fn from_str<'a, T>(s: &'a str) -> Result +where + T: de::Deserialize<'a>, +{ + let mut deserializer = Deserializer::from_str(s)?; + T::deserialize(&mut deserializer) +} + +/// A Deserializes JSON data into a Rust value. +pub struct Deserializer<'de> { + pair: Option>, +} + +impl<'de> Deserializer<'de> { + /// Creates a JSON5 deserializer from a `&str`. This parses the input at construction time, so + /// can fail if the input is not valid JSON5. + #[allow(clippy::should_implement_trait)] + pub fn from_str(input: &'de str) -> Result { + let pair = Parser::parse(Rule::text, input)?.next().unwrap(); + Ok(Deserializer::from_pair(pair)) + } + + fn from_pair(pair: Pair<'de, Rule>) -> Self { + Deserializer { pair: Some(pair) } + } +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || match pair.as_rule() { + Rule::null => visitor.visit_unit(), + Rule::boolean => visitor.visit_bool(parse_bool(&pair)), + Rule::string | Rule::identifier => visitor.visit_string(parse_string(pair)?), + Rule::number => match pair.as_str() { + "Infinity" | "+Infinity" => visitor.visit_f64(f64::INFINITY), + "-Infinity" => visitor.visit_f64(f64::NEG_INFINITY), + "NaN" | "-NaN" => visitor.visit_f64(f64::NAN), + _ => { + if is_int(pair.as_str()) { + visitor.visit_i64(parse_integer(&pair)?) + } else { + visitor.visit_f64(parse_number(&pair)?) + } + } + }, + Rule::array => visitor.visit_seq(Seq::new(pair)), + Rule::object => visitor.visit_map(Map::new(pair)), + _ => unreachable!(), + })(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = visitor.visit_enum(Enum { pair }); + error::set_location(&mut res, &span); + res + } + + // The below will get us the right types, but won't necessarily give + // meaningful results if the source is out of the range of the target type. + fn deserialize_i8(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_i8(parse_number(&pair)? as i8))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_i16(parse_number(&pair)? as i16))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_i32(parse_number(&pair)? as i32))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_i64(parse_number(&pair)? as i64))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_i128(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_i128(parse_number(&pair)? as i128))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_u8(parse_number(&pair)? as u8))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_u16(parse_number(&pair)? as u16))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_u32(parse_number(&pair)? as u32))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_u64(parse_number(&pair)? as u64))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_u128(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_u128(parse_number(&pair)? as u128))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_f32(parse_number(&pair)? as f32))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = (move || visitor.visit_f64(parse_number(&pair)?))(); + error::set_location(&mut res, &span); + res + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let pair = self.pair.take().unwrap(); + let span = pair.as_span(); + let mut res = match pair.as_rule() { + Rule::null => visitor.visit_none(), + _ => visitor.visit_some(&mut Deserializer::from_pair(pair)), + }; + error::set_location(&mut res, &span); + res + } + + fn deserialize_newtype_struct(self, _name: &str, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let span = self.pair.as_ref().unwrap().as_span(); + let mut res = visitor.visit_newtype_struct(self); + error::set_location(&mut res, &span); + res + } + + forward_to_deserialize_any! { + bool char str string bytes byte_buf unit unit_struct seq + tuple tuple_struct map struct identifier ignored_any + } +} + +fn parse_bool(pair: &Pair<'_, Rule>) -> bool { + match pair.as_str() { + "true" => true, + "false" => false, + _ => unreachable!(), + } +} + +fn parse_string(pair: Pair<'_, Rule>) -> Result { + let span = pair.as_span(); + let mut res = pair + .into_inner() + .map(|component| match component.as_rule() { + Rule::char_literal => Ok(String::from(component.as_str())), + Rule::char_escape_sequence => Ok(parse_char_escape_sequence(&component)), + Rule::nul_escape_sequence => Ok(String::from("\u{0000}")), + Rule::hex_escape_sequence => u8::from_str_radix(component.as_str(), 16) + .map(|value| format!("\\u{:04X}", value)) + .map_err(|_| de::Error::custom("error hex sequence")), // TODO: FIX HEX SEQUENCE TO MATCH SQLITE + Rule::unicode_escape_sequence => { + let hex_escape = parse_hex(component.as_str())?; + Ok(hex_escape.to_string()) + } + _ => unreachable!(), + }) + .collect(); + error::set_location(&mut res, &span); + res +} + +fn parse_char_escape_sequence(pair: &Pair<'_, Rule>) -> String { + String::from(match pair.as_str() { + "b" => "\u{0008}", + "f" => "\u{000C}", + "n" => "\n", + "r" => "\r", + "t" => "\t", + "v" => "\u{000B}", + "0" => "\u{0000}", + c => c, + }) +} + +fn parse_number(pair: &Pair<'_, Rule>) -> Result { + match pair.as_str() { + "Infinity" | "+Infinity" => Ok(f64::INFINITY), + "-Infinity" => Ok(f64::NEG_INFINITY), + "NaN" | "-NaN" => Ok(f64::NAN), + s if is_hex_literal(s) => parse_hex(s).map(f64::from), + s => { + if let Ok(r) = s.parse::() { + if r.is_finite() { + Ok(r) + } else { + Err(de::Error::custom("error parsing number: too large")) + } + } else { + Err(de::Error::custom("error parsing number")) + } + } + } +} + +fn parse_integer(pair: &Pair<'_, Rule>) -> Result { + match pair.as_str() { + s if is_hex_literal(s) => { + let parsed = parse_hex(s)? as i64; + Ok(parsed) + } + s => s + .parse() + .map_err(|_| de::Error::custom("error parsing integer")), + } +} + +fn is_int(s: &str) -> bool { + !s.contains('.') && (is_hex_literal(s) || (!s.contains('e') && !s.contains('E'))) +} + +fn parse_hex(s: &str) -> Result { + let (sign, trimmed) = match s.chars().next() { + Some('-') => (-1, &s[3..]), // skip "-0x" or "-0X" + Some('+') => (1, &s[3..]), // skip "+0x" or "+0X" + _ => (1, &s[2..]), // skip "0x" or "0X" + }; + i32::from_str_radix(trimmed, 16) + .map(|v| v * sign) + .map_err(|_| de::Error::custom("error parsing hex")) +} + +fn is_hex_literal(s: &str) -> bool { + let trimmed = s.trim_start_matches(|c| c == '+' || c == '-'); + trimmed.len() > 2 && (&trimmed[..2] == "0x" || &trimmed[..2] == "0X") +} + +struct Seq<'de> { + pairs: VecDeque>, +} + +impl<'de> Seq<'de> { + pub fn new(pair: Pair<'de, Rule>) -> Self { + Self { + pairs: pair.into_inner().collect(), + } + } +} + +impl<'de> de::SeqAccess<'de> for Seq<'de> { + type Error = Error; + + fn size_hint(&self) -> Option { + Some(self.pairs.len()) + } + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: de::DeserializeSeed<'de>, + { + if let Some(pair) = self.pairs.pop_front() { + seed.deserialize(&mut Deserializer::from_pair(pair)) + .map(Some) + } else { + Ok(None) + } + } +} + +struct Map<'de> { + pairs: VecDeque>, +} + +impl<'de> Map<'de> { + pub fn new(pair: Pair<'de, Rule>) -> Self { + Self { + pairs: pair.into_inner().collect(), + } + } +} + +impl<'de> de::MapAccess<'de> for Map<'de> { + type Error = Error; + + fn size_hint(&self) -> Option { + Some(self.pairs.len() / 2) + } + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + if let Some(pair) = self.pairs.pop_front() { + seed.deserialize(&mut Deserializer::from_pair(pair)) + .map(Some) + } else { + Ok(None) + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + seed.deserialize(&mut Deserializer::from_pair( + self.pairs.pop_front().unwrap(), + )) + } +} + +struct Enum<'de> { + pair: Pair<'de, Rule>, +} + +impl<'de> de::EnumAccess<'de> for Enum<'de> { + type Error = Error; + type Variant = Variant<'de>; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: de::DeserializeSeed<'de>, + { + let span = self.pair.as_span(); + let mut res = (move || match self.pair.as_rule() { + Rule::string => { + let tag = seed.deserialize(&mut Deserializer::from_pair(self.pair))?; + Ok((tag, Variant { pair: None })) + } + Rule::object => { + let mut pairs = self.pair.into_inner(); + + if let Some(tag_pair) = pairs.next() { + let tag = seed.deserialize(&mut Deserializer::from_pair(tag_pair))?; + Ok((tag, Variant { pair: pairs.next() })) + } else { + Err(de::Error::custom("expected a nonempty object")) + } + } + _ => Err(de::Error::custom("expected a string or an object")), + })(); + error::set_location(&mut res, &span); + res + } +} + +struct Variant<'de> { + pair: Option>, +} + +impl<'de> de::VariantAccess<'de> for Variant<'de> { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + if let Some(pair) = self.pair { + serde::Deserialize::deserialize(&mut Deserializer::from_pair(pair)) + } else { + Ok(()) + } + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: de::DeserializeSeed<'de>, + { + seed.deserialize(&mut Deserializer::from_pair(self.pair.unwrap())) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self.pair { + Some(pair) => match pair.as_rule() { + Rule::array => visitor.visit_seq(Seq::new(pair)), + _ => Err(de::Error::custom("expected an array")), + }, + None => Err(de::Error::custom("expected an array")), + } + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self.pair { + Some(pair) => match pair.as_rule() { + Rule::object => visitor.visit_map(Map::new(pair)), + _ => Err(de::Error::custom("expected an object")), + }, + None => Err(de::Error::custom("expected an object")), + } + } +} diff --git a/core/json/error.rs b/core/json/error.rs new file mode 100644 index 000000000..aef775968 --- /dev/null +++ b/core/json/error.rs @@ -0,0 +1,108 @@ +use pest::Span; +use serde::{de, ser}; +use std::fmt::{self, Display}; + +use crate::json::de::Rule; + +/// Alias for a `Result` with error type `json5::Error` +pub type Result = std::result::Result; + +/// One-based line and column at which the error was detected. +#[derive(Clone, Debug, PartialEq)] +pub struct Location { + /// The one-based line number of the error. + pub line: usize, + /// The one-based column number of the error. + pub column: usize, +} + +impl From<&Span<'_>> for Location { + fn from(s: &Span<'_>) -> Self { + let (line, column) = s.start_pos().line_col(); + Self { line, column } + } +} + +/// A bare bones error type which currently just collapses all the underlying errors in to a single +/// string... This is fine for displaying to the user, but not very useful otherwise. Work to be +/// done here. +#[derive(Clone, Debug, PartialEq)] +pub enum Error { + /// Just shove everything in a single variant for now. + Message { + /// The error message. + msg: String, + /// The location of the error, if applicable. + location: Option, + }, +} + +impl From> for Error { + fn from(err: pest::error::Error) -> Self { + let (line, column) = match err.line_col { + pest::error::LineColLocation::Pos((l, c)) => (l, c), + pest::error::LineColLocation::Span((l, c), (_, _)) => (l, c), + }; + Error::Message { + msg: err.to_string(), + location: Some(Location { line, column }), + } + } +} + +impl From for Error { + fn from(err: std::io::Error) -> Self { + Error::Message { + msg: err.to_string(), + location: None, + } + } +} + +impl From for Error { + fn from(err: std::str::Utf8Error) -> Self { + Error::Message { + msg: err.to_string(), + location: None, + } + } +} + +impl ser::Error for Error { + fn custom(msg: T) -> Self { + Error::Message { + msg: msg.to_string(), + location: None, + } + } +} + +impl de::Error for Error { + fn custom(msg: T) -> Self { + Error::Message { + msg: msg.to_string(), + location: None, + } + } +} + +impl Display for Error { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::Message { ref msg, .. } => write!(formatter, "{}", msg), + } + } +} + +impl std::error::Error for Error {} + +/// Adds location information from `span`, if `res` is an error. +pub fn set_location(res: &mut Result, span: &Span<'_>) { + if let Err(ref mut e) = res { + let Error::Message { location, .. } = e; + if location.is_none() { + let (line, column) = span.start_pos().line_col(); + *location = Some(Location { line, column }); + } + } +} diff --git a/core/json/licenses/serde-license.md b/core/json/licenses/serde-license.md new file mode 100644 index 000000000..468cd79a8 --- /dev/null +++ b/core/json/licenses/serde-license.md @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/core/json/licenses/serde_json5-license.md b/core/json/licenses/serde_json5-license.md new file mode 100644 index 000000000..7a4a3ea24 --- /dev/null +++ b/core/json/licenses/serde_json5-license.md @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/core/json/mod.rs b/core/json/mod.rs new file mode 100644 index 000000000..06169abd8 --- /dev/null +++ b/core/json/mod.rs @@ -0,0 +1,171 @@ +mod de; +mod error; +mod ser; + +use std::rc::Rc; + +pub use crate::json::de::from_str; +pub use crate::json::ser::to_string; +use crate::types::OwnedValue; +use indexmap::IndexMap; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(untagged)] +pub enum Val { + Null, + Bool(bool), + Integer(i64), + Float(f64), + String(String), + Array(Vec), + Object(IndexMap), +} + +pub fn get_json(json_value: &OwnedValue) -> crate::Result { + match json_value { + OwnedValue::Text(ref t) => match crate::json::from_str::(t) { + Ok(json) => { + let json = crate::json::to_string(&json).unwrap(); + Ok(OwnedValue::Text(Rc::new(json))) + } + Err(_) => { + crate::bail_parse_error!("malformed JSON") + } + }, + OwnedValue::Blob(b) => { + if let Ok(json) = jsonb::from_slice(b) { + Ok(OwnedValue::Text(Rc::new(json.to_string()))) + } else { + crate::bail_parse_error!("malformed JSON"); + } + } + _ => Ok(json_value.to_owned()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::OwnedValue; + + #[test] + fn test_get_json_valid_json5() { + let input = OwnedValue::Text(Rc::new("{ key: 'value' }".to_string())); + let result = get_json(&input).unwrap(); + if let OwnedValue::Text(result_str) = result { + assert!(result_str.contains("\"key\":\"value\"")); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_get_json_valid_json5_double_single_quotes() { + let input = OwnedValue::Text(Rc::new("{ key: ''value'' }".to_string())); + let result = get_json(&input).unwrap(); + if let OwnedValue::Text(result_str) = result { + assert!(result_str.contains("\"key\":\"value\"")); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_get_json_valid_json5_infinity() { + let input = OwnedValue::Text(Rc::new("{ \"key\": Infinity }".to_string())); + let result = get_json(&input).unwrap(); + if let OwnedValue::Text(result_str) = result { + assert!(result_str.contains("{\"key\":9e999}")); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_get_json_valid_json5_negative_infinity() { + let input = OwnedValue::Text(Rc::new("{ \"key\": -Infinity }".to_string())); + let result = get_json(&input).unwrap(); + if let OwnedValue::Text(result_str) = result { + assert!(result_str.contains("{\"key\":-9e999}")); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_get_json_valid_json5_nan() { + let input = OwnedValue::Text(Rc::new("{ \"key\": NaN }".to_string())); + let result = get_json(&input).unwrap(); + if let OwnedValue::Text(result_str) = result { + assert!(result_str.contains("{\"key\":null}")); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_get_json_invalid_json5() { + let input = OwnedValue::Text(Rc::new("{ key: value }".to_string())); + let result = get_json(&input); + match result { + Ok(_) => panic!("Expected error for malformed JSON"), + Err(e) => assert!(e.to_string().contains("malformed JSON")), + } + } + + #[test] + fn test_get_json_valid_jsonb() { + let input = OwnedValue::Text(Rc::new("{\"key\":\"value\"}".to_string())); + let result = get_json(&input).unwrap(); + if let OwnedValue::Text(result_str) = result { + assert!(result_str.contains("\"key\":\"value\"")); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_get_json_invalid_jsonb() { + let input = OwnedValue::Text(Rc::new("{key:\"value\"".to_string())); + let result = get_json(&input); + match result { + Ok(_) => panic!("Expected error for malformed JSON"), + Err(e) => assert!(e.to_string().contains("malformed JSON")), + } + } + + #[test] + fn test_get_json_blob_valid_jsonb() { + let binary_json = b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66".to_vec(); + let input = OwnedValue::Blob(Rc::new(binary_json)); + let result = get_json(&input).unwrap(); + if let OwnedValue::Text(result_str) = result { + assert!(result_str.contains("\"asd\":\"adf\"")); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_get_json_blob_invalid_jsonb() { + let binary_json: Vec = vec![0xA2, 0x62, 0x6B, 0x31, 0x62, 0x76]; // Incomplete binary JSON + let input = OwnedValue::Blob(Rc::new(binary_json)); + let result = get_json(&input); + match result { + Ok(_) => panic!("Expected error for malformed JSON"), + Err(e) => assert!(e.to_string().contains("malformed JSON")), + } + } + + #[test] + fn test_get_json_non_text() { + let input = OwnedValue::Null; + let result = get_json(&input).unwrap(); + if let OwnedValue::Null = result { + // Test passed + } else { + panic!("Expected OwnedValue::Null"); + } + } +} diff --git a/core/json/ser.rs b/core/json/ser.rs new file mode 100644 index 000000000..1afb8b497 --- /dev/null +++ b/core/json/ser.rs @@ -0,0 +1,384 @@ +use serde::ser::{self, Serialize}; +use std::{f32, f64, num::FpCategory}; + +use crate::json::error::{Error, Result}; + +/// Attempts to serialize the input as a JSON5 string (actually a JSON string). +pub fn to_string(value: &T) -> Result +where + T: Serialize, +{ + let mut serializer = Serializer { + output: String::new(), + }; + value.serialize(&mut serializer)?; + Ok(serializer.output) +} + +struct Serializer { + output: String, + // TODO settings for formatting (single vs double quotes, whitespace etc) +} + +impl Serializer { + fn call_to_string(&mut self, v: &T) -> Result<()> + where + T: ToString, + { + self.output += &v.to_string(); + Ok(()) + } +} + +impl<'a> ser::Serializer for &'a mut Serializer { + type Ok = (); + type Error = Error; + + type SerializeSeq = Self; + type SerializeTuple = Self; + type SerializeTupleStruct = Self; + type SerializeTupleVariant = Self; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = Self; + + fn serialize_bool(self, v: bool) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_i8(self, v: i8) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_i16(self, v: i16) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_i32(self, v: i32) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_i64(self, v: i64) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_u8(self, v: u8) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_u16(self, v: u16) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_u32(self, v: u32) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_u64(self, v: u64) -> Result<()> { + self.call_to_string(&v) + } + + fn serialize_f32(self, v: f32) -> Result<()> { + match v.classify() { + FpCategory::Nan => self.output += "null", + FpCategory::Infinite => { + let infinity = if v.is_sign_negative() { + "-9e999" + } else { + "9e999" + }; + self.output += infinity + } + _ => self.output += &v.to_string(), + } + Ok(()) + } + + fn serialize_f64(self, v: f64) -> Result<()> { + match v.classify() { + FpCategory::Nan => self.output += "null", + FpCategory::Infinite => { + let infinity = if v.is_sign_negative() { + "-9e999" + } else { + "9e999" + }; + self.output += infinity + } + _ => { + let str = &format!("{:.1}", v); + self.output += str + } + } + Ok(()) + } + + fn serialize_char(self, v: char) -> Result<()> { + // A char encoded as UTF-8 takes 4 bytes at most. + let mut buf = [0; 4]; + self.serialize_str(v.encode_utf8(&mut buf)) + } + + fn serialize_str(self, v: &str) -> Result<()> { + self.output += "\""; + self.output += &escape(v); + self.output += "\""; + Ok(()) + } + + fn serialize_bytes(self, _v: &[u8]) -> Result<()> { + unimplemented!() // TODO + } + + fn serialize_none(self) -> Result<()> { + self.serialize_unit() + } + + fn serialize_some(self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + fn serialize_unit(self) -> Result<()> { + self.output += "null"; + Ok(()) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + self.serialize_unit() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result<()> { + self.serialize_str(variant) + } + + fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result<()> + where + T: ?Sized + Serialize, + { + self.output += "{"; + variant.serialize(&mut *self)?; // TODO drop the quotes where possible + self.output += ":"; + value.serialize(&mut *self)?; + self.output += "}"; + Ok(()) + } + + fn serialize_seq(self, _len: Option) -> Result { + self.output += "["; + Ok(self) + } + + fn serialize_tuple(self, len: usize) -> Result { + self.serialize_seq(Some(len)) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + self.serialize_seq(Some(len)) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result { + self.output += "{"; + variant.serialize(&mut *self)?; + self.output += ":["; + Ok(self) + } + + fn serialize_map(self, _len: Option) -> Result { + self.output += "{"; + Ok(self) + } + + fn serialize_struct(self, _name: &'static str, len: usize) -> Result { + self.serialize_map(Some(len)) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result { + self.output += "{"; + variant.serialize(&mut *self)?; + self.output += ":{"; + Ok(self) + } +} + +impl<'a> ser::SerializeSeq for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + if !self.output.ends_with('[') { + self.output += ","; + } + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + self.output += "]"; + Ok(()) + } +} + +impl<'a> ser::SerializeTuple for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result<()> { + ser::SerializeSeq::end(self) + } +} + +impl<'a> ser::SerializeTupleStruct for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result<()> { + ser::SerializeSeq::end(self) + } +} + +impl<'a> ser::SerializeTupleVariant for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result<()> { + self.output += "]}"; + Ok(()) + } +} + +impl<'a> ser::SerializeMap for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + if !self.output.ends_with('{') { + self.output += ","; + } + key.serialize(&mut **self) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.output += ":"; + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + self.output += "}"; + Ok(()) + } +} + +impl<'a> ser::SerializeStruct for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeMap::serialize_key(self, key)?; + ser::SerializeMap::serialize_value(self, value) + } + + fn end(self) -> Result<()> { + ser::SerializeMap::end(self) + } +} + +impl<'a> ser::SerializeStructVariant for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeStruct::serialize_field(self, key, value) + } + + fn end(self) -> Result<()> { + self.output += "}}"; + Ok(()) + } +} + +fn escape(v: &str) -> String { + v.chars() + .flat_map(|c| match c { + '"' => vec!['\\', c], + '\n' => vec!['\\', 'n'], + '\r' => vec!['\\', 'r'], + '\t' => vec!['\\', 't'], + '\\' => vec!['\\', '\\'], + '\u{0008}' => vec!['\\', 'b'], + '\u{000c}' => vec!['\\', 'f'], + c => vec![c], + }) + .collect() +} diff --git a/core/lib.rs b/core/lib.rs index fb695a3aa..0ffdcd6e3 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -1,6 +1,7 @@ mod error; mod function; mod io; +mod json; mod pseudo; mod schema; mod storage; diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 10217f9a0..6e7a69128 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,4 +1,4 @@ -use crate::Result; +use crate::{function::JsonFunc, Result}; use sqlite3_parser::ast::{self, Expr, UnaryOperator}; use crate::{ @@ -134,6 +134,32 @@ pub fn translate_expr( Some(Func::Agg(_)) => { crate::bail_parse_error!("aggregation function in non-aggregation context") } + Some(Func::Json(j)) => match j { + JsonFunc::JSON => { + let args = if let Some(args) = args { + if args.len() != 1 { + crate::bail_parse_error!( + "{} function with not exactly 1 argument", + j.to_string() + ); + } + args + } else { + crate::bail_parse_error!( + "{} function with no arguments", + j.to_string() + ); + }; + let regs = program.alloc_register(); + translate_expr(program, select, &args[0], regs, cursor_hint)?; + program.emit_insn(Insn::Function { + start_reg: regs, + dest: target_register, + func: crate::vdbe::Func::Json(j), + }); + Ok(target_register) + } + }, Some(Func::Scalar(srf)) => { match srf { ScalarFunc::Coalesce => { @@ -202,7 +228,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: srf, + func: crate::vdbe::Func::Scalar(srf), }); Ok(target_register) } @@ -231,7 +257,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg: regs, dest: target_register, - func: srf, + func: crate::vdbe::Func::Scalar(srf), }); Ok(target_register) } @@ -246,7 +272,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg: regs, dest: target_register, - func: srf, + func: crate::vdbe::Func::Scalar(srf), }); Ok(target_register) } @@ -270,7 +296,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg, dest: target_register, - func: ScalarFunc::Date, + func: crate::vdbe::Func::Scalar(srf), }); Ok(target_register) } @@ -294,7 +320,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg, dest: target_register, - func: ScalarFunc::Time, + func: crate::vdbe::Func::Scalar(ScalarFunc::Time), }); Ok(target_register) } @@ -327,7 +353,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: srf, + func: crate::vdbe::Func::Scalar(srf), }); Ok(target_register) } @@ -353,7 +379,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: ScalarFunc::Min, + func: crate::vdbe::Func::Scalar(srf), }); Ok(target_register) } @@ -379,7 +405,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: ScalarFunc::Max, + func: crate::vdbe::Func::Scalar(srf), }); Ok(target_register) } diff --git a/core/translate/select.rs b/core/translate/select.rs index 6ad74977b..7321c0584 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -860,7 +860,7 @@ fn translate_aggregation( let empty_args = &Vec::::new(); let args = info.args.as_ref().unwrap_or(empty_args); let dest = match func { - Func::Scalar(_) => { + Func::Scalar(_) | Func::Json(_) => { crate::bail_parse_error!("single row function in aggregation") } Func::Agg(agg_func) => match agg_func { diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs index 5de13bc12..261c54787 100644 --- a/core/translate/where_clause.rs +++ b/core/translate/where_clause.rs @@ -2,7 +2,7 @@ use crate::{ function::ScalarFunc, translate::{expr::translate_expr, select::Select}, util::normalize_ident, - vdbe::{builder::ProgramBuilder, BranchOffset, Insn}, + vdbe::{builder::ProgramBuilder, BranchOffset, Func, Insn}, Result, }; @@ -726,7 +726,7 @@ fn translate_condition_expr( program.mark_last_insn_constant(); let _ = translate_expr(program, Some(select), lhs, column_reg, cursor_hint)?; program.emit_insn(Insn::Function { - func: ScalarFunc::Like, + func: Func::Scalar(ScalarFunc::Like), start_reg: pattern_reg, dest: cur_reg, }); diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 38bf513cc..67ce34f72 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -24,7 +24,8 @@ pub mod sorter; mod datetime; use crate::error::LimboError; -use crate::function::{AggFunc, ScalarFunc}; +use crate::function::{AggFunc, JsonFunc, ScalarFunc}; +use crate::json::get_json; use crate::pseudo::PseudoCursor; use crate::schema::Table; use crate::storage::sqlite3_ondisk::DatabaseHeader; @@ -46,6 +47,21 @@ pub type CursorID = usize; pub type PageIdx = usize; +#[derive(Debug)] +pub enum Func { + Scalar(ScalarFunc), + Json(JsonFunc), +} + +impl ToString for Func { + fn to_string(&self) -> String { + match self { + Func::Scalar(scalar_func) => scalar_func.to_string(), + Func::Json(json_func) => json_func.to_string(), + } + } +} + #[derive(Debug)] pub enum Insn { // Initialize the program state and jump to the given PC. @@ -287,7 +303,7 @@ pub enum Insn { // constant_mask: i32, // P1, not used for now start_reg: usize, // P2, start of argument registers dest: usize, // P3 - func: ScalarFunc, // P4 + func: Func, // P4 }, InitCoroutine { @@ -1156,8 +1172,17 @@ impl Program { start_reg, dest, } => match func { - ScalarFunc::Coalesce => {} - ScalarFunc::Like => { + Func::Json(JsonFunc::JSON) => { + let json_value = &state.registers[*start_reg]; + let json_str = get_json(json_value); + match json_str { + Ok(json) => state.registers[*dest] = json, + Err(e) => return Err(e), + } + state.pc += 1; + } + Func::Scalar(ScalarFunc::Coalesce) => {} + Func::Scalar(ScalarFunc::Like) => { let start_reg = *start_reg; assert!( start_reg + 2 <= state.registers.len(), @@ -1178,7 +1203,7 @@ impl Program { state.registers[*dest] = result; state.pc += 1; } - ScalarFunc::Abs => { + Func::Scalar(ScalarFunc::Abs) => { let reg_value = state.registers[*start_reg].borrow_mut(); if let Some(value) = exec_abs(reg_value) { state.registers[*dest] = value; @@ -1187,7 +1212,7 @@ impl Program { } state.pc += 1; } - ScalarFunc::Upper => { + Func::Scalar(ScalarFunc::Upper) => { let reg_value = state.registers[*start_reg].borrow_mut(); if let Some(value) = exec_upper(reg_value) { state.registers[*dest] = value; @@ -1196,7 +1221,7 @@ impl Program { } state.pc += 1; } - ScalarFunc::Lower => { + Func::Scalar(ScalarFunc::Lower) => { let reg_value = state.registers[*start_reg].borrow_mut(); if let Some(value) = exec_lower(reg_value) { state.registers[*dest] = value; @@ -1205,16 +1230,16 @@ impl Program { } state.pc += 1; } - ScalarFunc::Length => { + Func::Scalar(ScalarFunc::Length) => { let reg_value = state.registers[*start_reg].borrow_mut(); state.registers[*dest] = exec_length(reg_value); state.pc += 1; } - ScalarFunc::Random => { + Func::Scalar(ScalarFunc::Random) => { state.registers[*dest] = exec_random(); state.pc += 1; } - ScalarFunc::Trim => { + Func::Scalar(ScalarFunc::Trim) => { let start_reg = *start_reg; let reg_value = state.registers[start_reg].clone(); let pattern_value = state.registers.get(start_reg + 1).cloned(); @@ -1224,7 +1249,7 @@ impl Program { state.registers[*dest] = result; state.pc += 1; } - ScalarFunc::LTrim => { + Func::Scalar(ScalarFunc::LTrim) => { let start_reg = *start_reg; let reg_value = state.registers[start_reg].clone(); let pattern_value = state.registers.get(start_reg + 1).cloned(); @@ -1234,7 +1259,7 @@ impl Program { state.registers[*dest] = result; state.pc += 1; } - ScalarFunc::RTrim => { + Func::Scalar(ScalarFunc::RTrim) => { let start_reg = *start_reg; let reg_value = state.registers[start_reg].clone(); let pattern_value = state.registers.get(start_reg + 1).cloned(); @@ -1244,7 +1269,7 @@ impl Program { state.registers[*dest] = result; state.pc += 1; } - ScalarFunc::Round => { + Func::Scalar(ScalarFunc::Round) => { let start_reg = *start_reg; let reg_value = state.registers[start_reg].clone(); let precision_value = state.registers.get(start_reg + 1).cloned(); @@ -1252,7 +1277,7 @@ impl Program { state.registers[*dest] = result; state.pc += 1; } - ScalarFunc::Min => { + Func::Scalar(ScalarFunc::Min) => { let start_reg = *start_reg; let reg_values = state.registers[start_reg..state.registers.len()] .iter() @@ -1265,7 +1290,7 @@ impl Program { } state.pc += 1; } - ScalarFunc::Max => { + Func::Scalar(ScalarFunc::Max) => { let start_reg = *start_reg; let reg_values = state.registers[start_reg..state.registers.len()] .iter() @@ -1278,7 +1303,7 @@ impl Program { } state.pc += 1; } - ScalarFunc::Date => { + Func::Scalar(ScalarFunc::Date) => { if *start_reg == 0 { let date_str = exec_date(&OwnedValue::Text(Rc::new("now".to_string())))?; @@ -1300,7 +1325,7 @@ impl Program { } state.pc += 1; } - ScalarFunc::Time => { + Func::Scalar(ScalarFunc::Time) => { if *start_reg == 0 { let time_str = exec_time(&OwnedValue::Text(Rc::new("now".to_string())))?; @@ -1322,7 +1347,7 @@ impl Program { } state.pc += 1; } - ScalarFunc::Unicode => { + Func::Scalar(ScalarFunc::Unicode) => { let reg_value = state.registers[*start_reg].borrow_mut(); state.registers[*dest] = exec_unicode(reg_value); state.pc += 1; diff --git a/testing/all.test b/testing/all.test index ad08d418b..26ee60589 100755 --- a/testing/all.test +++ b/testing/all.test @@ -12,3 +12,4 @@ source $testdir/where.test source $testdir/like.test source $testdir/scalar-functions.test source $testdir/orderby.test +source $testdir/json.test \ No newline at end of file diff --git a/testing/json.test b/testing/json.test new file mode 100644 index 000000000..6f566fa05 --- /dev/null +++ b/testing/json.test @@ -0,0 +1,57 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_execsql_test json5-ecma-script-1 { + select json('{a:5,b:6}') ; +} {{{"a":5,"b":6}}} + +do_execsql_test json5-ecma-script-2 { + SELECT json('{ MNO_123$xyz : 789 }'); +} {{{"MNO_123$xyz":789}}} + +do_execsql_test json5-with-single-trailing-comma-valid { + select json('{"a":5, "b":6, }'); +} {{{"a":5,"b":6}}} + +do_execsql_test json5-single-quoted { + SELECT json('{"a": ''abcd''}'); +} {{{"a":"abcd"}}} + +do_execsql_test json5-hexadecimal-1 { + SELECT json('{a: 0x0}') +} {{{"a":0}}} + +do_execsql_test json5-hexadecimal-2 { + SELECT json('{a: 0xabcdef}') +} {{{"a":11259375}}} + +do_execsql_test json5-hexadecimal-2 { + SELECT json('{a: -0xabcdef}') +} {{{"a":-11259375}}} + +do_execsql_test json5-number-1 { + SELECT json('{x: 4.}') +} {{{"x":4.0}}} + +do_execsql_test json5-number-2 { + SELECT json('{x: +4.}') +} {{{"x":4.0}}} + +do_execsql_test json5-number-3 { + SELECT json('{x: -4.}') +} {{{"x":-4.0}}} + +do_execsql_test json5-number-5 { + SELECT json('{x: Infinity}') +} {{{"x":9e999}}} + +do_execsql_test json5-number-6 { + SELECT json('{x: -Infinity}') +} {{{"x":-9e999}}} + +do_execsql_test json5-multi-comment { + SELECT json(' /* abc */ { /*def*/ aaa /* xyz */ : // to the end of line + 123 /* xyz */ , /* 123 */ }') +} {{{"aaa":123}}}