diff --git a/core/json/json_operations.rs b/core/json/json_operations.rs index e0e2ff09a..0be177dae 100644 --- a/core/json/json_operations.rs +++ b/core/json/json_operations.rs @@ -197,7 +197,7 @@ mod tests { } fn create_json(s: &str) -> OwnedValue { - OwnedValue::Text(Text::json(s)) + OwnedValue::Text(Text::json(s.to_string())) } #[test] diff --git a/core/json/jsonb.rs b/core/json/jsonb.rs index c928e6d19..97acd6aee 100644 --- a/core/json/jsonb.rs +++ b/core/json/jsonb.rs @@ -192,6 +192,25 @@ pub enum ElementType { RESERVED3 = 15, } +impl Into for ElementType { + fn into(self) -> String { + let result = match self { + ElementType::ARRAY => "array", + ElementType::OBJECT => "object", + ElementType::NULL => "null", + ElementType::TRUE => "true", + ElementType::FALSE => "false", + ElementType::FLOAT | ElementType::FLOAT5 => "real", + ElementType::INT | ElementType::INT5 => "integer", + ElementType::TEXT | ElementType::TEXT5 | ElementType::TEXTJ | ElementType::TEXTRAW => { + "text" + } + _ => unreachable!(), + }; + result.into() + } +} + impl TryFrom for ElementType { type Error = LimboError; @@ -223,7 +242,7 @@ type PayloadSize = usize; #[derive(Debug, Clone, Copy)] pub struct JsonbHeader(ElementType, PayloadSize); -enum HeaderFormat { +pub(crate) enum HeaderFormat { Inline([u8; 1]), // Small payloads embedded directly in the header OneByte([u8; 2]), // Medium payloads with 1-byte size field TwoBytes([u8; 3]), // Large payloads with 2-byte size field @@ -231,7 +250,7 @@ enum HeaderFormat { } impl HeaderFormat { - fn as_bytes(&self) -> &[u8] { + pub fn as_bytes(&self) -> &[u8] { match self { Self::Inline(bytes) => bytes, Self::OneByte(bytes) => bytes, @@ -246,6 +265,10 @@ impl JsonbHeader { Self(element_type, payload_size) } + pub fn make_null() -> Self { + Self(ElementType::NULL, 0) + } + fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> { match slice.get(cursor) { Some(header_byte) => { @@ -296,7 +319,7 @@ impl JsonbHeader { } } - fn into_bytes(self) -> HeaderFormat { + pub fn into_bytes(self) -> HeaderFormat { let (element_type, payload_size) = (self.0, self.1); match payload_size { @@ -361,17 +384,36 @@ impl Jsonb { self.data.len() } + pub fn make_empty_array(size: usize) -> Self { + let mut jsonb = Self { + data: Vec::with_capacity(size), + }; + jsonb + .write_element_header(0, ElementType::ARRAY, 0) + .unwrap(); + jsonb + } + + pub fn append_to_array_unsafe(&mut self, data: &[u8]) { + self.data.extend_from_slice(data); + } + + pub fn finalize_array_unsafe(&mut self) -> Result<()> { + self.write_element_header(0, ElementType::ARRAY, self.len() - 1)?; + Ok(()) + } + fn read_header(&self, cursor: usize) -> Result<(JsonbHeader, usize)> { let (header, offset) = JsonbHeader::from_slice(cursor, &self.data)?; Ok((header, offset)) } - pub fn is_valid(&self) -> Result<()> { + pub fn is_valid(&self) -> Result { match self.read_header(0) { Ok((header, offset)) => { if let Some(_) = self.data.get(offset..offset + header.1) { - Ok(()) + Ok(header.0) } else { bail_parse_error!("malformed JSON") } @@ -1395,6 +1437,33 @@ impl Jsonb { Ok((Jsonb::from_raw_data(&self.data[pos..end]), header.0)) } + pub fn get_by_path_raw(&self, path: &JsonPath) -> Result<&[u8]> { + let mut pos = 0; + let mut string_buffer = String::with_capacity(1024); + for segment in path.elements.iter() { + pos = self.navigate_to_segment(segment, pos, &mut string_buffer)?; + } + let (header, skip_header) = self.read_header(pos)?; + let end = pos + skip_header + header.1; + Ok(&self.data[pos..end]) + } + + pub fn array_len(&self) -> Result { + let (header, header_skip) = self.read_header(0)?; + if header.0 != ElementType::ARRAY { + return Ok(0); + } + + let mut count = 0; + let mut pos = header_skip; + while pos < header_skip + header.1 { + pos = self.skip_element(pos)?; + count += 1; + } + + Ok(count) + } + fn navigate_to_segment( &self, segment: &PathElement, @@ -1448,7 +1517,8 @@ impl Jsonb { }; if let Some(id) = idx { let id = id.to_owned(); - if id > 0 { + + if id >= 0 { for _ in 0..id as usize { if current_pos < pos + parent_size { current_pos = self.skip_element(current_pos)?; @@ -1530,6 +1600,7 @@ fn compare(key: (&str, ElementType), path_key: (&str, bool)) -> bool { pub fn unescape_string(input: &str) -> String { let mut result = String::with_capacity(input.len()); let mut chars = input.chars().peekable(); + let mut code_point = String::with_capacity(5); while let Some(c) = chars.next() { if c == '\\' { @@ -1542,10 +1613,24 @@ pub fn unescape_string(input: &str) -> String { Some('"') => result.push('"'), Some('b') => result.push('\u{0008}'), Some('f') => result.push('\u{000C}'), - + Some('x') => { + code_point.clear(); + for _ in 0..2 { + if let Some(hex_char) = chars.next() { + code_point.push(hex_char); + } else { + break; + } + } + if let Ok(code) = u16::from_str_radix(&code_point, 16) { + if let Some(ch) = char::from_u32(code as u32) { + result.push(ch) + } + } + } // Handle \uXXXX format (JSON style) Some('u') => { - let mut code_point = String::new(); + code_point.clear(); for _ in 0..4 { if let Some(hex_char) = chars.next() { code_point.push(hex_char); @@ -1556,18 +1641,18 @@ pub fn unescape_string(input: &str) -> String { if let Ok(code) = u16::from_str_radix(&code_point, 16) { // Check if this is a high surrogate - if (0xD800..=0xDBFF).contains(&code) { + if matches!(code, 0xD800..=0xDBFF) { if chars.next() == Some('\\') && chars.next() == Some('u') { - let mut low_surrogate = String::new(); + code_point.clear(); for _ in 0..4 { if let Some(hex_char) = chars.next() { - low_surrogate.push(hex_char); + code_point.push(hex_char); } else { break; } } - if let Ok(low_code) = u16::from_str_radix(&low_surrogate, 16) { + if let Ok(low_code) = u16::from_str_radix(&code_point, 16) { if (0xDC00..=0xDFFF).contains(&low_code) { let high_ten_bits = (code - 0xD800) as u32; let low_ten_bits = (low_code - 0xDC00) as u32; diff --git a/core/json/mod.rs b/core/json/mod.rs index 5f63ae2e7..a3b6c5d71 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -5,6 +5,7 @@ mod json_path; mod jsonb; mod ser; +use crate::bail_constraint_error; pub use crate::json::de::from_str; use crate::json::error::Error as JsonError; pub use crate::json::json_operations::{json_patch, json_remove}; @@ -13,7 +14,7 @@ pub use crate::json::ser::to_string; use crate::types::{OwnedValue, Text, TextSubtype}; use crate::{bail_parse_error, json::de::ordered_object}; use indexmap::IndexMap; -use jsonb::Jsonb; +use jsonb::{ElementType, Jsonb, JsonbHeader}; use ser::to_string_pretty; use serde::{Deserialize, Serialize}; use std::borrow::Cow; @@ -49,7 +50,7 @@ pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result< None => to_string(&json_val)?, }; - Ok(OwnedValue::Text(Text::json(&json))) + Ok(OwnedValue::Text(Text::json(json))) } OwnedValue::Blob(b) => { let jsonbin = Jsonb::new(b.len(), Some(b)); @@ -67,25 +68,13 @@ pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result< None => to_string(&json_val)?, }; - Ok(OwnedValue::Text(Text::json(&json))) + Ok(OwnedValue::Text(Text::json(json))) } } } pub fn jsonb(json_value: &OwnedValue) -> crate::Result { - let jsonbin = match json_value { - OwnedValue::Null | OwnedValue::Integer(_) | OwnedValue::Float(_) | OwnedValue::Text(_) => { - Jsonb::from_str(&json_value.to_text().unwrap()) - } - OwnedValue::Blob(blob) => { - let blob = Jsonb::new(blob.len(), Some(&blob)); - blob.is_valid()?; - Ok(blob) - } - _ => { - unimplemented!() - } - }; + let jsonbin = convert_dbtype_to_jsonb(json_value); match jsonbin { Ok(jsonbin) => Ok(OwnedValue::Blob(Rc::new(jsonbin.data()))), Err(_) => { @@ -94,6 +83,26 @@ pub fn jsonb(json_value: &OwnedValue) -> crate::Result { } } +fn convert_dbtype_to_jsonb(val: &OwnedValue) -> crate::Result { + match val { + OwnedValue::Text(text) => Jsonb::from_str(text.as_str()), + OwnedValue::Blob(blob) => { + let json = Jsonb::from_raw_data(&blob); + json.is_valid()?; + Ok(json) + } + OwnedValue::Record(_) | OwnedValue::Agg(_) => { + bail_constraint_error!("Wront number of arguments"); + } + OwnedValue::Null => Jsonb::from_str("null"), + OwnedValue::Float(float) => { + let mut buff = ryu::Buffer::new(); + Jsonb::from_str(buff.format(*float)) + } + OwnedValue::Integer(int) => Jsonb::from_str(&int.to_string()), + } +} + fn get_json_value(json_value: &OwnedValue) -> crate::Result { match json_value { OwnedValue::Text(ref t) => match from_str::(t.as_str()) { @@ -149,29 +158,31 @@ pub fn json_array(values: &[OwnedValue]) -> crate::Result { } s.push(']'); - Ok(OwnedValue::Text(Text::json(&s))) + Ok(OwnedValue::Text(Text::json(s))) } pub fn json_array_length( json_value: &OwnedValue, json_path: Option<&OwnedValue>, ) -> crate::Result { - let json = get_json_value(json_value)?; + let json = convert_dbtype_to_jsonb(json_value)?; - let arr_val = if let Some(path) = json_path { - match json_extract_single(&json, path, true)? { - Some(val) => val, - None => return Ok(OwnedValue::Null), - } - } else { - &json - }; - - match arr_val { - Val::Array(val) => Ok(OwnedValue::Integer(val.len() as i64)), - Val::Null => Ok(OwnedValue::Null), - _ => Ok(OwnedValue::Integer(0)), + if json_path.is_none() { + let result = json.array_len()?; + return Ok(OwnedValue::Integer(result as i64)); } + + let path = json_path_from_owned_value(json_path.expect("We already checked none"), true)?; + + if let Some(path) = path { + if let Ok(len) = json + .get_by_path(&path) + .and_then(|(json, _)| json.array_len()) + { + return Ok(OwnedValue::Integer(len as i64)); + } + } + Ok(OwnedValue::Null) } pub fn json_set(json: &OwnedValue, values: &[OwnedValue]) -> crate::Result { @@ -222,13 +233,14 @@ pub fn json_arrow_extract(value: &OwnedValue, path: &OwnedValue) -> crate::Resul return Ok(OwnedValue::Null); } - let json = get_json_value(value)?; - let extracted = json_extract_single(&json, path, false)?; - - if let Some(val) = extracted { - let json = to_string(val)?; - - Ok(OwnedValue::Text(Text::json(&json))) + if let Some(path) = json_path_from_owned_value(path, false)? { + let json = convert_dbtype_to_jsonb(value)?; + let extracted = json.get_by_path(&path); + if let Ok((json, _)) = extracted { + Ok(OwnedValue::Text(Text::json(json.to_string()?))) + } else { + Ok(OwnedValue::Null) + } } else { Ok(OwnedValue::Null) } @@ -243,11 +255,17 @@ pub fn json_arrow_shift_extract( if let OwnedValue::Null = value { return Ok(OwnedValue::Null); } - - let json = get_json_value(value)?; - let extracted = json_extract_single(&json, path, false)?.unwrap_or(&Val::Null); - - convert_json_to_db_type(extracted, true) + if let Some(path) = json_path_from_owned_value(path, false)? { + let json = convert_dbtype_to_jsonb(value)?; + let extracted = json.get_by_path(&path); + if let Ok((json, element_type)) = extracted { + Ok(json_string_to_db_type(json.to_string()?, element_type)) + } else { + Ok(OwnedValue::Null) + } + } else { + Ok(OwnedValue::Null) + } } /// Extracts a JSON value from a JSON object or array. @@ -261,37 +279,66 @@ pub fn json_extract(value: &OwnedValue, paths: &[OwnedValue]) -> crate::Result { - return Ok(OwnedValue::Null); - } - _ => { - let extracted = json_extract_single(&json, path, true)?.unwrap_or(&Val::Null); - - if paths.len() == 1 && extracted == &Val::Null { - return Ok(OwnedValue::Null); - } - - result.push_str(&to_string(&extracted)?); - result.push(','); - } + return Ok(json_string_to_db_type( + expected_value.to_string()?, + value_type, + )); + } else { + return Ok(OwnedValue::Null); } } - result.pop(); // remove the final comma - result.push(']'); + let json = convert_dbtype_to_jsonb(value)?; + let mut result = Jsonb::make_empty_array(json.len()); - Ok(OwnedValue::Text(Text::json(&result))) + let paths = paths + .into_iter() + .map(|p| json_path_from_owned_value(p, true)); + for path in paths { + if let Some(path) = path? { + let fragment = json.get_by_path_raw(&path); + if let Ok(data) = fragment { + result.append_to_array_unsafe(data); + } else { + result.append_to_array_unsafe(JsonbHeader::make_null().into_bytes().as_bytes()); + } + } else { + return Ok(OwnedValue::Null); + } + } + result.finalize_array_unsafe()?; + Ok(json_string_to_db_type( + result.to_string()?, + ElementType::ARRAY, + )) +} + +fn json_string_to_db_type(mut json: String, element_type: ElementType) -> OwnedValue { + match element_type { + ElementType::ARRAY | ElementType::OBJECT => OwnedValue::Text(Text::json(json)), + ElementType::TEXT | ElementType::TEXT5 | ElementType::TEXTJ | ElementType::TEXTRAW => { + json.remove(json.len() - 1); + json.remove(0); + OwnedValue::Text(Text { + value: Rc::new(json.into_bytes()), + subtype: TextSubtype::Text, + }) + } + ElementType::FLOAT5 | ElementType::FLOAT => { + OwnedValue::Float(json.parse().expect("Should be valid f64")) + } + ElementType::INT | ElementType::INT5 => { + OwnedValue::Integer(json.parse().expect("Should be valid i64")) + } + ElementType::TRUE => OwnedValue::Integer(1), + ElementType::FALSE => OwnedValue::Integer(0), + ElementType::NULL => OwnedValue::Null, + _ => unreachable!(), + } } /// Returns a value with type defined by SQLite documentation: @@ -324,7 +371,7 @@ fn convert_json_to_db_type(extracted: &Val, all_as_db: bool) -> crate::Result) -> crate::Result if let OwnedValue::Null = value { return Ok(OwnedValue::Null); } + if path.is_none() { + let json = convert_dbtype_to_jsonb(value)?; + let element_type = json.is_valid()?; - let json = get_json_value(value)?; + return Ok(OwnedValue::Text(Text::json(element_type.into()))); + } + if let Some(path) = json_path_from_owned_value(path.unwrap(), true)? { + let json = convert_dbtype_to_jsonb(value)?; - let json = if let Some(path) = path { - match json_extract_single(&json, path, true)? { - Some(val) => val, - None => return Ok(OwnedValue::Null), + if let Ok((_, element_type)) = json.get_by_path(&path) { + return Ok(OwnedValue::Text(Text::json(element_type.into()))); + } else { + return Ok(OwnedValue::Null); } } else { - &json - }; - - let val = match json { - Val::Null => "null", - Val::Bool(v) => { - if *v { - "true" - } else { - "false" - } - } - Val::Integer(_) => "integer", - Val::Float(_) => "real", - Val::String(_) => "text", - Val::Array(_) => "array", - Val::Object(_) => "object", - Val::Removed => unreachable!(), - }; - - Ok(OwnedValue::Text(Text::json(val))) -} - -/// Returns the value at the given JSON path. If the path does not exist, it returns None. -/// If the path is an invalid path, returns an error. -/// -/// *strict* - if false, we will try to resolve the path even if it does not start with "$" -/// in a way that's compatible with the `->` and `->>` operators. See examples in the docs: -/// https://sqlite.org/json1.html#the_and_operators -fn json_extract_single<'a>( - json: &'a Val, - path: &OwnedValue, - strict: bool, -) -> crate::Result> { - let json_path = match json_path_from_owned_value(path, strict)? { - Some(path) => path, - None => return Ok(None), - }; - - let mut current_element = &Val::Null; - - for element in json_path.elements.iter() { - match element { - PathElement::Root() => { - current_element = json; - } - PathElement::Key(key, _) => match current_element { - Val::Object(map) => { - if let Some((_, value)) = map.iter().find(|(k, _)| k == key) { - current_element = value; - } else { - return Ok(None); - } - } - _ => return Ok(None), - }, - PathElement::ArrayLocator(idx) => match current_element { - Val::Array(array) => { - if let Some(mut idx) = *idx { - if idx < 0 { - idx += array.len() as i32; - } - - if idx < array.len() as i32 { - current_element = &array[idx as usize]; - } else { - return Ok(None); - } - } - } - _ => return Ok(None), - }, - } + return Ok(OwnedValue::Null); } - - Ok(Some(current_element)) } fn json_path_from_owned_value(path: &OwnedValue, strict: bool) -> crate::Result> { @@ -674,7 +653,7 @@ pub fn json_object(values: &[OwnedValue]) -> crate::Result { .collect::, _>>()?; let result = crate::json::to_string(&value_map)?; - Ok(OwnedValue::Text(Text::json(&result))) + Ok(OwnedValue::Text(Text::json(result))) } pub fn is_json_valid(json_value: &OwnedValue) -> crate::Result { @@ -866,7 +845,7 @@ mod tests { #[test] fn test_json_array_simple() { let text = OwnedValue::build_text("value1"); - let json = OwnedValue::Text(Text::json("\"value2\"")); + let json = OwnedValue::Text(Text::json("\"value2\"".to_string())); let input = vec![text, json, OwnedValue::Integer(1), OwnedValue::Float(1.1)]; let result = json_array(&input).unwrap(); @@ -1104,7 +1083,7 @@ mod tests { let text_key = OwnedValue::build_text("text_key"); let text_value = OwnedValue::build_text("text_value"); let json_key = OwnedValue::build_text("json_key"); - let json_value = OwnedValue::Text(Text::json(r#"{"json":"value","number":1}"#)); + let json_value = OwnedValue::Text(Text::json(r#"{"json":"value","number":1}"#.to_string())); let integer_key = OwnedValue::build_text("integer_key"); let integer_value = OwnedValue::Integer(1); let float_key = OwnedValue::build_text("float_key"); @@ -1138,7 +1117,7 @@ mod tests { #[test] fn test_json_object_json_value_is_rendered_as_json() { let key = OwnedValue::build_text("key"); - let value = OwnedValue::Text(Text::json(r#"{"json":"value"}"#)); + let value = OwnedValue::Text(Text::json(r#"{"json":"value"}"#.to_string())); let input = vec![key, value]; let result = json_object(&input).unwrap(); diff --git a/core/types.rs b/core/types.rs index 2b3bd2e38..593c99efd 100644 --- a/core/types.rs +++ b/core/types.rs @@ -45,9 +45,9 @@ impl Text { } } - pub fn json(value: &str) -> Self { + pub fn json(value: String) -> Self { Self { - value: Rc::new(value.as_bytes().to_vec()), + value: Rc::new(value.into_bytes()), subtype: TextSubtype::Json, } } @@ -186,7 +186,7 @@ impl OwnedValue { return Ok(OwnedValue::Null); }; if v.is_json() { - Ok(OwnedValue::Text(Text::json(text))) + Ok(OwnedValue::Text(Text::json(text.to_string()))) } else { Ok(OwnedValue::build_text(text)) }