diff --git a/core/json/jsonb.rs b/core/json/jsonb.rs index 721ae6b91..8b49de85a 100644 --- a/core/json/jsonb.rs +++ b/core/json/jsonb.rs @@ -1,4 +1,5 @@ use crate::json::error::{Error as PError, Result as PResult}; +use crate::json::Conv; use crate::{bail_parse_error, LimboError, Result}; use std::{ borrow::Cow, @@ -742,7 +743,15 @@ impl JsonbHeader { Self(ElementType::OBJECT, 0) } - fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> { + pub(super) fn element_type(&self) -> ElementType { + self.0 + } + + pub(super) fn payload_size(&self) -> PayloadSize { + self.1 + } + + pub(super) fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> { match slice.get(cursor) { Some(header_byte) => { // Extract first 4 bits (values 0-15) @@ -921,6 +930,96 @@ impl Jsonb { } } + pub fn is_valid(&self) -> bool { + self.validate_element(0, self.data.len(), 0).is_ok() + } + + fn validate_element(&self, start: usize, end: usize, depth: usize) -> Result<()> { + if depth > MAX_JSON_DEPTH { + bail_parse_error!("Too deep"); + } + + if start >= end { + bail_parse_error!("Empty element"); + } + + let (header, header_offset) = self.read_header(start)?; + let payload_start = start + header_offset; + let payload_size = header.payload_size(); + let payload_end = payload_start + payload_size; + + if payload_end != end { + bail_parse_error!("Size mismatch"); + } + + match header.element_type() { + ElementType::NULL | ElementType::TRUE | ElementType::FALSE => { + if payload_size == 0 { + Ok(()) + } else { + bail_parse_error!("Invalid payload for primitive") + } + } + ElementType::INT | ElementType::INT5 | ElementType::FLOAT | ElementType::FLOAT5 => { + if payload_size > 0 { + Ok(()) + } else { + bail_parse_error!("Empty number payload") + } + } + ElementType::TEXT | ElementType::TEXTJ | ElementType::TEXT5 | ElementType::TEXTRAW => { + let payload = &self.data[payload_start..payload_end]; + std::str::from_utf8(payload).map_err(|_| { + LimboError::ParseError("Invalid UTF-8 in text payload".to_string()) + })?; + Ok(()) + } + ElementType::ARRAY => { + let mut pos = payload_start; + while pos < payload_end { + if pos >= self.data.len() { + bail_parse_error!("Array element out of bounds"); + } + let (elem_header, elem_header_size) = self.read_header(pos)?; + let elem_end = pos + elem_header_size + elem_header.payload_size(); + if elem_end > payload_end { + bail_parse_error!("Array element exceeds bounds"); + } + self.validate_element(pos, elem_end, depth + 1)?; + pos = elem_end; + } + Ok(()) + } + ElementType::OBJECT => { + let mut pos = payload_start; + let mut count = 0; + while pos < payload_end { + if pos >= self.data.len() { + bail_parse_error!("Object element out of bounds"); + } + let (elem_header, elem_header_size) = self.read_header(pos)?; + if count % 2 == 0 && !elem_header.element_type().is_valid_key() { + bail_parse_error!("Object key must be text"); + } + + let elem_end = pos + elem_header_size + elem_header.payload_size(); + if elem_end > payload_end { + bail_parse_error!("Object element exceeds bounds"); + } + self.validate_element(pos, elem_end, depth + 1)?; + pos = elem_end; + count += 1; + } + + if count % 2 != 0 { + bail_parse_error!("Object must have even number of elements"); + } + Ok(()) + } + _ => bail_parse_error!("Invalid element type"), + } + } + #[expect(clippy::inherent_to_string)] pub fn to_string(&self) -> String { let mut result = String::with_capacity(self.data.len() * 2); @@ -2170,6 +2269,18 @@ impl Jsonb { Ok(result) } + pub fn from_str_with_mode(input: &str, mode: Conv) -> PResult { + // Parse directly as JSON if it's already JSON subtype or strict mode is on + if matches!(mode, Conv::ToString) { + let mut str = input.replace('"', "\\\""); + str.insert(0, '"'); + str.push('"'); + Jsonb::from_str(&str) + } else { + Jsonb::from_str(input) + } + } + pub fn from_raw_data(data: &[u8]) -> Self { Self::new(data.len(), Some(data)) } diff --git a/core/json/mod.rs b/core/json/mod.rs index 311c595fa..697879edd 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -118,24 +118,20 @@ pub fn convert_dbtype_to_jsonb(val: &Value, strict: Conv) -> crate::Result crate::Result { + let str = std::str::from_utf8(slice) + .map_err(|_| LimboError::ParseError("malformed JSON".to_string()))?; + Jsonb::from_str_with_mode(str, Conv::Strict).map_err(Into::into) +} + pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Result { match val { RefValue::Text(text) => { let res = if text.subtype == TextSubtype::Json || matches!(strict, Conv::Strict) { - // Parse directly as JSON if it's already JSON subtype or strict mode is on - let json = if matches!(strict, Conv::ToString) { - let mut str = text.as_str().replace('"', "\\\""); - str.insert(0, '"'); - str.push('"'); - Jsonb::from_str(&str) - } else { - Jsonb::from_str(text.as_str()) - }; - json + Jsonb::from_str_with_mode(text.as_str(), strict) } else { // Handle as a string literal otherwise let mut str = text.as_str().replace('"', "\\\""); - // Quote the string to make it a JSON string str.insert(0, '"'); str.push('"'); @@ -144,7 +140,40 @@ pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Resul res.map_err(|_| LimboError::ParseError("malformed JSON".to_string())) } RefValue::Blob(blob) => { - let json = Jsonb::from_raw_data(blob.to_slice()); + let bytes = blob.to_slice(); + // Valid JSON can start with these whitespace characters + let index = bytes + .iter() + .position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r')) + .unwrap_or(bytes.len()); + let slice = &bytes[index..]; + let json = match slice { + // branch with no overlapping initial byte + [b'"', ..] | [b'-', ..] | [b'0'..=b'2', ..] => parse_as_json_text(slice)?, + _ => match JsonbHeader::from_slice(0, slice) { + Ok((header, header_offset)) => { + let payload_size = header.payload_size(); + let total_expected = header_offset + payload_size; + + if total_expected != slice.len() { + parse_as_json_text(slice)? + } else { + let jsonb = Jsonb::from_raw_data(slice); + let is_valid_json = if payload_size <= 7 { + jsonb.is_valid() + } else { + jsonb.element_type().is_ok() + }; + if is_valid_json { + jsonb + } else { + parse_as_json_text(slice)? + } + } + } + Err(_) => parse_as_json_text(slice)?, + }, + }; json.element_type()?; Ok(json) } diff --git a/testing/json.test b/testing/json.test index 781859e35..2e59e0893 100755 --- a/testing/json.test +++ b/testing/json.test @@ -279,9 +279,46 @@ do_execsql_test json_arrow_object { SELECT '{"a": [1,2,3]}' -> '$.a' } {{[1,2,3]}} -do_execsql_test json_arrow_shift_object { - SELECT '{"a": [1,2,3]}' ->> '$.a' -} {{[1,2,3]}} +do_execsql_test json_arrow_blob_object { + SELECT cast('{"age":30,"name":"John"}' as blob) -> '$.age' +} {{30}} + +# Tests against valid jsonb [b'{',.., b'}'] vs json text '{..}' +# b'{' = ElementType::Array, PayloadSize of 7. +# b'}' = last element in array ends in '}' +# x'7B0707070707177D' = jsonb(["", "", "", "", "", "}"]) +do_execsql_test json_arrow_blob_array { + SELECT x'7B0707070707177D' -> '$[5]' +} {\"\}\"} + +# Tests against valid jsonb [b'[',.., b']'] vs json text '[..]' +# b'[' = ElementType::Array, PayloadSize of 5. +# b']' = last element in array ends in ']' +# x'5B070707175D' = jsonb(["", "", "", "}"]) +do_execsql_test json_arrow_blob_array_2 { + SELECT x'5B070707175D' -> '$[3]' +} {\"\]\"} + +do_execsql_test json_arrow_blob_number { + SELECT cast('4' as blob) -> '$' +} {{4}} + +do_execsql_test json_arrow_blob_number_2 { + SELECT cast(33 as blob) -> '$' +} {{33}} + +# jsonb(333) +do_execsql_test json_arrow_blob_number_3 { + SELECT x'33333333' -> '$' +} {{333}} + +do_execsql_test json_arrow_blob_negative_number { + SELECT cast('-4' as blob) -> '$' +} {{-4}} + +do_execsql_test json_arrow_shift_blob { + SELECT cast('{"age":30,"name":"John"}' as blob) ->> '$.age' +} {{30}} do_execsql_test json_extract_object_2 { SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', '$.a[3]') @@ -291,7 +328,6 @@ do_execsql_test json_extract_object_3 { SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', null, '$.a[3]') } {{}} - # \x61 is the ASCII code for 'a' do_execsql_test json_extract_with_escaping { SELECT json_extract('{"\x61": 1}', '$.a') @@ -510,10 +546,9 @@ do_execsql_test json_extract_overflow_int64 { # SELECT json_extract('[1, 2, 3]', '$[170141183460469231731687303715884105729]'); #} {{2}} -# TODO: fix me - this passes on SQLite and needs to be fixed in Limbo. -#do_execsql_test json_extract_blob { -# select json_extract(CAST('[1,2,3]' as BLOB), '$[1]') -#} {{2}} +do_execsql_test json_extract_blob { + select json_extract(CAST('[1,2,3]' as BLOB), '$[1]') +} {{2}} do_execsql_test json_array_length { SELECT json_array_length('[1,2,3,4]'); @@ -680,12 +715,11 @@ do_execsql_test json_valid_1 { do_execsql_test json_valid_2 { SELECT json_valid('["a",55,"b",72]'); } {1} -# -# Unimplemented -#do_execsql_test json_valid_3 { -# SELECT json_valid( CAST('{"a":"1}' AS BLOB) ); -#} {0} -# + +do_execsql_test json_valid_3 { + SELECT json_valid( CAST('{"a":"1}' AS BLOB) ); +} {0} + do_execsql_test json_valid_4 { SELECT json_valid(123); } {1} @@ -830,6 +864,14 @@ do_execsql_test json-remove-7 { SELECT json_remove('{"a": 1, "b": [1,2], "c": {"d": 3}}', '$.a', '$.b[0]', '$.c.d'); } {{{"b":[2],"c":{}}}} +do_execsql_test json-remove-8 { + SELECT json_remove(cast('{"age":30,"name":"John"}' as blob), '$.age'); +} {{{"name":"John"}}} + +do_execsql_test json-remove-9 { + SELECT json_remove(cast('{"user":{"id":123,"profile":{"name":"Alice","age":25}}}' as blob), '$.user.id'); +} {{{"user":{"profile":{"name":"Alice","age":25}}}}} + do_execsql_test json_set_field_empty_object { SELECT json_set('{}', '$.field', 'value'); } {{{"field":"value"}}} @@ -838,6 +880,10 @@ do_execsql_test json_set_replace_field { SELECT json_set('{"field":"old_value"}', '$.field', 'new_value'); } {{{"field":"new_value"}}} +do_execsql_test json_set_replace_field_2 { + SELECT json_set(cast('{"age":30,"name":"John"}' as blob), '$.age', 40); +} {{{"age":40,"name":"John"}}} + do_execsql_test json_set_set_deeply_nested_key { SELECT json_set('{}', '$.object.doesnt.exist', 'value'); } {{{"object":{"doesnt":{"exist":"value"}}}}}