Merge 'Fix jsonb functions check valid json string binary' from

closes: #2820
```
turso> select user->>'age' as age from json_user;
┌─────┐
│ age │
├─────┤
│  30 │
└─────┘
```

Closes #2821
This commit is contained in:
Pekka Enberg
2025-09-10 14:44:54 +03:00
committed by GitHub
3 changed files with 213 additions and 27 deletions

View File

@@ -1,4 +1,5 @@
use crate::json::error::{Error as PError, Result as PResult};
use crate::json::Conv;
use crate::{bail_parse_error, LimboError, Result};
use std::{
borrow::Cow,
@@ -742,7 +743,15 @@ impl JsonbHeader {
Self(ElementType::OBJECT, 0)
}
fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
pub(super) fn element_type(&self) -> ElementType {
self.0
}
pub(super) fn payload_size(&self) -> PayloadSize {
self.1
}
pub(super) fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
match slice.get(cursor) {
Some(header_byte) => {
// Extract first 4 bits (values 0-15)
@@ -921,6 +930,96 @@ impl Jsonb {
}
}
pub fn is_valid(&self) -> bool {
self.validate_element(0, self.data.len(), 0).is_ok()
}
fn validate_element(&self, start: usize, end: usize, depth: usize) -> Result<()> {
if depth > MAX_JSON_DEPTH {
bail_parse_error!("Too deep");
}
if start >= end {
bail_parse_error!("Empty element");
}
let (header, header_offset) = self.read_header(start)?;
let payload_start = start + header_offset;
let payload_size = header.payload_size();
let payload_end = payload_start + payload_size;
if payload_end != end {
bail_parse_error!("Size mismatch");
}
match header.element_type() {
ElementType::NULL | ElementType::TRUE | ElementType::FALSE => {
if payload_size == 0 {
Ok(())
} else {
bail_parse_error!("Invalid payload for primitive")
}
}
ElementType::INT | ElementType::INT5 | ElementType::FLOAT | ElementType::FLOAT5 => {
if payload_size > 0 {
Ok(())
} else {
bail_parse_error!("Empty number payload")
}
}
ElementType::TEXT | ElementType::TEXTJ | ElementType::TEXT5 | ElementType::TEXTRAW => {
let payload = &self.data[payload_start..payload_end];
std::str::from_utf8(payload).map_err(|_| {
LimboError::ParseError("Invalid UTF-8 in text payload".to_string())
})?;
Ok(())
}
ElementType::ARRAY => {
let mut pos = payload_start;
while pos < payload_end {
if pos >= self.data.len() {
bail_parse_error!("Array element out of bounds");
}
let (elem_header, elem_header_size) = self.read_header(pos)?;
let elem_end = pos + elem_header_size + elem_header.payload_size();
if elem_end > payload_end {
bail_parse_error!("Array element exceeds bounds");
}
self.validate_element(pos, elem_end, depth + 1)?;
pos = elem_end;
}
Ok(())
}
ElementType::OBJECT => {
let mut pos = payload_start;
let mut count = 0;
while pos < payload_end {
if pos >= self.data.len() {
bail_parse_error!("Object element out of bounds");
}
let (elem_header, elem_header_size) = self.read_header(pos)?;
if count % 2 == 0 && !elem_header.element_type().is_valid_key() {
bail_parse_error!("Object key must be text");
}
let elem_end = pos + elem_header_size + elem_header.payload_size();
if elem_end > payload_end {
bail_parse_error!("Object element exceeds bounds");
}
self.validate_element(pos, elem_end, depth + 1)?;
pos = elem_end;
count += 1;
}
if count % 2 != 0 {
bail_parse_error!("Object must have even number of elements");
}
Ok(())
}
_ => bail_parse_error!("Invalid element type"),
}
}
#[expect(clippy::inherent_to_string)]
pub fn to_string(&self) -> String {
let mut result = String::with_capacity(self.data.len() * 2);
@@ -2170,6 +2269,18 @@ impl Jsonb {
Ok(result)
}
pub fn from_str_with_mode(input: &str, mode: Conv) -> PResult<Self> {
// Parse directly as JSON if it's already JSON subtype or strict mode is on
if matches!(mode, Conv::ToString) {
let mut str = input.replace('"', "\\\"");
str.insert(0, '"');
str.push('"');
Jsonb::from_str(&str)
} else {
Jsonb::from_str(input)
}
}
pub fn from_raw_data(data: &[u8]) -> Self {
Self::new(data.len(), Some(data))
}

View File

@@ -118,24 +118,20 @@ pub fn convert_dbtype_to_jsonb(val: &Value, strict: Conv) -> crate::Result<Jsonb
)
}
fn parse_as_json_text(slice: &[u8]) -> crate::Result<Jsonb> {
let str = std::str::from_utf8(slice)
.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))?;
Jsonb::from_str_with_mode(str, Conv::Strict).map_err(Into::into)
}
pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Result<Jsonb> {
match val {
RefValue::Text(text) => {
let res = if text.subtype == TextSubtype::Json || matches!(strict, Conv::Strict) {
// Parse directly as JSON if it's already JSON subtype or strict mode is on
let json = if matches!(strict, Conv::ToString) {
let mut str = text.as_str().replace('"', "\\\"");
str.insert(0, '"');
str.push('"');
Jsonb::from_str(&str)
} else {
Jsonb::from_str(text.as_str())
};
json
Jsonb::from_str_with_mode(text.as_str(), strict)
} else {
// Handle as a string literal otherwise
let mut str = text.as_str().replace('"', "\\\"");
// Quote the string to make it a JSON string
str.insert(0, '"');
str.push('"');
@@ -144,7 +140,40 @@ pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Resul
res.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))
}
RefValue::Blob(blob) => {
let json = Jsonb::from_raw_data(blob.to_slice());
let bytes = blob.to_slice();
// Valid JSON can start with these whitespace characters
let index = bytes
.iter()
.position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
.unwrap_or(bytes.len());
let slice = &bytes[index..];
let json = match slice {
// branch with no overlapping initial byte
[b'"', ..] | [b'-', ..] | [b'0'..=b'2', ..] => parse_as_json_text(slice)?,
_ => match JsonbHeader::from_slice(0, slice) {
Ok((header, header_offset)) => {
let payload_size = header.payload_size();
let total_expected = header_offset + payload_size;
if total_expected != slice.len() {
parse_as_json_text(slice)?
} else {
let jsonb = Jsonb::from_raw_data(slice);
let is_valid_json = if payload_size <= 7 {
jsonb.is_valid()
} else {
jsonb.element_type().is_ok()
};
if is_valid_json {
jsonb
} else {
parse_as_json_text(slice)?
}
}
}
Err(_) => parse_as_json_text(slice)?,
},
};
json.element_type()?;
Ok(json)
}

View File

@@ -279,9 +279,46 @@ do_execsql_test json_arrow_object {
SELECT '{"a": [1,2,3]}' -> '$.a'
} {{[1,2,3]}}
do_execsql_test json_arrow_shift_object {
SELECT '{"a": [1,2,3]}' ->> '$.a'
} {{[1,2,3]}}
do_execsql_test json_arrow_blob_object {
SELECT cast('{"age":30,"name":"John"}' as blob) -> '$.age'
} {{30}}
# Tests against valid jsonb [b'{',.., b'}'] vs json text '{..}'
# b'{' = ElementType::Array, PayloadSize of 7.
# b'}' = last element in array ends in '}'
# x'7B0707070707177D' = jsonb(["", "", "", "", "", "}"])
do_execsql_test json_arrow_blob_array {
SELECT x'7B0707070707177D' -> '$[5]'
} {\"\}\"}
# Tests against valid jsonb [b'[',.., b']'] vs json text '[..]'
# b'[' = ElementType::Array, PayloadSize of 5.
# b']' = last element in array ends in ']'
# x'5B070707175D' = jsonb(["", "", "", "}"])
do_execsql_test json_arrow_blob_array_2 {
SELECT x'5B070707175D' -> '$[3]'
} {\"\]\"}
do_execsql_test json_arrow_blob_number {
SELECT cast('4' as blob) -> '$'
} {{4}}
do_execsql_test json_arrow_blob_number_2 {
SELECT cast(33 as blob) -> '$'
} {{33}}
# jsonb(333)
do_execsql_test json_arrow_blob_number_3 {
SELECT x'33333333' -> '$'
} {{333}}
do_execsql_test json_arrow_blob_negative_number {
SELECT cast('-4' as blob) -> '$'
} {{-4}}
do_execsql_test json_arrow_shift_blob {
SELECT cast('{"age":30,"name":"John"}' as blob) ->> '$.age'
} {{30}}
do_execsql_test json_extract_object_2 {
SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', '$.a[3]')
@@ -291,7 +328,6 @@ do_execsql_test json_extract_object_3 {
SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', null, '$.a[3]')
} {{}}
# \x61 is the ASCII code for 'a'
do_execsql_test json_extract_with_escaping {
SELECT json_extract('{"\x61": 1}', '$.a')
@@ -510,10 +546,9 @@ do_execsql_test json_extract_overflow_int64 {
# SELECT json_extract('[1, 2, 3]', '$[170141183460469231731687303715884105729]');
#} {{2}}
# TODO: fix me - this passes on SQLite and needs to be fixed in Limbo.
#do_execsql_test json_extract_blob {
# select json_extract(CAST('[1,2,3]' as BLOB), '$[1]')
#} {{2}}
do_execsql_test json_extract_blob {
select json_extract(CAST('[1,2,3]' as BLOB), '$[1]')
} {{2}}
do_execsql_test json_array_length {
SELECT json_array_length('[1,2,3,4]');
@@ -680,12 +715,11 @@ do_execsql_test json_valid_1 {
do_execsql_test json_valid_2 {
SELECT json_valid('["a",55,"b",72]');
} {1}
#
# Unimplemented
#do_execsql_test json_valid_3 {
# SELECT json_valid( CAST('{"a":"1}' AS BLOB) );
#} {0}
#
do_execsql_test json_valid_3 {
SELECT json_valid( CAST('{"a":"1}' AS BLOB) );
} {0}
do_execsql_test json_valid_4 {
SELECT json_valid(123);
} {1}
@@ -830,6 +864,14 @@ do_execsql_test json-remove-7 {
SELECT json_remove('{"a": 1, "b": [1,2], "c": {"d": 3}}', '$.a', '$.b[0]', '$.c.d');
} {{{"b":[2],"c":{}}}}
do_execsql_test json-remove-8 {
SELECT json_remove(cast('{"age":30,"name":"John"}' as blob), '$.age');
} {{{"name":"John"}}}
do_execsql_test json-remove-9 {
SELECT json_remove(cast('{"user":{"id":123,"profile":{"name":"Alice","age":25}}}' as blob), '$.user.id');
} {{{"user":{"profile":{"name":"Alice","age":25}}}}}
do_execsql_test json_set_field_empty_object {
SELECT json_set('{}', '$.field', 'value');
} {{{"field":"value"}}}
@@ -838,6 +880,10 @@ do_execsql_test json_set_replace_field {
SELECT json_set('{"field":"old_value"}', '$.field', 'new_value');
} {{{"field":"new_value"}}}
do_execsql_test json_set_replace_field_2 {
SELECT json_set(cast('{"age":30,"name":"John"}' as blob), '$.age', 40);
} {{{"age":40,"name":"John"}}}
do_execsql_test json_set_set_deeply_nested_key {
SELECT json_set('{}', '$.object.doesnt.exist', 'value');
} {{{"object":{"doesnt":{"exist":"value"}}}}}