mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-18 06:24:56 +01:00
Merge 'Fix jsonb functions check valid json string binary' from
closes: #2820 ``` turso> select user->>'age' as age from json_user; ┌─────┐ │ age │ ├─────┤ │ 30 │ └─────┘ ``` Closes #2821
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use crate::json::error::{Error as PError, Result as PResult};
|
||||
use crate::json::Conv;
|
||||
use crate::{bail_parse_error, LimboError, Result};
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
@@ -742,7 +743,15 @@ impl JsonbHeader {
|
||||
Self(ElementType::OBJECT, 0)
|
||||
}
|
||||
|
||||
fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
|
||||
pub(super) fn element_type(&self) -> ElementType {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub(super) fn payload_size(&self) -> PayloadSize {
|
||||
self.1
|
||||
}
|
||||
|
||||
pub(super) fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
|
||||
match slice.get(cursor) {
|
||||
Some(header_byte) => {
|
||||
// Extract first 4 bits (values 0-15)
|
||||
@@ -921,6 +930,96 @@ impl Jsonb {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.validate_element(0, self.data.len(), 0).is_ok()
|
||||
}
|
||||
|
||||
fn validate_element(&self, start: usize, end: usize, depth: usize) -> Result<()> {
|
||||
if depth > MAX_JSON_DEPTH {
|
||||
bail_parse_error!("Too deep");
|
||||
}
|
||||
|
||||
if start >= end {
|
||||
bail_parse_error!("Empty element");
|
||||
}
|
||||
|
||||
let (header, header_offset) = self.read_header(start)?;
|
||||
let payload_start = start + header_offset;
|
||||
let payload_size = header.payload_size();
|
||||
let payload_end = payload_start + payload_size;
|
||||
|
||||
if payload_end != end {
|
||||
bail_parse_error!("Size mismatch");
|
||||
}
|
||||
|
||||
match header.element_type() {
|
||||
ElementType::NULL | ElementType::TRUE | ElementType::FALSE => {
|
||||
if payload_size == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
bail_parse_error!("Invalid payload for primitive")
|
||||
}
|
||||
}
|
||||
ElementType::INT | ElementType::INT5 | ElementType::FLOAT | ElementType::FLOAT5 => {
|
||||
if payload_size > 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
bail_parse_error!("Empty number payload")
|
||||
}
|
||||
}
|
||||
ElementType::TEXT | ElementType::TEXTJ | ElementType::TEXT5 | ElementType::TEXTRAW => {
|
||||
let payload = &self.data[payload_start..payload_end];
|
||||
std::str::from_utf8(payload).map_err(|_| {
|
||||
LimboError::ParseError("Invalid UTF-8 in text payload".to_string())
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
ElementType::ARRAY => {
|
||||
let mut pos = payload_start;
|
||||
while pos < payload_end {
|
||||
if pos >= self.data.len() {
|
||||
bail_parse_error!("Array element out of bounds");
|
||||
}
|
||||
let (elem_header, elem_header_size) = self.read_header(pos)?;
|
||||
let elem_end = pos + elem_header_size + elem_header.payload_size();
|
||||
if elem_end > payload_end {
|
||||
bail_parse_error!("Array element exceeds bounds");
|
||||
}
|
||||
self.validate_element(pos, elem_end, depth + 1)?;
|
||||
pos = elem_end;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
ElementType::OBJECT => {
|
||||
let mut pos = payload_start;
|
||||
let mut count = 0;
|
||||
while pos < payload_end {
|
||||
if pos >= self.data.len() {
|
||||
bail_parse_error!("Object element out of bounds");
|
||||
}
|
||||
let (elem_header, elem_header_size) = self.read_header(pos)?;
|
||||
if count % 2 == 0 && !elem_header.element_type().is_valid_key() {
|
||||
bail_parse_error!("Object key must be text");
|
||||
}
|
||||
|
||||
let elem_end = pos + elem_header_size + elem_header.payload_size();
|
||||
if elem_end > payload_end {
|
||||
bail_parse_error!("Object element exceeds bounds");
|
||||
}
|
||||
self.validate_element(pos, elem_end, depth + 1)?;
|
||||
pos = elem_end;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
if count % 2 != 0 {
|
||||
bail_parse_error!("Object must have even number of elements");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ => bail_parse_error!("Invalid element type"),
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(clippy::inherent_to_string)]
|
||||
pub fn to_string(&self) -> String {
|
||||
let mut result = String::with_capacity(self.data.len() * 2);
|
||||
@@ -2170,6 +2269,18 @@ impl Jsonb {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn from_str_with_mode(input: &str, mode: Conv) -> PResult<Self> {
|
||||
// Parse directly as JSON if it's already JSON subtype or strict mode is on
|
||||
if matches!(mode, Conv::ToString) {
|
||||
let mut str = input.replace('"', "\\\"");
|
||||
str.insert(0, '"');
|
||||
str.push('"');
|
||||
Jsonb::from_str(&str)
|
||||
} else {
|
||||
Jsonb::from_str(input)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_raw_data(data: &[u8]) -> Self {
|
||||
Self::new(data.len(), Some(data))
|
||||
}
|
||||
|
||||
@@ -118,24 +118,20 @@ pub fn convert_dbtype_to_jsonb(val: &Value, strict: Conv) -> crate::Result<Jsonb
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_as_json_text(slice: &[u8]) -> crate::Result<Jsonb> {
|
||||
let str = std::str::from_utf8(slice)
|
||||
.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))?;
|
||||
Jsonb::from_str_with_mode(str, Conv::Strict).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Result<Jsonb> {
|
||||
match val {
|
||||
RefValue::Text(text) => {
|
||||
let res = if text.subtype == TextSubtype::Json || matches!(strict, Conv::Strict) {
|
||||
// Parse directly as JSON if it's already JSON subtype or strict mode is on
|
||||
let json = if matches!(strict, Conv::ToString) {
|
||||
let mut str = text.as_str().replace('"', "\\\"");
|
||||
str.insert(0, '"');
|
||||
str.push('"');
|
||||
Jsonb::from_str(&str)
|
||||
} else {
|
||||
Jsonb::from_str(text.as_str())
|
||||
};
|
||||
json
|
||||
Jsonb::from_str_with_mode(text.as_str(), strict)
|
||||
} else {
|
||||
// Handle as a string literal otherwise
|
||||
let mut str = text.as_str().replace('"', "\\\"");
|
||||
|
||||
// Quote the string to make it a JSON string
|
||||
str.insert(0, '"');
|
||||
str.push('"');
|
||||
@@ -144,7 +140,40 @@ pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Resul
|
||||
res.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))
|
||||
}
|
||||
RefValue::Blob(blob) => {
|
||||
let json = Jsonb::from_raw_data(blob.to_slice());
|
||||
let bytes = blob.to_slice();
|
||||
// Valid JSON can start with these whitespace characters
|
||||
let index = bytes
|
||||
.iter()
|
||||
.position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
|
||||
.unwrap_or(bytes.len());
|
||||
let slice = &bytes[index..];
|
||||
let json = match slice {
|
||||
// branch with no overlapping initial byte
|
||||
[b'"', ..] | [b'-', ..] | [b'0'..=b'2', ..] => parse_as_json_text(slice)?,
|
||||
_ => match JsonbHeader::from_slice(0, slice) {
|
||||
Ok((header, header_offset)) => {
|
||||
let payload_size = header.payload_size();
|
||||
let total_expected = header_offset + payload_size;
|
||||
|
||||
if total_expected != slice.len() {
|
||||
parse_as_json_text(slice)?
|
||||
} else {
|
||||
let jsonb = Jsonb::from_raw_data(slice);
|
||||
let is_valid_json = if payload_size <= 7 {
|
||||
jsonb.is_valid()
|
||||
} else {
|
||||
jsonb.element_type().is_ok()
|
||||
};
|
||||
if is_valid_json {
|
||||
jsonb
|
||||
} else {
|
||||
parse_as_json_text(slice)?
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => parse_as_json_text(slice)?,
|
||||
},
|
||||
};
|
||||
json.element_type()?;
|
||||
Ok(json)
|
||||
}
|
||||
|
||||
@@ -279,9 +279,46 @@ do_execsql_test json_arrow_object {
|
||||
SELECT '{"a": [1,2,3]}' -> '$.a'
|
||||
} {{[1,2,3]}}
|
||||
|
||||
do_execsql_test json_arrow_shift_object {
|
||||
SELECT '{"a": [1,2,3]}' ->> '$.a'
|
||||
} {{[1,2,3]}}
|
||||
do_execsql_test json_arrow_blob_object {
|
||||
SELECT cast('{"age":30,"name":"John"}' as blob) -> '$.age'
|
||||
} {{30}}
|
||||
|
||||
# Tests against valid jsonb [b'{',.., b'}'] vs json text '{..}'
|
||||
# b'{' = ElementType::Array, PayloadSize of 7.
|
||||
# b'}' = last element in array ends in '}'
|
||||
# x'7B0707070707177D' = jsonb(["", "", "", "", "", "}"])
|
||||
do_execsql_test json_arrow_blob_array {
|
||||
SELECT x'7B0707070707177D' -> '$[5]'
|
||||
} {\"\}\"}
|
||||
|
||||
# Tests against valid jsonb [b'[',.., b']'] vs json text '[..]'
|
||||
# b'[' = ElementType::Array, PayloadSize of 5.
|
||||
# b']' = last element in array ends in ']'
|
||||
# x'5B070707175D' = jsonb(["", "", "", "}"])
|
||||
do_execsql_test json_arrow_blob_array_2 {
|
||||
SELECT x'5B070707175D' -> '$[3]'
|
||||
} {\"\]\"}
|
||||
|
||||
do_execsql_test json_arrow_blob_number {
|
||||
SELECT cast('4' as blob) -> '$'
|
||||
} {{4}}
|
||||
|
||||
do_execsql_test json_arrow_blob_number_2 {
|
||||
SELECT cast(33 as blob) -> '$'
|
||||
} {{33}}
|
||||
|
||||
# jsonb(333)
|
||||
do_execsql_test json_arrow_blob_number_3 {
|
||||
SELECT x'33333333' -> '$'
|
||||
} {{333}}
|
||||
|
||||
do_execsql_test json_arrow_blob_negative_number {
|
||||
SELECT cast('-4' as blob) -> '$'
|
||||
} {{-4}}
|
||||
|
||||
do_execsql_test json_arrow_shift_blob {
|
||||
SELECT cast('{"age":30,"name":"John"}' as blob) ->> '$.age'
|
||||
} {{30}}
|
||||
|
||||
do_execsql_test json_extract_object_2 {
|
||||
SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', '$.a[3]')
|
||||
@@ -291,7 +328,6 @@ do_execsql_test json_extract_object_3 {
|
||||
SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', null, '$.a[3]')
|
||||
} {{}}
|
||||
|
||||
|
||||
# \x61 is the ASCII code for 'a'
|
||||
do_execsql_test json_extract_with_escaping {
|
||||
SELECT json_extract('{"\x61": 1}', '$.a')
|
||||
@@ -510,10 +546,9 @@ do_execsql_test json_extract_overflow_int64 {
|
||||
# SELECT json_extract('[1, 2, 3]', '$[170141183460469231731687303715884105729]');
|
||||
#} {{2}}
|
||||
|
||||
# TODO: fix me - this passes on SQLite and needs to be fixed in Limbo.
|
||||
#do_execsql_test json_extract_blob {
|
||||
# select json_extract(CAST('[1,2,3]' as BLOB), '$[1]')
|
||||
#} {{2}}
|
||||
do_execsql_test json_extract_blob {
|
||||
select json_extract(CAST('[1,2,3]' as BLOB), '$[1]')
|
||||
} {{2}}
|
||||
|
||||
do_execsql_test json_array_length {
|
||||
SELECT json_array_length('[1,2,3,4]');
|
||||
@@ -680,12 +715,11 @@ do_execsql_test json_valid_1 {
|
||||
do_execsql_test json_valid_2 {
|
||||
SELECT json_valid('["a",55,"b",72]');
|
||||
} {1}
|
||||
#
|
||||
# Unimplemented
|
||||
#do_execsql_test json_valid_3 {
|
||||
# SELECT json_valid( CAST('{"a":"1}' AS BLOB) );
|
||||
#} {0}
|
||||
#
|
||||
|
||||
do_execsql_test json_valid_3 {
|
||||
SELECT json_valid( CAST('{"a":"1}' AS BLOB) );
|
||||
} {0}
|
||||
|
||||
do_execsql_test json_valid_4 {
|
||||
SELECT json_valid(123);
|
||||
} {1}
|
||||
@@ -830,6 +864,14 @@ do_execsql_test json-remove-7 {
|
||||
SELECT json_remove('{"a": 1, "b": [1,2], "c": {"d": 3}}', '$.a', '$.b[0]', '$.c.d');
|
||||
} {{{"b":[2],"c":{}}}}
|
||||
|
||||
do_execsql_test json-remove-8 {
|
||||
SELECT json_remove(cast('{"age":30,"name":"John"}' as blob), '$.age');
|
||||
} {{{"name":"John"}}}
|
||||
|
||||
do_execsql_test json-remove-9 {
|
||||
SELECT json_remove(cast('{"user":{"id":123,"profile":{"name":"Alice","age":25}}}' as blob), '$.user.id');
|
||||
} {{{"user":{"profile":{"name":"Alice","age":25}}}}}
|
||||
|
||||
do_execsql_test json_set_field_empty_object {
|
||||
SELECT json_set('{}', '$.field', 'value');
|
||||
} {{{"field":"value"}}}
|
||||
@@ -838,6 +880,10 @@ do_execsql_test json_set_replace_field {
|
||||
SELECT json_set('{"field":"old_value"}', '$.field', 'new_value');
|
||||
} {{{"field":"new_value"}}}
|
||||
|
||||
do_execsql_test json_set_replace_field_2 {
|
||||
SELECT json_set(cast('{"age":30,"name":"John"}' as blob), '$.age', 40);
|
||||
} {{{"age":40,"name":"John"}}}
|
||||
|
||||
do_execsql_test json_set_set_deeply_nested_key {
|
||||
SELECT json_set('{}', '$.object.doesnt.exist', 'value');
|
||||
} {{{"object":{"doesnt":{"exist":"value"}}}}}
|
||||
|
||||
Reference in New Issue
Block a user