mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-21 07:55:18 +01:00
fix: jsonb functions to check if binary is json string
chore: match sqlite error chore: use existing slice variable add better parsing logic, and validation see sqlite source code @ sqlite/src/json.c -> static int jsonArgIsJsonb chore: clippy
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use crate::json::error::{Error as PError, Result as PResult};
|
||||
use crate::json::Conv;
|
||||
use crate::{bail_parse_error, LimboError, Result};
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
@@ -742,7 +743,15 @@ impl JsonbHeader {
|
||||
Self(ElementType::OBJECT, 0)
|
||||
}
|
||||
|
||||
fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
|
||||
pub(super) fn element_type(&self) -> ElementType {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub(super) fn payload_size(&self) -> PayloadSize {
|
||||
self.1
|
||||
}
|
||||
|
||||
pub(super) fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
|
||||
match slice.get(cursor) {
|
||||
Some(header_byte) => {
|
||||
// Extract first 4 bits (values 0-15)
|
||||
@@ -909,6 +918,96 @@ impl Jsonb {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.validate_element(0, self.data.len(), 0).is_ok()
|
||||
}
|
||||
|
||||
fn validate_element(&self, start: usize, end: usize, depth: usize) -> Result<()> {
|
||||
if depth > MAX_JSON_DEPTH {
|
||||
bail_parse_error!("Too deep");
|
||||
}
|
||||
|
||||
if start >= end {
|
||||
bail_parse_error!("Empty element");
|
||||
}
|
||||
|
||||
let (header, header_offset) = self.read_header(start)?;
|
||||
let payload_start = start + header_offset;
|
||||
let payload_size = header.payload_size();
|
||||
let payload_end = payload_start + payload_size;
|
||||
|
||||
if payload_end != end {
|
||||
bail_parse_error!("Size mismatch");
|
||||
}
|
||||
|
||||
match header.element_type() {
|
||||
ElementType::NULL | ElementType::TRUE | ElementType::FALSE => {
|
||||
if payload_size == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
bail_parse_error!("Invalid payload for primitive")
|
||||
}
|
||||
}
|
||||
ElementType::INT | ElementType::INT5 | ElementType::FLOAT | ElementType::FLOAT5 => {
|
||||
if payload_size > 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
bail_parse_error!("Empty number payload")
|
||||
}
|
||||
}
|
||||
ElementType::TEXT | ElementType::TEXTJ | ElementType::TEXT5 | ElementType::TEXTRAW => {
|
||||
let payload = &self.data[payload_start..payload_end];
|
||||
std::str::from_utf8(payload).map_err(|_| {
|
||||
LimboError::ParseError("Invalid UTF-8 in text payload".to_string())
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
ElementType::ARRAY => {
|
||||
let mut pos = payload_start;
|
||||
while pos < payload_end {
|
||||
if pos >= self.data.len() {
|
||||
bail_parse_error!("Array element out of bounds");
|
||||
}
|
||||
let (elem_header, elem_header_size) = self.read_header(pos)?;
|
||||
let elem_end = pos + elem_header_size + elem_header.payload_size();
|
||||
if elem_end > payload_end {
|
||||
bail_parse_error!("Array element exceeds bounds");
|
||||
}
|
||||
self.validate_element(pos, elem_end, depth + 1)?;
|
||||
pos = elem_end;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
ElementType::OBJECT => {
|
||||
let mut pos = payload_start;
|
||||
let mut count = 0;
|
||||
while pos < payload_end {
|
||||
if pos >= self.data.len() {
|
||||
bail_parse_error!("Object element out of bounds");
|
||||
}
|
||||
let (elem_header, elem_header_size) = self.read_header(pos)?;
|
||||
if count % 2 == 0 && !elem_header.element_type().is_valid_key() {
|
||||
bail_parse_error!("Object key must be text");
|
||||
}
|
||||
|
||||
let elem_end = pos + elem_header_size + elem_header.payload_size();
|
||||
if elem_end > payload_end {
|
||||
bail_parse_error!("Object element exceeds bounds");
|
||||
}
|
||||
self.validate_element(pos, elem_end, depth + 1)?;
|
||||
pos = elem_end;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
if count % 2 != 0 {
|
||||
bail_parse_error!("Object must have even number of elements");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ => bail_parse_error!("Invalid element type"),
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(clippy::inherent_to_string)]
|
||||
pub fn to_string(&self) -> String {
|
||||
let mut result = String::with_capacity(self.data.len() * 2);
|
||||
@@ -2158,6 +2257,18 @@ impl Jsonb {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn from_str_with_mode(input: &str, mode: Conv) -> PResult<Self> {
|
||||
// Parse directly as JSON if it's already JSON subtype or strict mode is on
|
||||
if matches!(mode, Conv::ToString) {
|
||||
let mut str = input.replace('"', "\\\"");
|
||||
str.insert(0, '"');
|
||||
str.push('"');
|
||||
Jsonb::from_str(&str)
|
||||
} else {
|
||||
Jsonb::from_str(input)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_raw_data(data: &[u8]) -> Self {
|
||||
Self::new(data.len(), Some(data))
|
||||
}
|
||||
|
||||
@@ -117,24 +117,20 @@ pub fn convert_dbtype_to_jsonb(val: &Value, strict: Conv) -> crate::Result<Jsonb
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_as_json_text(slice: &[u8]) -> crate::Result<Jsonb> {
|
||||
let str = std::str::from_utf8(slice)
|
||||
.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))?;
|
||||
Jsonb::from_str_with_mode(str, Conv::Strict).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Result<Jsonb> {
|
||||
match val {
|
||||
RefValue::Text(text) => {
|
||||
let res = if text.subtype == TextSubtype::Json || matches!(strict, Conv::Strict) {
|
||||
// Parse directly as JSON if it's already JSON subtype or strict mode is on
|
||||
let json = if matches!(strict, Conv::ToString) {
|
||||
let mut str = text.as_str().replace('"', "\\\"");
|
||||
str.insert(0, '"');
|
||||
str.push('"');
|
||||
Jsonb::from_str(&str)
|
||||
} else {
|
||||
Jsonb::from_str(text.as_str())
|
||||
};
|
||||
json
|
||||
Jsonb::from_str_with_mode(text.as_str(), strict)
|
||||
} else {
|
||||
// Handle as a string literal otherwise
|
||||
let mut str = text.as_str().replace('"', "\\\"");
|
||||
|
||||
// Quote the string to make it a JSON string
|
||||
str.insert(0, '"');
|
||||
str.push('"');
|
||||
@@ -143,7 +139,40 @@ pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Resul
|
||||
res.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))
|
||||
}
|
||||
RefValue::Blob(blob) => {
|
||||
let json = Jsonb::from_raw_data(blob.to_slice());
|
||||
let bytes = blob.to_slice();
|
||||
// Valid JSON can start with these whitespace characters
|
||||
let index = bytes
|
||||
.iter()
|
||||
.position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
|
||||
.unwrap_or(bytes.len());
|
||||
let slice = &bytes[index..];
|
||||
let json = match slice {
|
||||
// branch with no overlapping initial byte
|
||||
[b'"', ..] | [b'-', ..] | [b'0'..=b'2', ..] => parse_as_json_text(slice)?,
|
||||
_ => match JsonbHeader::from_slice(0, slice) {
|
||||
Ok((header, header_offset)) => {
|
||||
let payload_size = header.payload_size();
|
||||
let total_expected = header_offset + payload_size;
|
||||
|
||||
if total_expected != slice.len() {
|
||||
parse_as_json_text(slice)?
|
||||
} else {
|
||||
let jsonb = Jsonb::from_raw_data(slice);
|
||||
let is_valid_json = if payload_size <= 7 {
|
||||
jsonb.is_valid()
|
||||
} else {
|
||||
jsonb.element_type().is_ok()
|
||||
};
|
||||
if is_valid_json {
|
||||
jsonb
|
||||
} else {
|
||||
parse_as_json_text(slice)?
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => parse_as_json_text(slice)?,
|
||||
},
|
||||
};
|
||||
json.element_type()?;
|
||||
Ok(json)
|
||||
}
|
||||
|
||||
@@ -279,6 +279,34 @@ do_execsql_test json_arrow_object {
|
||||
SELECT '{"a": [1,2,3]}' -> '$.a'
|
||||
} {{[1,2,3]}}
|
||||
|
||||
do_execsql_test json_arrow_blob_object {
|
||||
SELECT cast('{"age":30,"name":"John"}' as blob) -> '$.age'
|
||||
} {{30}}
|
||||
|
||||
# Tests against valid jsonb [b'{',.., b'}'] vs json text '{..}'
|
||||
# b'{' = ElementType::Array, PayloadSize of 7.
|
||||
# b'}' = last element in array ends in '}'
|
||||
# x'7B0707070707177D' = jsonb(["", "", "", "", "", "}"])
|
||||
do_execsql_test json_arrow_blob_array {
|
||||
SELECT x'7B0707070707177D' -> '$[5]'
|
||||
} {\"\}\"}
|
||||
|
||||
do_execsql_test json_arrow_blob_number {
|
||||
SELECT cast('4' as blob) -> '$'
|
||||
} {{4}}
|
||||
|
||||
do_execsql_test json_arrow_blob_number_2 {
|
||||
SELECT cast(33 as blob) -> '$'
|
||||
} {{33}}
|
||||
|
||||
# jsonb(333)
|
||||
do_execsql_test json_arrow_blob_number_3 {
|
||||
SELECT x'33333333' -> '$'
|
||||
} {{333}}
|
||||
|
||||
do_execsql_test json_arrow_blob_negative_number {
|
||||
SELECT cast('-4' as blob) -> '$'
|
||||
} {{-4}}
|
||||
do_execsql_test json_arrow_shift_object {
|
||||
SELECT '{"a": [1,2,3]}' ->> '$.a'
|
||||
} {{[1,2,3]}}
|
||||
@@ -291,7 +319,6 @@ do_execsql_test json_extract_object_3 {
|
||||
SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', null, '$.a[3]')
|
||||
} {{}}
|
||||
|
||||
|
||||
# \x61 is the ASCII code for 'a'
|
||||
do_execsql_test json_extract_with_escaping {
|
||||
SELECT json_extract('{"\x61": 1}', '$.a')
|
||||
|
||||
Reference in New Issue
Block a user