fix: jsonb functions to check if binary is json string

chore: match sqlite error

chore: use existing slice variable

add better parsing logic, and validation

see sqlite source code @ sqlite/src/json.c -> static int jsonArgIsJsonb

chore: clippy
This commit is contained in:
themixednuts
2025-08-27 23:49:59 -05:00
parent f3d252d665
commit b6e64587cb
3 changed files with 181 additions and 14 deletions

View File

@@ -1,4 +1,5 @@
use crate::json::error::{Error as PError, Result as PResult};
use crate::json::Conv;
use crate::{bail_parse_error, LimboError, Result};
use std::{
borrow::Cow,
@@ -742,7 +743,15 @@ impl JsonbHeader {
Self(ElementType::OBJECT, 0)
}
fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
pub(super) fn element_type(&self) -> ElementType {
self.0
}
pub(super) fn payload_size(&self) -> PayloadSize {
self.1
}
pub(super) fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
match slice.get(cursor) {
Some(header_byte) => {
// Extract first 4 bits (values 0-15)
@@ -909,6 +918,96 @@ impl Jsonb {
}
}
pub fn is_valid(&self) -> bool {
self.validate_element(0, self.data.len(), 0).is_ok()
}
fn validate_element(&self, start: usize, end: usize, depth: usize) -> Result<()> {
if depth > MAX_JSON_DEPTH {
bail_parse_error!("Too deep");
}
if start >= end {
bail_parse_error!("Empty element");
}
let (header, header_offset) = self.read_header(start)?;
let payload_start = start + header_offset;
let payload_size = header.payload_size();
let payload_end = payload_start + payload_size;
if payload_end != end {
bail_parse_error!("Size mismatch");
}
match header.element_type() {
ElementType::NULL | ElementType::TRUE | ElementType::FALSE => {
if payload_size == 0 {
Ok(())
} else {
bail_parse_error!("Invalid payload for primitive")
}
}
ElementType::INT | ElementType::INT5 | ElementType::FLOAT | ElementType::FLOAT5 => {
if payload_size > 0 {
Ok(())
} else {
bail_parse_error!("Empty number payload")
}
}
ElementType::TEXT | ElementType::TEXTJ | ElementType::TEXT5 | ElementType::TEXTRAW => {
let payload = &self.data[payload_start..payload_end];
std::str::from_utf8(payload).map_err(|_| {
LimboError::ParseError("Invalid UTF-8 in text payload".to_string())
})?;
Ok(())
}
ElementType::ARRAY => {
let mut pos = payload_start;
while pos < payload_end {
if pos >= self.data.len() {
bail_parse_error!("Array element out of bounds");
}
let (elem_header, elem_header_size) = self.read_header(pos)?;
let elem_end = pos + elem_header_size + elem_header.payload_size();
if elem_end > payload_end {
bail_parse_error!("Array element exceeds bounds");
}
self.validate_element(pos, elem_end, depth + 1)?;
pos = elem_end;
}
Ok(())
}
ElementType::OBJECT => {
let mut pos = payload_start;
let mut count = 0;
while pos < payload_end {
if pos >= self.data.len() {
bail_parse_error!("Object element out of bounds");
}
let (elem_header, elem_header_size) = self.read_header(pos)?;
if count % 2 == 0 && !elem_header.element_type().is_valid_key() {
bail_parse_error!("Object key must be text");
}
let elem_end = pos + elem_header_size + elem_header.payload_size();
if elem_end > payload_end {
bail_parse_error!("Object element exceeds bounds");
}
self.validate_element(pos, elem_end, depth + 1)?;
pos = elem_end;
count += 1;
}
if count % 2 != 0 {
bail_parse_error!("Object must have even number of elements");
}
Ok(())
}
_ => bail_parse_error!("Invalid element type"),
}
}
#[expect(clippy::inherent_to_string)]
pub fn to_string(&self) -> String {
let mut result = String::with_capacity(self.data.len() * 2);
@@ -2158,6 +2257,18 @@ impl Jsonb {
Ok(result)
}
pub fn from_str_with_mode(input: &str, mode: Conv) -> PResult<Self> {
// Parse directly as JSON if it's already JSON subtype or strict mode is on
if matches!(mode, Conv::ToString) {
let mut str = input.replace('"', "\\\"");
str.insert(0, '"');
str.push('"');
Jsonb::from_str(&str)
} else {
Jsonb::from_str(input)
}
}
pub fn from_raw_data(data: &[u8]) -> Self {
Self::new(data.len(), Some(data))
}

View File

@@ -117,24 +117,20 @@ pub fn convert_dbtype_to_jsonb(val: &Value, strict: Conv) -> crate::Result<Jsonb
)
}
fn parse_as_json_text(slice: &[u8]) -> crate::Result<Jsonb> {
let str = std::str::from_utf8(slice)
.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))?;
Jsonb::from_str_with_mode(str, Conv::Strict).map_err(Into::into)
}
pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Result<Jsonb> {
match val {
RefValue::Text(text) => {
let res = if text.subtype == TextSubtype::Json || matches!(strict, Conv::Strict) {
// Parse directly as JSON if it's already JSON subtype or strict mode is on
let json = if matches!(strict, Conv::ToString) {
let mut str = text.as_str().replace('"', "\\\"");
str.insert(0, '"');
str.push('"');
Jsonb::from_str(&str)
} else {
Jsonb::from_str(text.as_str())
};
json
Jsonb::from_str_with_mode(text.as_str(), strict)
} else {
// Handle as a string literal otherwise
let mut str = text.as_str().replace('"', "\\\"");
// Quote the string to make it a JSON string
str.insert(0, '"');
str.push('"');
@@ -143,7 +139,40 @@ pub fn convert_ref_dbtype_to_jsonb(val: &RefValue, strict: Conv) -> crate::Resul
res.map_err(|_| LimboError::ParseError("malformed JSON".to_string()))
}
RefValue::Blob(blob) => {
let json = Jsonb::from_raw_data(blob.to_slice());
let bytes = blob.to_slice();
// Valid JSON can start with these whitespace characters
let index = bytes
.iter()
.position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
.unwrap_or(bytes.len());
let slice = &bytes[index..];
let json = match slice {
// branch with no overlapping initial byte
[b'"', ..] | [b'-', ..] | [b'0'..=b'2', ..] => parse_as_json_text(slice)?,
_ => match JsonbHeader::from_slice(0, slice) {
Ok((header, header_offset)) => {
let payload_size = header.payload_size();
let total_expected = header_offset + payload_size;
if total_expected != slice.len() {
parse_as_json_text(slice)?
} else {
let jsonb = Jsonb::from_raw_data(slice);
let is_valid_json = if payload_size <= 7 {
jsonb.is_valid()
} else {
jsonb.element_type().is_ok()
};
if is_valid_json {
jsonb
} else {
parse_as_json_text(slice)?
}
}
}
Err(_) => parse_as_json_text(slice)?,
},
};
json.element_type()?;
Ok(json)
}

View File

@@ -279,6 +279,34 @@ do_execsql_test json_arrow_object {
SELECT '{"a": [1,2,3]}' -> '$.a'
} {{[1,2,3]}}
do_execsql_test json_arrow_blob_object {
SELECT cast('{"age":30,"name":"John"}' as blob) -> '$.age'
} {{30}}
# Tests against valid jsonb [b'{',.., b'}'] vs json text '{..}'
# b'{' = ElementType::Array, PayloadSize of 7.
# b'}' = last element in array ends in '}'
# x'7B0707070707177D' = jsonb(["", "", "", "", "", "}"])
do_execsql_test json_arrow_blob_array {
SELECT x'7B0707070707177D' -> '$[5]'
} {\"\}\"}
do_execsql_test json_arrow_blob_number {
SELECT cast('4' as blob) -> '$'
} {{4}}
do_execsql_test json_arrow_blob_number_2 {
SELECT cast(33 as blob) -> '$'
} {{33}}
# jsonb(333)
do_execsql_test json_arrow_blob_number_3 {
SELECT x'33333333' -> '$'
} {{333}}
do_execsql_test json_arrow_blob_negative_number {
SELECT cast('-4' as blob) -> '$'
} {{-4}}
do_execsql_test json_arrow_shift_object {
SELECT '{"a": [1,2,3]}' ->> '$.a'
} {{[1,2,3]}}
@@ -291,7 +319,6 @@ do_execsql_test json_extract_object_3 {
SELECT json_extract('{"a": [1,2,3]}', '$.a', '$.a[0]', '$.a[1]', null, '$.a[3]')
} {{}}
# \x61 is the ASCII code for 'a'
do_execsql_test json_extract_with_escaping {
SELECT json_extract('{"\x61": 1}', '$.a')