mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-19 23:15:28 +01:00
Merge 'Jsonb implementation' from Ihor Andrianov
This PR implements a complete JSONB parser and serializer as current PR draft looks stale. Sorry for huge PR. I've choose a recursive parsing approach because: 1. It's simpler to understand and maintain 2. It follows SQLite's implementation pattern, ensuring compatibility 3. It naturally maps to JSON's hierarchical structure The implementation includes comprehensive test coverage for standard JSON features and JSON5 extensions. All test cases pass successfully, handling edge cases like nested structures, escape sequences, and various number formats. While the code is ready for review, I believe it would benefit from fuzz testing in the future to identify any edge cases not covered by the current tests. Ready for review, proposals and feedback. Closes #1114
This commit is contained in:
@@ -71,6 +71,7 @@ impl Display for ExternalFunc {
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum JsonFunc {
|
||||
Json,
|
||||
Jsonb,
|
||||
JsonArray,
|
||||
JsonArrayLength,
|
||||
JsonArrowExtract,
|
||||
@@ -95,6 +96,7 @@ impl Display for JsonFunc {
|
||||
"{}",
|
||||
match self {
|
||||
Self::Json => "json".to_string(),
|
||||
Self::Jsonb => "jsonb".to_string(),
|
||||
Self::JsonArray => "json_array".to_string(),
|
||||
Self::JsonExtract => "json_extract".to_string(),
|
||||
Self::JsonArrayLength => "json_array_length".to_string(),
|
||||
@@ -549,6 +551,8 @@ impl Func {
|
||||
#[cfg(feature = "json")]
|
||||
"json" => Ok(Self::Json(JsonFunc::Json)),
|
||||
#[cfg(feature = "json")]
|
||||
"jsonb" => Ok(Self::Json(JsonFunc::Jsonb)),
|
||||
#[cfg(feature = "json")]
|
||||
"json_array_length" => Ok(Self::Json(JsonFunc::JsonArrayLength)),
|
||||
#[cfg(feature = "json")]
|
||||
"json_array" => Ok(Self::Json(JsonFunc::JsonArray)),
|
||||
|
||||
1671
core/json/jsonb.rs
Normal file
1671
core/json/jsonb.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2,20 +2,23 @@ mod de;
|
||||
mod error;
|
||||
mod json_operations;
|
||||
mod json_path;
|
||||
mod jsonb;
|
||||
mod ser;
|
||||
|
||||
pub use crate::json::de::from_str;
|
||||
use crate::json::de::ordered_object;
|
||||
use crate::json::error::Error as JsonError;
|
||||
pub use crate::json::json_operations::{json_patch, json_remove};
|
||||
use crate::json::json_path::{json_path, JsonPath, PathElement};
|
||||
pub use crate::json::ser::to_string;
|
||||
use crate::types::{OwnedValue, Text, TextSubtype};
|
||||
use crate::{bail_parse_error, json::de::ordered_object};
|
||||
use indexmap::IndexMap;
|
||||
use jsonb::Error as JsonbError;
|
||||
use jsonb::Jsonb;
|
||||
use ser::to_string_pretty;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use std::rc::Rc;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
|
||||
#[serde(untagged)]
|
||||
@@ -49,13 +52,12 @@ pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result<
|
||||
Ok(OwnedValue::Text(Text::json(&json)))
|
||||
}
|
||||
OwnedValue::Blob(b) => {
|
||||
// TODO: use get_json_value after we implement a single Struct
|
||||
// to represent both JSON and JSONB
|
||||
if let Ok(json) = jsonb::from_slice(b) {
|
||||
Ok(OwnedValue::Text(Text::json(&json.to_string())))
|
||||
} else {
|
||||
crate::bail_parse_error!("malformed JSON");
|
||||
}
|
||||
let jsonbin = Jsonb::new(b.len(), Some(b));
|
||||
jsonbin.is_valid()?;
|
||||
Ok(OwnedValue::Text(Text {
|
||||
value: Rc::new(jsonbin.to_string()?.into_bytes()),
|
||||
subtype: TextSubtype::Json,
|
||||
}))
|
||||
}
|
||||
OwnedValue::Null => Ok(OwnedValue::Null),
|
||||
_ => {
|
||||
@@ -70,6 +72,28 @@ pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result<
|
||||
}
|
||||
}
|
||||
|
||||
pub fn jsonb(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
|
||||
let jsonbin = match json_value {
|
||||
OwnedValue::Null | OwnedValue::Integer(_) | OwnedValue::Float(_) | OwnedValue::Text(_) => {
|
||||
Jsonb::from_str(&json_value.to_string())
|
||||
}
|
||||
OwnedValue::Blob(blob) => {
|
||||
let blob = Jsonb::new(blob.len(), Some(&blob));
|
||||
blob.is_valid()?;
|
||||
Ok(blob)
|
||||
}
|
||||
_ => {
|
||||
unimplemented!()
|
||||
}
|
||||
};
|
||||
match jsonbin {
|
||||
Ok(jsonbin) => Ok(OwnedValue::Blob(Rc::new(jsonbin.data()))),
|
||||
Err(_) => {
|
||||
bail_parse_error!("malformed JSON")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_json_value(json_value: &OwnedValue) -> crate::Result<Val> {
|
||||
match json_value {
|
||||
OwnedValue::Text(ref t) => match from_str::<Val>(t.as_str()) {
|
||||
@@ -78,12 +102,8 @@ fn get_json_value(json_value: &OwnedValue) -> crate::Result<Val> {
|
||||
crate::bail_parse_error!("malformed JSON")
|
||||
}
|
||||
},
|
||||
OwnedValue::Blob(b) => {
|
||||
if let Ok(_json) = jsonb::from_slice(b) {
|
||||
todo!("jsonb to json conversion");
|
||||
} else {
|
||||
crate::bail_parse_error!("malformed JSON");
|
||||
}
|
||||
OwnedValue::Blob(_) => {
|
||||
crate::bail_parse_error!("malformed JSON");
|
||||
}
|
||||
OwnedValue::Null => Ok(Val::Null),
|
||||
OwnedValue::Float(f) => Ok(Val::Float(*f)),
|
||||
@@ -625,13 +645,9 @@ pub fn json_error_position(json: &OwnedValue) -> crate::Result<OwnedValue> {
|
||||
}
|
||||
}
|
||||
},
|
||||
OwnedValue::Blob(b) => match jsonb::from_slice(b) {
|
||||
Ok(_) => Ok(OwnedValue::Integer(0)),
|
||||
Err(JsonbError::Syntax(_, pos)) => Ok(OwnedValue::Integer(pos as i64)),
|
||||
_ => Err(crate::error::LimboError::InternalError(
|
||||
"failed to determine json error position".into(),
|
||||
)),
|
||||
},
|
||||
OwnedValue::Blob(_) => {
|
||||
bail_parse_error!("Unsupported")
|
||||
}
|
||||
OwnedValue::Null => Ok(OwnedValue::Null),
|
||||
_ => Ok(OwnedValue::Integer(0)),
|
||||
}
|
||||
@@ -667,10 +683,9 @@ pub fn is_json_valid(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
|
||||
Ok(_) => Ok(OwnedValue::Integer(1)),
|
||||
Err(_) => Ok(OwnedValue::Integer(0)),
|
||||
},
|
||||
OwnedValue::Blob(b) => match jsonb::from_slice(b) {
|
||||
Ok(_) => Ok(OwnedValue::Integer(1)),
|
||||
Err(_) => Ok(OwnedValue::Integer(0)),
|
||||
},
|
||||
OwnedValue::Blob(_) => {
|
||||
bail_parse_error!("Unsuported!")
|
||||
}
|
||||
OwnedValue::Null => Ok(OwnedValue::Null),
|
||||
_ => Ok(OwnedValue::Integer(1)),
|
||||
}
|
||||
@@ -814,11 +829,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_get_json_blob_valid_jsonb() {
|
||||
let binary_json = b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66".to_vec();
|
||||
let binary_json = vec![124, 55, 104, 101, 121, 39, 121, 111];
|
||||
let input = OwnedValue::Blob(Rc::new(binary_json));
|
||||
let result = get_json(&input, None).unwrap();
|
||||
if let OwnedValue::Text(result_str) = result {
|
||||
assert!(result_str.as_str().contains("\"asd\":\"adf\""));
|
||||
assert!(result_str.as_str().contains(r#"{"hey":"yo"}"#));
|
||||
assert_eq!(result_str.subtype, TextSubtype::Json);
|
||||
} else {
|
||||
panic!("Expected OwnedValue::Text");
|
||||
@@ -830,6 +845,7 @@ mod tests {
|
||||
let binary_json: Vec<u8> = vec![0xA2, 0x62, 0x6B, 0x31, 0x62, 0x76]; // Incomplete binary JSON
|
||||
let input = OwnedValue::Blob(Rc::new(binary_json));
|
||||
let result = get_json(&input, None);
|
||||
println!("{:?}", result);
|
||||
match result {
|
||||
Ok(_) => panic!("Expected error for malformed JSON"),
|
||||
Err(e) => assert!(e.to_string().contains("malformed JSON")),
|
||||
@@ -1070,13 +1086,6 @@ mod tests {
|
||||
assert_eq!(result, OwnedValue::Integer(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_error_position_blob() {
|
||||
let input = OwnedValue::Blob(Rc::new(r#"["a",55,"b",72,,]"#.as_bytes().to_owned()));
|
||||
let result = json_error_position(&input).unwrap();
|
||||
assert_eq!(result, OwnedValue::Integer(16));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_object_simple() {
|
||||
let key = OwnedValue::build_text("key");
|
||||
|
||||
@@ -882,7 +882,7 @@ pub fn translate_expr(
|
||||
}
|
||||
#[cfg(feature = "json")]
|
||||
Func::Json(j) => match j {
|
||||
JsonFunc::Json => {
|
||||
JsonFunc::Json | JsonFunc::Jsonb => {
|
||||
let args = expect_arguments_exact!(args, 1, j);
|
||||
|
||||
translate_function(
|
||||
|
||||
@@ -52,7 +52,7 @@ use crate::{
|
||||
function::JsonFunc, json::get_json, json::is_json_valid, json::json_array,
|
||||
json::json_array_length, json::json_arrow_extract, json::json_arrow_shift_extract,
|
||||
json::json_error_position, json::json_extract, json::json_object, json::json_patch,
|
||||
json::json_quote, json::json_remove, json::json_set, json::json_type,
|
||||
json::json_quote, json::json_remove, json::json_set, json::json_type, json::jsonb,
|
||||
};
|
||||
use crate::{info, CheckpointStatus};
|
||||
use crate::{
|
||||
@@ -2131,6 +2131,14 @@ impl Program {
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
JsonFunc::Jsonb => {
|
||||
let json_value = &state.registers[*start_reg];
|
||||
let json_blob = jsonb(json_value);
|
||||
match json_blob {
|
||||
Ok(json) => state.registers[*dest] = json,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
JsonFunc::JsonArray | JsonFunc::JsonObject => {
|
||||
let reg_values =
|
||||
&state.registers[*start_reg..*start_reg + arg_count];
|
||||
|
||||
@@ -682,9 +682,12 @@ do_execsql_test json_valid_1 {
|
||||
do_execsql_test json_valid_2 {
|
||||
SELECT json_valid('["a",55,"b",72]');
|
||||
} {1}
|
||||
do_execsql_test json_valid_3 {
|
||||
SELECT json_valid( CAST('{"a":1}' AS BLOB) );
|
||||
} {1}
|
||||
#
|
||||
# Unimplemented
|
||||
#do_execsql_test json_valid_3 {
|
||||
# SELECT json_valid( CAST('{"a":"1}' AS BLOB) );
|
||||
#} {0}
|
||||
#
|
||||
do_execsql_test json_valid_4 {
|
||||
SELECT json_valid(123);
|
||||
} {1}
|
||||
@@ -700,9 +703,7 @@ do_execsql_test json_valid_7 {
|
||||
do_execsql_test json_valid_8 {
|
||||
SELECT json_valid('{"a":55 "b":72}');
|
||||
} {0}
|
||||
do_execsql_test json_valid_3 {
|
||||
SELECT json_valid( CAST('{"a":"1}' AS BLOB) );
|
||||
} {0}
|
||||
|
||||
do_execsql_test json_valid_9 {
|
||||
SELECT json_valid(NULL);
|
||||
} {}
|
||||
@@ -906,6 +907,80 @@ do_execsql_test json_quote_json_value {
|
||||
SELECT json_quote(json('{a:1, b: "test"}'));
|
||||
} {{{"a":1,"b":"test"}}}
|
||||
|
||||
do_execsql_test json_basics {
|
||||
SELECT json(jsonb('{"name":"John", "age":30, "city":"New York"}'));
|
||||
} {{{"name":"John","age":30,"city":"New York"}}}
|
||||
|
||||
do_execsql_test json_complex_nested {
|
||||
SELECT json(jsonb('{"complex": {"nested": ["array", "of", "values"], "numbers": [1, 2, 3]}}'));
|
||||
} {{{"complex":{"nested":["array","of","values"],"numbers":[1,2,3]}}}}
|
||||
|
||||
do_execsql_test json_array_of_objects {
|
||||
SELECT json(jsonb('[{"id": 1, "data": "value1"}, {"id": 2, "data": "value2"}]'));
|
||||
} {{[{"id":1,"data":"value1"},{"id":2,"data":"value2"}]}}
|
||||
|
||||
do_execsql_test json_special_chars {
|
||||
SELECT json(jsonb('{"special_chars": "!@#$%^&*()_+", "quotes": "\"quoted text\""}'));
|
||||
} {{{"special_chars":"!@#$%^&*()_+","quotes":"\"quoted text\""}}}
|
||||
|
||||
do_execsql_test json_unicode_emoji {
|
||||
SELECT json(jsonb('{"unicode": "こんにちは世界", "emoji": "🚀🔥💯"}'));
|
||||
} {{{"unicode":"こんにちは世界","emoji":"🚀🔥💯"}}}
|
||||
|
||||
do_execsql_test json_value_types {
|
||||
SELECT json(jsonb('{"boolean": true, "null_value": null, "number": 42.5}'));
|
||||
} {{{"boolean":true,"null_value":null,"number":42.5}}}
|
||||
|
||||
do_execsql_test json_deeply_nested {
|
||||
SELECT json(jsonb('{"deeply": {"nested": {"structure": {"with": "values"}}}}'));
|
||||
} {{{"deeply":{"nested":{"structure":{"with":"values"}}}}}}
|
||||
|
||||
do_execsql_test json_mixed_array {
|
||||
SELECT json(jsonb('{"array_mixed": [1, "text", true, null, {"obj": "inside array"}]}'));
|
||||
} {{{"array_mixed":[1,"text",true,null,{"obj":"inside array"}]}}}
|
||||
|
||||
do_execsql_test json_single_line_comments {
|
||||
SELECT json(jsonb('{"name": "John", // This is a comment
|
||||
"age": 30}'));
|
||||
} {{{"name":"John","age":30}}}
|
||||
|
||||
do_execsql_test json_multi_line_comments {
|
||||
SELECT json(jsonb('{"data": "value", /* This is a
|
||||
multi-line comment that spans
|
||||
several lines */ "more": "data"}'));
|
||||
} {{{"data":"value","more":"data"}}}
|
||||
|
||||
do_execsql_test json_trailing_commas {
|
||||
SELECT json(jsonb('{"items": ["one", "two", "three",], "status": "complete",}'));
|
||||
} {{{"items":["one","two","three"],"status":"complete"}}}
|
||||
|
||||
do_execsql_test json_unquoted_keys {
|
||||
SELECT json(jsonb('{name: "Alice", age: 25}'));
|
||||
} {{{"name":"Alice","age":25}}}
|
||||
|
||||
do_execsql_test json_newlines {
|
||||
SELECT json(jsonb('{"description": "Text with \nnew lines\nand more\nformatting"}'));
|
||||
} {{{"description":"Text with \nnew lines\nand more\nformatting"}}}
|
||||
|
||||
do_execsql_test json_hex_values {
|
||||
SELECT json(jsonb('{"hex_value": "\x68\x65\x6c\x6c\x6f"}'));
|
||||
} {{{"hex_value":"\u0068\u0065\u006c\u006c\u006f"}}}
|
||||
|
||||
do_execsql_test json_unicode_escape {
|
||||
SELECT json(jsonb('{"unicode": "\u0068\u0065\u006c\u006c\u006f"}'));
|
||||
} {{{"unicode":"\u0068\u0065\u006c\u006c\u006f"}}}
|
||||
|
||||
do_execsql_test json_tabs_whitespace {
|
||||
SELECT json(jsonb('{"formatted": "Text with \ttabs and \tspacing"}'));
|
||||
} {{{"formatted":"Text with \ttabs and \tspacing"}}}
|
||||
|
||||
do_execsql_test json_mixed_escaping {
|
||||
SELECT json(jsonb('{"mixed": "Newlines: \n Tabs: \t Quotes: \" Backslash: \\ Hex: \x40"}'));
|
||||
} {{{"mixed":"Newlines: \n Tabs: \t Quotes: \" Backslash: \\ Hex: \u0040"}}}
|
||||
|
||||
do_execsql_test json_control_chars {
|
||||
SELECT json(jsonb('{"control": "Bell: \u0007 Backspace: \u0008 Form feed: \u000C"}'));
|
||||
} {{{"control":"Bell: \u0007 Backspace: \u0008 Form feed: \u000C"}}}
|
||||
|
||||
# Escape character tests in sqlite source depend on json_valid and in some syntax that is not implemented
|
||||
# yet in limbo.
|
||||
@@ -916,4 +991,3 @@ do_execsql_test json_quote_json_value {
|
||||
# WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<0x1f)
|
||||
# SELECT sum(json_valid(json_quote('a'||char(x)||'z'))) FROM c ORDER BY x;
|
||||
# } {31}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user