Merge 'implement json_pretty' from Pedro Muniz

This PR implements json_pretty. At the moment, support for jsonb is
being added, so this function suffers from the same limitations as in
json(x). Also, I have not found a way to implement the same conversion
of Blob -> String that SQLite does. From my own experimentation, I
believe SQLite converts blobs to a lossy ascii representation, but I
would appreciate some help on this.

Closes #860
This commit is contained in:
Pekka Enberg
2025-02-04 14:51:47 +02:00
7 changed files with 237 additions and 18 deletions

View File

@@ -23,6 +23,7 @@ This document describes the compatibility of Limbo with SQLite.
- [Extensions](#extensions)
- [UUID](#uuid)
- [regexp](#regexp)
- [Vector](#vector)
## Features
@@ -349,7 +350,7 @@ Modifiers:
#### JSON functions
| Function | Status | Comment |
|------------------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------|
| ---------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------- |
| json(json) | Partial | |
| jsonb(json) | | |
| json_array(value1,value2,...) | Yes | |
@@ -367,7 +368,7 @@ Modifiers:
| jsonb_object(label1,value1,...) | | |
| json_patch(json1,json2) | Yes | |
| jsonb_patch(json1,json2) | | |
| json_pretty(json) | | |
| json_pretty(json) | Partial | Shares same json(val) limitations. Also, when passing blobs for indentation, conversion is not exactly the same as in SQLite |
| json_remove(json,path,...) | Partial | Uses same json path parser as json_extract so shares same limitations. |
| jsonb_remove(json,path,...) | | |
| json_replace(json,path,value,...) | | |

View File

@@ -82,6 +82,7 @@ pub enum JsonFunc {
JsonValid,
JsonPatch,
JsonRemove,
JsonPretty,
}
#[cfg(feature = "json")]
@@ -103,6 +104,7 @@ impl Display for JsonFunc {
Self::JsonValid => "json_valid".to_string(),
Self::JsonPatch => "json_patch".to_string(),
Self::JsonRemove => "json_remove".to_string(),
Self::JsonPretty => "json_pretty".to_string(),
}
)
}
@@ -534,6 +536,8 @@ impl Func {
"json_patch" => Ok(Self::Json(JsonFunc::JsonPatch)),
#[cfg(feature = "json")]
"json_remove" => Ok(Self::Json(JsonFunc::JsonRemove)),
#[cfg(feature = "json")]
"json_pretty" => Ok(Self::Json(JsonFunc::JsonPretty)),
"unixepoch" => Ok(Self::Scalar(ScalarFunc::UnixEpoch)),
"julianday" => Ok(Self::Scalar(ScalarFunc::JulianDay)),
"hex" => Ok(Self::Scalar(ScalarFunc::Hex)),

View File

@@ -15,6 +15,7 @@ pub use crate::json::ser::to_string;
use crate::types::{LimboText, OwnedValue, TextSubtype};
use indexmap::IndexMap;
use jsonb::Error as JsonbError;
use ser::to_string_pretty;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
@@ -31,7 +32,7 @@ pub enum Val {
Object(Vec<(String, Val)>),
}
pub fn get_json(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result<OwnedValue> {
match json_value {
OwnedValue::Text(ref t) => {
// optimization: once we know the subtype is a valid JSON, we do not have
@@ -41,7 +42,10 @@ pub fn get_json(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
}
let json_val = get_json_value(json_value)?;
let json = to_string(&json_val).unwrap();
let json = match indent {
Some(indent) => to_string_pretty(&json_val, indent).unwrap(),
None => to_string(&json_val).unwrap(),
};
Ok(OwnedValue::Text(LimboText::json(Rc::new(json))))
}
@@ -57,7 +61,10 @@ pub fn get_json(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
OwnedValue::Null => Ok(OwnedValue::Null),
_ => {
let json_val = get_json_value(json_value)?;
let json = to_string(&json_val).unwrap();
let json = match indent {
Some(indent) => to_string_pretty(&json_val, indent).unwrap(),
None => to_string(&json_val).unwrap(),
};
Ok(OwnedValue::Text(LimboText::json(Rc::new(json))))
}
@@ -536,7 +543,7 @@ mod tests {
#[test]
fn test_get_json_valid_json5() {
let input = OwnedValue::build_text(Rc::new("{ key: 'value' }".to_string()));
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Text(result_str) = result {
assert!(result_str.value.contains("\"key\":\"value\""));
assert_eq!(result_str.subtype, TextSubtype::Json);
@@ -548,7 +555,7 @@ mod tests {
#[test]
fn test_get_json_valid_json5_double_single_quotes() {
let input = OwnedValue::build_text(Rc::new("{ key: ''value'' }".to_string()));
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Text(result_str) = result {
assert!(result_str.value.contains("\"key\":\"value\""));
assert_eq!(result_str.subtype, TextSubtype::Json);
@@ -560,7 +567,7 @@ mod tests {
#[test]
fn test_get_json_valid_json5_infinity() {
let input = OwnedValue::build_text(Rc::new("{ \"key\": Infinity }".to_string()));
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Text(result_str) = result {
assert!(result_str.value.contains("{\"key\":9e999}"));
assert_eq!(result_str.subtype, TextSubtype::Json);
@@ -572,7 +579,7 @@ mod tests {
#[test]
fn test_get_json_valid_json5_negative_infinity() {
let input = OwnedValue::build_text(Rc::new("{ \"key\": -Infinity }".to_string()));
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Text(result_str) = result {
assert!(result_str.value.contains("{\"key\":-9e999}"));
assert_eq!(result_str.subtype, TextSubtype::Json);
@@ -584,7 +591,7 @@ mod tests {
#[test]
fn test_get_json_valid_json5_nan() {
let input = OwnedValue::build_text(Rc::new("{ \"key\": NaN }".to_string()));
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Text(result_str) = result {
assert!(result_str.value.contains("{\"key\":null}"));
assert_eq!(result_str.subtype, TextSubtype::Json);
@@ -596,7 +603,7 @@ mod tests {
#[test]
fn test_get_json_invalid_json5() {
let input = OwnedValue::build_text(Rc::new("{ key: value }".to_string()));
let result = get_json(&input);
let result = get_json(&input, None);
match result {
Ok(_) => panic!("Expected error for malformed JSON"),
Err(e) => assert!(e.to_string().contains("malformed JSON")),
@@ -606,7 +613,7 @@ mod tests {
#[test]
fn test_get_json_valid_jsonb() {
let input = OwnedValue::build_text(Rc::new("{\"key\":\"value\"}".to_string()));
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Text(result_str) = result {
assert!(result_str.value.contains("\"key\":\"value\""));
assert_eq!(result_str.subtype, TextSubtype::Json);
@@ -618,7 +625,7 @@ mod tests {
#[test]
fn test_get_json_invalid_jsonb() {
let input = OwnedValue::build_text(Rc::new("{key:\"value\"".to_string()));
let result = get_json(&input);
let result = get_json(&input, None);
match result {
Ok(_) => panic!("Expected error for malformed JSON"),
Err(e) => assert!(e.to_string().contains("malformed JSON")),
@@ -629,7 +636,7 @@ mod tests {
fn test_get_json_blob_valid_jsonb() {
let binary_json = b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66".to_vec();
let input = OwnedValue::Blob(Rc::new(binary_json));
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Text(result_str) = result {
assert!(result_str.value.contains("\"asd\":\"adf\""));
assert_eq!(result_str.subtype, TextSubtype::Json);
@@ -642,7 +649,7 @@ mod tests {
fn test_get_json_blob_invalid_jsonb() {
let binary_json: Vec<u8> = vec![0xA2, 0x62, 0x6B, 0x31, 0x62, 0x76]; // Incomplete binary JSON
let input = OwnedValue::Blob(Rc::new(binary_json));
let result = get_json(&input);
let result = get_json(&input, None);
match result {
Ok(_) => panic!("Expected error for malformed JSON"),
Err(e) => assert!(e.to_string().contains("malformed JSON")),
@@ -652,7 +659,7 @@ mod tests {
#[test]
fn test_get_json_non_text() {
let input = OwnedValue::Null;
let result = get_json(&input).unwrap();
let result = get_json(&input, None).unwrap();
if let OwnedValue::Null = result {
// Test passed
} else {
@@ -809,7 +816,7 @@ mod tests {
#[test]
fn test_json_array_length_simple_json_subtype() {
let input = OwnedValue::build_text(Rc::new("[1,2,3]".to_string()));
let wrapped = get_json(&input).unwrap();
let wrapped = get_json(&input, None).unwrap();
let result = json_array_length(&wrapped, None).unwrap();
if let OwnedValue::Integer(res) = result {

View File

@@ -25,6 +25,16 @@ where
Ok(string)
}
/// Attempts to serialize the input as a JSON5 string (actually a JSON string).
pub fn to_string_pretty<T>(value: &T, indent: &str) -> Result<String>
where
T: Serialize,
{
let vec = to_vec_pretty(value, indent)?;
let string = String::from_utf8(vec).map_err(|err| Error::from(err.utf8_error()))?;
Ok(string)
}
struct Serializer<W, F = CompactFormatter> {
writer: W,
formatter: F,
@@ -39,6 +49,17 @@ where
}
}
impl<'a, W> Serializer<W, PrettyFormatter<'a>>
where
W: io::Write,
{
/// Creates a new JSON pretty print serializer.
#[inline]
pub fn pretty(writer: W, indent: &'a str) -> Self {
Serializer::with_formatter(writer, PrettyFormatter::with_indent(indent.as_bytes()))
}
}
impl<W, F> Serializer<W, F>
where
W: io::Write,
@@ -553,6 +574,24 @@ where
Ok(writer)
}
pub fn to_writer_pretty<W, T>(writer: W, value: &T, indent: &str) -> Result<()>
where
W: io::Write,
T: ?Sized + Serialize,
{
let mut ser = Serializer::pretty(writer, indent);
value.serialize(&mut ser)
}
pub fn to_vec_pretty<T>(value: &T, indent: &str) -> Result<Vec<u8>>
where
T: ?Sized + Serialize,
{
let mut writer = Vec::with_capacity(128);
to_writer_pretty(&mut writer, value, indent)?;
Ok(writer)
}
/// Represents a character escape code in a type-safe manner.
pub enum CharEscape {
/// An escaped quote `"`

View File

@@ -1015,6 +1015,18 @@ pub fn translate_expr(
});
Ok(target_register)
}
JsonFunc::JsonPretty => {
let args = expect_arguments_max!(args, 2, j);
translate_function(
program,
args,
referenced_tables,
resolver,
target_register,
func_ctx,
)
}
},
Func::Scalar(srf) => {
match srf {

View File

@@ -1699,7 +1699,7 @@ impl Program {
crate::function::Func::Json(json_func) => match json_func {
JsonFunc::Json => {
let json_value = &state.registers[*start_reg];
let json_str = get_json(json_value);
let json_str = get_json(json_value, None);
match json_str {
Ok(json) => state.registers[*dest] = json,
Err(e) => return Err(e),
@@ -1796,6 +1796,40 @@ impl Program {
&state.registers[*start_reg..*start_reg + arg_count],
)?;
}
JsonFunc::JsonPretty => {
let json_value = &state.registers[*start_reg];
let indent = if arg_count > 1 {
Some(&state.registers[*start_reg + 1])
} else {
None
};
// Blob should be converted to Ascii in a lossy way
// However, Rust strings uses utf-8
// so the behavior at the moment is slightly different
// To the way blobs are parsed here in SQLite.
let indent = match indent {
Some(value) => match value {
OwnedValue::Text(text) => text.value.as_str(),
OwnedValue::Integer(val) => &val.to_string(),
OwnedValue::Float(val) => &val.to_string(),
OwnedValue::Blob(val) => &String::from_utf8_lossy(val),
OwnedValue::Agg(ctx) => match ctx.final_value() {
OwnedValue::Text(text) => text.value.as_str(),
OwnedValue::Integer(val) => &val.to_string(),
OwnedValue::Float(val) => &val.to_string(),
OwnedValue::Blob(val) => &String::from_utf8_lossy(val),
_ => " ",
},
_ => " ",
},
// If the second argument is omitted or is NULL, then indentation is four spaces per level
None => " ",
};
let json_str = get_json(json_value, Some(indent))?;
state.registers[*dest] = json_str;
}
},
crate::function::Func::Scalar(scalar_func) => match scalar_func {
ScalarFunc::Cast => {

View File

@@ -60,6 +60,128 @@ do_execsql_test json5-multi-comment {
123 /* xyz */ , /* 123 */ }')
} {{{"aaa":123}}}
do_execsql_test json5-ecma-script-1-pretty {
select json_pretty('{a:5,b:6}') ;
} {{{
"a": 5,
"b": 6
}}}
do_execsql_test json5-ecma-script-2-pretty {
select json_pretty('{a:5,a:3}') ;
} {{{
"a": 5,
"a": 3
}}}
do_execsql_test json5-ecma-script-3-pretty {
SELECT json_pretty('{ MNO_123$xyz : 789 }');
} {{{
"MNO_123$xyz": 789
}}}
do_execsql_test json5-with-single-trailing-comma-valid-pretty {
select json_pretty('{"a":5, "b":6, }');
} {{{
"a": 5,
"b": 6
}}}
do_execsql_test json5-single-quoted-pretty {
SELECT json_pretty('{"a": ''abcd''}');
} {{{
"a": "abcd"
}}}
do_execsql_test json5-hexadecimal-1-pretty {
SELECT json_pretty('{a: 0x0}');
} {{{
"a": 0
}}}
do_execsql_test json5-hexadecimal-2-pretty {
SELECT json_pretty('{a: 0xabcdef}');
} {{{
"a": 11259375
}}}
do_execsql_test json5-hexadecimal-2-pretty {
SELECT json_pretty('{a: -0xabcdef}');
} {{{
"a": -11259375
}}}
do_execsql_test json5-number-1-pretty {
SELECT json_pretty('{x: 4.}');
} {{{
"x": 4.0
}}}
do_execsql_test json5-number-2-pretty {
SELECT json_pretty('{x: +4.}');
} {{{
"x": 4.0
}}}
do_execsql_test json5-number-3-pretty {
SELECT json_pretty('{x: -4.}');
} {{{
"x": -4.0
}}}
do_execsql_test json5-number-5-pretty {
SELECT json_pretty('{x: Infinity}');
} {{{
"x": 9e999
}}}
do_execsql_test json5-number-6-pretty {
SELECT json_pretty('{x: -Infinity}');
} {{{
"x": -9e999
}}}
do_execsql_test json5-multi-comment-pretty {
SELECT json_pretty(' /* abc */ { /*def*/ aaa /* xyz */ : // to the end of line
123 /* xyz */ , /* 123 */ }');
} {{{
"aaa": 123
}}}
do_execsql_test json-pretty-ident-1 {
SELECT json_pretty('{x: 1}', '');
} {{{
"x": 1
}}}
do_execsql_test json-pretty-ident-2 {
SELECT json_pretty('{x: 1}', '11');
} {{{
11"x": 1
}}}
do_execsql_test json-pretty-ident-null {
SELECT json_pretty('{x: 1}', NULL);
} {{{
"x": 1
}}}
do_execsql_test json-pretty-ident-blob-1 {
SELECT json_pretty('{x: 1}', x'33');
} {{{
3"x": 1
}}}
# TODO
# Currently conversion from blob to string is not exactly the same as in sqlite.
# The blob below should evaluate to two whitespaces TEXT value
# do_execsql_test json-pretty-ident-blob-2 {
# SELECT json_pretty('{x: 1}', x'1111');
# } {{{
# "x": 1
# }}}
do_execsql_test json_array_str {
SELECT json_array('a')
} {{["a"]}}