From 303a687e65f28503bceb4b8b6ae9151163142a6d Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 4 Feb 2025 15:36:36 -0300 Subject: [PATCH 1/9] rebase to main --- core/function.rs | 5 +++++ core/json/mod.rs | 46 ++++++++++++++++++++++++++++++++++++++++++ core/translate/expr.rs | 11 ++++++++++ core/vdbe/mod.rs | 10 ++++++++- testing/json.test | 41 +++++++++++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 1 deletion(-) diff --git a/core/function.rs b/core/function.rs index 83c755796..99abf1864 100644 --- a/core/function.rs +++ b/core/function.rs @@ -84,6 +84,7 @@ pub enum JsonFunc { JsonRemove, JsonPretty, JsonSet, + JsonQuote, } #[cfg(feature = "json")] @@ -107,6 +108,7 @@ impl Display for JsonFunc { Self::JsonRemove => "json_remove".to_string(), Self::JsonPretty => "json_pretty".to_string(), Self::JsonSet => "json_set".to_string(), + Self::JsonQuote => "json_quote".to_string(), } ) } @@ -134,6 +136,7 @@ impl Display for VectorFunc { } } + #[derive(Debug, Clone)] pub enum AggFunc { Avg, @@ -568,6 +571,8 @@ impl Func { "json_pretty" => Ok(Self::Json(JsonFunc::JsonPretty)), #[cfg(feature = "json")] "json_set" => Ok(Self::Json(JsonFunc::JsonSet)), + #[cfg(feature = "json")] + "json_quote" => Ok(Self::Json(JsonFunc::JsonQuote)), "unixepoch" => Ok(Self::Scalar(ScalarFunc::UnixEpoch)), "julianday" => Ok(Self::Scalar(ScalarFunc::JulianDay)), "hex" => Ok(Self::Scalar(ScalarFunc::Hex)), diff --git a/core/json/mod.rs b/core/json/mod.rs index 31f12ba14..d9ebb9405 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -15,6 +15,7 @@ pub use crate::json::ser::to_string; use crate::types::{OwnedValue, Text, TextSubtype}; use indexmap::IndexMap; use jsonb::Error as JsonbError; +use rustix::path::Arg; use ser::to_string_pretty; use serde::{Deserialize, Serialize}; @@ -674,6 +675,51 @@ pub fn is_json_valid(json_value: &OwnedValue) -> crate::Result { } } +pub fn json_quote(value: &OwnedValue) -> crate::Result { + match value { + OwnedValue::Text(ref t) => { + // If X is a JSON value returned by another JSON function, + // then this function is a no-op + if t.subtype == TextSubtype::Json { + // Should just return the json value with no quotes + return Ok(value.to_owned()); + } + + let escaped_value: String = t + .value.to_string_lossy() + .chars() + .flat_map(|c| match c { + '"' => vec!['\\', c], + '\n' => vec!['\\', 'n'], + '\r' => vec!['\\', 'r'], + '\t' => vec!['\\', 't'], + '\\' => vec!['\\', '\\'], + '\u{0008}' => vec!['\\', 'b'], + '\u{000c}' => vec!['\\', 'f'], + c => vec![c], + }) + .collect(); + + let quoted_value = format!("\"{}\"", escaped_value); + + Ok(OwnedValue::Text(Text::new(Rc::new(quoted_value)))) + } + // Numbers are unquoted in json + OwnedValue::Integer(ref int) => Ok(OwnedValue::Integer(int.to_owned())), + OwnedValue::Float(ref float) => Ok(OwnedValue::Float(float.to_owned())), + OwnedValue::Blob(_) => crate::bail_constraint_error!("JSON cannot hold BLOB values"), + OwnedValue::Null => { + let null_value = "null".to_string(); + + Ok(OwnedValue::Text(Text::new(Rc::new(null_value)))) + } + _ => { + // TODO not too sure what message should be here + crate::bail_parse_error!("Syntax error"); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/core/translate/expr.rs b/core/translate/expr.rs index c23cb053a..9c6b12f0d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1017,6 +1017,17 @@ pub fn translate_expr( }); Ok(target_register) } + JsonFunc::JsonQuote => { + let args = expect_arguments_exact!(args, 1, j); + translate_function( + program, + args, + referenced_tables, + resolver, + target_register, + func_ctx, + ) + } JsonFunc::JsonPretty => { let args = expect_arguments_max!(args, 2, j); diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 5003b72c6..b0720b772 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -49,7 +49,7 @@ use crate::{ function::JsonFunc, json::get_json, json::is_json_valid, json::json_array, json::json_array_length, json::json_arrow_extract, json::json_arrow_shift_extract, json::json_error_position, json::json_extract, json::json_object, json::json_patch, - json::json_remove, json::json_set, json::json_type, + json::json_quote, json::json_remove, json::json_set, json::json_type, }; use crate::{resolve_ext_path, Connection, Result, TransactionState, DATABASE_VERSION}; use insn::{ @@ -1968,6 +1968,14 @@ impl Program { Err(e) => return Err(e), } } + JsonFunc::JsonQuote => { + let json_value = &state.registers[*start_reg]; + + match json_quote(json_value) { + Ok(result) => state.registers[*dest] = result, + Err(e) => return Err(e), + } + } }, crate::function::Func::Scalar(scalar_func) => match scalar_func { ScalarFunc::Cast => { diff --git a/testing/json.test b/testing/json.test index 3070c6088..92839b06d 100755 --- a/testing/json.test +++ b/testing/json.test @@ -878,3 +878,44 @@ do_execsql_test json_set_add_array_in_array_in_nested_object { do_execsql_test json_set_add_array_in_array_in_nested_object_out_of_bounds { SELECT json_set('{}', '$.object[123].another', 'value', '$.field', 'value'); } {{{"field":"value"}}} + +# The json_quote() function transforms an SQL value into a JSON value. +# String values are quoted and interior quotes are escaped. NULL values +# are rendered as the unquoted string "null". +# +do_execsql_test json_quote_string_literal { + SELECT json_quote('abc"xyz'); +} {{"abc\"xyz"}} +do_execsql_test json_quote_float { + SELECT json_quote(3.14159); +} {3.14159} +do_execsql_test json_quote_integer { + SELECT json_quote(12345); +} {12345} +do_execsql_test json_quote_null { + SELECT json_quote(null); +} {"null"} +do_execsql_test json_quote_null_caps { + SELECT json_quote(NULL); +} null +do_execsql_test json_quote_json_value { + SELECT json_quote(json('{a:1, b: "test"}')); +} {{{"a":1,"b":"test"}}} + + +# Escape character tests in sqlite source depend on json_valid +# See https://github.com/sqlite/sqlite/blob/255548562b125e6c148bb27d49aaa01b2fe61dba/test/json102.test#L690 +# So for now not all control characters escaped are tested + + +# TODO No catchsql tests function to test these on +#do_catchsql_test json_quote_blob { +# SELECT json_quote(x'3031323334'); +#} {1 {JSON cannot hold BLOB values}} +#do_catchsql_test json_quote_more_than_one_arg { +# SELECT json_quote(123,456) +#} {1 {json_quote function called with not exactly 1 arguments}} +#do_catchsql_test json_quote_no_args { +# SELECT json_quote() +#} {1 {json_quote function with no arguments}} + From 782a18d4bdda534cb0a6cce02cc7e69d0586623c Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 21 Jan 2025 15:53:33 -0300 Subject: [PATCH 2/9] modify COMPAT.md --- COMPAT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index 1c2b9b227..772d53a43 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -380,7 +380,7 @@ Modifiers: | json_type(json,path) | Yes | | | json_valid(json) | Yes | | | json_valid(json,flags) | | | -| json_quote(value) | | | +| json_quote(value) | Yes | | | json_group_array(value) | | | | jsonb_group_array(value) | | | | json_group_object(label,value) | | | From 26388cc802b8adb7e63a0f69c3761a0c980f8e03 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 21 Jan 2025 16:12:00 -0300 Subject: [PATCH 3/9] fix: cargo fmt --- core/translate/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 9c6b12f0d..8ddb580df 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -975,7 +975,7 @@ pub fn translate_expr( translate_function( program, - &args, + args, referenced_tables, resolver, target_register, From eb40505c313d9e695640170715ca9f93af8838d7 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 2 Feb 2025 20:14:54 -0300 Subject: [PATCH 4/9] some tests in sqlite rely on commands not implemented in limbo yet --- Makefile | 4 ++++ testing/json.test | 18 ++++++------------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index e1fdf6d29..0a2e1082d 100644 --- a/Makefile +++ b/Makefile @@ -89,3 +89,7 @@ test-time: test-sqlite3: limbo-c LIBS="$(SQLITE_LIB)" HEADERS="$(SQLITE_LIB_HEADERS)" make -C sqlite3/tests test .PHONY: test-sqlite3 + +test-json: + SQLITE_EXEC=$(SQLITE_EXEC) ./testing/json.test +.PHONY: test-json diff --git a/testing/json.test b/testing/json.test index 92839b06d..0bfec6247 100755 --- a/testing/json.test +++ b/testing/json.test @@ -903,19 +903,13 @@ do_execsql_test json_quote_json_value { } {{{"a":1,"b":"test"}}} -# Escape character tests in sqlite source depend on json_valid +# Escape character tests in sqlite source depend on json_valid and in some syntax that is not implemented +# yet in limbo. # See https://github.com/sqlite/sqlite/blob/255548562b125e6c148bb27d49aaa01b2fe61dba/test/json102.test#L690 # So for now not all control characters escaped are tested - -# TODO No catchsql tests function to test these on -#do_catchsql_test json_quote_blob { -# SELECT json_quote(x'3031323334'); -#} {1 {JSON cannot hold BLOB values}} -#do_catchsql_test json_quote_more_than_one_arg { -# SELECT json_quote(123,456) -#} {1 {json_quote function called with not exactly 1 arguments}} -#do_catchsql_test json_quote_no_args { -# SELECT json_quote() -#} {1 {json_quote function with no arguments}} +# do_execsql_test json102-1501 { +# WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<0x1f) +# SELECT sum(json_valid(json_quote('a'||char(x)||'z'))) FROM c ORDER BY x; +# } {31} From 90ecaf40b50a1960bc7c5760f7a402d521bf539b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 4 Feb 2025 14:38:01 -0300 Subject: [PATCH 5/9] removed unnecessary string allocations for escaped json value --- core/json/mod.rs | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/core/json/mod.rs b/core/json/mod.rs index d9ebb9405..4d972c150 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -685,34 +685,26 @@ pub fn json_quote(value: &OwnedValue) -> crate::Result { return Ok(value.to_owned()); } - let escaped_value: String = t - .value.to_string_lossy() - .chars() - .flat_map(|c| match c { - '"' => vec!['\\', c], - '\n' => vec!['\\', 'n'], - '\r' => vec!['\\', 'r'], - '\t' => vec!['\\', 't'], - '\\' => vec!['\\', '\\'], - '\u{0008}' => vec!['\\', 'b'], - '\u{000c}' => vec!['\\', 'f'], - c => vec![c], - }) - .collect(); + let mut escaped_value = String::with_capacity(t.value.len()); + escaped_value.push('"'); + for c in t.value.to_string_lossy().chars() { + match c { + '"' | '\\' | '\n' | '\r' | '\t' | '\u{0008}' | '\u{000c}' => { + escaped_value.push('\\'); + escaped_value.push(c); + } + c => escaped_value.push(c), + } + } + escaped_value.push('"'); - let quoted_value = format!("\"{}\"", escaped_value); - - Ok(OwnedValue::Text(Text::new(Rc::new(quoted_value)))) + Ok(OwnedValue::Text(Text::new(Rc::new(escaped_value)))) } // Numbers are unquoted in json OwnedValue::Integer(ref int) => Ok(OwnedValue::Integer(int.to_owned())), OwnedValue::Float(ref float) => Ok(OwnedValue::Float(float.to_owned())), OwnedValue::Blob(_) => crate::bail_constraint_error!("JSON cannot hold BLOB values"), - OwnedValue::Null => { - let null_value = "null".to_string(); - - Ok(OwnedValue::Text(Text::new(Rc::new(null_value)))) - } + OwnedValue::Null => Ok(OwnedValue::Text(Text::new(Rc::new("null".to_string())))), _ => { // TODO not too sure what message should be here crate::bail_parse_error!("Syntax error"); From b678375c69822a9daa4944a60dfdc5fe740aba0b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 4 Feb 2025 15:38:02 -0300 Subject: [PATCH 6/9] increasing string capacity to reduce allocations --- core/json/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/json/mod.rs b/core/json/mod.rs index 4d972c150..cdb1d8ed6 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -685,7 +685,7 @@ pub fn json_quote(value: &OwnedValue) -> crate::Result { return Ok(value.to_owned()); } - let mut escaped_value = String::with_capacity(t.value.len()); + let mut escaped_value = String::with_capacity(t.value.len() + 4); escaped_value.push('"'); for c in t.value.to_string_lossy().chars() { match c { From c8bb1fd35367b4b9145061e5d60a49b571d41668 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 6 Feb 2025 23:43:30 -0300 Subject: [PATCH 7/9] unreachable to agg and record types, as it should not be possible to pass them to json_quote --- core/json/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/json/mod.rs b/core/json/mod.rs index cdb1d8ed6..5d156e6d7 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -706,8 +706,7 @@ pub fn json_quote(value: &OwnedValue) -> crate::Result { OwnedValue::Blob(_) => crate::bail_constraint_error!("JSON cannot hold BLOB values"), OwnedValue::Null => Ok(OwnedValue::Text(Text::new(Rc::new("null".to_string())))), _ => { - // TODO not too sure what message should be here - crate::bail_parse_error!("Syntax error"); + unreachable!() } } } From 8fe71309c07d4a6f78efa5d8bc6e4f8e6249f491 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 6 Feb 2025 23:46:00 -0300 Subject: [PATCH 8/9] cargo fmt --- core/function.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/function.rs b/core/function.rs index 99abf1864..fa10d9787 100644 --- a/core/function.rs +++ b/core/function.rs @@ -136,7 +136,6 @@ impl Display for VectorFunc { } } - #[derive(Debug, Clone)] pub enum AggFunc { Avg, From c3cad5dfdda9be161661ca84ddc4898bb4e9a223 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 7 Feb 2025 00:07:51 -0300 Subject: [PATCH 9/9] corrected to use newly created as_str function to convert to string slice --- core/json/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/json/mod.rs b/core/json/mod.rs index 5d156e6d7..bca1eb5de 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -15,7 +15,6 @@ pub use crate::json::ser::to_string; use crate::types::{OwnedValue, Text, TextSubtype}; use indexmap::IndexMap; use jsonb::Error as JsonbError; -use rustix::path::Arg; use ser::to_string_pretty; use serde::{Deserialize, Serialize}; @@ -687,7 +686,8 @@ pub fn json_quote(value: &OwnedValue) -> crate::Result { let mut escaped_value = String::with_capacity(t.value.len() + 4); escaped_value.push('"'); - for c in t.value.to_string_lossy().chars() { + + for c in t.as_str().chars() { match c { '"' | '\\' | '\n' | '\r' | '\t' | '\u{0008}' | '\u{000c}' => { escaped_value.push('\\');