Optimization

This commit is contained in:
Kacper Madej
2025-01-09 17:16:58 +07:00
parent dd533414ef
commit 74e19e2148
4 changed files with 139 additions and 76 deletions

View File

@@ -1,7 +1,10 @@
negative_index_indicator = ${ "#-" }
array_offset = ${ ASCII_DIGIT+ }
array_locator = ${ "[" ~ negative_index_indicator? ~ array_offset ~ "]" }
relaxed_array_locator = ${ negative_index_indicator? ~ array_offset }
root = ${ "$" }
json_path_key = ${ identifier | string }
path = ${ SOI ~ root ~ (array_locator | "." ~ json_path_key)* ~ EOI }
relaxed_path = ${ SOI ~ ((root ~ (array_locator | "." ~ json_path_key)*) | json_path_key | relaxed_array_locator) ~ EOI }

View File

@@ -23,8 +23,16 @@ pub enum PathElement {
ArrayLocator(i32),
}
pub fn json_path(path: &str, strict: bool) -> crate::Result<JsonPath> {
if strict {
strict_json_path(path)
} else {
relaxed_json_path(path)
}
}
/// Parses path into a Vec of Strings, where each string is a key or an array locator.
pub fn json_path(path: &str) -> crate::Result<JsonPath> {
fn strict_json_path(path: &str) -> crate::Result<JsonPath> {
let parsed = Parser::parse(Rule::path, path);
if let Ok(mut parsed) = parsed {
@@ -35,39 +43,8 @@ pub fn json_path(path: &str) -> crate::Result<JsonPath> {
Rule::EOI => (),
Rule::root => result.push(PathElement::Root()),
Rule::json_path_key => result.push(PathElement::Key(pair.as_str().to_string())),
Rule::array_locator => {
let mut array_locator = pair.into_inner();
let index_or_negative_indicator = array_locator.next().unwrap();
match index_or_negative_indicator.as_rule() {
Rule::negative_index_indicator => {
let negative_offset = array_locator.next().unwrap();
// TODO: sqlite is able to parse arbitrarily big numbers, but they
// always get overflown and cast to i32. Handle this.
let parsed = negative_offset
.as_str()
.parse::<i128>()
.unwrap_or(i128::MAX);
result.push(PathElement::ArrayLocator(-parsed as i32));
}
Rule::array_offset => {
let array_offset = index_or_negative_indicator.as_str();
// TODO: sqlite is able to parse arbitrarily big numbers, but they
// always get overflown and cast to i32. Handle this.
let parsed = array_offset.parse::<i128>().unwrap_or(i128::MAX);
result.push(PathElement::ArrayLocator(parsed as i32));
}
_ => unreachable!(
"Unexpected rule: {:?}",
index_or_negative_indicator.as_rule()
),
}
}
_ => {
unreachable!("Unexpected rule: {:?}", pair.as_rule());
}
Rule::array_locator => handle_array_locator(pair, &mut result),
_ => unreachable!("Unexpected rule: {:?}", pair.as_rule()),
}
}
@@ -77,20 +54,76 @@ pub fn json_path(path: &str) -> crate::Result<JsonPath> {
}
}
/// Parses path into a Vec of Strings, where each string is a key or an array locator.
/// Handles relaxed grammar for JSON path that is applicable for the -> and ->> operators.
/// https://sqlite.org/json1.html#the_and_operators
pub fn relaxed_json_path(path: &str) -> crate::Result<JsonPath> {
let parsed = Parser::parse(Rule::relaxed_path, path);
if let Ok(mut parsed) = parsed {
let mut result = vec![PathElement::Root()];
let parsed = parsed.next().unwrap();
for pair in parsed.into_inner() {
match pair.as_rule() {
Rule::EOI => (),
Rule::root => (),
Rule::json_path_key => result.push(PathElement::Key(pair.as_str().to_string())),
Rule::array_locator => handle_array_locator(pair, &mut result),
Rule::relaxed_array_locator => handle_array_locator(pair, &mut result),
_ => unreachable!("Unexpected rule: {:?}", pair.as_rule()),
}
}
Ok(JsonPath { elements: result })
} else {
crate::bail_constraint_error!("JSON path error near: {:?}", path.to_string());
}
}
fn handle_array_locator(pair: pest::iterators::Pair<Rule>, result: &mut Vec<PathElement>) {
let mut array_locator = pair.into_inner();
let index_or_negative_indicator = array_locator.next().unwrap();
match index_or_negative_indicator.as_rule() {
Rule::negative_index_indicator => {
let negative_offset = array_locator.next().unwrap();
// TODO: sqlite is able to parse arbitrarily big numbers, but they
// always get overflown and cast to i32. Handle this.
let parsed = negative_offset
.as_str()
.parse::<i128>()
.unwrap_or(i128::MAX);
result.push(PathElement::ArrayLocator(-parsed as i32));
}
Rule::array_offset => {
let array_offset = index_or_negative_indicator.as_str();
// TODO: sqlite is able to parse arbitrarily big numbers, but they
// always get overflown and cast to i32. Handle this.
let parsed = array_offset.parse::<i128>().unwrap_or(i128::MAX);
result.push(PathElement::ArrayLocator(parsed as i32));
}
_ => unreachable!(
"Unexpected rule: {:?}",
index_or_negative_indicator.as_rule()
),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_json_path_root() {
let path = json_path("$").unwrap();
let path = json_path("$", true).unwrap();
assert_eq!(path.elements.len(), 1);
assert_eq!(path.elements[0], PathElement::Root());
}
#[test]
fn test_json_path_single_locator() {
let path = json_path("$.x").unwrap();
let path = json_path("$.x", true).unwrap();
assert_eq!(path.elements.len(), 2);
assert_eq!(path.elements[0], PathElement::Root());
assert_eq!(path.elements[1], PathElement::Key("x".to_string()));
@@ -98,7 +131,7 @@ mod tests {
#[test]
fn test_json_path_single_array_locator() {
let path = json_path("$[0]").unwrap();
let path = json_path("$[0]", true).unwrap();
assert_eq!(path.elements.len(), 2);
assert_eq!(path.elements[0], PathElement::Root());
assert_eq!(path.elements[1], PathElement::ArrayLocator(0));
@@ -106,7 +139,7 @@ mod tests {
#[test]
fn test_json_path_single_negative_array_locator() {
let path = json_path("$[#-2]").unwrap();
let path = json_path("$[#-2]", true).unwrap();
assert_eq!(path.elements.len(), 2);
assert_eq!(path.elements[0], PathElement::Root());
assert_eq!(path.elements[1], PathElement::ArrayLocator(-2));
@@ -119,7 +152,7 @@ mod tests {
];
for value in invalid_values {
let path = json_path(value);
let path = json_path(value, true);
match path {
Err(crate::error::LimboError::Constraint(e)) => {
@@ -132,7 +165,7 @@ mod tests {
#[test]
fn test_json_path() {
let path = json_path("$.store.book[0].title").unwrap();
let path = json_path("$.store.book[0].title", true).unwrap();
assert_eq!(path.elements.len(), 5);
assert_eq!(path.elements[0], PathElement::Root());
assert_eq!(path.elements[1], PathElement::Key("store".to_string()));
@@ -140,4 +173,31 @@ mod tests {
assert_eq!(path.elements[3], PathElement::ArrayLocator(0));
assert_eq!(path.elements[4], PathElement::Key("title".to_string()));
}
#[test]
fn test_relaxed_json_path_array_locator() {
let path = json_path("1", false).unwrap();
assert_eq!(path.elements.len(), 2);
assert_eq!(path.elements[0], PathElement::Root());
assert_eq!(path.elements[1], PathElement::ArrayLocator(1));
}
#[test]
fn test_relaxed_json_path_negative_array_locator() {
let path = json_path("-1", false).unwrap();
assert_eq!(path.elements.len(), 2);
assert_eq!(path.elements[0], PathElement::Root());
assert_eq!(path.elements[1], PathElement::ArrayLocator(-1));
}
#[test]
fn test_relaxed_json_path_key() {
let path = json_path("x", false).unwrap();
assert_eq!(path.elements.len(), 2);
assert_eq!(path.elements[0], PathElement::Root());
assert_eq!(path.elements[1], PathElement::Key("x".to_string()));
}
}

View File

@@ -131,7 +131,7 @@ pub fn json_array_length(
let json = get_json_value(json_value)?;
let arr_val = if let Some(path) = path {
&json_extract_single(&json, path.as_str())?
&json_extract_single(&json, path.as_str(), true)?
} else {
&json
};
@@ -150,16 +150,21 @@ pub fn json_arrow_extract(value: &OwnedValue, path: &OwnedValue) -> crate::Resul
return Ok(OwnedValue::Null);
}
let json = get_json_value(value)?;
match path {
OwnedValue::Null => Ok(OwnedValue::Null),
OwnedValue::Text(p) => {
let extracted = json_extract_single(&json, p.value.as_str())?;
let json = get_json_value(value)?;
let extracted = json_extract_single(&json, p.value.as_str(), false)?;
let json = crate::json::to_string(&extracted).unwrap();
Ok(OwnedValue::Text(LimboText::json(Rc::new(json))))
}
OwnedValue::Integer(i) => {
let json = get_json_value(value)?;
let extracted = json_extract_single(&json, &i.to_string(), false)?;
convert_json_to_db_type(&extracted, true)
}
_ => crate::bail_constraint_error!("JSON path error near: {:?}", path.to_string()),
}
}
@@ -174,14 +179,13 @@ pub fn json_arrow_shift_extract(
return Ok(OwnedValue::Null);
}
let json = get_json_value(value)?;
match path {
OwnedValue::Null => Ok(OwnedValue::Null),
OwnedValue::Text(p) => {
let extracted = json_extract_single(&json, p.value.as_str())?;
let json = get_json_value(value)?;
let extracted = json_extract_single(&json, p.value.as_str(), false)?;
convert_json_to_db_type(&extracted)
convert_json_to_db_type(&extracted, true)
}
_ => crate::bail_constraint_error!("JSON path error near: {:?}", path.to_string()),
}
@@ -197,48 +201,37 @@ pub fn json_extract(value: &OwnedValue, paths: &[OwnedValue]) -> crate::Result<O
if paths.is_empty() {
return Ok(OwnedValue::Null);
}
let json = get_json_value(value)?;
if paths.len() == 1 {
} else if paths.len() == 1 {
match &paths[0] {
OwnedValue::Null => return Ok(OwnedValue::Null),
OwnedValue::Text(p) => {
let extracted = json_extract_single(&json, p.value.as_str())?;
let json = get_json_value(value)?;
let extracted = json_extract_single(&json, p.value.as_str(), true)?;
return convert_json_to_db_type(&extracted);
return convert_json_to_db_type(&extracted, false);
}
_ => crate::bail_constraint_error!("JSON path error near: {:?}", paths[0].to_string()),
}
}
// multiple paths - we should return an array
let json = get_json_value(value)?;
let mut result = "[".to_string();
for path in paths {
match path {
OwnedValue::Text(p) => {
let extracted = json_extract_single(&json, p.value.as_str())?;
if paths.len() == 1 && extracted == Val::Null {
return Ok(OwnedValue::Null);
}
let extracted = json_extract_single(&json, p.value.as_str(), true)?;
result.push_str(&crate::json::to_string(&extracted).unwrap());
if paths.len() > 1 {
result.push(',');
}
result.push(',');
}
OwnedValue::Null => return Ok(OwnedValue::Null),
_ => crate::bail_constraint_error!("JSON path error near: {:?}", path.to_string()),
}
}
if paths.len() > 1 {
result.pop(); // remove the final comma
result.push(']');
}
result.pop(); // remove the final comma
result.push(']');
Ok(OwnedValue::Text(LimboText::json(Rc::new(result))))
}
@@ -251,7 +244,9 @@ pub fn json_extract(value: &OwnedValue, paths: &[OwnedValue]) -> crate::Result<O
/// > the dequoted text for a JSON string value,
/// > and a text representation for JSON object and array values.
/// https://sqlite.org/json1.html#the_json_extract_function
fn convert_json_to_db_type(extracted: &Val) -> crate::Result<OwnedValue> {
///
/// *all_as_db* - if true, objects and arrays will be returned as pure TEXT without the JSON subtype
fn convert_json_to_db_type(extracted: &Val, all_as_db: bool) -> crate::Result<OwnedValue> {
match extracted {
Val::Null => Ok(OwnedValue::Null),
Val::Float(f) => Ok(OwnedValue::Float(*f)),
@@ -263,16 +258,20 @@ fn convert_json_to_db_type(extracted: &Val) -> crate::Result<OwnedValue> {
Ok(OwnedValue::Integer(0))
}
}
Val::String(s) => Ok(OwnedValue::Text(LimboText::json(Rc::new(s.clone())))),
Val::String(s) => Ok(OwnedValue::Text(LimboText::new(Rc::new(s.clone())))),
_ => {
let json = crate::json::to_string(&extracted).unwrap();
Ok(OwnedValue::Text(LimboText::json(Rc::new(json))))
if all_as_db {
Ok(OwnedValue::Text(LimboText::new(Rc::new(json))))
} else {
Ok(OwnedValue::Text(LimboText::json(Rc::new(json))))
}
}
}
}
fn json_extract_single(json: &Val, path: &str) -> crate::Result<Val> {
let json_path = json_path(path)?;
fn json_extract_single(json: &Val, path: &str, strict: bool) -> crate::Result<Val> {
let json_path = json_path(path, strict)?;
let mut current_element = &Val::Null;