From 166532cc815ef83a3d346b6dd49425e0270a49a1 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Sun, 9 Feb 2025 14:17:34 +0200 Subject: [PATCH 1/6] simplify json path parsing --- core/json/json_path.rs | 209 ++++++++++++++++++++++++++++++----------- core/json/mod.rs | 35 +++---- 2 files changed, 171 insertions(+), 73 deletions(-) diff --git a/core/json/json_path.rs b/core/json/json_path.rs index ed8981018..e6e361827 100644 --- a/core/json/json_path.rs +++ b/core/json/json_path.rs @@ -1,80 +1,177 @@ -use pest::Parser as P; -use pest_derive::Parser; +use crate::bail_parse_error; +use std::borrow::Cow; -#[derive(Parser)] -#[grammar = "json/json.pest"] -#[grammar = "json/json_path.pest"] -struct Parser; +/// You have to know your PP state +#[derive(Clone, Debug, PartialEq)] +enum PPState { + Start, + AfterRoot, + InKey, + InArrayIndex, + ExpectDotOrBracket, +} + +enum ArrayIndexState { + Start, + AfterHash, + CollectingNumbers, + IsMax, +} /// Describes a JSON path, which is a sequence of keys and/or array locators. #[derive(Clone, Debug)] -pub struct JsonPath { - pub elements: Vec, +pub struct JsonPath<'a> { + pub elements: Vec>, } /// PathElement describes a single element of a JSON path. #[derive(Clone, Debug, PartialEq)] -pub enum PathElement { +pub enum PathElement<'a> { /// Root element: '$' Root(), /// JSON key - Key(String), + Key(Cow<'a, str>), /// Array locator, eg. [2], [#-5] ArrayLocator(i32), } +type IsMaxNumber = bool; + +fn collect_num(current: i32, adding: i32, negative: bool) -> (i32, IsMaxNumber) { + let mut is_max = false; + let current = if negative { + current + .checked_mul(10) + .and_then(|n| n.checked_sub(adding)) + .unwrap_or_else(|| { + is_max = true; + i32::MIN + }) + } else { + current + .checked_mul(10) + .and_then(|n| n.checked_add(adding)) + .unwrap_or_else(|| { + is_max = true; + i32::MAX + }) + }; + (current, is_max) +} + /// Parses path into a Vec of Strings, where each string is a key or an array locator. -pub fn json_path(path: &str) -> crate::Result { - let parsed = Parser::parse(Rule::path, path); +pub fn json_path<'a>(path: &'a str) -> crate::Result> { + let mut parser_state = PPState::Start; + let mut index_state = ArrayIndexState::Start; - if let Ok(mut parsed) = parsed { - let mut result = vec![]; - let parsed = parsed.next().unwrap(); - for pair in parsed.into_inner() { - match pair.as_rule() { - Rule::EOI => (), - Rule::root => result.push(PathElement::Root()), - Rule::json_path_key => result.push(PathElement::Key(pair.as_str().to_string())), - Rule::array_locator => { - let mut array_locator = pair.into_inner(); - let index_or_negative_indicator = array_locator.next().unwrap(); + let mut key_start = 0; + let mut index_buffer = 0; - match index_or_negative_indicator.as_rule() { - Rule::negative_index_indicator => { - let negative_offset = array_locator.next().unwrap(); - // TODO: sqlite is able to parse arbitrarily big numbers, but they - // always get overflown and cast to i32. Handle this. - let parsed = negative_offset - .as_str() - .parse::() - .unwrap_or(i128::MAX); - - result.push(PathElement::ArrayLocator(-parsed as i32)); + let mut path_components = Vec::with_capacity(5); + let mut path_iter = path.char_indices(); + while let Some(ch) = path_iter.next() { + let ch_len = ch.1.len_utf8(); + match parser_state { + PPState::Start => match ch { + (_, '$') => { + path_components.push(PathElement::Root()); + parser_state = PPState::AfterRoot + } + (_, _) => bail_parse_error!("Bad json path: {}", path), + }, + PPState::AfterRoot => match ch { + (idx, '.') => { + parser_state = PPState::InKey; + key_start = idx + ch_len; + } + (_, '[') => { + index_state = ArrayIndexState::Start; + parser_state = PPState::InArrayIndex; + index_buffer = 0; + } + (_, _) => bail_parse_error!("Bad json path: {}", path), + }, + PPState::InKey => match ch { + (idx, '.' | '[') => { + let key_end = idx; + if key_end > key_start { + let key = &path[key_start..key_end]; + if ch.1 == '[' { + index_state = ArrayIndexState::Start; + parser_state = PPState::InArrayIndex; + index_buffer = 0; + } else { + key_start = idx + ch_len; } - Rule::array_offset => { - let array_offset = index_or_negative_indicator.as_str(); - // TODO: sqlite is able to parse arbitrarily big numbers, but they - // always get overflown and cast to i32. Handle this. - let parsed = array_offset.parse::().unwrap_or(i128::MAX); - - result.push(PathElement::ArrayLocator(parsed as i32)); - } - _ => unreachable!( - "Unexpected rule: {:?}", - index_or_negative_indicator.as_rule() - ), + path_components.push(PathElement::Key(Cow::Borrowed(key))); + } else { + unreachable!() } } - _ => { - unreachable!("Unexpected rule: {:?}", pair.as_rule()); + (_, _) => continue, + }, + PPState::InArrayIndex => { + let (_, c) = ch; + + match (&index_state, c) { + (ArrayIndexState::Start, '#') => index_state = ArrayIndexState::AfterHash, + (ArrayIndexState::Start, '0'..='9') => { + index_buffer = c.to_digit(10).unwrap() as i32; + index_state = ArrayIndexState::CollectingNumbers; + } + (ArrayIndexState::AfterHash, '-') => { + if let Some((_, next_c)) = path_iter.next() { + if next_c.is_ascii_digit() { + index_buffer = -(next_c.to_digit(10).unwrap() as i32); + index_state = ArrayIndexState::CollectingNumbers; + } else { + bail_parse_error!("Bad json path: {}", path); + } + } else { + bail_parse_error!("Bad json path: {}", path); + } + } + (ArrayIndexState::CollectingNumbers, '0'..='9') => { + let (new_num, is_max) = collect_num( + index_buffer, + c.to_digit(10).unwrap() as i32, + index_buffer < 0, + ); + if is_max { + index_state = ArrayIndexState::IsMax; + index_buffer = new_num; + } + } + (ArrayIndexState::IsMax, '0'..='9') => continue, + (ArrayIndexState::CollectingNumbers | ArrayIndexState::IsMax, ']') => { + parser_state = PPState::ExpectDotOrBracket; + path_components.push(PathElement::ArrayLocator(index_buffer)) + } + (_, _) => bail_parse_error!("Bad json path: {}", path), } } + PPState::ExpectDotOrBracket => match ch { + (idx, '.') => { + key_start = idx + ch_len; + parser_state = PPState::InKey; + } + (_, '[') => { + index_state = ArrayIndexState::Start; + parser_state = PPState::InArrayIndex; + index_buffer = 0; + } + (_, _) => bail_parse_error!("Bad json path: {}", path), + }, } - - Ok(JsonPath { elements: result }) - } else { - crate::bail_constraint_error!("JSON path error near: {:?}", path.to_string()); } + if parser_state == PPState::InKey && key_start < path.len() { + let key = &path[key_start..]; + path_components.push(PathElement::Key(Cow::Borrowed(key))); + } + + Ok(JsonPath { + elements: path_components, + }) } #[cfg(test)] @@ -93,7 +190,7 @@ mod tests { let path = json_path("$.x").unwrap(); assert_eq!(path.elements.len(), 2); assert_eq!(path.elements[0], PathElement::Root()); - assert_eq!(path.elements[1], PathElement::Key("x".to_string())); + assert_eq!(path.elements[1], PathElement::Key(Cow::Borrowed("x"))); } #[test] @@ -135,9 +232,9 @@ mod tests { let path = json_path("$.store.book[0].title").unwrap(); assert_eq!(path.elements.len(), 5); assert_eq!(path.elements[0], PathElement::Root()); - assert_eq!(path.elements[1], PathElement::Key("store".to_string())); - assert_eq!(path.elements[2], PathElement::Key("book".to_string())); + assert_eq!(path.elements[1], PathElement::Key(Cow::Borrowed("store"))); + assert_eq!(path.elements[2], PathElement::Key(Cow::Borrowed("book"))); assert_eq!(path.elements[3], PathElement::ArrayLocator(0)); - assert_eq!(path.elements[4], PathElement::Key("title".to_string())); + assert_eq!(path.elements[4], PathElement::Key(Cow::Borrowed("title"))); } } diff --git a/core/json/mod.rs b/core/json/mod.rs index c1a195b49..bb05adfe8 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -15,6 +15,8 @@ use indexmap::IndexMap; use jsonb::Error as JsonbError; use ser::to_string_pretty; use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::rc::Rc; #[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] #[serde(untagged)] @@ -392,20 +394,16 @@ fn json_extract_single<'a>( PathElement::Root() => { current_element = json; } - PathElement::Key(key) => { - let key = key.as_str(); - - match current_element { - Val::Object(map) => { - if let Some((_, value)) = map.iter().find(|(k, _)| k == key) { - current_element = value; - } else { - return Ok(None); - } + PathElement::Key(key) => match current_element { + Val::Object(map) => { + if let Some((_, value)) = map.iter().find(|(k, _)| k == key) { + current_element = value; + } else { + return Ok(None); } - _ => return Ok(None), } - } + _ => return Ok(None), + }, PathElement::ArrayLocator(idx) => match current_element { Val::Array(array) => { let mut idx = *idx; @@ -444,7 +442,7 @@ fn json_path_from_owned_value(path: &OwnedValue, strict: bool) -> crate::Result< JsonPath { elements: vec![ PathElement::Root(), - PathElement::Key(t.as_str().to_string()), + PathElement::Key(Cow::Borrowed(t.as_str())), ], } } @@ -454,7 +452,10 @@ fn json_path_from_owned_value(path: &OwnedValue, strict: bool) -> crate::Result< elements: vec![PathElement::Root(), PathElement::ArrayLocator(*i as i32)], }, OwnedValue::Float(f) => JsonPath { - elements: vec![PathElement::Root(), PathElement::Key(f.to_string())], + elements: vec![ + PathElement::Root(), + PathElement::Key(Cow::Owned(f.to_string())), + ], }, _ => crate::bail_constraint_error!("JSON path error near: {:?}", path.to_string()), } @@ -593,7 +594,7 @@ fn find_or_create_target<'a>(json: &'a mut Val, path: &JsonPath) -> Option Date: Sun, 9 Feb 2025 16:30:40 +0200 Subject: [PATCH 2/6] fixed edge cases and add some tests, remove pest grammar --- core/json/json_path.pest | 8 --- core/json/json_path.rs | 106 +++++++++++++++++++++++++++++++-------- 2 files changed, 86 insertions(+), 28 deletions(-) delete mode 100644 core/json/json_path.pest diff --git a/core/json/json_path.pest b/core/json/json_path.pest deleted file mode 100644 index 590a3df23..000000000 --- a/core/json/json_path.pest +++ /dev/null @@ -1,8 +0,0 @@ -negative_index_indicator = ${ "#-" } -array_offset = ${ ASCII_DIGIT+ } -array_locator = ${ "[" ~ negative_index_indicator? ~ array_offset ~ "]" } -relaxed_array_locator = ${ negative_index_indicator? ~ array_offset } - -root = ${ "$" } -json_path_key = ${ identifier | string | ASCII_DIGIT+ } -path = ${ SOI ~ root ~ (array_locator | "." ~ json_path_key)* ~ EOI } diff --git a/core/json/json_path.rs b/core/json/json_path.rs index e6e361827..3e325c6bd 100644 --- a/core/json/json_path.rs +++ b/core/json/json_path.rs @@ -1,7 +1,6 @@ use crate::bail_parse_error; use std::borrow::Cow; -/// You have to know your PP state #[derive(Clone, Debug, PartialEq)] enum PPState { Start, @@ -11,6 +10,7 @@ enum PPState { ExpectDotOrBracket, } +#[derive(Clone, Debug, PartialEq)] enum ArrayIndexState { Start, AfterHash, @@ -37,37 +37,47 @@ pub enum PathElement<'a> { type IsMaxNumber = bool; -fn collect_num(current: i32, adding: i32, negative: bool) -> (i32, IsMaxNumber) { +fn collect_num(current: i128, adding: i128, negative: bool) -> (i128, IsMaxNumber) { let mut is_max = false; - let current = if negative { + let cur = if negative { current .checked_mul(10) - .and_then(|n| n.checked_sub(adding)) + .and_then(|x| x.checked_sub(adding)) .unwrap_or_else(|| { is_max = true; - i32::MIN + i128::MIN }) } else { current .checked_mul(10) - .and_then(|n| n.checked_add(adding)) + .and_then(|x| x.checked_add(adding)) .unwrap_or_else(|| { is_max = true; - i32::MAX + i128::MAX }) }; - (current, is_max) + (cur, is_max) +} + +fn estimate_path_capacity(input: &str) -> usize { + // After $ we need either . or [ for each component + // So divide remaining length by 2 (minimum chars per component) + // Add 1 for the root component + 1 + (input.len() - 1) / 2 } /// Parses path into a Vec of Strings, where each string is a key or an array locator. pub fn json_path<'a>(path: &'a str) -> crate::Result> { + if path.is_empty() { + bail_parse_error!("Bad json path: {}", path) + } let mut parser_state = PPState::Start; let mut index_state = ArrayIndexState::Start; let mut key_start = 0; - let mut index_buffer = 0; + let mut index_buffer: i128 = 0; - let mut path_components = Vec::with_capacity(5); + let mut path_components = Vec::with_capacity(estimate_path_capacity(path)); let mut path_iter = path.char_indices(); while let Some(ch) = path_iter.next() { let ch_len = ch.1.len_utf8(); @@ -105,7 +115,7 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { } path_components.push(PathElement::Key(Cow::Borrowed(key))); } else { - unreachable!() + bail_parse_error!("Bad json path: {}", path) } } (_, _) => continue, @@ -116,13 +126,13 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { match (&index_state, c) { (ArrayIndexState::Start, '#') => index_state = ArrayIndexState::AfterHash, (ArrayIndexState::Start, '0'..='9') => { - index_buffer = c.to_digit(10).unwrap() as i32; + index_buffer = c.to_digit(10).unwrap() as i128; index_state = ArrayIndexState::CollectingNumbers; } (ArrayIndexState::AfterHash, '-') => { if let Some((_, next_c)) = path_iter.next() { if next_c.is_ascii_digit() { - index_buffer = -(next_c.to_digit(10).unwrap() as i32); + index_buffer = -(next_c.to_digit(10).unwrap() as i128); index_state = ArrayIndexState::CollectingNumbers; } else { bail_parse_error!("Bad json path: {}", path); @@ -134,18 +144,18 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { (ArrayIndexState::CollectingNumbers, '0'..='9') => { let (new_num, is_max) = collect_num( index_buffer, - c.to_digit(10).unwrap() as i32, + c.to_digit(10).unwrap() as i128, index_buffer < 0, ); if is_max { index_state = ArrayIndexState::IsMax; - index_buffer = new_num; } + index_buffer = new_num; } (ArrayIndexState::IsMax, '0'..='9') => continue, (ArrayIndexState::CollectingNumbers | ArrayIndexState::IsMax, ']') => { parser_state = PPState::ExpectDotOrBracket; - path_components.push(PathElement::ArrayLocator(index_buffer)) + path_components.push(PathElement::ArrayLocator(index_buffer as i32)) } (_, _) => bail_parse_error!("Bad json path: {}", path), } @@ -164,9 +174,18 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { }, } } - if parser_state == PPState::InKey && key_start < path.len() { - let key = &path[key_start..]; - path_components.push(PathElement::Key(Cow::Borrowed(key))); + match parser_state { + PPState::InArrayIndex => bail_parse_error!("Bad json path: {}", path), + PPState::InKey => { + if key_start < path.len() { + let key = &path[key_start..]; + + path_components.push(PathElement::Key(Cow::Borrowed(key))); + } else { + bail_parse_error!("Bad json path: {}", path) + } + } + _ => (), } Ok(JsonPath { @@ -219,7 +238,7 @@ mod tests { let path = json_path(value); match path { - Err(crate::error::LimboError::Constraint(_)) => { + Err(crate::error::LimboError::ParseError(_)) => { // happy path } _ => panic!("Expected error for: {:?}, got: {:?}", value, path), @@ -237,4 +256,51 @@ mod tests { assert_eq!(path.elements[3], PathElement::ArrayLocator(0)); assert_eq!(path.elements[4], PathElement::Key(Cow::Borrowed("title"))); } + + #[test] + fn test_large_index_wrapping() { + let path = json_path("$[4294967296]").unwrap(); + assert_eq!(path.elements[1], PathElement::ArrayLocator(0)); + + let path = json_path("$[4294967297]").unwrap(); + assert_eq!(path.elements[1], PathElement::ArrayLocator(1)); + } + + #[test] + fn test_deeply_nested_path() { + let path = json_path("$[0][1][2].key[3].other").unwrap(); + assert_eq!(path.elements.len(), 7); + assert_eq!(path.elements[0], PathElement::Root()); + assert_eq!(path.elements[1], PathElement::ArrayLocator(0)); + assert_eq!(path.elements[2], PathElement::ArrayLocator(1)); + assert_eq!(path.elements[3], PathElement::ArrayLocator(2)); + assert_eq!(path.elements[4], PathElement::Key(Cow::Borrowed("key"))); + assert_eq!(path.elements[5], PathElement::ArrayLocator(3)); + } + + #[test] + fn test_edge_cases() { + // Empty key + assert!(json_path("$.").is_err()); + + // Multiple dots + assert!(json_path("$..key").is_err()); + + // Unclosed brackets + assert!(json_path("$[0").is_err()); + assert!(json_path("$[").is_err()); + + // Invalid negative index format + assert!(json_path("$[-1]").is_err()); // should be $[#-1] + } + + #[test] + fn test_path_capacity() { + // Test that our capacity estimation is reasonable + let short_path = "$[0]"; + assert!(estimate_path_capacity(short_path) >= 2); + + let long_path = "$.a.b.c.d.e.f.g[0][1][2]"; + assert!(estimate_path_capacity(long_path) >= 11); + } } From ee16c49c6cdb2d22b045e7d99dadfac210fca002 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Sun, 9 Feb 2025 19:12:09 +0200 Subject: [PATCH 3/6] add support for quoted path --- core/json/json_path.rs | 87 ++++++++++++++++++++++++++++++++++++------ core/json/mod.rs | 18 ++++----- 2 files changed, 84 insertions(+), 21 deletions(-) diff --git a/core/json/json_path.rs b/core/json/json_path.rs index 3e325c6bd..210945a23 100644 --- a/core/json/json_path.rs +++ b/core/json/json_path.rs @@ -24,13 +24,15 @@ pub struct JsonPath<'a> { pub elements: Vec>, } +type IsQuoted = bool; + /// PathElement describes a single element of a JSON path. #[derive(Clone, Debug, PartialEq)] pub enum PathElement<'a> { /// Root element: '$' Root(), /// JSON key - Key(Cow<'a, str>), + Key(Cow<'a, str>, IsQuoted), /// Array locator, eg. [2], [#-5] ArrayLocator(i32), } @@ -73,12 +75,14 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { } let mut parser_state = PPState::Start; let mut index_state = ArrayIndexState::Start; + let mut is_quoted = false; let mut key_start = 0; let mut index_buffer: i128 = 0; let mut path_components = Vec::with_capacity(estimate_path_capacity(path)); let mut path_iter = path.char_indices(); + while let Some(ch) = path_iter.next() { let ch_len = ch.1.len_utf8(); match parser_state { @@ -103,9 +107,17 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { }, PPState::InKey => match ch { (idx, '.' | '[') => { + if is_quoted { + continue; + } let key_end = idx; + if key_end > key_start { - let key = &path[key_start..key_end]; + let mut key = &path[key_start..key_end]; + println!("{}, {}", &key[0..2], &key[key.len() - 2..]); + if key[0..2].contains("\"") && key[key.len() - 2..].contains("\"") { + key = &key[2..key.len() - 2]; + } if ch.1 == '[' { index_state = ArrayIndexState::Start; parser_state = PPState::InArrayIndex; @@ -113,12 +125,45 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { } else { key_start = idx + ch_len; } - path_components.push(PathElement::Key(Cow::Borrowed(key))); + path_components.push(PathElement::Key(Cow::Borrowed(key), is_quoted)); + is_quoted = false; } else { bail_parse_error!("Bad json path: {}", path) } } - (_, _) => continue, + (idx, ch) => { + if ch != '"' { + continue; + }; + + if key_start == idx { + is_quoted = true + } else { + if let Some(next_char) = path_iter.next() { + let c = next_char.1; + match next_char { + (idx, '.' | '[') => { + let key_end = idx; + + if key_end > key_start { + let key = &path[key_start + 1..key_end - 1]; + if c == '[' { + index_state = ArrayIndexState::Start; + parser_state = PPState::InArrayIndex; + index_buffer = 0; + } else { + key_start = idx + c.len_utf8(); + } + path_components + .push(PathElement::Key(Cow::Borrowed(key), is_quoted)); + } + is_quoted = false; + } + _ => bail_parse_error!("Bad json path: {}", path), + } + } + } + } }, PPState::InArrayIndex => { let (_, c) = ch; @@ -178,16 +223,19 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { PPState::InArrayIndex => bail_parse_error!("Bad json path: {}", path), PPState::InKey => { if key_start < path.len() { - let key = &path[key_start..]; + let mut key = &path[key_start..]; - path_components.push(PathElement::Key(Cow::Borrowed(key))); + if key[0..=1].contains("\"") && key[key.len() - 1..].contains("\"") { + key = &key[1..key.len() - 1]; + } + path_components.push(PathElement::Key(Cow::Borrowed(key), is_quoted)); } else { bail_parse_error!("Bad json path: {}", path) } } _ => (), } - + println!("{:?}", path_components); Ok(JsonPath { elements: path_components, }) @@ -209,7 +257,10 @@ mod tests { let path = json_path("$.x").unwrap(); assert_eq!(path.elements.len(), 2); assert_eq!(path.elements[0], PathElement::Root()); - assert_eq!(path.elements[1], PathElement::Key(Cow::Borrowed("x"))); + assert_eq!( + path.elements[1], + PathElement::Key(Cow::Borrowed("x"), false) + ); } #[test] @@ -251,10 +302,19 @@ mod tests { let path = json_path("$.store.book[0].title").unwrap(); assert_eq!(path.elements.len(), 5); assert_eq!(path.elements[0], PathElement::Root()); - assert_eq!(path.elements[1], PathElement::Key(Cow::Borrowed("store"))); - assert_eq!(path.elements[2], PathElement::Key(Cow::Borrowed("book"))); + assert_eq!( + path.elements[1], + PathElement::Key(Cow::Borrowed("store"), false) + ); + assert_eq!( + path.elements[2], + PathElement::Key(Cow::Borrowed("book"), false) + ); assert_eq!(path.elements[3], PathElement::ArrayLocator(0)); - assert_eq!(path.elements[4], PathElement::Key(Cow::Borrowed("title"))); + assert_eq!( + path.elements[4], + PathElement::Key(Cow::Borrowed("title"), false) + ); } #[test] @@ -274,7 +334,10 @@ mod tests { assert_eq!(path.elements[1], PathElement::ArrayLocator(0)); assert_eq!(path.elements[2], PathElement::ArrayLocator(1)); assert_eq!(path.elements[3], PathElement::ArrayLocator(2)); - assert_eq!(path.elements[4], PathElement::Key(Cow::Borrowed("key"))); + assert_eq!( + path.elements[4], + PathElement::Key(Cow::Borrowed("key"), false) + ); assert_eq!(path.elements[5], PathElement::ArrayLocator(3)); } diff --git a/core/json/mod.rs b/core/json/mod.rs index bb05adfe8..4984a80d5 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -394,7 +394,7 @@ fn json_extract_single<'a>( PathElement::Root() => { current_element = json; } - PathElement::Key(key) => match current_element { + PathElement::Key(key, _) => match current_element { Val::Object(map) => { if let Some((_, value)) = map.iter().find(|(k, _)| k == key) { current_element = value; @@ -442,7 +442,7 @@ fn json_path_from_owned_value(path: &OwnedValue, strict: bool) -> crate::Result< JsonPath { elements: vec![ PathElement::Root(), - PathElement::Key(Cow::Borrowed(t.as_str())), + PathElement::Key(Cow::Borrowed(t.as_str()), false), ], } } @@ -454,7 +454,7 @@ fn json_path_from_owned_value(path: &OwnedValue, strict: bool) -> crate::Result< OwnedValue::Float(f) => JsonPath { elements: vec![ PathElement::Root(), - PathElement::Key(Cow::Owned(f.to_string())), + PathElement::Key(Cow::Owned(f.to_string()), false), ], }, _ => crate::bail_constraint_error!("JSON path error near: {:?}", path.to_string()), @@ -501,7 +501,7 @@ fn find_target<'a>(json: &'a mut Val, path: &JsonPath) -> Option> { return None; } }, - PathElement::Key(key) => match current { + PathElement::Key(key, _) => match current { Val::Object(obj) => { if let Some(pos) = &obj .iter() @@ -577,7 +577,7 @@ fn find_or_create_target<'a>(json: &'a mut Val, path: &JsonPath) -> Option match current { + PathElement::Key(key, _) => match current { Val::Object(obj) => { if let Some(pos) = &obj .iter() @@ -1253,7 +1253,7 @@ mod tests { fn test_find_target_object() { let mut val = Val::Object(vec![("key".to_string(), Val::String("value".to_string()))]); let path = JsonPath { - elements: vec![PathElement::Key(Cow::Borrowed("key"))], + elements: vec![PathElement::Key(Cow::Borrowed("key"), false)], }; match find_target(&mut val, &path) { @@ -1269,7 +1269,7 @@ mod tests { ("key".to_string(), Val::String("value".to_string())), ]); let path = JsonPath { - elements: vec![PathElement::Key(Cow::Borrowed("key"))], + elements: vec![PathElement::Key(Cow::Borrowed("key"), false)], }; match find_target(&mut val, &path) { @@ -1364,7 +1364,7 @@ mod tests { let result = result.unwrap(); match &result.elements[..] { - [PathElement::Root(), PathElement::Key(field)] if *field == "field" => {} + [PathElement::Root(), PathElement::Key(field, false)] if *field == "field" => {} _ => panic!("Expected root and field"), } } @@ -1433,7 +1433,7 @@ mod tests { let result = result.unwrap(); match &result.elements[..] { - [PathElement::Root(), PathElement::Key(field)] if *field == "1.23" => {} + [PathElement::Root(), PathElement::Key(field, false)] if *field == "1.23" => {} _ => panic!("Expected root and field"), } } From a10955cbcf599994b251af16ed73c9d745edf1f0 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 10 Feb 2025 13:25:18 +0200 Subject: [PATCH 4/6] refine quoted key handling, add RawString option for SQLite compat --- core/json/json_path.rs | 151 +++++++++++++++++++++-------------------- core/json/mod.rs | 44 ++++++------ 2 files changed, 101 insertions(+), 94 deletions(-) diff --git a/core/json/json_path.rs b/core/json/json_path.rs index 210945a23..6a714b79c 100644 --- a/core/json/json_path.rs +++ b/core/json/json_path.rs @@ -24,7 +24,7 @@ pub struct JsonPath<'a> { pub elements: Vec>, } -type IsQuoted = bool; +type RawString = bool; /// PathElement describes a single element of a JSON path. #[derive(Clone, Debug, PartialEq)] @@ -32,33 +32,25 @@ pub enum PathElement<'a> { /// Root element: '$' Root(), /// JSON key - Key(Cow<'a, str>, IsQuoted), + Key(Cow<'a, str>, RawString), /// Array locator, eg. [2], [#-5] - ArrayLocator(i32), + ArrayLocator(Option), } type IsMaxNumber = bool; fn collect_num(current: i128, adding: i128, negative: bool) -> (i128, IsMaxNumber) { - let mut is_max = false; - let cur = if negative { - current - .checked_mul(10) - .and_then(|x| x.checked_sub(adding)) - .unwrap_or_else(|| { - is_max = true; - i128::MIN - }) + let ten = 10i128; + let adding = adding as i128; + + let result = if negative { + current.saturating_mul(ten).saturating_sub(adding) } else { - current - .checked_mul(10) - .and_then(|x| x.checked_add(adding)) - .unwrap_or_else(|| { - is_max = true; - i128::MAX - }) + current.saturating_mul(ten).saturating_add(adding) }; - (cur, is_max) + + let is_max = result == i128::MAX || result == i128::MIN; + (result, is_max) } fn estimate_path_capacity(input: &str) -> usize { @@ -75,7 +67,6 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { } let mut parser_state = PPState::Start; let mut index_state = ArrayIndexState::Start; - let mut is_quoted = false; let mut key_start = 0; let mut index_buffer: i128 = 0; @@ -107,17 +98,10 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { }, PPState::InKey => match ch { (idx, '.' | '[') => { - if is_quoted { - continue; - } let key_end = idx; if key_end > key_start { - let mut key = &path[key_start..key_end]; - println!("{}, {}", &key[0..2], &key[key.len() - 2..]); - if key[0..2].contains("\"") && key[key.len() - 2..].contains("\"") { - key = &key[2..key.len() - 2]; - } + let key = &path[key_start..key_end]; if ch.1 == '[' { index_state = ArrayIndexState::Start; parser_state = PPState::InArrayIndex; @@ -125,44 +109,34 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { } else { key_start = idx + ch_len; } - path_components.push(PathElement::Key(Cow::Borrowed(key), is_quoted)); - is_quoted = false; + path_components.push(PathElement::Key(Cow::Borrowed(key), false)); } else { bail_parse_error!("Bad json path: {}", path) } } - (idx, ch) => { - if ch != '"' { - continue; - }; - - if key_start == idx { - is_quoted = true - } else { - if let Some(next_char) = path_iter.next() { - let c = next_char.1; - match next_char { - (idx, '.' | '[') => { - let key_end = idx; - - if key_end > key_start { - let key = &path[key_start + 1..key_end - 1]; - if c == '[' { - index_state = ArrayIndexState::Start; - parser_state = PPState::InArrayIndex; - index_buffer = 0; - } else { - key_start = idx + c.len_utf8(); - } - path_components - .push(PathElement::Key(Cow::Borrowed(key), is_quoted)); + (_, ch) => { + match ch { + '"' => { + while let Some((idx, ch)) = path_iter.next() { + match ch { + '\\' => { + path_iter.next(); } - is_quoted = false; + '"' => { + if key_start < idx { + let key = &path[key_start + 1..idx]; + path_components + .push(PathElement::Key(Cow::Borrowed(key), true)); + parser_state = PPState::ExpectDotOrBracket; + break; + } + } + _ => continue, } - _ => bail_parse_error!("Bad json path: {}", path), } } - } + _ => continue, + }; } }, PPState::InArrayIndex => { @@ -186,6 +160,10 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { bail_parse_error!("Bad json path: {}", path); } } + (ArrayIndexState::AfterHash, ']') => { + parser_state = PPState::ExpectDotOrBracket; + path_components.push(PathElement::ArrayLocator(None)) + } (ArrayIndexState::CollectingNumbers, '0'..='9') => { let (new_num, is_max) = collect_num( index_buffer, @@ -200,7 +178,7 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { (ArrayIndexState::IsMax, '0'..='9') => continue, (ArrayIndexState::CollectingNumbers | ArrayIndexState::IsMax, ']') => { parser_state = PPState::ExpectDotOrBracket; - path_components.push(PathElement::ArrayLocator(index_buffer as i32)) + path_components.push(PathElement::ArrayLocator(Some(index_buffer as i32))) } (_, _) => bail_parse_error!("Bad json path: {}", path), } @@ -223,19 +201,18 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { PPState::InArrayIndex => bail_parse_error!("Bad json path: {}", path), PPState::InKey => { if key_start < path.len() { - let mut key = &path[key_start..]; + let key = &path[key_start..]; - if key[0..=1].contains("\"") && key[key.len() - 1..].contains("\"") { - key = &key[1..key.len() - 1]; + if key.starts_with('"') & !key.ends_with('"') { + bail_parse_error!("Bad json path: {}", path) } - path_components.push(PathElement::Key(Cow::Borrowed(key), is_quoted)); + path_components.push(PathElement::Key(Cow::Borrowed(key), false)); } else { bail_parse_error!("Bad json path: {}", path) } } _ => (), } - println!("{:?}", path_components); Ok(JsonPath { elements: path_components, }) @@ -268,7 +245,7 @@ mod tests { let path = json_path("$[0]").unwrap(); assert_eq!(path.elements.len(), 2); assert_eq!(path.elements[0], PathElement::Root()); - assert_eq!(path.elements[1], PathElement::ArrayLocator(0)); + assert_eq!(path.elements[1], PathElement::ArrayLocator(Some(0))); } #[test] @@ -276,7 +253,7 @@ mod tests { let path = json_path("$[#-2]").unwrap(); assert_eq!(path.elements.len(), 2); assert_eq!(path.elements[0], PathElement::Root()); - assert_eq!(path.elements[1], PathElement::ArrayLocator(-2)); + assert_eq!(path.elements[1], PathElement::ArrayLocator(Some(-2))); } #[test] @@ -310,7 +287,7 @@ mod tests { path.elements[2], PathElement::Key(Cow::Borrowed("book"), false) ); - assert_eq!(path.elements[3], PathElement::ArrayLocator(0)); + assert_eq!(path.elements[3], PathElement::ArrayLocator(Some(0))); assert_eq!( path.elements[4], PathElement::Key(Cow::Borrowed("title"), false) @@ -320,10 +297,10 @@ mod tests { #[test] fn test_large_index_wrapping() { let path = json_path("$[4294967296]").unwrap(); - assert_eq!(path.elements[1], PathElement::ArrayLocator(0)); + assert_eq!(path.elements[1], PathElement::ArrayLocator(Some(0))); let path = json_path("$[4294967297]").unwrap(); - assert_eq!(path.elements[1], PathElement::ArrayLocator(1)); + assert_eq!(path.elements[1], PathElement::ArrayLocator(Some(1))); } #[test] @@ -331,14 +308,14 @@ mod tests { let path = json_path("$[0][1][2].key[3].other").unwrap(); assert_eq!(path.elements.len(), 7); assert_eq!(path.elements[0], PathElement::Root()); - assert_eq!(path.elements[1], PathElement::ArrayLocator(0)); - assert_eq!(path.elements[2], PathElement::ArrayLocator(1)); - assert_eq!(path.elements[3], PathElement::ArrayLocator(2)); + assert_eq!(path.elements[1], PathElement::ArrayLocator(Some(0))); + assert_eq!(path.elements[2], PathElement::ArrayLocator(Some(1))); + assert_eq!(path.elements[3], PathElement::ArrayLocator(Some(2))); assert_eq!( path.elements[4], PathElement::Key(Cow::Borrowed("key"), false) ); - assert_eq!(path.elements[5], PathElement::ArrayLocator(3)); + assert_eq!(path.elements[5], PathElement::ArrayLocator(Some(3))); } #[test] @@ -366,4 +343,30 @@ mod tests { let long_path = "$.a.b.c.d.e.f.g[0][1][2]"; assert!(estimate_path_capacity(long_path) >= 11); } + + #[test] + fn test_quoted_keys() { + let path = json_path(r#"$."key""#).unwrap(); + assert_eq!( + path.elements[1], + PathElement::Key(Cow::Borrowed("key"), true) + ); + + let path = json_path(r#"$."key.with.dots""#).unwrap(); + assert_eq!( + path.elements[1], + PathElement::Key(Cow::Borrowed("key.with.dots"), true) + ); + + let path = json_path(r#"$."key[0]""#).unwrap(); + assert_eq!( + path.elements[1], + PathElement::Key(Cow::Borrowed("key[0]"), true) + ); + } + + #[test] + fn test_empty_quoted_key() { + assert!(json_path(r#"$."""#).is_ok()); + } } diff --git a/core/json/mod.rs b/core/json/mod.rs index 4984a80d5..a7d0101cd 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -16,7 +16,6 @@ use jsonb::Error as JsonbError; use ser::to_string_pretty; use serde::{Deserialize, Serialize}; use std::borrow::Cow; -use std::rc::Rc; #[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] #[serde(untagged)] @@ -406,16 +405,16 @@ fn json_extract_single<'a>( }, PathElement::ArrayLocator(idx) => match current_element { Val::Array(array) => { - let mut idx = *idx; + if let Some(mut idx) = *idx { + if idx < 0 { + idx += array.len() as i32; + } - if idx < 0 { - idx += array.len() as i32; - } - - if idx < array.len() as i32 { - current_element = &array[idx as usize]; - } else { - return Ok(None); + if idx < array.len() as i32 { + current_element = &array[idx as usize]; + } else { + return Ok(None); + } } } _ => return Ok(None), @@ -449,7 +448,10 @@ fn json_path_from_owned_value(path: &OwnedValue, strict: bool) -> crate::Result< } OwnedValue::Null => return Ok(None), OwnedValue::Integer(i) => JsonPath { - elements: vec![PathElement::Root(), PathElement::ArrayLocator(*i as i32)], + elements: vec![ + PathElement::Root(), + PathElement::ArrayLocator(Some(*i as i32)), + ], }, OwnedValue::Float(f) => JsonPath { elements: vec![ @@ -485,8 +487,9 @@ fn find_target<'a>(json: &'a mut Val, path: &JsonPath) -> Option> { PathElement::ArrayLocator(index) => match current { Val::Array(arr) => { if let Some(index) = match index { - i if *i < 0 => arr.len().checked_sub(i.unsigned_abs() as usize), - i => ((*i as usize) < arr.len()).then_some(*i as usize), + Some(i) if *i < 0 => arr.len().checked_sub(i.unsigned_abs() as usize), + Some(i) => ((*i as usize) < arr.len()).then_some(*i as usize), + None => Some(arr.len()), } { if is_last { return Some(Target::Array(arr, index)); @@ -538,8 +541,9 @@ fn find_or_create_target<'a>(json: &'a mut Val, path: &JsonPath) -> Option match current { Val::Array(arr) => { if let Some(index) = match index { - i if *i < 0 => arr.len().checked_sub(i.unsigned_abs() as usize), - i => Some(*i as usize), + Some(i) if *i < 0 => arr.len().checked_sub(i.unsigned_abs() as usize), + Some(i) => Some(*i as usize), + None => Some(arr.len()), } { if is_last { if index == arr.len() { @@ -1224,7 +1228,7 @@ mod tests { Val::String("second".to_string()), ]); let path = JsonPath { - elements: vec![PathElement::ArrayLocator(0)], + elements: vec![PathElement::ArrayLocator(Some(0))], }; match find_target(&mut val, &path) { @@ -1240,7 +1244,7 @@ mod tests { Val::String("second".to_string()), ]); let path = JsonPath { - elements: vec![PathElement::ArrayLocator(-1)], + elements: vec![PathElement::ArrayLocator(Some(-1))], }; match find_target(&mut val, &path) { @@ -1282,7 +1286,7 @@ mod tests { fn test_mutate_json() { let mut val = Val::Array(vec![Val::String("test".to_string())]); let path = JsonPath { - elements: vec![PathElement::ArrayLocator(0)], + elements: vec![PathElement::ArrayLocator(Some(0))], }; let result = mutate_json_by_path(&mut val, path, |target| match target { @@ -1301,7 +1305,7 @@ mod tests { fn test_mutate_json_none() { let mut val = Val::Array(vec![]); let path = JsonPath { - elements: vec![PathElement::ArrayLocator(0)], + elements: vec![PathElement::ArrayLocator(Some(0))], }; let result: Option<()> = mutate_json_by_path(&mut val, path, |_| { @@ -1387,7 +1391,7 @@ mod tests { let result = result.unwrap(); match &result.elements[..] { - [PathElement::Root(), PathElement::ArrayLocator(index)] if *index == 3 => {} + [PathElement::Root(), PathElement::ArrayLocator(index)] if *index == Some(3) => {} _ => panic!("Expected root and array locator"), } } From aad0522f56c05524a070d733d559c0e847c3d433 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 10 Feb 2025 14:29:19 +0200 Subject: [PATCH 5/6] refactor to make parser more readable --- core/json/json_path.rs | 374 +++++++++++++++++++++++++++-------------- 1 file changed, 249 insertions(+), 125 deletions(-) diff --git a/core/json/json_path.rs b/core/json/json_path.rs index 6a714b79c..9413a8e32 100644 --- a/core/json/json_path.rs +++ b/core/json/json_path.rs @@ -61,148 +61,274 @@ fn estimate_path_capacity(input: &str) -> usize { } /// Parses path into a Vec of Strings, where each string is a key or an array locator. -pub fn json_path<'a>(path: &'a str) -> crate::Result> { +pub fn json_path(path: &str) -> crate::Result> { if path.is_empty() { bail_parse_error!("Bad json path: {}", path) } let mut parser_state = PPState::Start; let mut index_state = ArrayIndexState::Start; - let mut key_start = 0; let mut index_buffer: i128 = 0; - let mut path_components = Vec::with_capacity(estimate_path_capacity(path)); let mut path_iter = path.char_indices(); while let Some(ch) = path_iter.next() { - let ch_len = ch.1.len_utf8(); match parser_state { - PPState::Start => match ch { - (_, '$') => { - path_components.push(PathElement::Root()); - parser_state = PPState::AfterRoot - } - (_, _) => bail_parse_error!("Bad json path: {}", path), - }, - PPState::AfterRoot => match ch { - (idx, '.') => { - parser_state = PPState::InKey; - key_start = idx + ch_len; - } - (_, '[') => { - index_state = ArrayIndexState::Start; - parser_state = PPState::InArrayIndex; - index_buffer = 0; - } - (_, _) => bail_parse_error!("Bad json path: {}", path), - }, - PPState::InKey => match ch { - (idx, '.' | '[') => { - let key_end = idx; - - if key_end > key_start { - let key = &path[key_start..key_end]; - if ch.1 == '[' { - index_state = ArrayIndexState::Start; - parser_state = PPState::InArrayIndex; - index_buffer = 0; - } else { - key_start = idx + ch_len; - } - path_components.push(PathElement::Key(Cow::Borrowed(key), false)); - } else { - bail_parse_error!("Bad json path: {}", path) - } - } - (_, ch) => { - match ch { - '"' => { - while let Some((idx, ch)) = path_iter.next() { - match ch { - '\\' => { - path_iter.next(); - } - '"' => { - if key_start < idx { - let key = &path[key_start + 1..idx]; - path_components - .push(PathElement::Key(Cow::Borrowed(key), true)); - parser_state = PPState::ExpectDotOrBracket; - break; - } - } - _ => continue, - } - } - } - _ => continue, - }; - } - }, - PPState::InArrayIndex => { - let (_, c) = ch; - - match (&index_state, c) { - (ArrayIndexState::Start, '#') => index_state = ArrayIndexState::AfterHash, - (ArrayIndexState::Start, '0'..='9') => { - index_buffer = c.to_digit(10).unwrap() as i128; - index_state = ArrayIndexState::CollectingNumbers; - } - (ArrayIndexState::AfterHash, '-') => { - if let Some((_, next_c)) = path_iter.next() { - if next_c.is_ascii_digit() { - index_buffer = -(next_c.to_digit(10).unwrap() as i128); - index_state = ArrayIndexState::CollectingNumbers; - } else { - bail_parse_error!("Bad json path: {}", path); - } - } else { - bail_parse_error!("Bad json path: {}", path); - } - } - (ArrayIndexState::AfterHash, ']') => { - parser_state = PPState::ExpectDotOrBracket; - path_components.push(PathElement::ArrayLocator(None)) - } - (ArrayIndexState::CollectingNumbers, '0'..='9') => { - let (new_num, is_max) = collect_num( - index_buffer, - c.to_digit(10).unwrap() as i128, - index_buffer < 0, - ); - if is_max { - index_state = ArrayIndexState::IsMax; - } - index_buffer = new_num; - } - (ArrayIndexState::IsMax, '0'..='9') => continue, - (ArrayIndexState::CollectingNumbers | ArrayIndexState::IsMax, ']') => { - parser_state = PPState::ExpectDotOrBracket; - path_components.push(PathElement::ArrayLocator(Some(index_buffer as i32))) - } - (_, _) => bail_parse_error!("Bad json path: {}", path), - } + PPState::Start => { + handle_start(ch, &mut parser_state, &mut path_components, path)?; + } + PPState::AfterRoot => { + handle_after_root( + ch, + &mut parser_state, + &mut index_state, + &mut key_start, + &mut index_buffer, + path, + )?; + } + PPState::InKey => { + handle_in_key( + ch, + &mut parser_state, + &mut index_state, + &mut key_start, + &mut index_buffer, + &mut path_components, + &mut path_iter, + path, + )?; + } + PPState::InArrayIndex => { + handle_array_index( + ch, + &mut parser_state, + &mut index_state, + &mut index_buffer, + &mut path_components, + &mut path_iter, + path, + )?; + } + PPState::ExpectDotOrBracket => { + handle_expect_dot_or_bracket( + ch, + &mut parser_state, + &mut index_state, + &mut key_start, + &mut index_buffer, + path, + )?; } - PPState::ExpectDotOrBracket => match ch { - (idx, '.') => { - key_start = idx + ch_len; - parser_state = PPState::InKey; - } - (_, '[') => { - index_state = ArrayIndexState::Start; - parser_state = PPState::InArrayIndex; - index_buffer = 0; - } - (_, _) => bail_parse_error!("Bad json path: {}", path), - }, } } + + finalize_path(parser_state, key_start, path, &mut path_components)?; + Ok(JsonPath { + elements: path_components, + }) +} + +fn handle_start( + ch: (usize, char), + parser_state: &mut PPState, + path_components: &mut Vec, + path: &str, +) -> crate::Result<()> { + match ch { + (_, '$') => { + path_components.push(PathElement::Root()); + *parser_state = PPState::AfterRoot; + Ok(()) + } + (_, _) => bail_parse_error!("Bad json path: {}", path), + } +} + +fn handle_after_root( + ch: (usize, char), + parser_state: &mut PPState, + index_state: &mut ArrayIndexState, + key_start: &mut usize, + index_buffer: &mut i128, + path: &str, +) -> crate::Result<()> { + match ch { + (idx, '.') => { + *parser_state = PPState::InKey; + *key_start = idx + ch.1.len_utf8(); + Ok(()) + } + (_, '[') => { + *index_state = ArrayIndexState::Start; + *parser_state = PPState::InArrayIndex; + *index_buffer = 0; + Ok(()) + } + (_, _) => bail_parse_error!("Bad json path: {}", path), + } +} + +fn handle_in_key<'a>( + ch: (usize, char), + parser_state: &mut PPState, + index_state: &mut ArrayIndexState, + key_start: &mut usize, + index_buffer: &mut i128, + path_components: &mut Vec>, + path_iter: &mut std::str::CharIndices, + path: &'a str, +) -> crate::Result<()> { + match ch { + (idx, '.' | '[') => { + let key_end = idx; + if key_end > *key_start { + let key = &path[*key_start..key_end]; + if ch.1 == '[' { + *index_state = ArrayIndexState::Start; + *parser_state = PPState::InArrayIndex; + *index_buffer = 0; + } else { + *key_start = idx + ch.1.len_utf8(); + } + path_components.push(PathElement::Key(Cow::Borrowed(key), false)); + } else { + bail_parse_error!("Bad json path: {}", path) + } + } + (_, '"') => { + handle_quoted_key(parser_state, key_start, path_components, path_iter, path)?; + } + (_, _) => (), + } + Ok(()) +} + +fn handle_quoted_key<'a>( + parser_state: &mut PPState, + key_start: &mut usize, + path_components: &mut Vec>, + path_iter: &mut std::str::CharIndices, + path: &'a str, +) -> crate::Result<()> { + while let Some((idx, ch)) = path_iter.next() { + match ch { + '\\' => { + path_iter.next(); + } + '"' => { + if *key_start < idx { + let key = &path[*key_start + 1..idx]; + path_components.push(PathElement::Key(Cow::Borrowed(key), true)); + *parser_state = PPState::ExpectDotOrBracket; + return Ok(()); + } + } + _ => continue, + } + } + Ok(()) +} + +fn handle_array_index<'a>( + ch: (usize, char), + parser_state: &mut PPState, + index_state: &mut ArrayIndexState, + index_buffer: &mut i128, + path_components: &mut Vec>, + path_iter: &mut std::str::CharIndices, + path: &str, +) -> crate::Result<()> { + match (&index_state, ch.1) { + (ArrayIndexState::Start, '#') => { + *index_state = ArrayIndexState::AfterHash; + } + (ArrayIndexState::Start, '0'..='9') => { + *index_buffer = ch.1.to_digit(10).unwrap() as i128; + *index_state = ArrayIndexState::CollectingNumbers; + } + (ArrayIndexState::AfterHash, '-') => { + handle_negative_index(index_state, index_buffer, path_iter, path)?; + } + (ArrayIndexState::AfterHash, ']') => { + *parser_state = PPState::ExpectDotOrBracket; + path_components.push(PathElement::ArrayLocator(None)); + } + (ArrayIndexState::CollectingNumbers, '0'..='9') => { + let (new_num, is_max) = collect_num( + *index_buffer, + ch.1.to_digit(10).unwrap() as i128, + *index_buffer < 0, + ); + if is_max { + *index_state = ArrayIndexState::IsMax; + } + *index_buffer = new_num; + } + (ArrayIndexState::IsMax, '0'..='9') => (), + (ArrayIndexState::CollectingNumbers | ArrayIndexState::IsMax, ']') => { + *parser_state = PPState::ExpectDotOrBracket; + path_components.push(PathElement::ArrayLocator(Some(*index_buffer as i32))); + } + (_, _) => bail_parse_error!("Bad json path: {}", path), + } + Ok(()) +} + +fn handle_negative_index( + index_state: &mut ArrayIndexState, + index_buffer: &mut i128, + path_iter: &mut std::str::CharIndices, + path: &str, +) -> crate::Result<()> { + if let Some((_, next_c)) = path_iter.next() { + if next_c.is_ascii_digit() { + *index_buffer = -(next_c.to_digit(10).unwrap() as i128); + *index_state = ArrayIndexState::CollectingNumbers; + Ok(()) + } else { + bail_parse_error!("Bad json path: {}", path) + } + } else { + bail_parse_error!("Bad json path: {}", path) + } +} + +fn handle_expect_dot_or_bracket( + ch: (usize, char), + parser_state: &mut PPState, + index_state: &mut ArrayIndexState, + key_start: &mut usize, + index_buffer: &mut i128, + path: &str, +) -> crate::Result<()> { + match ch { + (idx, '.') => { + *key_start = idx + ch.1.len_utf8(); + *parser_state = PPState::InKey; + Ok(()) + } + (_, '[') => { + *index_state = ArrayIndexState::Start; + *parser_state = PPState::InArrayIndex; + *index_buffer = 0; + Ok(()) + } + (_, _) => bail_parse_error!("Bad json path: {}", path), + } +} + +fn finalize_path<'a>( + parser_state: PPState, + key_start: usize, + path: &'a str, + path_components: &mut Vec>, +) -> crate::Result<()> { match parser_state { PPState::InArrayIndex => bail_parse_error!("Bad json path: {}", path), PPState::InKey => { if key_start < path.len() { let key = &path[key_start..]; - if key.starts_with('"') & !key.ends_with('"') { bail_parse_error!("Bad json path: {}", path) } @@ -213,9 +339,7 @@ pub fn json_path<'a>(path: &'a str) -> crate::Result> { } _ => (), } - Ok(JsonPath { - elements: path_components, - }) + Ok(()) } #[cfg(test)] From b8b0f860d7c1e5d7153615f102a1e022821dbf29 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 10 Feb 2025 15:34:47 +0200 Subject: [PATCH 6/6] clippy --- core/json/json_path.rs | 5 ++--- core/json/mod.rs | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/core/json/json_path.rs b/core/json/json_path.rs index 9413a8e32..33631e469 100644 --- a/core/json/json_path.rs +++ b/core/json/json_path.rs @@ -41,7 +41,6 @@ type IsMaxNumber = bool; fn collect_num(current: i128, adding: i128, negative: bool) -> (i128, IsMaxNumber) { let ten = 10i128; - let adding = adding as i128; let result = if negative { current.saturating_mul(ten).saturating_sub(adding) @@ -230,12 +229,12 @@ fn handle_quoted_key<'a>( Ok(()) } -fn handle_array_index<'a>( +fn handle_array_index( ch: (usize, char), parser_state: &mut PPState, index_state: &mut ArrayIndexState, index_buffer: &mut i128, - path_components: &mut Vec>, + path_components: &mut Vec>, path_iter: &mut std::str::CharIndices, path: &str, ) -> crate::Result<()> { diff --git a/core/json/mod.rs b/core/json/mod.rs index a7d0101cd..f7a2e0205 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -225,7 +225,7 @@ pub fn json_arrow_shift_extract( } let json = get_json_value(value)?; - let extracted = json_extract_single(&json, path, false)?.unwrap_or_else(|| &Val::Null); + let extracted = json_extract_single(&json, path, false)?.unwrap_or(&Val::Null); convert_json_to_db_type(extracted, true) } @@ -242,7 +242,7 @@ pub fn json_extract(value: &OwnedValue, paths: &[OwnedValue]) -> crate::Result crate::Result { - let extracted = - json_extract_single(&json, path, true)?.unwrap_or_else(|| &Val::Null); + let extracted = json_extract_single(&json, path, true)?.unwrap_or(&Val::Null); if paths.len() == 1 && extracted == &Val::Null { return Ok(OwnedValue::Null);