mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-27 04:54:21 +01:00
370 lines
13 KiB
Rust
370 lines
13 KiB
Rust
use crate::bail_parse_error;
|
|
use std::borrow::Cow;
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
enum PPState {
|
|
Start,
|
|
AfterRoot,
|
|
InKey,
|
|
InArrayIndex,
|
|
ExpectDotOrBracket,
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
enum ArrayIndexState {
|
|
Start,
|
|
AfterHash,
|
|
CollectingNumbers,
|
|
IsMax,
|
|
}
|
|
|
|
/// Describes a JSON path, which is a sequence of keys and/or array locators.
|
|
#[derive(Clone, Debug)]
|
|
pub struct JsonPath<'a> {
|
|
pub elements: Vec<PathElement<'a>>,
|
|
}
|
|
|
|
type IsQuoted = bool;
|
|
|
|
/// PathElement describes a single element of a JSON path.
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub enum PathElement<'a> {
|
|
/// Root element: '$'
|
|
Root(),
|
|
/// JSON key
|
|
Key(Cow<'a, str>, IsQuoted),
|
|
/// Array locator, eg. [2], [#-5]
|
|
ArrayLocator(i32),
|
|
}
|
|
|
|
type IsMaxNumber = bool;
|
|
|
|
fn collect_num(current: i128, adding: i128, negative: bool) -> (i128, IsMaxNumber) {
|
|
let mut is_max = false;
|
|
let cur = if negative {
|
|
current
|
|
.checked_mul(10)
|
|
.and_then(|x| x.checked_sub(adding))
|
|
.unwrap_or_else(|| {
|
|
is_max = true;
|
|
i128::MIN
|
|
})
|
|
} else {
|
|
current
|
|
.checked_mul(10)
|
|
.and_then(|x| x.checked_add(adding))
|
|
.unwrap_or_else(|| {
|
|
is_max = true;
|
|
i128::MAX
|
|
})
|
|
};
|
|
(cur, is_max)
|
|
}
|
|
|
|
fn estimate_path_capacity(input: &str) -> usize {
|
|
// After $ we need either . or [ for each component
|
|
// So divide remaining length by 2 (minimum chars per component)
|
|
// Add 1 for the root component
|
|
1 + (input.len() - 1) / 2
|
|
}
|
|
|
|
/// Parses path into a Vec of Strings, where each string is a key or an array locator.
|
|
pub fn json_path<'a>(path: &'a str) -> crate::Result<JsonPath<'a>> {
|
|
if path.is_empty() {
|
|
bail_parse_error!("Bad json path: {}", path)
|
|
}
|
|
let mut parser_state = PPState::Start;
|
|
let mut index_state = ArrayIndexState::Start;
|
|
let mut is_quoted = false;
|
|
|
|
let mut key_start = 0;
|
|
let mut index_buffer: i128 = 0;
|
|
|
|
let mut path_components = Vec::with_capacity(estimate_path_capacity(path));
|
|
let mut path_iter = path.char_indices();
|
|
|
|
while let Some(ch) = path_iter.next() {
|
|
let ch_len = ch.1.len_utf8();
|
|
match parser_state {
|
|
PPState::Start => match ch {
|
|
(_, '$') => {
|
|
path_components.push(PathElement::Root());
|
|
parser_state = PPState::AfterRoot
|
|
}
|
|
(_, _) => bail_parse_error!("Bad json path: {}", path),
|
|
},
|
|
PPState::AfterRoot => match ch {
|
|
(idx, '.') => {
|
|
parser_state = PPState::InKey;
|
|
key_start = idx + ch_len;
|
|
}
|
|
(_, '[') => {
|
|
index_state = ArrayIndexState::Start;
|
|
parser_state = PPState::InArrayIndex;
|
|
index_buffer = 0;
|
|
}
|
|
(_, _) => bail_parse_error!("Bad json path: {}", path),
|
|
},
|
|
PPState::InKey => match ch {
|
|
(idx, '.' | '[') => {
|
|
if is_quoted {
|
|
continue;
|
|
}
|
|
let key_end = idx;
|
|
|
|
if key_end > key_start {
|
|
let mut key = &path[key_start..key_end];
|
|
println!("{}, {}", &key[0..2], &key[key.len() - 2..]);
|
|
if key[0..2].contains("\"") && key[key.len() - 2..].contains("\"") {
|
|
key = &key[2..key.len() - 2];
|
|
}
|
|
if ch.1 == '[' {
|
|
index_state = ArrayIndexState::Start;
|
|
parser_state = PPState::InArrayIndex;
|
|
index_buffer = 0;
|
|
} else {
|
|
key_start = idx + ch_len;
|
|
}
|
|
path_components.push(PathElement::Key(Cow::Borrowed(key), is_quoted));
|
|
is_quoted = false;
|
|
} else {
|
|
bail_parse_error!("Bad json path: {}", path)
|
|
}
|
|
}
|
|
(idx, ch) => {
|
|
if ch != '"' {
|
|
continue;
|
|
};
|
|
|
|
if key_start == idx {
|
|
is_quoted = true
|
|
} else {
|
|
if let Some(next_char) = path_iter.next() {
|
|
let c = next_char.1;
|
|
match next_char {
|
|
(idx, '.' | '[') => {
|
|
let key_end = idx;
|
|
|
|
if key_end > key_start {
|
|
let key = &path[key_start + 1..key_end - 1];
|
|
if c == '[' {
|
|
index_state = ArrayIndexState::Start;
|
|
parser_state = PPState::InArrayIndex;
|
|
index_buffer = 0;
|
|
} else {
|
|
key_start = idx + c.len_utf8();
|
|
}
|
|
path_components
|
|
.push(PathElement::Key(Cow::Borrowed(key), is_quoted));
|
|
}
|
|
is_quoted = false;
|
|
}
|
|
_ => bail_parse_error!("Bad json path: {}", path),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
PPState::InArrayIndex => {
|
|
let (_, c) = ch;
|
|
|
|
match (&index_state, c) {
|
|
(ArrayIndexState::Start, '#') => index_state = ArrayIndexState::AfterHash,
|
|
(ArrayIndexState::Start, '0'..='9') => {
|
|
index_buffer = c.to_digit(10).unwrap() as i128;
|
|
index_state = ArrayIndexState::CollectingNumbers;
|
|
}
|
|
(ArrayIndexState::AfterHash, '-') => {
|
|
if let Some((_, next_c)) = path_iter.next() {
|
|
if next_c.is_ascii_digit() {
|
|
index_buffer = -(next_c.to_digit(10).unwrap() as i128);
|
|
index_state = ArrayIndexState::CollectingNumbers;
|
|
} else {
|
|
bail_parse_error!("Bad json path: {}", path);
|
|
}
|
|
} else {
|
|
bail_parse_error!("Bad json path: {}", path);
|
|
}
|
|
}
|
|
(ArrayIndexState::CollectingNumbers, '0'..='9') => {
|
|
let (new_num, is_max) = collect_num(
|
|
index_buffer,
|
|
c.to_digit(10).unwrap() as i128,
|
|
index_buffer < 0,
|
|
);
|
|
if is_max {
|
|
index_state = ArrayIndexState::IsMax;
|
|
}
|
|
index_buffer = new_num;
|
|
}
|
|
(ArrayIndexState::IsMax, '0'..='9') => continue,
|
|
(ArrayIndexState::CollectingNumbers | ArrayIndexState::IsMax, ']') => {
|
|
parser_state = PPState::ExpectDotOrBracket;
|
|
path_components.push(PathElement::ArrayLocator(index_buffer as i32))
|
|
}
|
|
(_, _) => bail_parse_error!("Bad json path: {}", path),
|
|
}
|
|
}
|
|
PPState::ExpectDotOrBracket => match ch {
|
|
(idx, '.') => {
|
|
key_start = idx + ch_len;
|
|
parser_state = PPState::InKey;
|
|
}
|
|
(_, '[') => {
|
|
index_state = ArrayIndexState::Start;
|
|
parser_state = PPState::InArrayIndex;
|
|
index_buffer = 0;
|
|
}
|
|
(_, _) => bail_parse_error!("Bad json path: {}", path),
|
|
},
|
|
}
|
|
}
|
|
match parser_state {
|
|
PPState::InArrayIndex => bail_parse_error!("Bad json path: {}", path),
|
|
PPState::InKey => {
|
|
if key_start < path.len() {
|
|
let mut key = &path[key_start..];
|
|
|
|
if key[0..=1].contains("\"") && key[key.len() - 1..].contains("\"") {
|
|
key = &key[1..key.len() - 1];
|
|
}
|
|
path_components.push(PathElement::Key(Cow::Borrowed(key), is_quoted));
|
|
} else {
|
|
bail_parse_error!("Bad json path: {}", path)
|
|
}
|
|
}
|
|
_ => (),
|
|
}
|
|
println!("{:?}", path_components);
|
|
Ok(JsonPath {
|
|
elements: path_components,
|
|
})
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_json_path_root() {
|
|
let path = json_path("$").unwrap();
|
|
assert_eq!(path.elements.len(), 1);
|
|
assert_eq!(path.elements[0], PathElement::Root());
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_path_single_locator() {
|
|
let path = json_path("$.x").unwrap();
|
|
assert_eq!(path.elements.len(), 2);
|
|
assert_eq!(path.elements[0], PathElement::Root());
|
|
assert_eq!(
|
|
path.elements[1],
|
|
PathElement::Key(Cow::Borrowed("x"), false)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_path_single_array_locator() {
|
|
let path = json_path("$[0]").unwrap();
|
|
assert_eq!(path.elements.len(), 2);
|
|
assert_eq!(path.elements[0], PathElement::Root());
|
|
assert_eq!(path.elements[1], PathElement::ArrayLocator(0));
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_path_single_negative_array_locator() {
|
|
let path = json_path("$[#-2]").unwrap();
|
|
assert_eq!(path.elements.len(), 2);
|
|
assert_eq!(path.elements[0], PathElement::Root());
|
|
assert_eq!(path.elements[1], PathElement::ArrayLocator(-2));
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_path_invalid() {
|
|
let invalid_values = vec![
|
|
"", "$$$", "$.", "$ ", "$[", "$]", "$[-1]", "x", "[]", "$[0", "$[0x]", "$\"",
|
|
];
|
|
|
|
for value in invalid_values {
|
|
let path = json_path(value);
|
|
|
|
match path {
|
|
Err(crate::error::LimboError::ParseError(_)) => {
|
|
// happy path
|
|
}
|
|
_ => panic!("Expected error for: {:?}, got: {:?}", value, path),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_path() {
|
|
let path = json_path("$.store.book[0].title").unwrap();
|
|
assert_eq!(path.elements.len(), 5);
|
|
assert_eq!(path.elements[0], PathElement::Root());
|
|
assert_eq!(
|
|
path.elements[1],
|
|
PathElement::Key(Cow::Borrowed("store"), false)
|
|
);
|
|
assert_eq!(
|
|
path.elements[2],
|
|
PathElement::Key(Cow::Borrowed("book"), false)
|
|
);
|
|
assert_eq!(path.elements[3], PathElement::ArrayLocator(0));
|
|
assert_eq!(
|
|
path.elements[4],
|
|
PathElement::Key(Cow::Borrowed("title"), false)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_large_index_wrapping() {
|
|
let path = json_path("$[4294967296]").unwrap();
|
|
assert_eq!(path.elements[1], PathElement::ArrayLocator(0));
|
|
|
|
let path = json_path("$[4294967297]").unwrap();
|
|
assert_eq!(path.elements[1], PathElement::ArrayLocator(1));
|
|
}
|
|
|
|
#[test]
|
|
fn test_deeply_nested_path() {
|
|
let path = json_path("$[0][1][2].key[3].other").unwrap();
|
|
assert_eq!(path.elements.len(), 7);
|
|
assert_eq!(path.elements[0], PathElement::Root());
|
|
assert_eq!(path.elements[1], PathElement::ArrayLocator(0));
|
|
assert_eq!(path.elements[2], PathElement::ArrayLocator(1));
|
|
assert_eq!(path.elements[3], PathElement::ArrayLocator(2));
|
|
assert_eq!(
|
|
path.elements[4],
|
|
PathElement::Key(Cow::Borrowed("key"), false)
|
|
);
|
|
assert_eq!(path.elements[5], PathElement::ArrayLocator(3));
|
|
}
|
|
|
|
#[test]
|
|
fn test_edge_cases() {
|
|
// Empty key
|
|
assert!(json_path("$.").is_err());
|
|
|
|
// Multiple dots
|
|
assert!(json_path("$..key").is_err());
|
|
|
|
// Unclosed brackets
|
|
assert!(json_path("$[0").is_err());
|
|
assert!(json_path("$[").is_err());
|
|
|
|
// Invalid negative index format
|
|
assert!(json_path("$[-1]").is_err()); // should be $[#-1]
|
|
}
|
|
|
|
#[test]
|
|
fn test_path_capacity() {
|
|
// Test that our capacity estimation is reasonable
|
|
let short_path = "$[0]";
|
|
assert!(estimate_path_capacity(short_path) >= 2);
|
|
|
|
let long_path = "$.a.b.c.d.e.f.g[0][1][2]";
|
|
assert!(estimate_path_capacity(long_path) >= 11);
|
|
}
|
|
}
|