From 4d1ecd2d50149c0dd182b2113c53e2bbf3bc780c Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 01:36:36 -0300 Subject: [PATCH 1/4] better MalformedHexInteger --- cli/app.rs | 13 ++--- vendored/sqlite3-parser/src/lexer/scan.rs | 4 +- .../sqlite3-parser/src/lexer/sql/error.rs | 51 +++++++++++++++---- vendored/sqlite3-parser/src/lexer/sql/mod.rs | 21 ++++++-- 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 3f04ab9fe..bb9515660 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -789,10 +789,9 @@ impl<'a> Limbo<'a> { if let Some(ref mut stats) = statistics { stats.execute_time_elapsed_samples.push(start.elapsed()); } - let _ = self.write_fmt(format_args!( - "{:?}", - miette::Error::from(err).with_source_code(sql.to_owned()) - )); + let report = + miette::Error::from(err).with_source_code(sql.to_owned()); + let _ = self.write_fmt(format_args!("{:?}", report)); break; } } @@ -805,10 +804,8 @@ impl<'a> Limbo<'a> { }, Ok(None) => {} Err(err) => { - let _ = self.write_fmt(format_args!( - "{:?}", - miette::Error::from(err).with_source_code(sql.to_owned()) - )); + let report = miette::Error::from(err).with_source_code(sql.to_owned()); + let _ = self.write_fmt(format_args!("{:?}", report)); anyhow::bail!("We have to throw here, even if we printed error"); } } diff --git a/vendored/sqlite3-parser/src/lexer/scan.rs b/vendored/sqlite3-parser/src/lexer/scan.rs index e0d22cbd5..6c0085b29 100644 --- a/vendored/sqlite3-parser/src/lexer/scan.rs +++ b/vendored/sqlite3-parser/src/lexer/scan.rs @@ -9,7 +9,7 @@ use std::io; /// Error with position pub trait ScanError: Error + From + Sized { /// Update the position where the error occurs - fn position(&mut self, line: u64, column: usize); + fn position(&mut self, line: u64, column: usize, offset: usize); } /// The `(&[u8], TokenType)` is the token. @@ -126,7 +126,7 @@ impl Scanner { let data = &input[self.offset..]; match self.splitter.split(data) { Err(mut e) => { - e.position(self.line, self.column); + e.position(self.line, self.column, self.offset); return Err(e); } Ok((None, 0)) => { diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs index d3e3ac345..fb6d6c32e 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/error.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -56,6 +56,8 @@ pub enum Error { MalformedHexInteger( Option<(u64, usize)>, #[label("here")] Option, + Option, + #[help] Option<&'static str>, ), /// Grammar error ParserError( @@ -87,7 +89,7 @@ impl fmt::Display for Error { Self::MalformedBlobLiteral(pos, _) => { write!(f, "malformed blob literal at {:?}", pos.unwrap()) } - Self::MalformedHexInteger(pos, _) => { + Self::MalformedHexInteger(pos, _, _, _) => { write!(f, "malformed hex integer at {:?}", pos.unwrap()) } Self::ParserError(ref msg, Some(pos), _) => write!(f, "{msg} at {pos:?}"), @@ -111,18 +113,45 @@ impl From for Error { } impl ScanError for Error { - fn position(&mut self, line: u64, column: usize) { + fn position(&mut self, line: u64, column: usize, offset: usize) { match *self { Self::Io(_) => {} - Self::UnrecognizedToken(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedLiteral(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedBracket(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedBlockComment(ref mut pos, _) => *pos = Some((line, column)), - Self::BadVariableName(ref mut pos, _) => *pos = Some((line, column)), - Self::BadNumber(ref mut pos, _) => *pos = Some((line, column)), - Self::ExpectedEqualsSign(ref mut pos, _) => *pos = Some((line, column)), - Self::MalformedBlobLiteral(ref mut pos, _) => *pos = Some((line, column)), - Self::MalformedHexInteger(ref mut pos, _) => *pos = Some((line, column)), + Self::UnrecognizedToken(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedLiteral(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedBracket(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedBlockComment(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::BadVariableName(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::BadNumber(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::ExpectedEqualsSign(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::MalformedBlobLiteral(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) => { + *pos = Some((line, column)); + *src = Some((offset, len.unwrap_or(0)).into()); + } Self::ParserError(_, ref mut pos, _) => *pos = Some((line, column)), } } diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index fba3d72d1..ccb05bd01 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -172,7 +172,7 @@ macro_rules! try_with_position { Ok(val) => val, Err(err) => { let mut err = Error::from(err); - err.position($scanner.line(), $scanner.column()); + err.position($scanner.line(), $scanner.column(), $scanner.offset() - 1); return Err(err); } } @@ -610,13 +610,28 @@ fn hex_integer(data: &[u8]) -> Result<(Option>, usize), Error> { if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? { // Must not be empty (Ox is invalid) if i == 2 || is_identifier_start(b) { - return Err(Error::MalformedHexInteger(None, None)); + let (len, help) = if i == 2 && !is_identifier_start(b) { + (i, "Did you forget to add digits after '0x' or '0X'?") + } else { + (i + 1, "There are some invalid digits after '0x' or '0X'") + }; + return Err(Error::MalformedHexInteger( + None, + None, + Some(len), // Length of the malformed hex + Some(help), // Help Message + )); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { // Must not be empty (Ox is invalid) if data.len() == 2 { - return Err(Error::MalformedHexInteger(None, None)); + return Err(Error::MalformedHexInteger( + None, + None, + Some(2), // Length of the malformed hex + Some("Did you forget to add digits after '0x' or '0X'?"), // Help Message + )); } Ok((Some((data, TK_INTEGER)), data.len())) } From a2ca9e5a4670a867230e55fbf1a589c321806d04 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 10:09:00 -0300 Subject: [PATCH 2/4] better BadNumber --- vendored/sqlite3-parser/src/lexer/sql/error.rs | 11 +++++------ vendored/sqlite3-parser/src/lexer/sql/mod.rs | 15 ++++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs index fb6d6c32e..d94e529a1 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/error.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -41,6 +41,7 @@ pub enum Error { BadNumber( Option<(u64, usize)>, #[label("here")] Option, + Option, ), /// Invalid or missing sign after `!` ExpectedEqualsSign( @@ -84,7 +85,7 @@ impl fmt::Display for Error { write!(f, "non-terminated block comment at {:?}", pos.unwrap()) } Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()), - Self::BadNumber(pos, _) => write!(f, "bad number at {:?}", pos.unwrap()), + Self::BadNumber(pos, _, _) => write!(f, "bad number at {:?}", pos.unwrap()), Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()), Self::MalformedBlobLiteral(pos, _) => { write!(f, "malformed blob literal at {:?}", pos.unwrap()) @@ -136,10 +137,6 @@ impl ScanError for Error { *pos = Some((line, column)); *src = Some((offset).into()); } - Self::BadNumber(ref mut pos, ref mut src) => { - *pos = Some((line, column)); - *src = Some((offset).into()); - } Self::ExpectedEqualsSign(ref mut pos, ref mut src) => { *pos = Some((line, column)); *src = Some((offset).into()); @@ -148,7 +145,9 @@ impl ScanError for Error { *pos = Some((line, column)); *src = Some((offset).into()); } - Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) => { + // Exact same handling here + Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) + | Self::BadNumber(ref mut pos, ref mut src, len) => { *pos = Some((line, column)); *src = Some((offset, len.unwrap_or(0)).into()); } diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index ccb05bd01..b65d09863 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -596,7 +596,7 @@ fn number(data: &[u8]) -> Result<(Option>, usize), Error> { } else if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1))); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { @@ -643,7 +643,7 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option>, usize), if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1))); } Ok((Some((&data[..i], TK_FLOAT)), i)) } else { @@ -658,17 +658,18 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option>, usize), let i = if *b == b'+' || *b == b'-' { i + 1 } else { i }; if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? { if j == i + 1 || is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + let len = if is_identifier_start(b) { j + 1 } else { j }; + return Err(Error::BadNumber(None, None, Some(len))); } Ok((Some((&data[..j], TK_FLOAT)), j)) } else { if data.len() == i + 1 { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1))); } Ok((Some((data, TK_FLOAT)), data.len())) } } else { - Err(Error::BadNumber(None, None)) + Err(Error::BadNumber(None, None, Some(data.len()))) } } @@ -685,7 +686,7 @@ fn find_end_of_number( { continue; } - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(j))); } else { return Ok(Some((j, b))); } @@ -739,7 +740,7 @@ mod tests { let mut s = Scanner::new(tokenizer); expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?; let err = s.scan(input).unwrap_err(); - assert!(matches!(err, Error::BadNumber(_, _))); + assert!(matches!(err, Error::BadNumber(_, _, _))); Ok(()) } From 946b59f4ee696f7677b4b142d2131b927e724a6d Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 11:00:06 -0300 Subject: [PATCH 3/4] even better BadNumber --- .../sqlite3-parser/src/lexer/sql/error.rs | 6 +++-- vendored/sqlite3-parser/src/lexer/sql/mod.rs | 26 ++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs index d94e529a1..b85dad504 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/error.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -38,10 +38,12 @@ pub enum Error { #[label("here")] Option, ), /// Invalid number format + #[diagnostic(help("Invalid digit in `{3}`"))] BadNumber( Option<(u64, usize)>, #[label("here")] Option, Option, + String, // Holds the offending number as a string ), /// Invalid or missing sign after `!` ExpectedEqualsSign( @@ -85,7 +87,7 @@ impl fmt::Display for Error { write!(f, "non-terminated block comment at {:?}", pos.unwrap()) } Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()), - Self::BadNumber(pos, _, _) => write!(f, "bad number at {:?}", pos.unwrap()), + Self::BadNumber(pos, _, _, _) => write!(f, "bad number at {:?}", pos.unwrap()), Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()), Self::MalformedBlobLiteral(pos, _) => { write!(f, "malformed blob literal at {:?}", pos.unwrap()) @@ -147,7 +149,7 @@ impl ScanError for Error { } // Exact same handling here Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) - | Self::BadNumber(ref mut pos, ref mut src, len) => { + | Self::BadNumber(ref mut pos, ref mut src, len, _) => { *pos = Some((line, column)); *src = Some((offset, len.unwrap_or(0)).into()); } diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index b65d09863..84f59eaeb 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -596,7 +596,9 @@ fn number(data: &[u8]) -> Result<(Option>, usize), Error> { } else if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None, Some(i + 1))); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { @@ -643,7 +645,9 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option>, usize), if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None, Some(i + 1))); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((&data[..i], TK_FLOAT)), i)) } else { @@ -659,17 +663,23 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option>, usize), if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? { if j == i + 1 || is_identifier_start(b) { let len = if is_identifier_start(b) { j + 1 } else { j }; - return Err(Error::BadNumber(None, None, Some(len))); + return Err(Error::BadNumber(None, None, Some(len), unsafe { + String::from_utf8_unchecked(data[..len].to_vec()) + })); } Ok((Some((&data[..j], TK_FLOAT)), j)) } else { if data.len() == i + 1 { - return Err(Error::BadNumber(None, None, Some(i + 1))); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((data, TK_FLOAT)), data.len())) } } else { - Err(Error::BadNumber(None, None, Some(data.len()))) + Err(Error::BadNumber(None, None, Some(data.len()), unsafe { + String::from_utf8_unchecked(data.to_vec()) + })) } } @@ -686,7 +696,9 @@ fn find_end_of_number( { continue; } - return Err(Error::BadNumber(None, None, Some(j))); + return Err(Error::BadNumber(None, None, Some(j), unsafe { + String::from_utf8_unchecked(data[..j].to_vec()) + })); } else { return Ok(Some((j, b))); } @@ -740,7 +752,7 @@ mod tests { let mut s = Scanner::new(tokenizer); expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?; let err = s.scan(input).unwrap_err(); - assert!(matches!(err, Error::BadNumber(_, _, _))); + assert!(matches!(err, Error::BadNumber(_, _, _, _))); Ok(()) } From c99c6a4be54e3866179f24d038b1eca317c73c06 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 13:40:56 -0300 Subject: [PATCH 4/4] Activate Bench for comparison --- Cargo.lock | 9 +++++++++ Cargo.toml | 1 + vendored/sqlite3-parser/sqlparser_bench/Cargo.toml | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index af3033499..e3b2cd455 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3163,6 +3163,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "sqlparser_bench" +version = "0.1.0" +dependencies = [ + "criterion", + "fallible-iterator", + "limbo_sqlite3_parser", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index ac7490880..44c621be1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ members = [ "sqlite3", "stress", "tests", + "vendored/sqlite3-parser/sqlparser_bench", ] exclude = ["perf/latency/limbo"] diff --git a/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml index 1ec87cbe4..0bb6e16c5 100644 --- a/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml +++ b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml @@ -5,7 +5,7 @@ authors = ["Dandandan "] edition = "2018" [dependencies] -sqlite3-parser = { path = "..", default-features = false, features = [ +limbo_sqlite3_parser = { path = "..", default-features = false, features = [ "YYNOERRORRECOVERY", "NDEBUG", ] }