Merge 'Better diagnostics' from Pedro Muniz

On my journey of improving DX, I thought it would be a good idea to
improve a bit the Diagnostics that `miette` provides. This is the first
step to provide better errors in the CLI. I want to do better errors for
tables and columns afterwards by giving suggestions of possible names to
choose from, like we have in the Rust compiler or Clippy.
# Changed
- `Scanner` now passes its offset to Errors allowing `miette` to
correctly point to error in the sql statement.
- Some personalized help messages for `MalformedHexInteger`
- Help message for `BadNumber` printing the offending `String`
Examples:
| Before | After |
|--------|--------|
| <img width="458" alt="image" src="https://github.com/user-
attachments/assets/a9d62add-66fc-42c0-b5e9-8b1ef3c436d0" /> | <img
width="263" alt="image" src="https://github.com/user-
attachments/assets/e56af563-3225-4d7b-a303-30e5c9c38f5c" /> |
| <img width="458" alt="image" src="https://github.com/user-
attachments/assets/1f5145ee-3a1c-4616-ad0e-6459444cbb2e" /> | <img
width="277" alt="image" src="https://github.com/user-
attachments/assets/c87cba62-08af-477e-865b-fcca55ac725a" /> |
| <img width="458" alt="image" src="https://github.com/user-
attachments/assets/1601649e-5e2a-45c7-b47a-4f0830d7d78f" /> | <img
width="458" alt="image" src="https://github.com/user-
attachments/assets/9f0fef05-2f67-4b20-b8d2-bad7c44a534e" /> |
| <img width="458" alt="image" src="https://github.com/user-
attachments/assets/70b82de0-7d24-4c57-b953-333c758f6f63" /> | <img
width="458" alt="image" src="https://github.com/user-
attachments/assets/889d3654-0f91-44cf-ab8a-a01b462d3a5c" /> |

Closes #1316
This commit is contained in:
Jussi Saurio
2025-04-12 10:40:44 +03:00
7 changed files with 98 additions and 33 deletions

9
Cargo.lock generated
View File

@@ -3165,6 +3165,15 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "sqlparser_bench"
version = "0.1.0"
dependencies = [
"criterion",
"fallible-iterator",
"limbo_sqlite3_parser",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"

View File

@@ -25,6 +25,7 @@ members = [
"sqlite3",
"stress",
"tests",
"vendored/sqlite3-parser/sqlparser_bench",
]
exclude = ["perf/latency/limbo"]

View File

@@ -789,10 +789,9 @@ impl<'a> Limbo<'a> {
if let Some(ref mut stats) = statistics {
stats.execute_time_elapsed_samples.push(start.elapsed());
}
let _ = self.write_fmt(format_args!(
"{:?}",
miette::Error::from(err).with_source_code(sql.to_owned())
));
let report =
miette::Error::from(err).with_source_code(sql.to_owned());
let _ = self.write_fmt(format_args!("{:?}", report));
break;
}
}
@@ -805,10 +804,8 @@ impl<'a> Limbo<'a> {
},
Ok(None) => {}
Err(err) => {
let _ = self.write_fmt(format_args!(
"{:?}",
miette::Error::from(err).with_source_code(sql.to_owned())
));
let report = miette::Error::from(err).with_source_code(sql.to_owned());
let _ = self.write_fmt(format_args!("{:?}", report));
anyhow::bail!("We have to throw here, even if we printed error");
}
}

View File

@@ -5,7 +5,7 @@ authors = ["Dandandan <danielheres@gmail.com>"]
edition = "2018"
[dependencies]
sqlite3-parser = { path = "..", default-features = false, features = [
limbo_sqlite3_parser = { path = "..", default-features = false, features = [
"YYNOERRORRECOVERY",
"NDEBUG",
] }

View File

@@ -9,7 +9,7 @@ use std::io;
/// Error with position
pub trait ScanError: Error + From<io::Error> + Sized {
/// Update the position where the error occurs
fn position(&mut self, line: u64, column: usize);
fn position(&mut self, line: u64, column: usize, offset: usize);
}
/// The `(&[u8], TokenType)` is the token.
@@ -126,7 +126,7 @@ impl<S: Splitter> Scanner<S> {
let data = &input[self.offset..];
match self.splitter.split(data) {
Err(mut e) => {
e.position(self.line, self.column);
e.position(self.line, self.column, self.offset);
return Err(e);
}
Ok((None, 0)) => {

View File

@@ -38,9 +38,12 @@ pub enum Error {
#[label("here")] Option<miette::SourceSpan>,
),
/// Invalid number format
#[diagnostic(help("Invalid digit in `{3}`"))]
BadNumber(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
Option<usize>,
String, // Holds the offending number as a string
),
/// Invalid or missing sign after `!`
ExpectedEqualsSign(
@@ -56,6 +59,8 @@ pub enum Error {
MalformedHexInteger(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
Option<usize>,
#[help] Option<&'static str>,
),
/// Grammar error
ParserError(
@@ -82,12 +87,12 @@ impl fmt::Display for Error {
write!(f, "non-terminated block comment at {:?}", pos.unwrap())
}
Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()),
Self::BadNumber(pos, _) => write!(f, "bad number at {:?}", pos.unwrap()),
Self::BadNumber(pos, _, _, _) => write!(f, "bad number at {:?}", pos.unwrap()),
Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()),
Self::MalformedBlobLiteral(pos, _) => {
write!(f, "malformed blob literal at {:?}", pos.unwrap())
}
Self::MalformedHexInteger(pos, _) => {
Self::MalformedHexInteger(pos, _, _, _) => {
write!(f, "malformed hex integer at {:?}", pos.unwrap())
}
Self::ParserError(ref msg, Some(pos), _) => write!(f, "{msg} at {pos:?}"),
@@ -111,18 +116,43 @@ impl From<ParserError> for Error {
}
impl ScanError for Error {
fn position(&mut self, line: u64, column: usize) {
fn position(&mut self, line: u64, column: usize, offset: usize) {
match *self {
Self::Io(_) => {}
Self::UnrecognizedToken(ref mut pos, _) => *pos = Some((line, column)),
Self::UnterminatedLiteral(ref mut pos, _) => *pos = Some((line, column)),
Self::UnterminatedBracket(ref mut pos, _) => *pos = Some((line, column)),
Self::UnterminatedBlockComment(ref mut pos, _) => *pos = Some((line, column)),
Self::BadVariableName(ref mut pos, _) => *pos = Some((line, column)),
Self::BadNumber(ref mut pos, _) => *pos = Some((line, column)),
Self::ExpectedEqualsSign(ref mut pos, _) => *pos = Some((line, column)),
Self::MalformedBlobLiteral(ref mut pos, _) => *pos = Some((line, column)),
Self::MalformedHexInteger(ref mut pos, _) => *pos = Some((line, column)),
Self::UnrecognizedToken(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::UnterminatedLiteral(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::UnterminatedBracket(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::UnterminatedBlockComment(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::BadVariableName(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::ExpectedEqualsSign(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::MalformedBlobLiteral(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
// Exact same handling here
Self::MalformedHexInteger(ref mut pos, ref mut src, len, _)
| Self::BadNumber(ref mut pos, ref mut src, len, _) => {
*pos = Some((line, column));
*src = Some((offset, len.unwrap_or(0)).into());
}
Self::ParserError(_, ref mut pos, _) => *pos = Some((line, column)),
}
}

View File

@@ -172,7 +172,7 @@ macro_rules! try_with_position {
Ok(val) => val,
Err(err) => {
let mut err = Error::from(err);
err.position($scanner.line(), $scanner.column());
err.position($scanner.line(), $scanner.column(), $scanner.offset() - 1);
return Err(err);
}
}
@@ -596,7 +596,9 @@ fn number(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
} else if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None, None));
return Err(Error::BadNumber(None, None, Some(i + 1), unsafe {
String::from_utf8_unchecked(data[..i + 1].to_vec())
}));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
@@ -610,13 +612,28 @@ fn hex_integer(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? {
// Must not be empty (Ox is invalid)
if i == 2 || is_identifier_start(b) {
return Err(Error::MalformedHexInteger(None, None));
let (len, help) = if i == 2 && !is_identifier_start(b) {
(i, "Did you forget to add digits after '0x' or '0X'?")
} else {
(i + 1, "There are some invalid digits after '0x' or '0X'")
};
return Err(Error::MalformedHexInteger(
None,
None,
Some(len), // Length of the malformed hex
Some(help), // Help Message
));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
// Must not be empty (Ox is invalid)
if data.len() == 2 {
return Err(Error::MalformedHexInteger(None, None));
return Err(Error::MalformedHexInteger(
None,
None,
Some(2), // Length of the malformed hex
Some("Did you forget to add digits after '0x' or '0X'?"), // Help Message
));
}
Ok((Some((data, TK_INTEGER)), data.len()))
}
@@ -628,7 +645,9 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize),
if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None, None));
return Err(Error::BadNumber(None, None, Some(i + 1), unsafe {
String::from_utf8_unchecked(data[..i + 1].to_vec())
}));
}
Ok((Some((&data[..i], TK_FLOAT)), i))
} else {
@@ -643,17 +662,24 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize),
let i = if *b == b'+' || *b == b'-' { i + 1 } else { i };
if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
if j == i + 1 || is_identifier_start(b) {
return Err(Error::BadNumber(None, None));
let len = if is_identifier_start(b) { j + 1 } else { j };
return Err(Error::BadNumber(None, None, Some(len), unsafe {
String::from_utf8_unchecked(data[..len].to_vec())
}));
}
Ok((Some((&data[..j], TK_FLOAT)), j))
} else {
if data.len() == i + 1 {
return Err(Error::BadNumber(None, None));
return Err(Error::BadNumber(None, None, Some(i + 1), unsafe {
String::from_utf8_unchecked(data[..i + 1].to_vec())
}));
}
Ok((Some((data, TK_FLOAT)), data.len()))
}
} else {
Err(Error::BadNumber(None, None))
Err(Error::BadNumber(None, None, Some(data.len()), unsafe {
String::from_utf8_unchecked(data.to_vec())
}))
}
}
@@ -670,7 +696,9 @@ fn find_end_of_number(
{
continue;
}
return Err(Error::BadNumber(None, None));
return Err(Error::BadNumber(None, None, Some(j), unsafe {
String::from_utf8_unchecked(data[..j].to_vec())
}));
} else {
return Ok(Some((j, b)));
}
@@ -724,7 +752,7 @@ mod tests {
let mut s = Scanner::new(tokenizer);
expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?;
let err = s.scan(input).unwrap_err();
assert!(matches!(err, Error::BadNumber(_, _)));
assert!(matches!(err, Error::BadNumber(_, _, _, _)));
Ok(())
}