Merge 'Nicer parse errors using miette' from Samyak Sarnayak

I noticed that the parse errors were a bit hard to read - only the
nearest token and the line/col offsets were printed.
I made a first attempt at improving the errors using
[miette](https://github.com/zkat/miette).
- Added derive for `miette::Diagnostic` to both the parser's error type
and LimboError.
- Added miette dependency to both sqlite3_parser and core. The `fancy`
feature is only enabled for the CLI. So the overhead on the libraries
(core, parser) should be minimal.
Some future improvements that can be made further:
- Add spans to AST nodes so that errors can better point to the correct
token. See upstream issue: https://github.com/gwenn/lemon-rs/issues/33
- Construct more errors with offset information. I noticed that most
parser errors are constructed with `None` as the offset.
- The messages are a bit redundant (example "syntax error at (1, 6)").
This can improved.
Comparisons.
Before:
```
❯ cargo run --package limbo --bin limbo database.db --output-mode pretty
...
limbo> selet * from a;
[2025-01-05T11:22:55Z ERROR sqlite3Parser] near "Token([115, 101, 108, 101, 116])": syntax error
Parse error: near "selet": syntax error at (1, 6)
```
<img width="969" alt="image" src="https://github.com/user-
attachments/assets/82651a77-f5ac-4eee-b208-88c6ea7fc9b7" />
After:
```
❯ cargo run --package limbo --bin limbo database.db --output-mode pretty
...
limbo> selet * from a;
[2025-01-05T12:25:52Z ERROR sqlite3Parser] near "Token([115, 101, 108, 101, 116])": syntax error

  × near "selet": syntax error at (1, 6)
   ╭────
 1 │ selet * from a
   ·     ▲
   ·     ╰── syntax error
   ╰────

```
<img width="980" alt="image" src="https://github.com/user-
attachments/assets/747a90e5-5085-41f9-b0fe-25864179ca35" />

Closes #618
This commit is contained in:
Pekka Enberg
2025-01-05 21:09:52 +02:00
10 changed files with 224 additions and 59 deletions

102
Cargo.lock generated
View File

@@ -166,6 +166,15 @@ dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "backtrace-ext"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50"
dependencies = [
"backtrace",
]
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -1021,6 +1030,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "is_ci"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
@@ -1165,6 +1180,7 @@ dependencies = [
"dirs",
"env_logger 0.10.2",
"limbo_core",
"miette",
"rustyline",
]
@@ -1196,6 +1212,7 @@ dependencies = [
"libc",
"limbo_macros",
"log",
"miette",
"mimalloc",
"mockall",
"nix 0.29.0",
@@ -1291,6 +1308,37 @@ dependencies = [
"autocfg",
]
[[package]]
name = "miette"
version = "7.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "317f146e2eb7021892722af37cf1b971f0a70c8406f487e24952667616192c64"
dependencies = [
"backtrace",
"backtrace-ext",
"cfg-if",
"miette-derive",
"owo-colors",
"supports-color",
"supports-hyperlinks",
"supports-unicode",
"terminal_size",
"textwrap",
"thiserror 1.0.69",
"unicode-width",
]
[[package]]
name = "miette-derive"
version = "7.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23c9b935fbe1d6cbd1dac857b54a688145e2d93f48db36010514d0f612d0ad67"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.90",
]
[[package]]
name = "mimalloc"
version = "0.1.43"
@@ -1438,6 +1486,12 @@ dependencies = [
"num-traits",
]
[[package]]
name = "owo-colors"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb37767f6569cd834a413442455e0f066d0d522de8630436e2a1761d9726ba56"
[[package]]
name = "parking_lot"
version = "0.12.3"
@@ -2105,6 +2159,7 @@ dependencies = [
"indexmap",
"log",
"memchr",
"miette",
"phf",
"phf_codegen",
"phf_shared",
@@ -2135,6 +2190,27 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "supports-color"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6"
dependencies = [
"is_ci",
]
[[package]]
name = "supports-hyperlinks"
version = "3.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "804f44ed3c63152de6a9f90acbea1a110441de43006ea51bcce8f436196a288b"
[[package]]
name = "supports-unicode"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7401a30af6cb5818bb64852270bb722533397edcfc7344954a38f420819ece2"
[[package]]
name = "symbolic-common"
version = "12.12.3"
@@ -2208,12 +2284,32 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "terminal_size"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5352447f921fda68cf61b4101566c0bdb5104eff6804d0678e5227580ab6a4e9"
dependencies = [
"rustix",
"windows-sys 0.59.0",
]
[[package]]
name = "termtree"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "textwrap"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
dependencies = [
"unicode-linebreak",
"unicode-width",
]
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -2307,6 +2403,12 @@ version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
[[package]]
name = "unicode-linebreak"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"

View File

@@ -28,3 +28,4 @@ limbo_core = { path = "../core" }
rustyline = "12.0.0"
ctrlc = "3.4.4"
csv = "1.3.1"
miette = { version = "7.4.0", features = ["fancy"] }

View File

@@ -581,7 +581,10 @@ impl Limbo {
break;
}
Err(err) => {
let _ = self.write_fmt(format_args!("{}", err));
let _ = self.write_fmt(format_args!(
"{:?}",
miette::Error::from(err).with_source_code(sql.to_owned())
));
break;
}
}
@@ -595,7 +598,10 @@ impl Limbo {
},
Ok(None) => {}
Err(err) => {
let _ = self.write_fmt(format_args!("{}", err));
let _ = self.write_fmt(format_args!(
"{:?}",
miette::Error::from(err).with_source_code(sql.to_owned())
));
}
}
// for now let's cache flush always

View File

@@ -57,6 +57,7 @@ rand = "0.8.5"
bumpalo = { version = "3.16.0", features = ["collections", "boxed"] }
limbo_macros = { path = "../macros" }
uuid = { version = "1.11.0", features = ["v4", "v7"], optional = true }
miette = "7.4.0"
[target.'cfg(not(target_family = "windows"))'.dev-dependencies]
pprof = { version = "0.14.0", features = ["criterion", "flamegraph"] }

View File

@@ -1,6 +1,6 @@
use thiserror::Error;
#[derive(Debug, Error)]
#[derive(Debug, Error, miette::Diagnostic)]
pub enum LimboError {
#[error("Corrupt database: {0}")]
Corrupt(String),
@@ -10,7 +10,8 @@ pub enum LimboError {
InternalError(String),
#[error("Parse error: {0}")]
ParseError(String),
#[error("Parse error: {0}")]
#[error(transparent)]
#[diagnostic(transparent)]
LexerError(#[from] sqlite3_parser::lexer::sql::Error),
#[error("Conversion error: {0}")]
ConversionError(String),

View File

@@ -31,6 +31,7 @@ fallible-iterator = "0.3"
bitflags = "2.0"
uncased = "0.9.10"
indexmap = "2.0"
miette = "7.4.0"
[dev-dependencies]
env_logger = { version = "0.11", default-features = false }

View File

@@ -79,6 +79,12 @@ impl<S: Splitter> Scanner<S> {
pub fn column(&self) -> usize {
self.column
}
/// Current byte offset in the source string
pub fn offset(&self) -> usize {
self.offset
}
/// Associated splitter
pub fn splitter(&self) -> &S {
&self.splitter

View File

@@ -7,57 +7,91 @@ use crate::parser::ParserError;
/// SQL lexer and parser errors
#[non_exhaustive]
#[derive(Debug)]
#[derive(Debug, miette::Diagnostic)]
#[diagnostic()]
pub enum Error {
/// I/O Error
Io(io::Error),
/// Lexer error
UnrecognizedToken(Option<(u64, usize)>),
UnrecognizedToken(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Missing quote or double-quote or backtick
UnterminatedLiteral(Option<(u64, usize)>),
UnterminatedLiteral(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Missing `]`
UnterminatedBracket(Option<(u64, usize)>),
UnterminatedBracket(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Missing `*/`
UnterminatedBlockComment(Option<(u64, usize)>),
UnterminatedBlockComment(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Invalid parameter name
BadVariableName(Option<(u64, usize)>),
BadVariableName(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Invalid number format
BadNumber(Option<(u64, usize)>),
BadNumber(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Invalid or missing sign after `!`
ExpectedEqualsSign(Option<(u64, usize)>),
ExpectedEqualsSign(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character.
MalformedBlobLiteral(Option<(u64, usize)>),
MalformedBlobLiteral(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
MalformedHexInteger(Option<(u64, usize)>),
MalformedHexInteger(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Grammar error
ParserError(ParserError, Option<(u64, usize)>),
ParserError(
ParserError,
Option<(u64, usize)>,
#[label("syntax error")] Option<miette::SourceSpan>,
),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::Io(ref err) => err.fmt(f),
Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()),
Self::UnterminatedLiteral(pos) => {
Self::UnrecognizedToken(pos, _) => {
write!(f, "unrecognized token at {:?}", pos.unwrap())
}
Self::UnterminatedLiteral(pos, _) => {
write!(f, "non-terminated literal at {:?}", pos.unwrap())
}
Self::UnterminatedBracket(pos) => {
Self::UnterminatedBracket(pos, _) => {
write!(f, "non-terminated bracket at {:?}", pos.unwrap())
}
Self::UnterminatedBlockComment(pos) => {
Self::UnterminatedBlockComment(pos, _) => {
write!(f, "non-terminated block comment at {:?}", pos.unwrap())
}
Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()),
Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()),
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()),
Self::MalformedBlobLiteral(pos) => {
Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()),
Self::BadNumber(pos, _) => write!(f, "bad number at {:?}", pos.unwrap()),
Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()),
Self::MalformedBlobLiteral(pos, _) => {
write!(f, "malformed blob literal at {:?}", pos.unwrap())
}
Self::MalformedHexInteger(pos) => {
Self::MalformedHexInteger(pos, _) => {
write!(f, "malformed hex integer at {:?}", pos.unwrap())
}
Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"),
Self::ParserError(ref msg, _) => write!(f, "{msg}"),
Self::ParserError(ref msg, Some(pos), _) => write!(f, "{msg} at {pos:?}"),
Self::ParserError(ref msg, _, _) => write!(f, "{msg}"),
}
}
}
@@ -72,7 +106,7 @@ impl From<io::Error> for Error {
impl From<ParserError> for Error {
fn from(err: ParserError) -> Self {
Self::ParserError(err, None)
Self::ParserError(err, None, None)
}
}
@@ -80,16 +114,16 @@ impl ScanError for Error {
fn position(&mut self, line: u64, column: usize) {
match *self {
Self::Io(_) => {}
Self::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)),
Self::BadVariableName(ref mut pos) => *pos = Some((line, column)),
Self::BadNumber(ref mut pos) => *pos = Some((line, column)),
Self::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)),
Self::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)),
Self::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)),
Self::ParserError(_, ref mut pos) => *pos = Some((line, column)),
Self::UnrecognizedToken(ref mut pos, _) => *pos = Some((line, column)),
Self::UnterminatedLiteral(ref mut pos, _) => *pos = Some((line, column)),
Self::UnterminatedBracket(ref mut pos, _) => *pos = Some((line, column)),
Self::UnterminatedBlockComment(ref mut pos, _) => *pos = Some((line, column)),
Self::BadVariableName(ref mut pos, _) => *pos = Some((line, column)),
Self::BadNumber(ref mut pos, _) => *pos = Some((line, column)),
Self::ExpectedEqualsSign(ref mut pos, _) => *pos = Some((line, column)),
Self::MalformedBlobLiteral(ref mut pos, _) => *pos = Some((line, column)),
Self::MalformedHexInteger(ref mut pos, _) => *pos = Some((line, column)),
Self::ParserError(_, ref mut pos, _) => *pos = Some((line, column)),
}
}
}

View File

@@ -57,6 +57,11 @@ impl<'input> Parser<'input> {
pub fn column(&self) -> usize {
self.scanner.column()
}
/// Current byte offset in input
pub fn offset(&self) -> usize {
self.scanner.offset()
}
}
/*
@@ -230,13 +235,21 @@ impl FallibleIterator for Parser<'_> {
}
self.parser.sqlite3ParserFinalize();
if let Some(e) = self.parser.ctx.error() {
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
let err = Error::ParserError(
e,
Some((self.scanner.line(), self.scanner.column())),
Some((self.offset() - 1).into()),
);
return Err(err);
}
let cmd = self.parser.ctx.cmd();
if let Some(ref cmd) = cmd {
if let Err(e) = cmd.check() {
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
let err = Error::ParserError(
e,
Some((self.scanner.line(), self.scanner.column())),
Some((self.offset() - 1).into()),
);
return Err(err);
}
}
@@ -332,7 +345,7 @@ impl Splitter for Tokenizer {
if let Some(i) = end {
Ok((None, i + 1))
} else {
Err(Error::UnterminatedBlockComment(None))
Err(Error::UnterminatedBlockComment(None, None))
}
} else {
Ok((Some((&data[..1], TK_SLASH)), 1))
@@ -381,10 +394,10 @@ impl Splitter for Tokenizer {
if *b == b'=' {
Ok((Some((&data[..2], TK_NE)), 2))
} else {
Err(Error::ExpectedEqualsSign(None))
Err(Error::ExpectedEqualsSign(None, None))
}
} else {
Err(Error::ExpectedEqualsSign(None))
Err(Error::ExpectedEqualsSign(None, None))
}
}
b'|' => {
@@ -419,7 +432,7 @@ impl Splitter for Tokenizer {
// Keep original quotes / '[' ... ]'
Ok((Some((&data[0..=i], TK_ID)), i + 1))
} else {
Err(Error::UnterminatedBracket(None))
Err(Error::UnterminatedBracket(None, None))
}
}
b'?' => {
@@ -437,14 +450,14 @@ impl Splitter for Tokenizer {
.skip(1)
.position(|&b| !is_identifier_continue(b))
{
Some(0) => Err(Error::BadVariableName(None)),
Some(0) => Err(Error::BadVariableName(None, None)),
Some(i) => {
// '$' is included as part of the name
Ok((Some((&data[..=i], TK_VARIABLE)), i + 1))
}
None => {
if data.len() == 1 {
return Err(Error::BadVariableName(None));
return Err(Error::BadVariableName(None, None));
}
Ok((Some((data, TK_VARIABLE)), data.len()))
}
@@ -461,7 +474,7 @@ impl Splitter for Tokenizer {
Ok(self.identifierish(data))
}
}
_ => Err(Error::UnrecognizedToken(None)),
_ => Err(Error::UnrecognizedToken(None, None)),
}
}
}
@@ -493,7 +506,7 @@ fn literal(data: &[u8], quote: u8) -> Result<(Option<Token<'_>>, usize), Error>
// keep original quotes in the token
Ok((Some((&data[0..i], tt)), i))
} else {
Err(Error::UnterminatedLiteral(None))
Err(Error::UnterminatedLiteral(None, None))
}
}
@@ -507,11 +520,11 @@ fn blob_literal(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
.find(|&(_, &b)| !b.is_ascii_hexdigit())
{
if *b != b'\'' || i % 2 != 0 {
return Err(Error::MalformedBlobLiteral(None));
return Err(Error::MalformedBlobLiteral(None, None));
}
Ok((Some((&data[2..i], TK_BLOB)), i + 1))
} else {
Err(Error::MalformedBlobLiteral(None))
Err(Error::MalformedBlobLiteral(None, None))
}
}
@@ -532,7 +545,7 @@ fn number(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
} else if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None));
return Err(Error::BadNumber(None, None));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
@@ -546,13 +559,13 @@ fn hex_integer(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? {
// Must not be empty (Ox is invalid)
if i == 2 || is_identifier_start(b) {
return Err(Error::MalformedHexInteger(None));
return Err(Error::MalformedHexInteger(None, None));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
// Must not be empty (Ox is invalid)
if data.len() == 2 {
return Err(Error::MalformedHexInteger(None));
return Err(Error::MalformedHexInteger(None, None));
}
Ok((Some((data, TK_INTEGER)), data.len()))
}
@@ -564,7 +577,7 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize),
if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None));
return Err(Error::BadNumber(None, None));
}
Ok((Some((&data[..i], TK_FLOAT)), i))
} else {
@@ -579,17 +592,17 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize),
let i = if *b == b'+' || *b == b'-' { i + 1 } else { i };
if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
if j == i + 1 || is_identifier_start(b) {
return Err(Error::BadNumber(None));
return Err(Error::BadNumber(None, None));
}
Ok((Some((&data[..j], TK_FLOAT)), j))
} else {
if data.len() == i + 1 {
return Err(Error::BadNumber(None));
return Err(Error::BadNumber(None, None));
}
Ok((Some((data, TK_FLOAT)), data.len()))
}
} else {
Err(Error::BadNumber(None))
Err(Error::BadNumber(None, None))
}
}
@@ -606,7 +619,7 @@ fn find_end_of_number(
{
continue;
}
return Err(Error::BadNumber(None));
return Err(Error::BadNumber(None, None));
} else {
return Ok(Some((j, b)));
}
@@ -660,7 +673,7 @@ mod tests {
let mut s = Scanner::new(tokenizer);
expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?;
let err = s.scan(input).unwrap_err();
assert!(matches!(err, Error::BadNumber(_)));
assert!(matches!(err, Error::BadNumber(_, _)));
Ok(())
}

View File

@@ -361,7 +361,7 @@ fn expect_parser_err_msg(input: &[u8], error_msg: &str) {
}
fn expect_parser_err(input: &[u8], err: ParserError) {
let r = parse(input);
if let Error::ParserError(e, _) = r.unwrap_err() {
if let Error::ParserError(e, _, _) = r.unwrap_err() {
assert_eq!(e, err);
} else {
panic!("unexpected error type")