mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-08 18:54:21 +01:00
Merge 'Nicer parse errors using miette' from Samyak Sarnayak
I noticed that the parse errors were a bit hard to read - only the nearest token and the line/col offsets were printed. I made a first attempt at improving the errors using [miette](https://github.com/zkat/miette). - Added derive for `miette::Diagnostic` to both the parser's error type and LimboError. - Added miette dependency to both sqlite3_parser and core. The `fancy` feature is only enabled for the CLI. So the overhead on the libraries (core, parser) should be minimal. Some future improvements that can be made further: - Add spans to AST nodes so that errors can better point to the correct token. See upstream issue: https://github.com/gwenn/lemon-rs/issues/33 - Construct more errors with offset information. I noticed that most parser errors are constructed with `None` as the offset. - The messages are a bit redundant (example "syntax error at (1, 6)"). This can improved. Comparisons. Before: ``` ❯ cargo run --package limbo --bin limbo database.db --output-mode pretty ... limbo> selet * from a; [2025-01-05T11:22:55Z ERROR sqlite3Parser] near "Token([115, 101, 108, 101, 116])": syntax error Parse error: near "selet": syntax error at (1, 6) ``` <img width="969" alt="image" src="https://github.com/user- attachments/assets/82651a77-f5ac-4eee-b208-88c6ea7fc9b7" /> After: ``` ❯ cargo run --package limbo --bin limbo database.db --output-mode pretty ... limbo> selet * from a; [2025-01-05T12:25:52Z ERROR sqlite3Parser] near "Token([115, 101, 108, 101, 116])": syntax error × near "selet": syntax error at (1, 6) ╭──── 1 │ selet * from a · ▲ · ╰── syntax error ╰──── ``` <img width="980" alt="image" src="https://github.com/user- attachments/assets/747a90e5-5085-41f9-b0fe-25864179ca35" /> Closes #618
This commit is contained in:
@@ -79,6 +79,12 @@ impl<S: Splitter> Scanner<S> {
|
||||
pub fn column(&self) -> usize {
|
||||
self.column
|
||||
}
|
||||
|
||||
/// Current byte offset in the source string
|
||||
pub fn offset(&self) -> usize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
/// Associated splitter
|
||||
pub fn splitter(&self) -> &S {
|
||||
&self.splitter
|
||||
|
||||
@@ -7,57 +7,91 @@ use crate::parser::ParserError;
|
||||
|
||||
/// SQL lexer and parser errors
|
||||
#[non_exhaustive]
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, miette::Diagnostic)]
|
||||
#[diagnostic()]
|
||||
pub enum Error {
|
||||
/// I/O Error
|
||||
Io(io::Error),
|
||||
/// Lexer error
|
||||
UnrecognizedToken(Option<(u64, usize)>),
|
||||
UnrecognizedToken(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Missing quote or double-quote or backtick
|
||||
UnterminatedLiteral(Option<(u64, usize)>),
|
||||
UnterminatedLiteral(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Missing `]`
|
||||
UnterminatedBracket(Option<(u64, usize)>),
|
||||
UnterminatedBracket(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Missing `*/`
|
||||
UnterminatedBlockComment(Option<(u64, usize)>),
|
||||
UnterminatedBlockComment(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Invalid parameter name
|
||||
BadVariableName(Option<(u64, usize)>),
|
||||
BadVariableName(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Invalid number format
|
||||
BadNumber(Option<(u64, usize)>),
|
||||
BadNumber(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Invalid or missing sign after `!`
|
||||
ExpectedEqualsSign(Option<(u64, usize)>),
|
||||
ExpectedEqualsSign(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character.
|
||||
MalformedBlobLiteral(Option<(u64, usize)>),
|
||||
MalformedBlobLiteral(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
|
||||
MalformedHexInteger(Option<(u64, usize)>),
|
||||
MalformedHexInteger(
|
||||
Option<(u64, usize)>,
|
||||
#[label("here")] Option<miette::SourceSpan>,
|
||||
),
|
||||
/// Grammar error
|
||||
ParserError(ParserError, Option<(u64, usize)>),
|
||||
ParserError(
|
||||
ParserError,
|
||||
Option<(u64, usize)>,
|
||||
#[label("syntax error")] Option<miette::SourceSpan>,
|
||||
),
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
Self::Io(ref err) => err.fmt(f),
|
||||
Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()),
|
||||
Self::UnterminatedLiteral(pos) => {
|
||||
Self::UnrecognizedToken(pos, _) => {
|
||||
write!(f, "unrecognized token at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::UnterminatedLiteral(pos, _) => {
|
||||
write!(f, "non-terminated literal at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::UnterminatedBracket(pos) => {
|
||||
Self::UnterminatedBracket(pos, _) => {
|
||||
write!(f, "non-terminated bracket at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::UnterminatedBlockComment(pos) => {
|
||||
Self::UnterminatedBlockComment(pos, _) => {
|
||||
write!(f, "non-terminated block comment at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()),
|
||||
Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()),
|
||||
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()),
|
||||
Self::MalformedBlobLiteral(pos) => {
|
||||
Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()),
|
||||
Self::BadNumber(pos, _) => write!(f, "bad number at {:?}", pos.unwrap()),
|
||||
Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()),
|
||||
Self::MalformedBlobLiteral(pos, _) => {
|
||||
write!(f, "malformed blob literal at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::MalformedHexInteger(pos) => {
|
||||
Self::MalformedHexInteger(pos, _) => {
|
||||
write!(f, "malformed hex integer at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"),
|
||||
Self::ParserError(ref msg, _) => write!(f, "{msg}"),
|
||||
Self::ParserError(ref msg, Some(pos), _) => write!(f, "{msg} at {pos:?}"),
|
||||
Self::ParserError(ref msg, _, _) => write!(f, "{msg}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,7 +106,7 @@ impl From<io::Error> for Error {
|
||||
|
||||
impl From<ParserError> for Error {
|
||||
fn from(err: ParserError) -> Self {
|
||||
Self::ParserError(err, None)
|
||||
Self::ParserError(err, None, None)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,16 +114,16 @@ impl ScanError for Error {
|
||||
fn position(&mut self, line: u64, column: usize) {
|
||||
match *self {
|
||||
Self::Io(_) => {}
|
||||
Self::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::BadVariableName(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::BadNumber(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::ParserError(_, ref mut pos) => *pos = Some((line, column)),
|
||||
Self::UnrecognizedToken(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::UnterminatedLiteral(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::UnterminatedBracket(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::UnterminatedBlockComment(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::BadVariableName(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::BadNumber(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::ExpectedEqualsSign(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::MalformedBlobLiteral(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::MalformedHexInteger(ref mut pos, _) => *pos = Some((line, column)),
|
||||
Self::ParserError(_, ref mut pos, _) => *pos = Some((line, column)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,6 +57,11 @@ impl<'input> Parser<'input> {
|
||||
pub fn column(&self) -> usize {
|
||||
self.scanner.column()
|
||||
}
|
||||
|
||||
/// Current byte offset in input
|
||||
pub fn offset(&self) -> usize {
|
||||
self.scanner.offset()
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -230,13 +235,21 @@ impl FallibleIterator for Parser<'_> {
|
||||
}
|
||||
self.parser.sqlite3ParserFinalize();
|
||||
if let Some(e) = self.parser.ctx.error() {
|
||||
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
|
||||
let err = Error::ParserError(
|
||||
e,
|
||||
Some((self.scanner.line(), self.scanner.column())),
|
||||
Some((self.offset() - 1).into()),
|
||||
);
|
||||
return Err(err);
|
||||
}
|
||||
let cmd = self.parser.ctx.cmd();
|
||||
if let Some(ref cmd) = cmd {
|
||||
if let Err(e) = cmd.check() {
|
||||
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
|
||||
let err = Error::ParserError(
|
||||
e,
|
||||
Some((self.scanner.line(), self.scanner.column())),
|
||||
Some((self.offset() - 1).into()),
|
||||
);
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
@@ -332,7 +345,7 @@ impl Splitter for Tokenizer {
|
||||
if let Some(i) = end {
|
||||
Ok((None, i + 1))
|
||||
} else {
|
||||
Err(Error::UnterminatedBlockComment(None))
|
||||
Err(Error::UnterminatedBlockComment(None, None))
|
||||
}
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_SLASH)), 1))
|
||||
@@ -381,10 +394,10 @@ impl Splitter for Tokenizer {
|
||||
if *b == b'=' {
|
||||
Ok((Some((&data[..2], TK_NE)), 2))
|
||||
} else {
|
||||
Err(Error::ExpectedEqualsSign(None))
|
||||
Err(Error::ExpectedEqualsSign(None, None))
|
||||
}
|
||||
} else {
|
||||
Err(Error::ExpectedEqualsSign(None))
|
||||
Err(Error::ExpectedEqualsSign(None, None))
|
||||
}
|
||||
}
|
||||
b'|' => {
|
||||
@@ -419,7 +432,7 @@ impl Splitter for Tokenizer {
|
||||
// Keep original quotes / '[' ... ’]'
|
||||
Ok((Some((&data[0..=i], TK_ID)), i + 1))
|
||||
} else {
|
||||
Err(Error::UnterminatedBracket(None))
|
||||
Err(Error::UnterminatedBracket(None, None))
|
||||
}
|
||||
}
|
||||
b'?' => {
|
||||
@@ -437,14 +450,14 @@ impl Splitter for Tokenizer {
|
||||
.skip(1)
|
||||
.position(|&b| !is_identifier_continue(b))
|
||||
{
|
||||
Some(0) => Err(Error::BadVariableName(None)),
|
||||
Some(0) => Err(Error::BadVariableName(None, None)),
|
||||
Some(i) => {
|
||||
// '$' is included as part of the name
|
||||
Ok((Some((&data[..=i], TK_VARIABLE)), i + 1))
|
||||
}
|
||||
None => {
|
||||
if data.len() == 1 {
|
||||
return Err(Error::BadVariableName(None));
|
||||
return Err(Error::BadVariableName(None, None));
|
||||
}
|
||||
Ok((Some((data, TK_VARIABLE)), data.len()))
|
||||
}
|
||||
@@ -461,7 +474,7 @@ impl Splitter for Tokenizer {
|
||||
Ok(self.identifierish(data))
|
||||
}
|
||||
}
|
||||
_ => Err(Error::UnrecognizedToken(None)),
|
||||
_ => Err(Error::UnrecognizedToken(None, None)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -493,7 +506,7 @@ fn literal(data: &[u8], quote: u8) -> Result<(Option<Token<'_>>, usize), Error>
|
||||
// keep original quotes in the token
|
||||
Ok((Some((&data[0..i], tt)), i))
|
||||
} else {
|
||||
Err(Error::UnterminatedLiteral(None))
|
||||
Err(Error::UnterminatedLiteral(None, None))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -507,11 +520,11 @@ fn blob_literal(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
.find(|&(_, &b)| !b.is_ascii_hexdigit())
|
||||
{
|
||||
if *b != b'\'' || i % 2 != 0 {
|
||||
return Err(Error::MalformedBlobLiteral(None));
|
||||
return Err(Error::MalformedBlobLiteral(None, None));
|
||||
}
|
||||
Ok((Some((&data[2..i], TK_BLOB)), i + 1))
|
||||
} else {
|
||||
Err(Error::MalformedBlobLiteral(None))
|
||||
Err(Error::MalformedBlobLiteral(None, None))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -532,7 +545,7 @@ fn number(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
} else if b == b'e' || b == b'E' {
|
||||
return exponential_part(data, i);
|
||||
} else if is_identifier_start(b) {
|
||||
return Err(Error::BadNumber(None));
|
||||
return Err(Error::BadNumber(None, None));
|
||||
}
|
||||
Ok((Some((&data[..i], TK_INTEGER)), i))
|
||||
} else {
|
||||
@@ -546,13 +559,13 @@ fn hex_integer(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? {
|
||||
// Must not be empty (Ox is invalid)
|
||||
if i == 2 || is_identifier_start(b) {
|
||||
return Err(Error::MalformedHexInteger(None));
|
||||
return Err(Error::MalformedHexInteger(None, None));
|
||||
}
|
||||
Ok((Some((&data[..i], TK_INTEGER)), i))
|
||||
} else {
|
||||
// Must not be empty (Ox is invalid)
|
||||
if data.len() == 2 {
|
||||
return Err(Error::MalformedHexInteger(None));
|
||||
return Err(Error::MalformedHexInteger(None, None));
|
||||
}
|
||||
Ok((Some((data, TK_INTEGER)), data.len()))
|
||||
}
|
||||
@@ -564,7 +577,7 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize),
|
||||
if b == b'e' || b == b'E' {
|
||||
return exponential_part(data, i);
|
||||
} else if is_identifier_start(b) {
|
||||
return Err(Error::BadNumber(None));
|
||||
return Err(Error::BadNumber(None, None));
|
||||
}
|
||||
Ok((Some((&data[..i], TK_FLOAT)), i))
|
||||
} else {
|
||||
@@ -579,17 +592,17 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize),
|
||||
let i = if *b == b'+' || *b == b'-' { i + 1 } else { i };
|
||||
if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
|
||||
if j == i + 1 || is_identifier_start(b) {
|
||||
return Err(Error::BadNumber(None));
|
||||
return Err(Error::BadNumber(None, None));
|
||||
}
|
||||
Ok((Some((&data[..j], TK_FLOAT)), j))
|
||||
} else {
|
||||
if data.len() == i + 1 {
|
||||
return Err(Error::BadNumber(None));
|
||||
return Err(Error::BadNumber(None, None));
|
||||
}
|
||||
Ok((Some((data, TK_FLOAT)), data.len()))
|
||||
}
|
||||
} else {
|
||||
Err(Error::BadNumber(None))
|
||||
Err(Error::BadNumber(None, None))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -606,7 +619,7 @@ fn find_end_of_number(
|
||||
{
|
||||
continue;
|
||||
}
|
||||
return Err(Error::BadNumber(None));
|
||||
return Err(Error::BadNumber(None, None));
|
||||
} else {
|
||||
return Ok(Some((j, b)));
|
||||
}
|
||||
@@ -660,7 +673,7 @@ mod tests {
|
||||
let mut s = Scanner::new(tokenizer);
|
||||
expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?;
|
||||
let err = s.scan(input).unwrap_err();
|
||||
assert!(matches!(err, Error::BadNumber(_)));
|
||||
assert!(matches!(err, Error::BadNumber(_, _)));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -361,7 +361,7 @@ fn expect_parser_err_msg(input: &[u8], error_msg: &str) {
|
||||
}
|
||||
fn expect_parser_err(input: &[u8], err: ParserError) {
|
||||
let r = parse(input);
|
||||
if let Error::ParserError(e, _) = r.unwrap_err() {
|
||||
if let Error::ParserError(e, _, _) = r.unwrap_err() {
|
||||
assert_eq!(e, err);
|
||||
} else {
|
||||
panic!("unexpected error type")
|
||||
|
||||
Reference in New Issue
Block a user