mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-21 17:14:19 +01:00
Merge 'Return better syntax error messages' from Diego Reis
Current error messages are too "low level", e.g returning tokens in
messages. This PR improves this a bit.
Before:
```text
turso> with t as (select * from pragma_schema_version); select c.schema_version from t as c;
× unexpected token at SourceSpan { offset: SourceOffset(47), length: 1 }
╭────
1 │ with t as (select * from pragma_schema_version); select c.schema_version from t as c;
· ┬
· ╰── here
╰────
help: expected [TK_SELECT, TK_VALUES, TK_UPDATE, TK_DELETE, TK_INSERT, TK_REPLACE] but found TK_SEMI
```
Now:
```text
turso> with t as (select * from pragma_schema_version); select c.schema_version from t as c;
× unexpected token ';' at offset 47
╭────
1 │ with t as (select * from pragma_schema_version);select c.schema_version from t as c;
· ┬
· ╰── here
╰────
help: expected SELECT, VALUES, UPDATE, DELETE, INSERT, or REPLACE but found ';'
```
@TcMits WDYT?
Closes #3190
This commit is contained in:
@@ -6,45 +6,101 @@ use crate::token::TokenType;
|
||||
#[diagnostic()]
|
||||
pub enum Error {
|
||||
/// Lexer error
|
||||
#[error("unrecognized token at {0:?}")]
|
||||
UnrecognizedToken(#[label("here")] miette::SourceSpan),
|
||||
#[error("unrecognized token '{token_text}' at offset {offset}")]
|
||||
UnrecognizedToken {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
/// Missing quote or double-quote or backtick
|
||||
#[error("non-terminated literal at {0:?}")]
|
||||
UnterminatedLiteral(#[label("here")] miette::SourceSpan),
|
||||
#[error("non-terminated literal '{token_text}' at offset {offset}")]
|
||||
UnterminatedLiteral {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
/// Missing `]`
|
||||
#[error("non-terminated bracket at {0:?}")]
|
||||
UnterminatedBracket(#[label("here")] miette::SourceSpan),
|
||||
#[error("non-terminated bracket '{token_text}' at offset {offset}")]
|
||||
UnterminatedBracket {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
/// Missing `*/`
|
||||
#[error("non-terminated block comment '{token_text}' at offset {offset}")]
|
||||
UnterminatedBlockComment {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
/// Invalid parameter name
|
||||
#[error("bad variable name at {0:?}")]
|
||||
BadVariableName(#[label("here")] miette::SourceSpan),
|
||||
#[error("bad variable name '{token_text}' at offset {offset}")]
|
||||
BadVariableName {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
/// Invalid number format
|
||||
#[error("bad number at {0:?}")]
|
||||
BadNumber(#[label("here")] miette::SourceSpan),
|
||||
#[error("bad number '{token_text}' at offset {offset}")]
|
||||
BadNumber {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
// Bad fractional part of a number
|
||||
#[error("bad fractional part at {0:?}")]
|
||||
BadFractionalPart(#[label("here")] miette::SourceSpan),
|
||||
#[error("bad fractional part '{token_text}' at offset {offset}")]
|
||||
BadFractionalPart {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
// Bad exponent part of a number
|
||||
#[error("bad exponent part at {0:?}")]
|
||||
BadExponentPart(#[label("here")] miette::SourceSpan),
|
||||
#[error("bad exponent part '{token_text}' at offset {offset}")]
|
||||
BadExponentPart {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
/// Invalid or missing sign after `!`
|
||||
#[error("expected = sign at {0:?}")]
|
||||
ExpectedEqualsSign(#[label("here")] miette::SourceSpan),
|
||||
#[error("expected = sign '{token_text}' at offset {offset}")]
|
||||
ExpectedEqualsSign {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
|
||||
#[error("malformed hex integer at {0:?}")]
|
||||
MalformedHexInteger(#[label("here")] miette::SourceSpan),
|
||||
#[error("malformed hex integer '{token_text}' at offset {offset}")]
|
||||
MalformedHexInteger {
|
||||
#[label("here")]
|
||||
span: miette::SourceSpan,
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
},
|
||||
// parse errors
|
||||
// Unexpected end of file
|
||||
#[error("unexpected end of file")]
|
||||
ParseUnexpectedEOF,
|
||||
// Unexpected token
|
||||
#[error("unexpected token at {parsed_offset:?}")]
|
||||
#[diagnostic(help("expected {expected:?} but found {got:?}"))]
|
||||
#[error("unexpected token '{token_text}' at offset {offset}")]
|
||||
#[diagnostic(help("expected {expected_display} but found '{token_text}'"))]
|
||||
ParseUnexpectedToken {
|
||||
#[label("here")]
|
||||
parsed_offset: miette::SourceSpan,
|
||||
|
||||
got: TokenType,
|
||||
expected: &'static [TokenType],
|
||||
token_text: String,
|
||||
offset: usize,
|
||||
expected_display: String,
|
||||
},
|
||||
// Custom error message
|
||||
#[error("{0}")]
|
||||
|
||||
@@ -297,14 +297,27 @@ impl<'a> Lexer<'a> {
|
||||
|
||||
if start == self.offset {
|
||||
// before the underscore, there was no digit
|
||||
return Err(Error::BadNumber((start, self.offset - start).into()));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::BadNumber {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
match self.peek() {
|
||||
Some(b) if b.is_ascii_digit() => continue, // Continue if next is a digit
|
||||
_ => {
|
||||
// after the underscore, there is no digit
|
||||
return Err(Error::BadNumber((start, self.offset - start).into()));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset])
|
||||
.to_string();
|
||||
return Err(Error::BadNumber {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -321,7 +334,13 @@ impl<'a> Lexer<'a> {
|
||||
Some(b'_') => {
|
||||
if start == self.offset {
|
||||
// before the underscore, there was no digit
|
||||
return Err(Error::BadNumber((start, self.offset - start).into()));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::BadNumber {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
self.eat_and_assert(|b| b == b'_');
|
||||
@@ -329,7 +348,14 @@ impl<'a> Lexer<'a> {
|
||||
Some(b) if b.is_ascii_hexdigit() => continue, // Continue if next is a digit
|
||||
_ => {
|
||||
// after the underscore, there is no digit
|
||||
return Err(Error::BadNumber((start, self.offset - start).into()));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset])
|
||||
.to_string();
|
||||
return Err(Error::BadNumber {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -514,9 +540,13 @@ impl<'a> Lexer<'a> {
|
||||
self.eat_and_assert(|b| b == b'=');
|
||||
}
|
||||
_ => {
|
||||
return Err(Error::ExpectedEqualsSign(
|
||||
(start, self.offset - start).into(),
|
||||
))
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::ExpectedEqualsSign {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -567,9 +597,13 @@ impl<'a> Lexer<'a> {
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(Error::UnterminatedLiteral(
|
||||
(start, self.offset - start).into(),
|
||||
))
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::UnterminatedLiteral {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
@@ -598,9 +632,15 @@ impl<'a> Lexer<'a> {
|
||||
token_type: Some(TokenType::TK_FLOAT),
|
||||
})
|
||||
}
|
||||
Some(b) if is_identifier_start(b) => Err(Error::BadFractionalPart(
|
||||
(start, self.offset - start).into(),
|
||||
)),
|
||||
Some(b) if is_identifier_start(b) => {
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
Err(Error::BadFractionalPart {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
})
|
||||
}
|
||||
_ => Ok(Token {
|
||||
value: &self.input[start..self.offset],
|
||||
token_type: Some(TokenType::TK_FLOAT),
|
||||
@@ -627,11 +667,21 @@ impl<'a> Lexer<'a> {
|
||||
let start_num = self.offset;
|
||||
self.eat_while_number_digit()?;
|
||||
if start_num == self.offset {
|
||||
return Err(Error::BadExponentPart((start, self.offset - start).into()));
|
||||
let token_text = String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::BadExponentPart {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
if self.peek().is_some() && is_identifier_start(self.peek().unwrap()) {
|
||||
return Err(Error::BadExponentPart((start, self.offset - start).into()));
|
||||
let token_text = String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::BadExponentPart {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Token {
|
||||
@@ -654,13 +704,23 @@ impl<'a> Lexer<'a> {
|
||||
self.eat_while_number_hexdigit()?;
|
||||
|
||||
if start_hex == self.offset {
|
||||
return Err(Error::MalformedHexInteger(
|
||||
(start, self.offset - start).into(),
|
||||
));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::MalformedHexInteger {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
if self.peek().is_some() && is_identifier_start(self.peek().unwrap()) {
|
||||
return Err(Error::BadNumber((start, self.offset - start).into()));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::BadNumber {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(Token {
|
||||
@@ -689,7 +749,13 @@ impl<'a> Lexer<'a> {
|
||||
})
|
||||
}
|
||||
Some(b) if is_identifier_start(b) => {
|
||||
Err(Error::BadNumber((start, self.offset - start).into()))
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
Err(Error::BadNumber {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
})
|
||||
}
|
||||
_ => Ok(Token {
|
||||
value: &self.input[start..self.offset],
|
||||
@@ -710,9 +776,15 @@ impl<'a> Lexer<'a> {
|
||||
token_type: Some(TokenType::TK_ID),
|
||||
})
|
||||
}
|
||||
None => Err(Error::UnterminatedBracket(
|
||||
(start, self.offset - start).into(),
|
||||
)),
|
||||
None => {
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
Err(Error::UnterminatedBracket {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
})
|
||||
}
|
||||
_ => unreachable!(), // We should not reach here
|
||||
}
|
||||
}
|
||||
@@ -737,7 +809,13 @@ impl<'a> Lexer<'a> {
|
||||
|
||||
// empty variable name
|
||||
if start_id == self.offset {
|
||||
return Err(Error::BadVariableName((start, self.offset - start).into()));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
return Err(Error::BadVariableName {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Token {
|
||||
@@ -767,9 +845,14 @@ impl<'a> Lexer<'a> {
|
||||
self.eat_and_assert(|b| b == b'\'');
|
||||
|
||||
if (end_hex - start_hex) % 2 != 0 {
|
||||
return Err(Error::UnrecognizedToken(
|
||||
(start, self.offset - start).into(),
|
||||
));
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset])
|
||||
.to_string();
|
||||
return Err(Error::UnrecognizedToken {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Token {
|
||||
@@ -777,9 +860,15 @@ impl<'a> Lexer<'a> {
|
||||
token_type: Some(TokenType::TK_BLOB),
|
||||
})
|
||||
}
|
||||
_ => Err(Error::UnterminatedLiteral(
|
||||
(start, self.offset - start).into(),
|
||||
)),
|
||||
_ => {
|
||||
let token_text =
|
||||
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
Err(Error::UnterminatedLiteral {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
@@ -796,9 +885,12 @@ impl<'a> Lexer<'a> {
|
||||
fn eat_unrecognized(&mut self) -> Result<Token<'a>> {
|
||||
let start = self.offset;
|
||||
self.eat_while(|b| b.is_some() && !b.unwrap().is_ascii_whitespace());
|
||||
Err(Error::UnrecognizedToken(
|
||||
(start, self.offset - start).into(),
|
||||
))
|
||||
let token_text = String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
|
||||
Err(Error::UnrecognizedToken {
|
||||
span: (start, self.offset - start).into(),
|
||||
token_text,
|
||||
offset: start,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,12 +28,17 @@ macro_rules! peek_expect {
|
||||
match (TK_ID, tt.fallback_id_if_ok()) {
|
||||
$(($x, TK_ID) => token,)*
|
||||
_ => {
|
||||
let token_text = String::from_utf8_lossy(token.value).to_string();
|
||||
let offset = $parser.offset();
|
||||
return Err(Error::ParseUnexpectedToken {
|
||||
parsed_offset: ($parser.offset(), token_len).into(),
|
||||
expected: &[
|
||||
$($x,)*
|
||||
],
|
||||
got: tt,
|
||||
token_text: token_text.clone(),
|
||||
offset,
|
||||
expected_display: crate::token::TokenType::format_expected_tokens(&[$($x,)*]),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -242,10 +247,17 @@ impl<'a> Parser<'a> {
|
||||
Some(token) => {
|
||||
if !found_semi {
|
||||
let tt = token.token_type.unwrap();
|
||||
let token_text = String::from_utf8_lossy(token.value).to_string();
|
||||
let offset = self.offset();
|
||||
return Err(Error::ParseUnexpectedToken {
|
||||
parsed_offset: (self.offset(), 1).into(),
|
||||
parsed_offset: (offset, 1).into(),
|
||||
expected: &[TK_SEMI],
|
||||
got: tt,
|
||||
token_text: token_text.clone(),
|
||||
offset,
|
||||
expected_display: crate::token::TokenType::format_expected_tokens(&[
|
||||
TK_SEMI,
|
||||
]),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1495,10 +1507,18 @@ impl<'a> Parser<'a> {
|
||||
Some(self.parse_nm()?)
|
||||
} else if tok.token_type == Some(TK_LP) {
|
||||
if can_be_lit_str {
|
||||
let token = self.peek_no_eof()?;
|
||||
let token_text = String::from_utf8_lossy(token.value).to_string();
|
||||
let offset = self.offset();
|
||||
return Err(Error::ParseUnexpectedToken {
|
||||
parsed_offset: (self.offset() - name.len(), name.len()).into(),
|
||||
got: TK_STRING,
|
||||
expected: &[TK_ID, TK_INDEXED, TK_JOIN_KW],
|
||||
token_text: token_text.clone(),
|
||||
offset,
|
||||
expected_display: crate::token::TokenType::format_expected_tokens(
|
||||
&[TK_ID, TK_INDEXED, TK_JOIN_KW],
|
||||
),
|
||||
});
|
||||
} // can not be literal string in function name
|
||||
|
||||
|
||||
@@ -548,4 +548,47 @@ impl TokenType {
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get user-friendly display name for error messages
|
||||
pub fn user_friendly_name(&self) -> &'static str {
|
||||
match self.as_str() {
|
||||
Some(s) => s,
|
||||
None => match self {
|
||||
TokenType::TK_ID => "identifier",
|
||||
TokenType::TK_STRING => "string",
|
||||
TokenType::TK_INTEGER => "integer",
|
||||
TokenType::TK_FLOAT => "float",
|
||||
TokenType::TK_BLOB => "blob",
|
||||
TokenType::TK_VARIABLE => "variable",
|
||||
TokenType::TK_ILLEGAL => "illegal token",
|
||||
TokenType::TK_EOF => "end of file",
|
||||
TokenType::TK_LIKE_KW => "LIKE",
|
||||
TokenType::TK_JOIN_KW => "JOIN",
|
||||
TokenType::TK_CTIME_KW => "datetime function",
|
||||
TokenType::TK_ISNOT => "IS NOT",
|
||||
TokenType::TK_ISNULL => "ISNULL",
|
||||
TokenType::TK_NOTNULL => "NOTNULL",
|
||||
TokenType::TK_PTR => "->",
|
||||
_ => "unknown token",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Format multiple tokens for error messages
|
||||
pub fn format_expected_tokens(tokens: &[TokenType]) -> String {
|
||||
if tokens.is_empty() {
|
||||
return "nothing".to_string();
|
||||
}
|
||||
if tokens.len() == 1 {
|
||||
return tokens[0].user_friendly_name().to_string();
|
||||
}
|
||||
|
||||
let names: Vec<&str> = tokens.iter().map(|t| t.user_friendly_name()).collect();
|
||||
if names.len() == 2 {
|
||||
format!("{} or {}", names[0], names[1])
|
||||
} else {
|
||||
let (last, rest) = names.split_last().unwrap();
|
||||
format!("{}, or {}", rest.join(", "), last)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user