more tests

This commit is contained in:
TcMits
2025-08-06 18:29:14 +07:00
parent b7962e25f7
commit e93e50ad67
3 changed files with 1127 additions and 107 deletions

View File

@@ -9,86 +9,79 @@ use crate::token::TokenType;
#[diagnostic()]
pub enum Error {
/// Lexer error
UnrecognizedToken(usize, #[label("here")] Option<miette::SourceSpan>),
UnrecognizedToken(#[label("here")] miette::SourceSpan),
/// Missing quote or double-quote or backtick
UnterminatedLiteral(usize, #[label("here")] Option<miette::SourceSpan>),
UnterminatedLiteral(#[label("here")] miette::SourceSpan),
/// Missing `]`
UnterminatedBracket(usize, #[label("here")] Option<miette::SourceSpan>),
UnterminatedBracket(#[label("here")] miette::SourceSpan),
/// Missing `*/`
UnterminatedBlockComment(usize, #[label("here")] Option<miette::SourceSpan>),
UnterminatedBlockComment(#[label("here")] miette::SourceSpan),
/// Invalid parameter name
BadVariableName(usize, #[label("here")] Option<miette::SourceSpan>),
BadVariableName(#[label("here")] miette::SourceSpan),
/// Invalid number format
#[diagnostic(help("Invalid digit at `{0}`"))]
BadNumber(
usize,
#[label("here")] Option<miette::SourceSpan>,
String, // Holds the offending number as a string
),
#[diagnostic(help("Invalid digit at `{0}`"))]
BadFractionalPart(
usize,
#[label("here")] Option<miette::SourceSpan>,
String, // Holds the offending number as a string
),
#[diagnostic(help("Invalid digit at `{0}`"))]
BadExponentPart(
usize,
#[label("here")] Option<miette::SourceSpan>,
String, // Holds the offending number as a string
),
BadNumber(#[label("here")] miette::SourceSpan),
// Bad fractional part of a number
BadFractionalPart(#[label("here")] miette::SourceSpan),
// Bad exponent part of a number
BadExponentPart(#[label("here")] miette::SourceSpan),
/// Invalid or missing sign after `!`
ExpectedEqualsSign(usize, #[label("here")] Option<miette::SourceSpan>),
ExpectedEqualsSign(#[label("here")] miette::SourceSpan),
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
MalformedHexInteger(
usize,
#[label("here")] Option<miette::SourceSpan>,
#[help] Option<&'static str>,
),
MalformedHexInteger(#[label("here")] miette::SourceSpan),
// parse errors
// Unexpected end of file
ParseUnexpectedEOF,
// Unexpected token
ParseUnexpectedToken {
#[label("parsed to here")]
parsed_offset: miette::SourceSpan,
got: TokenType,
expected: &'static [TokenType],
},
// Custom error message
Custom(String),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::UnrecognizedToken(pos, _) => {
Self::UnrecognizedToken(pos) => {
write!(f, "unrecognized token at {:?}", pos)
}
Self::UnterminatedLiteral(pos, _) => {
Self::UnterminatedLiteral(pos) => {
write!(f, "non-terminated literal at {:?}", pos)
}
Self::UnterminatedBracket(pos, _) => {
Self::UnterminatedBracket(pos) => {
write!(f, "non-terminated bracket at {:?}", pos)
}
Self::UnterminatedBlockComment(pos, _) => {
Self::UnterminatedBlockComment(pos) => {
write!(f, "non-terminated block comment at {:?}", pos)
}
Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos),
Self::BadNumber(pos, _, _) => write!(f, "bad number at {:?}", pos),
Self::BadFractionalPart(pos, _, _) => {
Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos),
Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos),
Self::BadFractionalPart(pos) => {
write!(f, "bad fractional part at {:?}", pos)
}
Self::BadExponentPart(pos, _, _) => {
Self::BadExponentPart(pos) => {
write!(f, "bad exponent part at {:?}", pos)
}
Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos),
Self::MalformedHexInteger(pos, _, _) => {
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos),
Self::MalformedHexInteger(pos) => {
write!(f, "malformed hex integer at {:?}", pos)
}
Self::ParseUnexpectedEOF => {
write!(f, "unexpected end of file")
}
Self::ParseUnexpectedToken { got, expected } => {
Self::ParseUnexpectedToken {
parsed_offset,
got,
expected,
} => {
write!(
f,
"got unexpected token: expected {:?}, found {}",
expected, got
"got unexpected token after parsing to offset {:?}: expected {:?}, found {}",
parsed_offset, expected, got
)
}
Self::Custom(ref s) => {

View File

@@ -142,9 +142,7 @@ impl<'a> Lexer<'a> {
Some(b'_') => {
if start == self.offset {
// before the underscore, there was no digit
return Err(Error::BadNumber(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset].to_vec())
}));
return Err(Error::BadNumber((self.offset, 1).into()));
}
self.eat_and_assert(|b| b == b'_');
@@ -152,9 +150,7 @@ impl<'a> Lexer<'a> {
Some(b) if b.is_ascii_digit() => continue, // Continue if next is a digit
_ => {
// after the underscore, there is no digit
return Err(Error::BadNumber(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset].to_vec())
}));
return Err(Error::BadNumber((self.offset, 1).into()));
}
}
}
@@ -171,9 +167,7 @@ impl<'a> Lexer<'a> {
Some(b'_') => {
if start == self.offset {
// before the underscore, there was no digit
return Err(Error::BadNumber(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset].to_vec())
}));
return Err(Error::BadNumber((self.offset, 1).into()));
}
self.eat_and_assert(|b| b == b'_');
@@ -181,9 +175,7 @@ impl<'a> Lexer<'a> {
Some(b) if b.is_ascii_hexdigit() => continue, // Continue if next is a digit
_ => {
// after the underscore, there is no digit
return Err(Error::BadNumber(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset].to_vec())
}));
return Err(Error::BadNumber((self.offset, 1).into()));
}
}
}
@@ -266,12 +258,12 @@ impl<'a> Lexer<'a> {
break; // End of block comment
}
None => {
return Err(Error::UnterminatedBlockComment(self.offset, None))
return Err(Error::UnterminatedBlockComment((self.offset, 1).into()))
}
_ => {}
}
}
None => return Err(Error::UnterminatedBlockComment(self.offset, None)),
None => return Err(Error::UnterminatedBlockComment((self.offset, 1).into())),
_ => unreachable!(), // We should not reach here
}
}
@@ -365,7 +357,7 @@ impl<'a> Lexer<'a> {
Some(b'=') => {
self.eat_and_assert(|b| b == b'=');
}
_ => return Err(Error::ExpectedEqualsSign(self.offset, None)),
_ => return Err(Error::ExpectedEqualsSign((self.offset, 1).into())),
}
Ok(Token {
@@ -414,7 +406,7 @@ impl<'a> Lexer<'a> {
_ => break,
}
}
None => return Err(Error::UnterminatedLiteral(self.offset, None)),
None => return Err(Error::UnterminatedLiteral((self.offset, 1).into())),
_ => unreachable!(),
};
}
@@ -441,9 +433,7 @@ impl<'a> Lexer<'a> {
})
}
Some(b) if is_identifier_start(b) => {
Err(Error::BadFractionalPart(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec())
}))
Err(Error::BadFractionalPart((self.offset, 1).into()))
}
_ => Ok(Token {
value: &self.input[start..self.offset],
@@ -471,15 +461,11 @@ impl<'a> Lexer<'a> {
let start_num = self.offset;
self.eat_while_number_digit()?;
if start_num == self.offset {
return Err(Error::BadExponentPart(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset].to_vec())
}));
return Err(Error::BadExponentPart((self.offset, 1).into()));
}
if self.peek().is_some() && is_identifier_start(self.peek().unwrap()) {
return Err(Error::BadExponentPart(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec())
}));
return Err(Error::BadExponentPart((self.offset, 1).into()));
}
Ok(Token {
@@ -502,17 +488,11 @@ impl<'a> Lexer<'a> {
self.eat_while_number_hexdigit()?;
if start_hex == self.offset {
return Err(Error::MalformedHexInteger(
self.offset,
None,
Some("Did you forget to add digits after '0x' or '0X'?"), // Help Message
));
return Err(Error::MalformedHexInteger((self.offset, 1).into()));
}
if self.peek().is_some() && is_identifier_start(self.peek().unwrap()) {
return Err(Error::BadNumber(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec())
}));
return Err(Error::BadNumber((self.offset, 1).into()));
}
return Ok(Token {
@@ -540,9 +520,7 @@ impl<'a> Lexer<'a> {
token_type: Some(TokenType::TK_FLOAT),
})
}
Some(b) if is_identifier_start(b) => Err(Error::BadNumber(self.offset, None, unsafe {
String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec())
})),
Some(b) if is_identifier_start(b) => Err(Error::BadNumber((self.offset, 1).into())),
_ => Ok(Token {
value: &self.input[start..self.offset],
token_type: Some(TokenType::TK_INTEGER),
@@ -562,7 +540,7 @@ impl<'a> Lexer<'a> {
token_type: Some(TokenType::TK_ID),
})
}
None => Err(Error::UnterminatedBracket(self.offset, None)),
None => Err(Error::UnterminatedBracket((self.offset, 1).into())),
_ => unreachable!(), // We should not reach here
}
}
@@ -579,7 +557,7 @@ impl<'a> Lexer<'a> {
// empty variable name
if start_digit == self.offset {
return Err(Error::BadVariableName(self.offset, None));
return Err(Error::BadVariableName((self.offset, 1).into()));
}
Ok(Token {
@@ -593,7 +571,7 @@ impl<'a> Lexer<'a> {
// empty variable name
if start_id == self.offset {
return Err(Error::BadVariableName(self.offset, None));
return Err(Error::BadVariableName((self.offset, 1).into()));
}
Ok(Token {
@@ -622,7 +600,7 @@ impl<'a> Lexer<'a> {
self.eat_and_assert(|b| b == b'\'');
if (end_hex - start_hex) % 2 != 0 {
return Err(Error::UnrecognizedToken(self.offset, None));
return Err(Error::UnrecognizedToken((self.offset, 1).into()));
}
Ok(Token {
@@ -630,7 +608,7 @@ impl<'a> Lexer<'a> {
token_type: Some(TokenType::TK_BLOB),
})
}
_ => Err(Error::UnterminatedLiteral(self.offset, None)),
_ => Err(Error::UnterminatedLiteral((self.offset, 1).into())),
}
}
_ => {

File diff suppressed because it is too large Load Diff