Files
turso/vendored/sqlite3-parser/src/dialect/mod.rs
2025-07-16 12:16:31 -03:00

410 lines
15 KiB
Rust

//! SQLite dialect
use std::fmt::Formatter;
use std::str;
mod token;
pub use token::TokenType;
/// Token value (lexeme)
#[derive(Clone, Copy)]
pub struct Token<'i>(pub usize, pub &'i [u8], pub usize);
pub(crate) fn sentinel(start: usize) -> Token<'static> {
Token(start, b"", start)
}
impl Token<'_> {
/// Access token value
pub fn unwrap(self) -> String {
from_bytes(self.1)
}
}
impl std::fmt::Debug for Token<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("Token").field(&self.1).finish()
}
}
impl TokenType {
// TODO try Cow<&'static, str> (Borrowed<&'static str> for keyword and Owned<String> for below),
// => Syntax error on keyword will be better
// => `from_token` will become unnecessary
pub(crate) fn to_token(self, start: usize, value: &[u8], end: usize) -> Token<'_> {
Token(start, value, end)
}
}
pub(crate) fn from_bytes(bytes: &[u8]) -> String {
unsafe { str::from_utf8_unchecked(bytes).to_owned() }
}
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
pub(crate) fn is_identifier(name: &str) -> bool {
if name.is_empty() {
return false;
}
let bytes = name.as_bytes();
is_identifier_start(bytes[0])
&& (bytes.len() == 1 || bytes[1..].iter().all(|b| is_identifier_continue(*b)))
}
pub(crate) fn is_identifier_start(b: u8) -> bool {
b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F'
}
pub(crate) fn is_identifier_continue(b: u8) -> bool {
b == b'$'
|| b.is_ascii_digit()
|| b.is_ascii_uppercase()
|| b == b'_'
|| b.is_ascii_lowercase()
|| b > b'\x7F'
}
// keyword may become an identifier
// see %fallback in parse.y
pub(crate) fn from_token(_ty: u16, value: Token) -> String {
from_bytes(value.1)
}
impl TokenType {
/// Return the associated string (mainly for testing)
pub const fn as_str(&self) -> Option<&'static str> {
use TokenType::*;
match self {
TK_ABORT => Some("ABORT"),
TK_ACTION => Some("ACTION"),
TK_ADD => Some("ADD"),
TK_AFTER => Some("AFTER"),
TK_ALL => Some("ALL"),
TK_ALTER => Some("ALTER"),
TK_ANALYZE => Some("ANALYZE"),
TK_ALWAYS => Some("ALWAYS"),
TK_AND => Some("AND"),
TK_AS => Some("AS"),
TK_ASC => Some("ASC"),
TK_ATTACH => Some("ATTACH"),
TK_AUTOINCR => Some("AUTOINCREMENT"),
TK_BEFORE => Some("BEFORE"),
TK_BEGIN => Some("BEGIN"),
TK_BETWEEN => Some("BETWEEN"),
TK_BY => Some("BY"),
TK_CASCADE => Some("CASCADE"),
TK_CASE => Some("CASE"),
TK_CAST => Some("CAST"),
TK_CHECK => Some("CHECK"),
TK_COLLATE => Some("COLLATE"),
TK_COLUMNKW => Some("COLUMN"),
TK_COMMIT => Some("COMMIT"),
TK_CONFLICT => Some("CONFLICT"),
TK_CONSTRAINT => Some("CONSTRAINT"),
TK_CREATE => Some("CREATE"),
TK_CURRENT => Some("CURRENT"),
TK_DATABASE => Some("DATABASE"),
TK_DEFAULT => Some("DEFAULT"),
TK_DEFERRABLE => Some("DEFERRABLE"),
TK_DEFERRED => Some("DEFERRED"),
TK_DELETE => Some("DELETE"),
TK_DESC => Some("DESC"),
TK_DETACH => Some("DETACH"),
TK_DISTINCT => Some("DISTINCT"),
TK_DO => Some("DO"),
TK_DROP => Some("DROP"),
TK_EACH => Some("EACH"),
TK_ELSE => Some("ELSE"),
TK_END => Some("END"),
TK_ESCAPE => Some("ESCAPE"),
TK_EXCEPT => Some("EXCEPT"),
TK_EXCLUDE => Some("EXCLUDE"),
TK_EXCLUSIVE => Some("EXCLUSIVE"),
TK_EXISTS => Some("EXISTS"),
TK_EXPLAIN => Some("EXPLAIN"),
TK_FAIL => Some("FAIL"),
TK_FILTER => Some("FILTER"),
TK_FIRST => Some("FIRST"),
TK_FOLLOWING => Some("FOLLOWING"),
TK_FOR => Some("FOR"),
TK_FOREIGN => Some("FOREIGN"),
TK_FROM => Some("FROM"),
TK_GENERATED => Some("GENERATED"),
TK_GROUP => Some("GROUP"),
TK_GROUPS => Some("GROUPS"),
TK_HAVING => Some("HAVING"),
TK_IF => Some("IF"),
TK_IGNORE => Some("IGNORE"),
TK_IMMEDIATE => Some("IMMEDIATE"),
TK_IN => Some("IN"),
TK_INDEX => Some("INDEX"),
TK_INDEXED => Some("INDEXED"),
TK_INITIALLY => Some("INITIALLY"),
TK_INSERT => Some("INSERT"),
TK_INSTEAD => Some("INSTEAD"),
TK_INTERSECT => Some("INTERSECT"),
TK_INTO => Some("INTO"),
TK_IS => Some("IS"),
TK_ISNULL => Some("ISNULL"),
TK_JOIN => Some("JOIN"),
TK_KEY => Some("KEY"),
TK_LAST => Some("LAST"),
TK_LIMIT => Some("LIMIT"),
TK_MATCH => Some("MATCH"),
TK_MATERIALIZED => Some("MATERIALIZED"),
TK_NO => Some("NO"),
TK_NOT => Some("NOT"),
TK_NOTHING => Some("NOTHING"),
TK_NOTNULL => Some("NOTNULL"),
TK_NULL => Some("NULL"),
TK_NULLS => Some("NULLS"),
TK_OF => Some("OF"),
TK_OFFSET => Some("OFFSET"),
TK_ON => Some("ON"),
TK_OR => Some("OR"),
TK_ORDER => Some("ORDER"),
TK_OTHERS => Some("OTHERS"),
TK_OVER => Some("OVER"),
TK_PARTITION => Some("PARTITION"),
TK_PLAN => Some("PLAN"),
TK_PRAGMA => Some("PRAGMA"),
TK_PRECEDING => Some("PRECEDING"),
TK_PRIMARY => Some("PRIMARY"),
TK_QUERY => Some("QUERY"),
TK_RAISE => Some("RAISE"),
TK_RANGE => Some("RANGE"),
TK_RECURSIVE => Some("RECURSIVE"),
TK_REFERENCES => Some("REFERENCES"),
TK_REINDEX => Some("REINDEX"),
TK_RELEASE => Some("RELEASE"),
TK_RENAME => Some("RENAME"),
TK_REPLACE => Some("REPLACE"),
TK_RETURNING => Some("RETURNING"),
TK_RESTRICT => Some("RESTRICT"),
TK_ROLLBACK => Some("ROLLBACK"),
TK_ROW => Some("ROW"),
TK_ROWS => Some("ROWS"),
TK_SAVEPOINT => Some("SAVEPOINT"),
TK_SELECT => Some("SELECT"),
TK_SET => Some("SET"),
TK_TABLE => Some("TABLE"),
TK_TEMP => Some("TEMP"), // or TEMPORARY
TK_TIES => Some("TIES"),
TK_THEN => Some("THEN"),
TK_TO => Some("TO"),
TK_TRANSACTION => Some("TRANSACTION"),
TK_TRIGGER => Some("TRIGGER"),
TK_UNBOUNDED => Some("UNBOUNDED"),
TK_UNION => Some("UNION"),
TK_UNIQUE => Some("UNIQUE"),
TK_UPDATE => Some("UPDATE"),
TK_USING => Some("USING"),
TK_VACUUM => Some("VACUUM"),
TK_VALUES => Some("VALUES"),
TK_VIEW => Some("VIEW"),
TK_VIRTUAL => Some("VIRTUAL"),
TK_WHEN => Some("WHEN"),
TK_WHERE => Some("WHERE"),
TK_WINDOW => Some("WINDOW"),
TK_WITH => Some("WITH"),
TK_WITHOUT => Some("WITHOUT"),
TK_BITAND => Some("&"),
TK_BITNOT => Some("~"),
TK_BITOR => Some("|"),
TK_COMMA => Some(","),
TK_CONCAT => Some("||"),
TK_DOT => Some("."),
TK_EQ => Some("="), // or ==
TK_GT => Some(">"),
TK_GE => Some(">="),
TK_LP => Some("("),
TK_LSHIFT => Some("<<"),
TK_LE => Some("<="),
TK_LT => Some("<"),
TK_MINUS => Some("-"),
TK_NE => Some("!="), // or <>
TK_PLUS => Some("+"),
TK_REM => Some("%"),
TK_RP => Some(")"),
TK_RSHIFT => Some(">>"),
TK_SEMI => Some(";"),
TK_SLASH => Some("/"),
TK_STAR => Some("*"),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
#[test]
fn test_keyword_token() {
let values = HashMap::from([
("ABORT", TokenType::TK_ABORT),
("ACTION", TokenType::TK_ACTION),
("ADD", TokenType::TK_ADD),
("AFTER", TokenType::TK_AFTER),
("ALL", TokenType::TK_ALL),
("ALTER", TokenType::TK_ALTER),
("ALWAYS", TokenType::TK_ALWAYS),
("ANALYZE", TokenType::TK_ANALYZE),
("AND", TokenType::TK_AND),
("AS", TokenType::TK_AS),
("ASC", TokenType::TK_ASC),
("ATTACH", TokenType::TK_ATTACH),
("AUTOINCREMENT", TokenType::TK_AUTOINCR),
("BEFORE", TokenType::TK_BEFORE),
("BEGIN", TokenType::TK_BEGIN),
("BETWEEN", TokenType::TK_BETWEEN),
("BY", TokenType::TK_BY),
("CASCADE", TokenType::TK_CASCADE),
("CASE", TokenType::TK_CASE),
("CAST", TokenType::TK_CAST),
("CHECK", TokenType::TK_CHECK),
("COLLATE", TokenType::TK_COLLATE),
("COLUMN", TokenType::TK_COLUMNKW),
("COMMIT", TokenType::TK_COMMIT),
("CONFLICT", TokenType::TK_CONFLICT),
("CONSTRAINT", TokenType::TK_CONSTRAINT),
("CREATE", TokenType::TK_CREATE),
("CROSS", TokenType::TK_JOIN_KW),
("CURRENT", TokenType::TK_CURRENT),
("CURRENT_DATE", TokenType::TK_CTIME_KW),
("CURRENT_TIME", TokenType::TK_CTIME_KW),
("CURRENT_TIMESTAMP", TokenType::TK_CTIME_KW),
("DATABASE", TokenType::TK_DATABASE),
("DEFAULT", TokenType::TK_DEFAULT),
("DEFERRABLE", TokenType::TK_DEFERRABLE),
("DEFERRED", TokenType::TK_DEFERRED),
("DELETE", TokenType::TK_DELETE),
("DESC", TokenType::TK_DESC),
("DETACH", TokenType::TK_DETACH),
("DISTINCT", TokenType::TK_DISTINCT),
("DO", TokenType::TK_DO),
("DROP", TokenType::TK_DROP),
("EACH", TokenType::TK_EACH),
("ELSE", TokenType::TK_ELSE),
("END", TokenType::TK_END),
("ESCAPE", TokenType::TK_ESCAPE),
("EXCEPT", TokenType::TK_EXCEPT),
("EXCLUDE", TokenType::TK_EXCLUDE),
("EXCLUSIVE", TokenType::TK_EXCLUSIVE),
("EXISTS", TokenType::TK_EXISTS),
("EXPLAIN", TokenType::TK_EXPLAIN),
("FAIL", TokenType::TK_FAIL),
("FILTER", TokenType::TK_FILTER),
("FIRST", TokenType::TK_FIRST),
("FOLLOWING", TokenType::TK_FOLLOWING),
("FOR", TokenType::TK_FOR),
("FOREIGN", TokenType::TK_FOREIGN),
("FROM", TokenType::TK_FROM),
("FULL", TokenType::TK_JOIN_KW),
("GENERATED", TokenType::TK_GENERATED),
("GLOB", TokenType::TK_LIKE_KW),
("GROUP", TokenType::TK_GROUP),
("GROUPS", TokenType::TK_GROUPS),
("HAVING", TokenType::TK_HAVING),
("IF", TokenType::TK_IF),
("IGNORE", TokenType::TK_IGNORE),
("IMMEDIATE", TokenType::TK_IMMEDIATE),
("IN", TokenType::TK_IN),
("INDEX", TokenType::TK_INDEX),
("INDEXED", TokenType::TK_INDEXED),
("INITIALLY", TokenType::TK_INITIALLY),
("INNER", TokenType::TK_JOIN_KW),
("INSERT", TokenType::TK_INSERT),
("INSTEAD", TokenType::TK_INSTEAD),
("INTERSECT", TokenType::TK_INTERSECT),
("INTO", TokenType::TK_INTO),
("IS", TokenType::TK_IS),
("ISNULL", TokenType::TK_ISNULL),
("JOIN", TokenType::TK_JOIN),
("KEY", TokenType::TK_KEY),
("LAST", TokenType::TK_LAST),
("LEFT", TokenType::TK_JOIN_KW),
("LIKE", TokenType::TK_LIKE_KW),
("LIMIT", TokenType::TK_LIMIT),
("MATCH", TokenType::TK_MATCH),
("MATERIALIZED", TokenType::TK_MATERIALIZED),
("NATURAL", TokenType::TK_JOIN_KW),
("NO", TokenType::TK_NO),
("NOT", TokenType::TK_NOT),
("NOTHING", TokenType::TK_NOTHING),
("NOTNULL", TokenType::TK_NOTNULL),
("NULL", TokenType::TK_NULL),
("NULLS", TokenType::TK_NULLS),
("OF", TokenType::TK_OF),
("OFFSET", TokenType::TK_OFFSET),
("ON", TokenType::TK_ON),
("OR", TokenType::TK_OR),
("ORDER", TokenType::TK_ORDER),
("OTHERS", TokenType::TK_OTHERS),
("OUTER", TokenType::TK_JOIN_KW),
("OVER", TokenType::TK_OVER),
("PARTITION", TokenType::TK_PARTITION),
("PLAN", TokenType::TK_PLAN),
("PRAGMA", TokenType::TK_PRAGMA),
("PRECEDING", TokenType::TK_PRECEDING),
("PRIMARY", TokenType::TK_PRIMARY),
("QUERY", TokenType::TK_QUERY),
("RAISE", TokenType::TK_RAISE),
("RANGE", TokenType::TK_RANGE),
("RECURSIVE", TokenType::TK_RECURSIVE),
("REFERENCES", TokenType::TK_REFERENCES),
("REGEXP", TokenType::TK_LIKE_KW),
("REINDEX", TokenType::TK_REINDEX),
("RELEASE", TokenType::TK_RELEASE),
("RENAME", TokenType::TK_RENAME),
("REPLACE", TokenType::TK_REPLACE),
("RETURNING", TokenType::TK_RETURNING),
("RESTRICT", TokenType::TK_RESTRICT),
("RIGHT", TokenType::TK_JOIN_KW),
("ROLLBACK", TokenType::TK_ROLLBACK),
("ROW", TokenType::TK_ROW),
("ROWS", TokenType::TK_ROWS),
("SAVEPOINT", TokenType::TK_SAVEPOINT),
("SELECT", TokenType::TK_SELECT),
("SET", TokenType::TK_SET),
("TABLE", TokenType::TK_TABLE),
("TEMP", TokenType::TK_TEMP),
("TEMPORARY", TokenType::TK_TEMP),
("THEN", TokenType::TK_THEN),
("TIES", TokenType::TK_TIES),
("TO", TokenType::TK_TO),
("TRANSACTION", TokenType::TK_TRANSACTION),
("TRIGGER", TokenType::TK_TRIGGER),
("UNBOUNDED", TokenType::TK_UNBOUNDED),
("UNION", TokenType::TK_UNION),
("UNIQUE", TokenType::TK_UNIQUE),
("UPDATE", TokenType::TK_UPDATE),
("USING", TokenType::TK_USING),
("VACUUM", TokenType::TK_VACUUM),
("VALUES", TokenType::TK_VALUES),
("VIEW", TokenType::TK_VIEW),
("VIRTUAL", TokenType::TK_VIRTUAL),
("WHEN", TokenType::TK_WHEN),
("WHERE", TokenType::TK_WHERE),
("WINDOW", TokenType::TK_WINDOW),
("WITH", TokenType::TK_WITH),
("WITHOUT", TokenType::TK_WITHOUT),
]);
for (key, value) in &values {
assert!(keyword_token(key.as_bytes()).unwrap() == *value);
assert!(
keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value
);
}
assert!(keyword_token(b"").is_none());
assert!(keyword_token(b"wrong").is_none());
assert!(keyword_token(b"super wrong").is_none());
assert!(keyword_token(b"super_wrong").is_none());
assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none());
assert!(keyword_token("Crème Brulée".as_bytes()).is_none());
assert!(keyword_token("fróm".as_bytes()).is_none());
}
}