mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-27 13:04:20 +01:00
410 lines
15 KiB
Rust
410 lines
15 KiB
Rust
//! SQLite dialect
|
|
|
|
use std::fmt::Formatter;
|
|
use std::str;
|
|
|
|
mod token;
|
|
pub use token::TokenType;
|
|
|
|
/// Token value (lexeme)
|
|
#[derive(Clone, Copy)]
|
|
pub struct Token<'i>(pub usize, pub &'i [u8], pub usize);
|
|
|
|
pub(crate) fn sentinel(start: usize) -> Token<'static> {
|
|
Token(start, b"", start)
|
|
}
|
|
|
|
impl Token<'_> {
|
|
/// Access token value
|
|
pub fn unwrap(self) -> String {
|
|
from_bytes(self.1)
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for Token<'_> {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_tuple("Token").field(&self.1).finish()
|
|
}
|
|
}
|
|
|
|
impl TokenType {
|
|
// TODO try Cow<&'static, str> (Borrowed<&'static str> for keyword and Owned<String> for below),
|
|
// => Syntax error on keyword will be better
|
|
// => `from_token` will become unnecessary
|
|
pub(crate) fn to_token(self, start: usize, value: &[u8], end: usize) -> Token<'_> {
|
|
Token(start, value, end)
|
|
}
|
|
}
|
|
|
|
pub(crate) fn from_bytes(bytes: &[u8]) -> String {
|
|
unsafe { str::from_utf8_unchecked(bytes).to_owned() }
|
|
}
|
|
|
|
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
|
|
|
|
pub(crate) fn is_identifier(name: &str) -> bool {
|
|
if name.is_empty() {
|
|
return false;
|
|
}
|
|
let bytes = name.as_bytes();
|
|
is_identifier_start(bytes[0])
|
|
&& (bytes.len() == 1 || bytes[1..].iter().all(|b| is_identifier_continue(*b)))
|
|
}
|
|
|
|
pub(crate) fn is_identifier_start(b: u8) -> bool {
|
|
b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F'
|
|
}
|
|
|
|
pub(crate) fn is_identifier_continue(b: u8) -> bool {
|
|
b == b'$'
|
|
|| b.is_ascii_digit()
|
|
|| b.is_ascii_uppercase()
|
|
|| b == b'_'
|
|
|| b.is_ascii_lowercase()
|
|
|| b > b'\x7F'
|
|
}
|
|
|
|
// keyword may become an identifier
|
|
// see %fallback in parse.y
|
|
pub(crate) fn from_token(_ty: u16, value: Token) -> String {
|
|
from_bytes(value.1)
|
|
}
|
|
|
|
impl TokenType {
|
|
/// Return the associated string (mainly for testing)
|
|
pub const fn as_str(&self) -> Option<&'static str> {
|
|
use TokenType::*;
|
|
match self {
|
|
TK_ABORT => Some("ABORT"),
|
|
TK_ACTION => Some("ACTION"),
|
|
TK_ADD => Some("ADD"),
|
|
TK_AFTER => Some("AFTER"),
|
|
TK_ALL => Some("ALL"),
|
|
TK_ALTER => Some("ALTER"),
|
|
TK_ANALYZE => Some("ANALYZE"),
|
|
TK_ALWAYS => Some("ALWAYS"),
|
|
TK_AND => Some("AND"),
|
|
TK_AS => Some("AS"),
|
|
TK_ASC => Some("ASC"),
|
|
TK_ATTACH => Some("ATTACH"),
|
|
TK_AUTOINCR => Some("AUTOINCREMENT"),
|
|
TK_BEFORE => Some("BEFORE"),
|
|
TK_BEGIN => Some("BEGIN"),
|
|
TK_BETWEEN => Some("BETWEEN"),
|
|
TK_BY => Some("BY"),
|
|
TK_CASCADE => Some("CASCADE"),
|
|
TK_CASE => Some("CASE"),
|
|
TK_CAST => Some("CAST"),
|
|
TK_CHECK => Some("CHECK"),
|
|
TK_COLLATE => Some("COLLATE"),
|
|
TK_COLUMNKW => Some("COLUMN"),
|
|
TK_COMMIT => Some("COMMIT"),
|
|
TK_CONFLICT => Some("CONFLICT"),
|
|
TK_CONSTRAINT => Some("CONSTRAINT"),
|
|
TK_CREATE => Some("CREATE"),
|
|
TK_CURRENT => Some("CURRENT"),
|
|
TK_DATABASE => Some("DATABASE"),
|
|
TK_DEFAULT => Some("DEFAULT"),
|
|
TK_DEFERRABLE => Some("DEFERRABLE"),
|
|
TK_DEFERRED => Some("DEFERRED"),
|
|
TK_DELETE => Some("DELETE"),
|
|
TK_DESC => Some("DESC"),
|
|
TK_DETACH => Some("DETACH"),
|
|
TK_DISTINCT => Some("DISTINCT"),
|
|
TK_DO => Some("DO"),
|
|
TK_DROP => Some("DROP"),
|
|
TK_EACH => Some("EACH"),
|
|
TK_ELSE => Some("ELSE"),
|
|
TK_END => Some("END"),
|
|
TK_ESCAPE => Some("ESCAPE"),
|
|
TK_EXCEPT => Some("EXCEPT"),
|
|
TK_EXCLUDE => Some("EXCLUDE"),
|
|
TK_EXCLUSIVE => Some("EXCLUSIVE"),
|
|
TK_EXISTS => Some("EXISTS"),
|
|
TK_EXPLAIN => Some("EXPLAIN"),
|
|
TK_FAIL => Some("FAIL"),
|
|
TK_FILTER => Some("FILTER"),
|
|
TK_FIRST => Some("FIRST"),
|
|
TK_FOLLOWING => Some("FOLLOWING"),
|
|
TK_FOR => Some("FOR"),
|
|
TK_FOREIGN => Some("FOREIGN"),
|
|
TK_FROM => Some("FROM"),
|
|
TK_GENERATED => Some("GENERATED"),
|
|
TK_GROUP => Some("GROUP"),
|
|
TK_GROUPS => Some("GROUPS"),
|
|
TK_HAVING => Some("HAVING"),
|
|
TK_IF => Some("IF"),
|
|
TK_IGNORE => Some("IGNORE"),
|
|
TK_IMMEDIATE => Some("IMMEDIATE"),
|
|
TK_IN => Some("IN"),
|
|
TK_INDEX => Some("INDEX"),
|
|
TK_INDEXED => Some("INDEXED"),
|
|
TK_INITIALLY => Some("INITIALLY"),
|
|
TK_INSERT => Some("INSERT"),
|
|
TK_INSTEAD => Some("INSTEAD"),
|
|
TK_INTERSECT => Some("INTERSECT"),
|
|
TK_INTO => Some("INTO"),
|
|
TK_IS => Some("IS"),
|
|
TK_ISNULL => Some("ISNULL"),
|
|
TK_JOIN => Some("JOIN"),
|
|
TK_KEY => Some("KEY"),
|
|
TK_LAST => Some("LAST"),
|
|
TK_LIMIT => Some("LIMIT"),
|
|
TK_MATCH => Some("MATCH"),
|
|
TK_MATERIALIZED => Some("MATERIALIZED"),
|
|
TK_NO => Some("NO"),
|
|
TK_NOT => Some("NOT"),
|
|
TK_NOTHING => Some("NOTHING"),
|
|
TK_NOTNULL => Some("NOTNULL"),
|
|
TK_NULL => Some("NULL"),
|
|
TK_NULLS => Some("NULLS"),
|
|
TK_OF => Some("OF"),
|
|
TK_OFFSET => Some("OFFSET"),
|
|
TK_ON => Some("ON"),
|
|
TK_OR => Some("OR"),
|
|
TK_ORDER => Some("ORDER"),
|
|
TK_OTHERS => Some("OTHERS"),
|
|
TK_OVER => Some("OVER"),
|
|
TK_PARTITION => Some("PARTITION"),
|
|
TK_PLAN => Some("PLAN"),
|
|
TK_PRAGMA => Some("PRAGMA"),
|
|
TK_PRECEDING => Some("PRECEDING"),
|
|
TK_PRIMARY => Some("PRIMARY"),
|
|
TK_QUERY => Some("QUERY"),
|
|
TK_RAISE => Some("RAISE"),
|
|
TK_RANGE => Some("RANGE"),
|
|
TK_RECURSIVE => Some("RECURSIVE"),
|
|
TK_REFERENCES => Some("REFERENCES"),
|
|
TK_REINDEX => Some("REINDEX"),
|
|
TK_RELEASE => Some("RELEASE"),
|
|
TK_RENAME => Some("RENAME"),
|
|
TK_REPLACE => Some("REPLACE"),
|
|
TK_RETURNING => Some("RETURNING"),
|
|
TK_RESTRICT => Some("RESTRICT"),
|
|
TK_ROLLBACK => Some("ROLLBACK"),
|
|
TK_ROW => Some("ROW"),
|
|
TK_ROWS => Some("ROWS"),
|
|
TK_SAVEPOINT => Some("SAVEPOINT"),
|
|
TK_SELECT => Some("SELECT"),
|
|
TK_SET => Some("SET"),
|
|
TK_TABLE => Some("TABLE"),
|
|
TK_TEMP => Some("TEMP"), // or TEMPORARY
|
|
TK_TIES => Some("TIES"),
|
|
TK_THEN => Some("THEN"),
|
|
TK_TO => Some("TO"),
|
|
TK_TRANSACTION => Some("TRANSACTION"),
|
|
TK_TRIGGER => Some("TRIGGER"),
|
|
TK_UNBOUNDED => Some("UNBOUNDED"),
|
|
TK_UNION => Some("UNION"),
|
|
TK_UNIQUE => Some("UNIQUE"),
|
|
TK_UPDATE => Some("UPDATE"),
|
|
TK_USING => Some("USING"),
|
|
TK_VACUUM => Some("VACUUM"),
|
|
TK_VALUES => Some("VALUES"),
|
|
TK_VIEW => Some("VIEW"),
|
|
TK_VIRTUAL => Some("VIRTUAL"),
|
|
TK_WHEN => Some("WHEN"),
|
|
TK_WHERE => Some("WHERE"),
|
|
TK_WINDOW => Some("WINDOW"),
|
|
TK_WITH => Some("WITH"),
|
|
TK_WITHOUT => Some("WITHOUT"),
|
|
TK_BITAND => Some("&"),
|
|
TK_BITNOT => Some("~"),
|
|
TK_BITOR => Some("|"),
|
|
TK_COMMA => Some(","),
|
|
TK_CONCAT => Some("||"),
|
|
TK_DOT => Some("."),
|
|
TK_EQ => Some("="), // or ==
|
|
TK_GT => Some(">"),
|
|
TK_GE => Some(">="),
|
|
TK_LP => Some("("),
|
|
TK_LSHIFT => Some("<<"),
|
|
TK_LE => Some("<="),
|
|
TK_LT => Some("<"),
|
|
TK_MINUS => Some("-"),
|
|
TK_NE => Some("!="), // or <>
|
|
TK_PLUS => Some("+"),
|
|
TK_REM => Some("%"),
|
|
TK_RP => Some(")"),
|
|
TK_RSHIFT => Some(">>"),
|
|
TK_SEMI => Some(";"),
|
|
TK_SLASH => Some("/"),
|
|
TK_STAR => Some("*"),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::collections::HashMap;
|
|
|
|
#[test]
|
|
fn test_keyword_token() {
|
|
let values = HashMap::from([
|
|
("ABORT", TokenType::TK_ABORT),
|
|
("ACTION", TokenType::TK_ACTION),
|
|
("ADD", TokenType::TK_ADD),
|
|
("AFTER", TokenType::TK_AFTER),
|
|
("ALL", TokenType::TK_ALL),
|
|
("ALTER", TokenType::TK_ALTER),
|
|
("ALWAYS", TokenType::TK_ALWAYS),
|
|
("ANALYZE", TokenType::TK_ANALYZE),
|
|
("AND", TokenType::TK_AND),
|
|
("AS", TokenType::TK_AS),
|
|
("ASC", TokenType::TK_ASC),
|
|
("ATTACH", TokenType::TK_ATTACH),
|
|
("AUTOINCREMENT", TokenType::TK_AUTOINCR),
|
|
("BEFORE", TokenType::TK_BEFORE),
|
|
("BEGIN", TokenType::TK_BEGIN),
|
|
("BETWEEN", TokenType::TK_BETWEEN),
|
|
("BY", TokenType::TK_BY),
|
|
("CASCADE", TokenType::TK_CASCADE),
|
|
("CASE", TokenType::TK_CASE),
|
|
("CAST", TokenType::TK_CAST),
|
|
("CHECK", TokenType::TK_CHECK),
|
|
("COLLATE", TokenType::TK_COLLATE),
|
|
("COLUMN", TokenType::TK_COLUMNKW),
|
|
("COMMIT", TokenType::TK_COMMIT),
|
|
("CONFLICT", TokenType::TK_CONFLICT),
|
|
("CONSTRAINT", TokenType::TK_CONSTRAINT),
|
|
("CREATE", TokenType::TK_CREATE),
|
|
("CROSS", TokenType::TK_JOIN_KW),
|
|
("CURRENT", TokenType::TK_CURRENT),
|
|
("CURRENT_DATE", TokenType::TK_CTIME_KW),
|
|
("CURRENT_TIME", TokenType::TK_CTIME_KW),
|
|
("CURRENT_TIMESTAMP", TokenType::TK_CTIME_KW),
|
|
("DATABASE", TokenType::TK_DATABASE),
|
|
("DEFAULT", TokenType::TK_DEFAULT),
|
|
("DEFERRABLE", TokenType::TK_DEFERRABLE),
|
|
("DEFERRED", TokenType::TK_DEFERRED),
|
|
("DELETE", TokenType::TK_DELETE),
|
|
("DESC", TokenType::TK_DESC),
|
|
("DETACH", TokenType::TK_DETACH),
|
|
("DISTINCT", TokenType::TK_DISTINCT),
|
|
("DO", TokenType::TK_DO),
|
|
("DROP", TokenType::TK_DROP),
|
|
("EACH", TokenType::TK_EACH),
|
|
("ELSE", TokenType::TK_ELSE),
|
|
("END", TokenType::TK_END),
|
|
("ESCAPE", TokenType::TK_ESCAPE),
|
|
("EXCEPT", TokenType::TK_EXCEPT),
|
|
("EXCLUDE", TokenType::TK_EXCLUDE),
|
|
("EXCLUSIVE", TokenType::TK_EXCLUSIVE),
|
|
("EXISTS", TokenType::TK_EXISTS),
|
|
("EXPLAIN", TokenType::TK_EXPLAIN),
|
|
("FAIL", TokenType::TK_FAIL),
|
|
("FILTER", TokenType::TK_FILTER),
|
|
("FIRST", TokenType::TK_FIRST),
|
|
("FOLLOWING", TokenType::TK_FOLLOWING),
|
|
("FOR", TokenType::TK_FOR),
|
|
("FOREIGN", TokenType::TK_FOREIGN),
|
|
("FROM", TokenType::TK_FROM),
|
|
("FULL", TokenType::TK_JOIN_KW),
|
|
("GENERATED", TokenType::TK_GENERATED),
|
|
("GLOB", TokenType::TK_LIKE_KW),
|
|
("GROUP", TokenType::TK_GROUP),
|
|
("GROUPS", TokenType::TK_GROUPS),
|
|
("HAVING", TokenType::TK_HAVING),
|
|
("IF", TokenType::TK_IF),
|
|
("IGNORE", TokenType::TK_IGNORE),
|
|
("IMMEDIATE", TokenType::TK_IMMEDIATE),
|
|
("IN", TokenType::TK_IN),
|
|
("INDEX", TokenType::TK_INDEX),
|
|
("INDEXED", TokenType::TK_INDEXED),
|
|
("INITIALLY", TokenType::TK_INITIALLY),
|
|
("INNER", TokenType::TK_JOIN_KW),
|
|
("INSERT", TokenType::TK_INSERT),
|
|
("INSTEAD", TokenType::TK_INSTEAD),
|
|
("INTERSECT", TokenType::TK_INTERSECT),
|
|
("INTO", TokenType::TK_INTO),
|
|
("IS", TokenType::TK_IS),
|
|
("ISNULL", TokenType::TK_ISNULL),
|
|
("JOIN", TokenType::TK_JOIN),
|
|
("KEY", TokenType::TK_KEY),
|
|
("LAST", TokenType::TK_LAST),
|
|
("LEFT", TokenType::TK_JOIN_KW),
|
|
("LIKE", TokenType::TK_LIKE_KW),
|
|
("LIMIT", TokenType::TK_LIMIT),
|
|
("MATCH", TokenType::TK_MATCH),
|
|
("MATERIALIZED", TokenType::TK_MATERIALIZED),
|
|
("NATURAL", TokenType::TK_JOIN_KW),
|
|
("NO", TokenType::TK_NO),
|
|
("NOT", TokenType::TK_NOT),
|
|
("NOTHING", TokenType::TK_NOTHING),
|
|
("NOTNULL", TokenType::TK_NOTNULL),
|
|
("NULL", TokenType::TK_NULL),
|
|
("NULLS", TokenType::TK_NULLS),
|
|
("OF", TokenType::TK_OF),
|
|
("OFFSET", TokenType::TK_OFFSET),
|
|
("ON", TokenType::TK_ON),
|
|
("OR", TokenType::TK_OR),
|
|
("ORDER", TokenType::TK_ORDER),
|
|
("OTHERS", TokenType::TK_OTHERS),
|
|
("OUTER", TokenType::TK_JOIN_KW),
|
|
("OVER", TokenType::TK_OVER),
|
|
("PARTITION", TokenType::TK_PARTITION),
|
|
("PLAN", TokenType::TK_PLAN),
|
|
("PRAGMA", TokenType::TK_PRAGMA),
|
|
("PRECEDING", TokenType::TK_PRECEDING),
|
|
("PRIMARY", TokenType::TK_PRIMARY),
|
|
("QUERY", TokenType::TK_QUERY),
|
|
("RAISE", TokenType::TK_RAISE),
|
|
("RANGE", TokenType::TK_RANGE),
|
|
("RECURSIVE", TokenType::TK_RECURSIVE),
|
|
("REFERENCES", TokenType::TK_REFERENCES),
|
|
("REGEXP", TokenType::TK_LIKE_KW),
|
|
("REINDEX", TokenType::TK_REINDEX),
|
|
("RELEASE", TokenType::TK_RELEASE),
|
|
("RENAME", TokenType::TK_RENAME),
|
|
("REPLACE", TokenType::TK_REPLACE),
|
|
("RETURNING", TokenType::TK_RETURNING),
|
|
("RESTRICT", TokenType::TK_RESTRICT),
|
|
("RIGHT", TokenType::TK_JOIN_KW),
|
|
("ROLLBACK", TokenType::TK_ROLLBACK),
|
|
("ROW", TokenType::TK_ROW),
|
|
("ROWS", TokenType::TK_ROWS),
|
|
("SAVEPOINT", TokenType::TK_SAVEPOINT),
|
|
("SELECT", TokenType::TK_SELECT),
|
|
("SET", TokenType::TK_SET),
|
|
("TABLE", TokenType::TK_TABLE),
|
|
("TEMP", TokenType::TK_TEMP),
|
|
("TEMPORARY", TokenType::TK_TEMP),
|
|
("THEN", TokenType::TK_THEN),
|
|
("TIES", TokenType::TK_TIES),
|
|
("TO", TokenType::TK_TO),
|
|
("TRANSACTION", TokenType::TK_TRANSACTION),
|
|
("TRIGGER", TokenType::TK_TRIGGER),
|
|
("UNBOUNDED", TokenType::TK_UNBOUNDED),
|
|
("UNION", TokenType::TK_UNION),
|
|
("UNIQUE", TokenType::TK_UNIQUE),
|
|
("UPDATE", TokenType::TK_UPDATE),
|
|
("USING", TokenType::TK_USING),
|
|
("VACUUM", TokenType::TK_VACUUM),
|
|
("VALUES", TokenType::TK_VALUES),
|
|
("VIEW", TokenType::TK_VIEW),
|
|
("VIRTUAL", TokenType::TK_VIRTUAL),
|
|
("WHEN", TokenType::TK_WHEN),
|
|
("WHERE", TokenType::TK_WHERE),
|
|
("WINDOW", TokenType::TK_WINDOW),
|
|
("WITH", TokenType::TK_WITH),
|
|
("WITHOUT", TokenType::TK_WITHOUT),
|
|
]);
|
|
|
|
for (key, value) in &values {
|
|
assert!(keyword_token(key.as_bytes()).unwrap() == *value);
|
|
assert!(
|
|
keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value
|
|
);
|
|
}
|
|
|
|
assert!(keyword_token(b"").is_none());
|
|
assert!(keyword_token(b"wrong").is_none());
|
|
assert!(keyword_token(b"super wrong").is_none());
|
|
assert!(keyword_token(b"super_wrong").is_none());
|
|
assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none());
|
|
assert!(keyword_token("Crème Brulée".as_bytes()).is_none());
|
|
assert!(keyword_token("fróm".as_bytes()).is_none());
|
|
}
|
|
}
|