diff --git a/Cargo.lock b/Cargo.lock index 5330e3d75..f931406d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4172,6 +4172,7 @@ dependencies = [ "strum", "strum_macros", "thiserror 1.0.69", + "turso_macros", "turso_sqlite3_parser", ] diff --git a/macros/Cargo.toml b/macros/Cargo.toml index f899747ed..1ca81c545 100644 --- a/macros/Cargo.toml +++ b/macros/Cargo.toml @@ -17,4 +17,4 @@ proc-macro = true [dependencies] quote = "1.0.38" proc-macro2 = "1.0.93" -syn = { version = "2.0.96", features = ["full"] } +syn = { version = "2.0.96", features = ["full", "clone-impls"] } diff --git a/macros/src/ext/match_ignore_ascii_case.rs b/macros/src/ext/match_ignore_ascii_case.rs new file mode 100644 index 000000000..85e62c0a2 --- /dev/null +++ b/macros/src/ext/match_ignore_ascii_case.rs @@ -0,0 +1,148 @@ +use quote::quote; +use std::collections::HashMap; + +use proc_macro::TokenStream; +use syn::{parse_macro_input, spanned::Spanned, Arm, ExprMatch, Lit, Pat}; + +pub fn match_ignore_ascci_case(input: TokenStream) -> TokenStream { + let match_block = parse_macro_input!(input as ExprMatch); + if match_block.arms.is_empty() { + return syn::Error::new( + match_block.span(), + "expected at least one arm with literal string/byte/bytes/char", + ) + .to_compile_error() + .into(); + } + let mut arms: Vec<(Vec, Arm)> = Vec::with_capacity(match_block.arms.len()); + let mut fallback_arm: Option = None; + for arm in &match_block.arms { + match &arm.pat { + Pat::Lit(lit) => match &lit.lit { + Lit::ByteStr(bs) => { + arms.push((bs.value().to_ascii_uppercase(), arm.clone())); + } + _ => { + return syn::Error::new( + arm.pat.span().span(), + "expected literal string/byte/bytes/char", + ) + .to_compile_error() + .into(); + } + }, + Pat::Wild(_) => { + fallback_arm = Some(arm.clone()); + } + Pat::Or(or) => { + for case in &or.cases { + match case { + Pat::Lit(lit) => match &lit.lit { + Lit::ByteStr(bs) => { + arms.push((bs.value().to_ascii_uppercase(), arm.clone())); + } + _ => { + return syn::Error::new( + arm.pat.span().span(), + "expected literal string/byte/bytes/char", + ) + .to_compile_error() + .into(); + } + }, + _ => { + return syn::Error::new( + arm.pat.span().span(), + "expected literal string/byte/bytes/char", + ) + .to_compile_error() + .into(); + } + } + } + } + _b => { + return syn::Error::new( + arm.pat.span().span(), + "expected literal string/byte/bytes/char", + ) + .to_compile_error() + .into(); + } + } + } + + struct PathEntry { + result: Option, + sub_entries: HashMap>, + } + + let mut paths = Box::new(PathEntry { + result: None, + sub_entries: HashMap::new(), + }); + + for (keyword_b, arm) in arms.drain(..) { + let mut current = &mut paths; + + for b in keyword_b { + match current.sub_entries.get(&b) { + Some(_) => { + current = current.sub_entries.get_mut(&b).unwrap(); + } + None => { + let new_entry = Box::new(PathEntry { + result: None, + sub_entries: HashMap::new(), + }); + current.sub_entries.insert(b, new_entry); + current = current.sub_entries.get_mut(&b).unwrap(); + } + } + } + + assert!(current.result.is_none()); + current.result = Some(arm); + } + + fn write_entry( + idx: usize, + var_name: proc_macro2::TokenStream, + fallback_arm: Option, + entry: &PathEntry, + ) -> proc_macro2::TokenStream { + let eof_handle = if let Some(ref result) = entry.result { + let body = &result.body; + quote! { None => { #body } } + } else { + quote! {} + }; + + let fallback_handle = if let Some(ref result) = fallback_arm { + let body = &result.body; + quote! { _ => { #body } } + } else { + quote! {} + }; + + let mut arms = Vec::with_capacity(entry.sub_entries.len()); + for (&b, sub_entry) in &entry.sub_entries { + let sub_match = write_entry(idx + 1, var_name.clone(), fallback_arm.clone(), sub_entry); + if b.is_ascii_alphabetic() { + let b_lower = b.to_ascii_lowercase(); + arms.push(quote! { Some(#b) | Some(#b_lower) => #sub_match }); + } else { + arms.push(quote! { Some(#b) => #sub_match }); + } + } + + quote! { match #var_name.get(#idx) { + #eof_handle + #(#arms)* + #fallback_handle + } } + } + + let expr = match_block.expr; + TokenStream::from(write_entry(0, quote! { #expr }, fallback_arm, &paths)) +} diff --git a/macros/src/ext/mod.rs b/macros/src/ext/mod.rs index 54182ddc6..c5c6740e1 100644 --- a/macros/src/ext/mod.rs +++ b/macros/src/ext/mod.rs @@ -5,10 +5,12 @@ use syn::punctuated::Punctuated; use syn::token::Eq; use syn::{parse_macro_input, Ident, LitStr, Token}; mod agg_derive; +mod match_ignore_ascii_case; mod scalars; mod vfs_derive; mod vtab_derive; pub use agg_derive::derive_agg_func; +pub use match_ignore_ascii_case::match_ignore_ascci_case; pub use scalars::scalar; pub use vfs_derive::derive_vfs_module; pub use vtab_derive::derive_vtab_module; diff --git a/macros/src/lib.rs b/macros/src/lib.rs index b6d85f294..ffc39282a 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -439,3 +439,8 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { pub fn derive_vfs_module(input: TokenStream) -> TokenStream { ext::derive_vfs_module(input) } + +#[proc_macro] +pub fn match_ignore_ascii_case(input: TokenStream) -> TokenStream { + ext::match_ignore_ascci_case(input) +} diff --git a/parser/Cargo.toml b/parser/Cargo.toml index c2b674a4d..d3a81cb6a 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -21,6 +21,7 @@ strum = { workspace = true } strum_macros = {workspace = true } serde = { workspace = true , optional = true, features = ["derive"] } thiserror = "1.0.61" +turso_macros = { workspace = true } [dev-dependencies] fallible-iterator = "0.3" diff --git a/parser/build.rs b/parser/build.rs deleted file mode 100644 index a6b8db946..000000000 --- a/parser/build.rs +++ /dev/null @@ -1,290 +0,0 @@ -use std::collections::HashMap; -use std::fs::File; -use std::io::{BufWriter, Write}; -use std::path::PathBuf; - -/// generates a trie-like function with nested match expressions for parsing SQL keywords -/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],] -/// A -/// ├─ B -/// │ ├─ O -/// │ │ ├─ R -/// │ │ │ ├─ T -> TK_ABORT -/// ├─ C -/// │ ├─ T -/// │ │ ├─ I -/// │ │ │ ├─ O -/// │ │ │ │ ├─ N -> TK_ACTION -/// ├─ D -/// │ ├─ D -> TK_ADD -fn build_keyword_map( - writer: &mut impl Write, - func_name: &str, - keywords: &[[&'static str; 2]], -) -> Result<(), std::io::Error> { - assert!(!keywords.is_empty()); - let mut min_len = keywords[0][0].len(); - let mut max_len = keywords[0][0].len(); - - struct PathEntry { - result: Option<&'static str>, - sub_entries: HashMap>, - } - - let mut paths = Box::new(PathEntry { - result: None, - sub_entries: HashMap::new(), - }); - - for keyword in keywords { - let keyword_b = keyword[0].as_bytes(); - - if keyword_b.len() < min_len { - min_len = keyword_b.len(); - } - - if keyword_b.len() > max_len { - max_len = keyword_b.len(); - } - - let mut current = &mut paths; - - for &b in keyword_b { - let upper_b = b.to_ascii_uppercase(); - - match current.sub_entries.get(&upper_b) { - Some(_) => { - current = current.sub_entries.get_mut(&upper_b).unwrap(); - } - None => { - let new_entry = Box::new(PathEntry { - result: None, - sub_entries: HashMap::new(), - }); - current.sub_entries.insert(upper_b, new_entry); - current = current.sub_entries.get_mut(&upper_b).unwrap(); - } - } - } - - assert!(current.result.is_none()); - current.result = Some(keyword[1]); - } - - fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<(), std::io::Error> { - if let Some(result) = entry.result { - writeln!(writer, "if idx == buf.len() {{")?; - writeln!(writer, "return Some(TokenType::{result});")?; - writeln!(writer, "}}")?; - } - - if entry.sub_entries.is_empty() { - writeln!(writer, "None")?; - return Ok(()); - } - - writeln!(writer, "if idx >= buf.len() {{")?; - writeln!(writer, "return None;")?; - writeln!(writer, "}}")?; - - writeln!(writer, "match buf[idx] {{")?; - for (&b, sub_entry) in &entry.sub_entries { - if b.is_ascii_alphabetic() { - writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?; - } else { - writeln!(writer, "{b} => {{")?; - } - writeln!(writer, "idx += 1;")?; - write_entry(writer, sub_entry)?; - writeln!(writer, "}}")?; - } - - writeln!(writer, "_ => None")?; - writeln!(writer, "}}")?; - Ok(()) - } - - writeln!( - writer, - "pub(crate) const MAX_KEYWORD_LEN: usize = {max_len};" - )?; - writeln!( - writer, - "pub(crate) const MIN_KEYWORD_LEN: usize = {min_len};" - )?; - writeln!(writer, "/// Check if `word` is a keyword")?; - writeln!( - writer, - "pub fn {func_name}(buf: &[u8]) -> Option {{" - )?; - writeln!( - writer, - "if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{" - )?; - writeln!(writer, "return None;")?; - writeln!(writer, "}}")?; - writeln!(writer, "let mut idx = 0;")?; - write_entry(writer, &paths)?; - writeln!(writer, "}}")?; - Ok(()) -} - -fn main() { - let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); - let keywords = out_dir.join("keywords.rs"); - let mut keywords = BufWriter::new(File::create(keywords).unwrap()); - build_keyword_map( - &mut keywords, - "keyword_token", - &[ - ["ABORT", "TK_ABORT"], - ["ACTION", "TK_ACTION"], - ["ADD", "TK_ADD"], - ["AFTER", "TK_AFTER"], - ["ALL", "TK_ALL"], - ["ALTER", "TK_ALTER"], - ["ALWAYS", "TK_ALWAYS"], - ["ANALYZE", "TK_ANALYZE"], - ["AND", "TK_AND"], - ["AS", "TK_AS"], - ["ASC", "TK_ASC"], - ["ATTACH", "TK_ATTACH"], - ["AUTOINCREMENT", "TK_AUTOINCR"], - ["BEFORE", "TK_BEFORE"], - ["BEGIN", "TK_BEGIN"], - ["BETWEEN", "TK_BETWEEN"], - ["BY", "TK_BY"], - ["CASCADE", "TK_CASCADE"], - ["CASE", "TK_CASE"], - ["CAST", "TK_CAST"], - ["CHECK", "TK_CHECK"], - ["COLLATE", "TK_COLLATE"], - ["COLUMN", "TK_COLUMNKW"], - ["COMMIT", "TK_COMMIT"], - ["CONFLICT", "TK_CONFLICT"], - ["CONSTRAINT", "TK_CONSTRAINT"], - ["CREATE", "TK_CREATE"], - ["CROSS", "TK_JOIN_KW"], - ["CURRENT", "TK_CURRENT"], - ["CURRENT_DATE", "TK_CTIME_KW"], - ["CURRENT_TIME", "TK_CTIME_KW"], - ["CURRENT_TIMESTAMP", "TK_CTIME_KW"], - ["DATABASE", "TK_DATABASE"], - ["DEFAULT", "TK_DEFAULT"], - ["DEFERRABLE", "TK_DEFERRABLE"], - ["DEFERRED", "TK_DEFERRED"], - ["DELETE", "TK_DELETE"], - ["DESC", "TK_DESC"], - ["DETACH", "TK_DETACH"], - ["DISTINCT", "TK_DISTINCT"], - ["DO", "TK_DO"], - ["DROP", "TK_DROP"], - ["EACH", "TK_EACH"], - ["ELSE", "TK_ELSE"], - ["END", "TK_END"], - ["ESCAPE", "TK_ESCAPE"], - ["EXCEPT", "TK_EXCEPT"], - ["EXCLUDE", "TK_EXCLUDE"], - ["EXCLUSIVE", "TK_EXCLUSIVE"], - ["EXISTS", "TK_EXISTS"], - ["EXPLAIN", "TK_EXPLAIN"], - ["FAIL", "TK_FAIL"], - ["FILTER", "TK_FILTER"], - ["FIRST", "TK_FIRST"], - ["FOLLOWING", "TK_FOLLOWING"], - ["FOR", "TK_FOR"], - ["FOREIGN", "TK_FOREIGN"], - ["FROM", "TK_FROM"], - ["FULL", "TK_JOIN_KW"], - ["GENERATED", "TK_GENERATED"], - ["GLOB", "TK_LIKE_KW"], - ["GROUP", "TK_GROUP"], - ["GROUPS", "TK_GROUPS"], - ["HAVING", "TK_HAVING"], - ["IF", "TK_IF"], - ["IGNORE", "TK_IGNORE"], - ["IMMEDIATE", "TK_IMMEDIATE"], - ["IN", "TK_IN"], - ["INDEX", "TK_INDEX"], - ["INDEXED", "TK_INDEXED"], - ["INITIALLY", "TK_INITIALLY"], - ["INNER", "TK_JOIN_KW"], - ["INSERT", "TK_INSERT"], - ["INSTEAD", "TK_INSTEAD"], - ["INTERSECT", "TK_INTERSECT"], - ["INTO", "TK_INTO"], - ["IS", "TK_IS"], - ["ISNULL", "TK_ISNULL"], - ["JOIN", "TK_JOIN"], - ["KEY", "TK_KEY"], - ["LAST", "TK_LAST"], - ["LEFT", "TK_JOIN_KW"], - ["LIKE", "TK_LIKE_KW"], - ["LIMIT", "TK_LIMIT"], - ["MATCH", "TK_MATCH"], - ["MATERIALIZED", "TK_MATERIALIZED"], - ["NATURAL", "TK_JOIN_KW"], - ["NO", "TK_NO"], - ["NOT", "TK_NOT"], - ["NOTHING", "TK_NOTHING"], - ["NOTNULL", "TK_NOTNULL"], - ["NULL", "TK_NULL"], - ["NULLS", "TK_NULLS"], - ["OF", "TK_OF"], - ["OFFSET", "TK_OFFSET"], - ["ON", "TK_ON"], - ["OR", "TK_OR"], - ["ORDER", "TK_ORDER"], - ["OTHERS", "TK_OTHERS"], - ["OUTER", "TK_JOIN_KW"], - ["OVER", "TK_OVER"], - ["PARTITION", "TK_PARTITION"], - ["PLAN", "TK_PLAN"], - ["PRAGMA", "TK_PRAGMA"], - ["PRECEDING", "TK_PRECEDING"], - ["PRIMARY", "TK_PRIMARY"], - ["QUERY", "TK_QUERY"], - ["RAISE", "TK_RAISE"], - ["RANGE", "TK_RANGE"], - ["RECURSIVE", "TK_RECURSIVE"], - ["REFERENCES", "TK_REFERENCES"], - ["REGEXP", "TK_LIKE_KW"], - ["REINDEX", "TK_REINDEX"], - ["RELEASE", "TK_RELEASE"], - ["RENAME", "TK_RENAME"], - ["REPLACE", "TK_REPLACE"], - ["RETURNING", "TK_RETURNING"], - ["RESTRICT", "TK_RESTRICT"], - ["RIGHT", "TK_JOIN_KW"], - ["ROLLBACK", "TK_ROLLBACK"], - ["ROW", "TK_ROW"], - ["ROWS", "TK_ROWS"], - ["SAVEPOINT", "TK_SAVEPOINT"], - ["SELECT", "TK_SELECT"], - ["SET", "TK_SET"], - ["TABLE", "TK_TABLE"], - ["TEMP", "TK_TEMP"], - ["TEMPORARY", "TK_TEMP"], - ["THEN", "TK_THEN"], - ["TIES", "TK_TIES"], - ["TO", "TK_TO"], - ["TRANSACTION", "TK_TRANSACTION"], - ["TRIGGER", "TK_TRIGGER"], - ["UNBOUNDED", "TK_UNBOUNDED"], - ["UNION", "TK_UNION"], - ["UNIQUE", "TK_UNIQUE"], - ["UPDATE", "TK_UPDATE"], - ["USING", "TK_USING"], - ["VACUUM", "TK_VACUUM"], - ["VALUES", "TK_VALUES"], - ["VIEW", "TK_VIEW"], - ["VIRTUAL", "TK_VIRTUAL"], - ["WHEN", "TK_WHEN"], - ["WHERE", "TK_WHERE"], - ["WINDOW", "TK_WINDOW"], - ["WITH", "TK_WITH"], - ["WITHOUT", "TK_WITHOUT"], - ], - ) - .unwrap(); -} diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 1ea608e14..ad9bf7151 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -1,6 +1,158 @@ use crate::{error::Error, token::TokenType, Result}; +use turso_macros::match_ignore_ascii_case; -include!(concat!(env!("OUT_DIR"), "/keywords.rs")); +fn keyword_or_id_token(input: &[u8]) -> TokenType { + match_ignore_ascii_case!(match input { + b"ABORT" => TokenType::TK_ABORT, + b"ACTION" => TokenType::TK_ACTION, + b"ADD" => TokenType::TK_ADD, + b"AFTER" => TokenType::TK_AFTER, + b"ALL" => TokenType::TK_ALL, + b"ALTER" => TokenType::TK_ALTER, + b"ALWAYS" => TokenType::TK_ALWAYS, + b"ANALYZE" => TokenType::TK_ANALYZE, + b"AND" => TokenType::TK_AND, + b"AS" => TokenType::TK_AS, + b"ASC" => TokenType::TK_ASC, + b"ATTACH" => TokenType::TK_ATTACH, + b"AUTOINCREMENT" => TokenType::TK_AUTOINCR, + b"BEFORE" => TokenType::TK_BEFORE, + b"BEGIN" => TokenType::TK_BEGIN, + b"BETWEEN" => TokenType::TK_BETWEEN, + b"BY" => TokenType::TK_BY, + b"CASCADE" => TokenType::TK_CASCADE, + b"CASE" => TokenType::TK_CASE, + b"CAST" => TokenType::TK_CAST, + b"CHECK" => TokenType::TK_CHECK, + b"COLLATE" => TokenType::TK_COLLATE, + b"COLUMN" => TokenType::TK_COLUMNKW, + b"COMMIT" => TokenType::TK_COMMIT, + b"CONFLICT" => TokenType::TK_CONFLICT, + b"CONSTRAINT" => TokenType::TK_CONSTRAINT, + b"CREATE" => TokenType::TK_CREATE, + b"CROSS" => TokenType::TK_JOIN_KW, + b"CURRENT" => TokenType::TK_CURRENT, + b"CURRENT_DATE" => TokenType::TK_CTIME_KW, + b"CURRENT_TIME" => TokenType::TK_CTIME_KW, + b"CURRENT_TIMESTAMP" => TokenType::TK_CTIME_KW, + b"DATABASE" => TokenType::TK_DATABASE, + b"DEFAULT" => TokenType::TK_DEFAULT, + b"DEFERRABLE" => TokenType::TK_DEFERRABLE, + b"DEFERRED" => TokenType::TK_DEFERRED, + b"DELETE" => TokenType::TK_DELETE, + b"DESC" => TokenType::TK_DESC, + b"DETACH" => TokenType::TK_DETACH, + b"DISTINCT" => TokenType::TK_DISTINCT, + b"DO" => TokenType::TK_DO, + b"DROP" => TokenType::TK_DROP, + b"EACH" => TokenType::TK_EACH, + b"ELSE" => TokenType::TK_ELSE, + b"END" => TokenType::TK_END, + b"ESCAPE" => TokenType::TK_ESCAPE, + b"EXCEPT" => TokenType::TK_EXCEPT, + b"EXCLUDE" => TokenType::TK_EXCLUDE, + b"EXCLUSIVE" => TokenType::TK_EXCLUSIVE, + b"EXISTS" => TokenType::TK_EXISTS, + b"EXPLAIN" => TokenType::TK_EXPLAIN, + b"FAIL" => TokenType::TK_FAIL, + b"FILTER" => TokenType::TK_FILTER, + b"FIRST" => TokenType::TK_FIRST, + b"FOLLOWING" => TokenType::TK_FOLLOWING, + b"FOR" => TokenType::TK_FOR, + b"FOREIGN" => TokenType::TK_FOREIGN, + b"FROM" => TokenType::TK_FROM, + b"FULL" => TokenType::TK_JOIN_KW, + b"GENERATED" => TokenType::TK_GENERATED, + b"GLOB" => TokenType::TK_LIKE_KW, + b"GROUP" => TokenType::TK_GROUP, + b"GROUPS" => TokenType::TK_GROUPS, + b"HAVING" => TokenType::TK_HAVING, + b"IF" => TokenType::TK_IF, + b"IGNORE" => TokenType::TK_IGNORE, + b"IMMEDIATE" => TokenType::TK_IMMEDIATE, + b"IN" => TokenType::TK_IN, + b"INDEX" => TokenType::TK_INDEX, + b"INDEXED" => TokenType::TK_INDEXED, + b"INITIALLY" => TokenType::TK_INITIALLY, + b"INNER" => TokenType::TK_JOIN_KW, + b"INSERT" => TokenType::TK_INSERT, + b"INSTEAD" => TokenType::TK_INSTEAD, + b"INTERSECT" => TokenType::TK_INTERSECT, + b"INTO" => TokenType::TK_INTO, + b"IS" => TokenType::TK_IS, + b"ISNULL" => TokenType::TK_ISNULL, + b"JOIN" => TokenType::TK_JOIN, + b"KEY" => TokenType::TK_KEY, + b"LAST" => TokenType::TK_LAST, + b"LEFT" => TokenType::TK_JOIN_KW, + b"LIKE" => TokenType::TK_LIKE_KW, + b"LIMIT" => TokenType::TK_LIMIT, + b"MATCH" => TokenType::TK_MATCH, + b"MATERIALIZED" => TokenType::TK_MATERIALIZED, + b"NATURAL" => TokenType::TK_JOIN_KW, + b"NO" => TokenType::TK_NO, + b"NOT" => TokenType::TK_NOT, + b"NOTHING" => TokenType::TK_NOTHING, + b"NOTNULL" => TokenType::TK_NOTNULL, + b"NULL" => TokenType::TK_NULL, + b"NULLS" => TokenType::TK_NULLS, + b"OF" => TokenType::TK_OF, + b"OFFSET" => TokenType::TK_OFFSET, + b"ON" => TokenType::TK_ON, + b"OR" => TokenType::TK_OR, + b"ORDER" => TokenType::TK_ORDER, + b"OTHERS" => TokenType::TK_OTHERS, + b"OUTER" => TokenType::TK_JOIN_KW, + b"OVER" => TokenType::TK_OVER, + b"PARTITION" => TokenType::TK_PARTITION, + b"PLAN" => TokenType::TK_PLAN, + b"PRAGMA" => TokenType::TK_PRAGMA, + b"PRECEDING" => TokenType::TK_PRECEDING, + b"PRIMARY" => TokenType::TK_PRIMARY, + b"QUERY" => TokenType::TK_QUERY, + b"RAISE" => TokenType::TK_RAISE, + b"RANGE" => TokenType::TK_RANGE, + b"RECURSIVE" => TokenType::TK_RECURSIVE, + b"REFERENCES" => TokenType::TK_REFERENCES, + b"REGEXP" => TokenType::TK_LIKE_KW, + b"REINDEX" => TokenType::TK_REINDEX, + b"RELEASE" => TokenType::TK_RELEASE, + b"RENAME" => TokenType::TK_RENAME, + b"REPLACE" => TokenType::TK_REPLACE, + b"RETURNING" => TokenType::TK_RETURNING, + b"RESTRICT" => TokenType::TK_RESTRICT, + b"RIGHT" => TokenType::TK_JOIN_KW, + b"ROLLBACK" => TokenType::TK_ROLLBACK, + b"ROW" => TokenType::TK_ROW, + b"ROWS" => TokenType::TK_ROWS, + b"SAVEPOINT" => TokenType::TK_SAVEPOINT, + b"SELECT" => TokenType::TK_SELECT, + b"SET" => TokenType::TK_SET, + b"TABLE" => TokenType::TK_TABLE, + b"TEMP" => TokenType::TK_TEMP, + b"TEMPORARY" => TokenType::TK_TEMP, + b"THEN" => TokenType::TK_THEN, + b"TIES" => TokenType::TK_TIES, + b"TO" => TokenType::TK_TO, + b"TRANSACTION" => TokenType::TK_TRANSACTION, + b"TRIGGER" => TokenType::TK_TRIGGER, + b"UNBOUNDED" => TokenType::TK_UNBOUNDED, + b"UNION" => TokenType::TK_UNION, + b"UNIQUE" => TokenType::TK_UNIQUE, + b"UPDATE" => TokenType::TK_UPDATE, + b"USING" => TokenType::TK_USING, + b"VACUUM" => TokenType::TK_VACUUM, + b"VALUES" => TokenType::TK_VALUES, + b"VIEW" => TokenType::TK_VIEW, + b"VIRTUAL" => TokenType::TK_VIRTUAL, + b"WHEN" => TokenType::TK_WHEN, + b"WHERE" => TokenType::TK_WHERE, + b"WINDOW" => TokenType::TK_WINDOW, + b"WITH" => TokenType::TK_WITH, + b"WITHOUT" => TokenType::TK_WITHOUT, + _ => TokenType::TK_ID, + }) +} #[inline(always)] pub fn is_identifier_start(b: u8) -> bool { @@ -637,7 +789,7 @@ impl<'a> Lexer<'a> { let result = &self.input[start..self.offset]; Ok(Token { value: result, - token_type: Some(keyword_token(result).unwrap_or(TokenType::TK_ID)), + token_type: Some(keyword_or_id_token(result)), }) } } @@ -1253,19 +1405,23 @@ mod tests { ]); for (key, value) in &values { - assert!(keyword_token(key.as_bytes()).unwrap() == *value); - assert!( - keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value - ); + assert!(keyword_or_id_token(key.as_bytes()) == *value); + assert!(keyword_or_id_token(key.as_bytes().to_ascii_lowercase().as_slice()) == *value); } - assert!(keyword_token(b"").is_none()); - assert!(keyword_token(b"wrong").is_none()); - assert!(keyword_token(b"super wrong").is_none()); - assert!(keyword_token(b"super_wrong").is_none()); - assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none()); - assert!(keyword_token("Crème Brulée".as_bytes()).is_none()); - assert!(keyword_token("fróm".as_bytes()).is_none()); + assert_eq!(keyword_or_id_token(b""), TokenType::TK_ID); + assert_eq!(keyword_or_id_token(b"wrong"), TokenType::TK_ID); + assert_eq!(keyword_or_id_token(b"super wrong"), TokenType::TK_ID); + assert_eq!(keyword_or_id_token(b"super_wrong"), TokenType::TK_ID); + assert_eq!( + keyword_or_id_token(b"aae26e78-3ba7-4627-8f8f-02623302495a"), + TokenType::TK_ID + ); + assert_eq!( + keyword_or_id_token("Crème Brulée".as_bytes()), + TokenType::TK_ID + ); + assert_eq!(keyword_or_id_token("fróm".as_bytes()), TokenType::TK_ID); } #[test] diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 124058fc9..047146a3f 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -14,6 +14,7 @@ use crate::error::Error; use crate::lexer::{Lexer, Token}; use crate::token::TokenType::{self, *}; use crate::Result; +use turso_macros::match_ignore_ascii_case; macro_rules! peek_expect { ( $parser:expr, $( $x:ident ),* $(,)?) => { @@ -91,26 +92,19 @@ fn from_bytes(bytes: &[u8]) -> String { #[inline] fn join_type_from_bytes(s: &[u8]) -> Result { - if b"CROSS".eq_ignore_ascii_case(s) { - Ok(JoinType::INNER | JoinType::CROSS) - } else if b"FULL".eq_ignore_ascii_case(s) { - Ok(JoinType::LEFT | JoinType::RIGHT | JoinType::OUTER) - } else if b"INNER".eq_ignore_ascii_case(s) { - Ok(JoinType::INNER) - } else if b"LEFT".eq_ignore_ascii_case(s) { - Ok(JoinType::LEFT | JoinType::OUTER) - } else if b"NATURAL".eq_ignore_ascii_case(s) { - Ok(JoinType::NATURAL) - } else if b"RIGHT".eq_ignore_ascii_case(s) { - Ok(JoinType::RIGHT | JoinType::OUTER) - } else if b"OUTER".eq_ignore_ascii_case(s) { - Ok(JoinType::OUTER) - } else { - Err(Error::Custom(format!( + match_ignore_ascii_case!(match s { + b"CROSS" => Ok(JoinType::INNER | JoinType::CROSS), + b"FULL" => Ok(JoinType::LEFT | JoinType::RIGHT | JoinType::OUTER), + b"INNER" => Ok(JoinType::INNER), + b"LEFT" => Ok(JoinType::LEFT | JoinType::OUTER), + b"NATURAL" => Ok(JoinType::NATURAL), + b"RIGHT" => Ok(JoinType::RIGHT | JoinType::OUTER), + b"OUTER" => Ok(JoinType::OUTER), + _ => Err(Error::Custom(format!( "unsupported JOIN type: {:?}", str::from_utf8(s) - ))) - } + ))), + }) } #[inline] @@ -1365,15 +1359,12 @@ impl<'a> Parser<'a> { } TK_CTIME_KW => { let tok = eat_assert!(self, TK_CTIME_KW); - if b"CURRENT_DATE".eq_ignore_ascii_case(tok.value) { - Ok(Box::new(Expr::Literal(Literal::CurrentDate))) - } else if b"CURRENT_TIME".eq_ignore_ascii_case(tok.value) { - Ok(Box::new(Expr::Literal(Literal::CurrentTime))) - } else if b"CURRENT_TIMESTAMP".eq_ignore_ascii_case(tok.value) { - Ok(Box::new(Expr::Literal(Literal::CurrentTimestamp))) - } else { - unreachable!() - } + match_ignore_ascii_case!(match tok.value { + b"CURRENT_DATE" => Ok(Box::new(Expr::Literal(Literal::CurrentDate))), + b"CURRENT_TIME" => Ok(Box::new(Expr::Literal(Literal::CurrentTime))), + b"CURRENT_TIMESTAMP" => Ok(Box::new(Expr::Literal(Literal::CurrentTimestamp))), + _ => unreachable!(), + }) } TK_NOT => { eat_assert!(self, TK_NOT); @@ -1729,17 +1720,12 @@ impl<'a> Parser<'a> { let tok = eat_assert!(self, TK_MATCH, TK_LIKE_KW); let op = match tok.token_type.unwrap() { TK_MATCH => LikeOperator::Match, - TK_LIKE_KW => { - if b"LIKE".eq_ignore_ascii_case(tok.value) { - LikeOperator::Like - } else if b"GLOB".eq_ignore_ascii_case(tok.value) { - LikeOperator::Glob - } else if b"REGEXP".eq_ignore_ascii_case(tok.value) { - LikeOperator::Regexp - } else { - unreachable!() - } - } + TK_LIKE_KW => match_ignore_ascii_case!(match tok.value { + b"LIKE" => LikeOperator::Like, + b"GLOB" => LikeOperator::Glob, + b"REGEXP" => LikeOperator::Regexp, + _ => unreachable!(), + }), _ => unreachable!(), }; @@ -2743,25 +2729,23 @@ impl<'a> Parser<'a> { TK_WITHOUT => { eat_assert!(self, TK_WITHOUT); let tok = eat_expect!(self, TK_ID); - if b"ROWID".eq_ignore_ascii_case(tok.value) { - Ok(TableOptions::WITHOUT_ROWID) - } else { - Err(Error::Custom(format!( + match_ignore_ascii_case!(match tok.value { + b"ROWID" => Ok(TableOptions::WITHOUT_ROWID), + _ => Err(Error::Custom(format!( "unknown table option: {}", from_bytes(tok.value) - ))) - } + ))), + }) } TK_ID => { let tok = eat_assert!(self, TK_ID); - if b"STRICT".eq_ignore_ascii_case(tok.value) { - Ok(TableOptions::STRICT) - } else { - Err(Error::Custom(format!( + match_ignore_ascii_case!(match tok.value { + b"STRICT" => Ok(TableOptions::STRICT), + _ => Err(Error::Custom(format!( "unknown table option: {}", from_bytes(tok.value) - ))) - } + ))), + }) } _ => Ok(TableOptions::NONE), }, @@ -2855,18 +2839,16 @@ impl<'a> Parser<'a> { match &c.col_type { Some(Type { name, .. }) => { // The datatype must be one of following: INT INTEGER REAL TEXT BLOB ANY - if !(name.eq_ignore_ascii_case("INT") - || name.eq_ignore_ascii_case("INTEGER") - || name.eq_ignore_ascii_case("REAL") - || name.eq_ignore_ascii_case("TEXT") - || name.eq_ignore_ascii_case("BLOB") - || name.eq_ignore_ascii_case("ANY")) - { - return Err(Error::Custom(format!( - "unknown datatype for {}.{}: \"{}\"", - tbl_name, c.col_name, name - ))); - } + let bytes_name = name.as_bytes(); + match_ignore_ascii_case!(match bytes_name { + b"INT" | b"INTEGER" | b"REAL" | b"TEXT" | b"BLOB" | b"ANY" => {} + _ => { + return Err(Error::Custom(format!( + "unknown datatype for {}.{}: \"{}\"", + tbl_name, c.col_name, name + ))); + } + }) } _ => { // Every column definition must specify a datatype for that column. The freedom to specify a column without a datatype is removed.