add match_ignore_ascii_case macro

This commit is contained in:
TcMits
2025-08-31 14:35:03 +07:00
parent e1b5f2d948
commit 190e9bcc95
9 changed files with 371 additions and 366 deletions

1
Cargo.lock generated
View File

@@ -4172,6 +4172,7 @@ dependencies = [
"strum",
"strum_macros",
"thiserror 1.0.69",
"turso_macros",
"turso_sqlite3_parser",
]

View File

@@ -17,4 +17,4 @@ proc-macro = true
[dependencies]
quote = "1.0.38"
proc-macro2 = "1.0.93"
syn = { version = "2.0.96", features = ["full"] }
syn = { version = "2.0.96", features = ["full", "clone-impls"] }

View File

@@ -0,0 +1,148 @@
use quote::quote;
use std::collections::HashMap;
use proc_macro::TokenStream;
use syn::{parse_macro_input, spanned::Spanned, Arm, ExprMatch, Lit, Pat};
pub fn match_ignore_ascci_case(input: TokenStream) -> TokenStream {
let match_block = parse_macro_input!(input as ExprMatch);
if match_block.arms.is_empty() {
return syn::Error::new(
match_block.span(),
"expected at least one arm with literal string/byte/bytes/char",
)
.to_compile_error()
.into();
}
let mut arms: Vec<(Vec<u8>, Arm)> = Vec::with_capacity(match_block.arms.len());
let mut fallback_arm: Option<Arm> = None;
for arm in &match_block.arms {
match &arm.pat {
Pat::Lit(lit) => match &lit.lit {
Lit::ByteStr(bs) => {
arms.push((bs.value().to_ascii_uppercase(), arm.clone()));
}
_ => {
return syn::Error::new(
arm.pat.span().span(),
"expected literal string/byte/bytes/char",
)
.to_compile_error()
.into();
}
},
Pat::Wild(_) => {
fallback_arm = Some(arm.clone());
}
Pat::Or(or) => {
for case in &or.cases {
match case {
Pat::Lit(lit) => match &lit.lit {
Lit::ByteStr(bs) => {
arms.push((bs.value().to_ascii_uppercase(), arm.clone()));
}
_ => {
return syn::Error::new(
arm.pat.span().span(),
"expected literal string/byte/bytes/char",
)
.to_compile_error()
.into();
}
},
_ => {
return syn::Error::new(
arm.pat.span().span(),
"expected literal string/byte/bytes/char",
)
.to_compile_error()
.into();
}
}
}
}
_b => {
return syn::Error::new(
arm.pat.span().span(),
"expected literal string/byte/bytes/char",
)
.to_compile_error()
.into();
}
}
}
struct PathEntry {
result: Option<Arm>,
sub_entries: HashMap<u8, Box<PathEntry>>,
}
let mut paths = Box::new(PathEntry {
result: None,
sub_entries: HashMap::new(),
});
for (keyword_b, arm) in arms.drain(..) {
let mut current = &mut paths;
for b in keyword_b {
match current.sub_entries.get(&b) {
Some(_) => {
current = current.sub_entries.get_mut(&b).unwrap();
}
None => {
let new_entry = Box::new(PathEntry {
result: None,
sub_entries: HashMap::new(),
});
current.sub_entries.insert(b, new_entry);
current = current.sub_entries.get_mut(&b).unwrap();
}
}
}
assert!(current.result.is_none());
current.result = Some(arm);
}
fn write_entry(
idx: usize,
var_name: proc_macro2::TokenStream,
fallback_arm: Option<Arm>,
entry: &PathEntry,
) -> proc_macro2::TokenStream {
let eof_handle = if let Some(ref result) = entry.result {
let body = &result.body;
quote! { None => { #body } }
} else {
quote! {}
};
let fallback_handle = if let Some(ref result) = fallback_arm {
let body = &result.body;
quote! { _ => { #body } }
} else {
quote! {}
};
let mut arms = Vec::with_capacity(entry.sub_entries.len());
for (&b, sub_entry) in &entry.sub_entries {
let sub_match = write_entry(idx + 1, var_name.clone(), fallback_arm.clone(), sub_entry);
if b.is_ascii_alphabetic() {
let b_lower = b.to_ascii_lowercase();
arms.push(quote! { Some(#b) | Some(#b_lower) => #sub_match });
} else {
arms.push(quote! { Some(#b) => #sub_match });
}
}
quote! { match #var_name.get(#idx) {
#eof_handle
#(#arms)*
#fallback_handle
} }
}
let expr = match_block.expr;
TokenStream::from(write_entry(0, quote! { #expr }, fallback_arm, &paths))
}

View File

@@ -5,10 +5,12 @@ use syn::punctuated::Punctuated;
use syn::token::Eq;
use syn::{parse_macro_input, Ident, LitStr, Token};
mod agg_derive;
mod match_ignore_ascii_case;
mod scalars;
mod vfs_derive;
mod vtab_derive;
pub use agg_derive::derive_agg_func;
pub use match_ignore_ascii_case::match_ignore_ascci_case;
pub use scalars::scalar;
pub use vfs_derive::derive_vfs_module;
pub use vtab_derive::derive_vtab_module;

View File

@@ -439,3 +439,8 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream {
pub fn derive_vfs_module(input: TokenStream) -> TokenStream {
ext::derive_vfs_module(input)
}
#[proc_macro]
pub fn match_ignore_ascii_case(input: TokenStream) -> TokenStream {
ext::match_ignore_ascci_case(input)
}

View File

@@ -21,6 +21,7 @@ strum = { workspace = true }
strum_macros = {workspace = true }
serde = { workspace = true , optional = true, features = ["derive"] }
thiserror = "1.0.61"
turso_macros = { workspace = true }
[dev-dependencies]
fallible-iterator = "0.3"

View File

@@ -1,290 +0,0 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
/// generates a trie-like function with nested match expressions for parsing SQL keywords
/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],]
/// A
/// ├─ B
/// │ ├─ O
/// │ │ ├─ R
/// │ │ │ ├─ T -> TK_ABORT
/// ├─ C
/// │ ├─ T
/// │ │ ├─ I
/// │ │ │ ├─ O
/// │ │ │ │ ├─ N -> TK_ACTION
/// ├─ D
/// │ ├─ D -> TK_ADD
fn build_keyword_map(
writer: &mut impl Write,
func_name: &str,
keywords: &[[&'static str; 2]],
) -> Result<(), std::io::Error> {
assert!(!keywords.is_empty());
let mut min_len = keywords[0][0].len();
let mut max_len = keywords[0][0].len();
struct PathEntry {
result: Option<&'static str>,
sub_entries: HashMap<u8, Box<PathEntry>>,
}
let mut paths = Box::new(PathEntry {
result: None,
sub_entries: HashMap::new(),
});
for keyword in keywords {
let keyword_b = keyword[0].as_bytes();
if keyword_b.len() < min_len {
min_len = keyword_b.len();
}
if keyword_b.len() > max_len {
max_len = keyword_b.len();
}
let mut current = &mut paths;
for &b in keyword_b {
let upper_b = b.to_ascii_uppercase();
match current.sub_entries.get(&upper_b) {
Some(_) => {
current = current.sub_entries.get_mut(&upper_b).unwrap();
}
None => {
let new_entry = Box::new(PathEntry {
result: None,
sub_entries: HashMap::new(),
});
current.sub_entries.insert(upper_b, new_entry);
current = current.sub_entries.get_mut(&upper_b).unwrap();
}
}
}
assert!(current.result.is_none());
current.result = Some(keyword[1]);
}
fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<(), std::io::Error> {
if let Some(result) = entry.result {
writeln!(writer, "if idx == buf.len() {{")?;
writeln!(writer, "return Some(TokenType::{result});")?;
writeln!(writer, "}}")?;
}
if entry.sub_entries.is_empty() {
writeln!(writer, "None")?;
return Ok(());
}
writeln!(writer, "if idx >= buf.len() {{")?;
writeln!(writer, "return None;")?;
writeln!(writer, "}}")?;
writeln!(writer, "match buf[idx] {{")?;
for (&b, sub_entry) in &entry.sub_entries {
if b.is_ascii_alphabetic() {
writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?;
} else {
writeln!(writer, "{b} => {{")?;
}
writeln!(writer, "idx += 1;")?;
write_entry(writer, sub_entry)?;
writeln!(writer, "}}")?;
}
writeln!(writer, "_ => None")?;
writeln!(writer, "}}")?;
Ok(())
}
writeln!(
writer,
"pub(crate) const MAX_KEYWORD_LEN: usize = {max_len};"
)?;
writeln!(
writer,
"pub(crate) const MIN_KEYWORD_LEN: usize = {min_len};"
)?;
writeln!(writer, "/// Check if `word` is a keyword")?;
writeln!(
writer,
"pub fn {func_name}(buf: &[u8]) -> Option<TokenType> {{"
)?;
writeln!(
writer,
"if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{"
)?;
writeln!(writer, "return None;")?;
writeln!(writer, "}}")?;
writeln!(writer, "let mut idx = 0;")?;
write_entry(writer, &paths)?;
writeln!(writer, "}}")?;
Ok(())
}
fn main() {
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
let keywords = out_dir.join("keywords.rs");
let mut keywords = BufWriter::new(File::create(keywords).unwrap());
build_keyword_map(
&mut keywords,
"keyword_token",
&[
["ABORT", "TK_ABORT"],
["ACTION", "TK_ACTION"],
["ADD", "TK_ADD"],
["AFTER", "TK_AFTER"],
["ALL", "TK_ALL"],
["ALTER", "TK_ALTER"],
["ALWAYS", "TK_ALWAYS"],
["ANALYZE", "TK_ANALYZE"],
["AND", "TK_AND"],
["AS", "TK_AS"],
["ASC", "TK_ASC"],
["ATTACH", "TK_ATTACH"],
["AUTOINCREMENT", "TK_AUTOINCR"],
["BEFORE", "TK_BEFORE"],
["BEGIN", "TK_BEGIN"],
["BETWEEN", "TK_BETWEEN"],
["BY", "TK_BY"],
["CASCADE", "TK_CASCADE"],
["CASE", "TK_CASE"],
["CAST", "TK_CAST"],
["CHECK", "TK_CHECK"],
["COLLATE", "TK_COLLATE"],
["COLUMN", "TK_COLUMNKW"],
["COMMIT", "TK_COMMIT"],
["CONFLICT", "TK_CONFLICT"],
["CONSTRAINT", "TK_CONSTRAINT"],
["CREATE", "TK_CREATE"],
["CROSS", "TK_JOIN_KW"],
["CURRENT", "TK_CURRENT"],
["CURRENT_DATE", "TK_CTIME_KW"],
["CURRENT_TIME", "TK_CTIME_KW"],
["CURRENT_TIMESTAMP", "TK_CTIME_KW"],
["DATABASE", "TK_DATABASE"],
["DEFAULT", "TK_DEFAULT"],
["DEFERRABLE", "TK_DEFERRABLE"],
["DEFERRED", "TK_DEFERRED"],
["DELETE", "TK_DELETE"],
["DESC", "TK_DESC"],
["DETACH", "TK_DETACH"],
["DISTINCT", "TK_DISTINCT"],
["DO", "TK_DO"],
["DROP", "TK_DROP"],
["EACH", "TK_EACH"],
["ELSE", "TK_ELSE"],
["END", "TK_END"],
["ESCAPE", "TK_ESCAPE"],
["EXCEPT", "TK_EXCEPT"],
["EXCLUDE", "TK_EXCLUDE"],
["EXCLUSIVE", "TK_EXCLUSIVE"],
["EXISTS", "TK_EXISTS"],
["EXPLAIN", "TK_EXPLAIN"],
["FAIL", "TK_FAIL"],
["FILTER", "TK_FILTER"],
["FIRST", "TK_FIRST"],
["FOLLOWING", "TK_FOLLOWING"],
["FOR", "TK_FOR"],
["FOREIGN", "TK_FOREIGN"],
["FROM", "TK_FROM"],
["FULL", "TK_JOIN_KW"],
["GENERATED", "TK_GENERATED"],
["GLOB", "TK_LIKE_KW"],
["GROUP", "TK_GROUP"],
["GROUPS", "TK_GROUPS"],
["HAVING", "TK_HAVING"],
["IF", "TK_IF"],
["IGNORE", "TK_IGNORE"],
["IMMEDIATE", "TK_IMMEDIATE"],
["IN", "TK_IN"],
["INDEX", "TK_INDEX"],
["INDEXED", "TK_INDEXED"],
["INITIALLY", "TK_INITIALLY"],
["INNER", "TK_JOIN_KW"],
["INSERT", "TK_INSERT"],
["INSTEAD", "TK_INSTEAD"],
["INTERSECT", "TK_INTERSECT"],
["INTO", "TK_INTO"],
["IS", "TK_IS"],
["ISNULL", "TK_ISNULL"],
["JOIN", "TK_JOIN"],
["KEY", "TK_KEY"],
["LAST", "TK_LAST"],
["LEFT", "TK_JOIN_KW"],
["LIKE", "TK_LIKE_KW"],
["LIMIT", "TK_LIMIT"],
["MATCH", "TK_MATCH"],
["MATERIALIZED", "TK_MATERIALIZED"],
["NATURAL", "TK_JOIN_KW"],
["NO", "TK_NO"],
["NOT", "TK_NOT"],
["NOTHING", "TK_NOTHING"],
["NOTNULL", "TK_NOTNULL"],
["NULL", "TK_NULL"],
["NULLS", "TK_NULLS"],
["OF", "TK_OF"],
["OFFSET", "TK_OFFSET"],
["ON", "TK_ON"],
["OR", "TK_OR"],
["ORDER", "TK_ORDER"],
["OTHERS", "TK_OTHERS"],
["OUTER", "TK_JOIN_KW"],
["OVER", "TK_OVER"],
["PARTITION", "TK_PARTITION"],
["PLAN", "TK_PLAN"],
["PRAGMA", "TK_PRAGMA"],
["PRECEDING", "TK_PRECEDING"],
["PRIMARY", "TK_PRIMARY"],
["QUERY", "TK_QUERY"],
["RAISE", "TK_RAISE"],
["RANGE", "TK_RANGE"],
["RECURSIVE", "TK_RECURSIVE"],
["REFERENCES", "TK_REFERENCES"],
["REGEXP", "TK_LIKE_KW"],
["REINDEX", "TK_REINDEX"],
["RELEASE", "TK_RELEASE"],
["RENAME", "TK_RENAME"],
["REPLACE", "TK_REPLACE"],
["RETURNING", "TK_RETURNING"],
["RESTRICT", "TK_RESTRICT"],
["RIGHT", "TK_JOIN_KW"],
["ROLLBACK", "TK_ROLLBACK"],
["ROW", "TK_ROW"],
["ROWS", "TK_ROWS"],
["SAVEPOINT", "TK_SAVEPOINT"],
["SELECT", "TK_SELECT"],
["SET", "TK_SET"],
["TABLE", "TK_TABLE"],
["TEMP", "TK_TEMP"],
["TEMPORARY", "TK_TEMP"],
["THEN", "TK_THEN"],
["TIES", "TK_TIES"],
["TO", "TK_TO"],
["TRANSACTION", "TK_TRANSACTION"],
["TRIGGER", "TK_TRIGGER"],
["UNBOUNDED", "TK_UNBOUNDED"],
["UNION", "TK_UNION"],
["UNIQUE", "TK_UNIQUE"],
["UPDATE", "TK_UPDATE"],
["USING", "TK_USING"],
["VACUUM", "TK_VACUUM"],
["VALUES", "TK_VALUES"],
["VIEW", "TK_VIEW"],
["VIRTUAL", "TK_VIRTUAL"],
["WHEN", "TK_WHEN"],
["WHERE", "TK_WHERE"],
["WINDOW", "TK_WINDOW"],
["WITH", "TK_WITH"],
["WITHOUT", "TK_WITHOUT"],
],
)
.unwrap();
}

View File

@@ -1,6 +1,158 @@
use crate::{error::Error, token::TokenType, Result};
use turso_macros::match_ignore_ascii_case;
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
fn keyword_or_id_token(input: &[u8]) -> TokenType {
match_ignore_ascii_case!(match input {
b"ABORT" => TokenType::TK_ABORT,
b"ACTION" => TokenType::TK_ACTION,
b"ADD" => TokenType::TK_ADD,
b"AFTER" => TokenType::TK_AFTER,
b"ALL" => TokenType::TK_ALL,
b"ALTER" => TokenType::TK_ALTER,
b"ALWAYS" => TokenType::TK_ALWAYS,
b"ANALYZE" => TokenType::TK_ANALYZE,
b"AND" => TokenType::TK_AND,
b"AS" => TokenType::TK_AS,
b"ASC" => TokenType::TK_ASC,
b"ATTACH" => TokenType::TK_ATTACH,
b"AUTOINCREMENT" => TokenType::TK_AUTOINCR,
b"BEFORE" => TokenType::TK_BEFORE,
b"BEGIN" => TokenType::TK_BEGIN,
b"BETWEEN" => TokenType::TK_BETWEEN,
b"BY" => TokenType::TK_BY,
b"CASCADE" => TokenType::TK_CASCADE,
b"CASE" => TokenType::TK_CASE,
b"CAST" => TokenType::TK_CAST,
b"CHECK" => TokenType::TK_CHECK,
b"COLLATE" => TokenType::TK_COLLATE,
b"COLUMN" => TokenType::TK_COLUMNKW,
b"COMMIT" => TokenType::TK_COMMIT,
b"CONFLICT" => TokenType::TK_CONFLICT,
b"CONSTRAINT" => TokenType::TK_CONSTRAINT,
b"CREATE" => TokenType::TK_CREATE,
b"CROSS" => TokenType::TK_JOIN_KW,
b"CURRENT" => TokenType::TK_CURRENT,
b"CURRENT_DATE" => TokenType::TK_CTIME_KW,
b"CURRENT_TIME" => TokenType::TK_CTIME_KW,
b"CURRENT_TIMESTAMP" => TokenType::TK_CTIME_KW,
b"DATABASE" => TokenType::TK_DATABASE,
b"DEFAULT" => TokenType::TK_DEFAULT,
b"DEFERRABLE" => TokenType::TK_DEFERRABLE,
b"DEFERRED" => TokenType::TK_DEFERRED,
b"DELETE" => TokenType::TK_DELETE,
b"DESC" => TokenType::TK_DESC,
b"DETACH" => TokenType::TK_DETACH,
b"DISTINCT" => TokenType::TK_DISTINCT,
b"DO" => TokenType::TK_DO,
b"DROP" => TokenType::TK_DROP,
b"EACH" => TokenType::TK_EACH,
b"ELSE" => TokenType::TK_ELSE,
b"END" => TokenType::TK_END,
b"ESCAPE" => TokenType::TK_ESCAPE,
b"EXCEPT" => TokenType::TK_EXCEPT,
b"EXCLUDE" => TokenType::TK_EXCLUDE,
b"EXCLUSIVE" => TokenType::TK_EXCLUSIVE,
b"EXISTS" => TokenType::TK_EXISTS,
b"EXPLAIN" => TokenType::TK_EXPLAIN,
b"FAIL" => TokenType::TK_FAIL,
b"FILTER" => TokenType::TK_FILTER,
b"FIRST" => TokenType::TK_FIRST,
b"FOLLOWING" => TokenType::TK_FOLLOWING,
b"FOR" => TokenType::TK_FOR,
b"FOREIGN" => TokenType::TK_FOREIGN,
b"FROM" => TokenType::TK_FROM,
b"FULL" => TokenType::TK_JOIN_KW,
b"GENERATED" => TokenType::TK_GENERATED,
b"GLOB" => TokenType::TK_LIKE_KW,
b"GROUP" => TokenType::TK_GROUP,
b"GROUPS" => TokenType::TK_GROUPS,
b"HAVING" => TokenType::TK_HAVING,
b"IF" => TokenType::TK_IF,
b"IGNORE" => TokenType::TK_IGNORE,
b"IMMEDIATE" => TokenType::TK_IMMEDIATE,
b"IN" => TokenType::TK_IN,
b"INDEX" => TokenType::TK_INDEX,
b"INDEXED" => TokenType::TK_INDEXED,
b"INITIALLY" => TokenType::TK_INITIALLY,
b"INNER" => TokenType::TK_JOIN_KW,
b"INSERT" => TokenType::TK_INSERT,
b"INSTEAD" => TokenType::TK_INSTEAD,
b"INTERSECT" => TokenType::TK_INTERSECT,
b"INTO" => TokenType::TK_INTO,
b"IS" => TokenType::TK_IS,
b"ISNULL" => TokenType::TK_ISNULL,
b"JOIN" => TokenType::TK_JOIN,
b"KEY" => TokenType::TK_KEY,
b"LAST" => TokenType::TK_LAST,
b"LEFT" => TokenType::TK_JOIN_KW,
b"LIKE" => TokenType::TK_LIKE_KW,
b"LIMIT" => TokenType::TK_LIMIT,
b"MATCH" => TokenType::TK_MATCH,
b"MATERIALIZED" => TokenType::TK_MATERIALIZED,
b"NATURAL" => TokenType::TK_JOIN_KW,
b"NO" => TokenType::TK_NO,
b"NOT" => TokenType::TK_NOT,
b"NOTHING" => TokenType::TK_NOTHING,
b"NOTNULL" => TokenType::TK_NOTNULL,
b"NULL" => TokenType::TK_NULL,
b"NULLS" => TokenType::TK_NULLS,
b"OF" => TokenType::TK_OF,
b"OFFSET" => TokenType::TK_OFFSET,
b"ON" => TokenType::TK_ON,
b"OR" => TokenType::TK_OR,
b"ORDER" => TokenType::TK_ORDER,
b"OTHERS" => TokenType::TK_OTHERS,
b"OUTER" => TokenType::TK_JOIN_KW,
b"OVER" => TokenType::TK_OVER,
b"PARTITION" => TokenType::TK_PARTITION,
b"PLAN" => TokenType::TK_PLAN,
b"PRAGMA" => TokenType::TK_PRAGMA,
b"PRECEDING" => TokenType::TK_PRECEDING,
b"PRIMARY" => TokenType::TK_PRIMARY,
b"QUERY" => TokenType::TK_QUERY,
b"RAISE" => TokenType::TK_RAISE,
b"RANGE" => TokenType::TK_RANGE,
b"RECURSIVE" => TokenType::TK_RECURSIVE,
b"REFERENCES" => TokenType::TK_REFERENCES,
b"REGEXP" => TokenType::TK_LIKE_KW,
b"REINDEX" => TokenType::TK_REINDEX,
b"RELEASE" => TokenType::TK_RELEASE,
b"RENAME" => TokenType::TK_RENAME,
b"REPLACE" => TokenType::TK_REPLACE,
b"RETURNING" => TokenType::TK_RETURNING,
b"RESTRICT" => TokenType::TK_RESTRICT,
b"RIGHT" => TokenType::TK_JOIN_KW,
b"ROLLBACK" => TokenType::TK_ROLLBACK,
b"ROW" => TokenType::TK_ROW,
b"ROWS" => TokenType::TK_ROWS,
b"SAVEPOINT" => TokenType::TK_SAVEPOINT,
b"SELECT" => TokenType::TK_SELECT,
b"SET" => TokenType::TK_SET,
b"TABLE" => TokenType::TK_TABLE,
b"TEMP" => TokenType::TK_TEMP,
b"TEMPORARY" => TokenType::TK_TEMP,
b"THEN" => TokenType::TK_THEN,
b"TIES" => TokenType::TK_TIES,
b"TO" => TokenType::TK_TO,
b"TRANSACTION" => TokenType::TK_TRANSACTION,
b"TRIGGER" => TokenType::TK_TRIGGER,
b"UNBOUNDED" => TokenType::TK_UNBOUNDED,
b"UNION" => TokenType::TK_UNION,
b"UNIQUE" => TokenType::TK_UNIQUE,
b"UPDATE" => TokenType::TK_UPDATE,
b"USING" => TokenType::TK_USING,
b"VACUUM" => TokenType::TK_VACUUM,
b"VALUES" => TokenType::TK_VALUES,
b"VIEW" => TokenType::TK_VIEW,
b"VIRTUAL" => TokenType::TK_VIRTUAL,
b"WHEN" => TokenType::TK_WHEN,
b"WHERE" => TokenType::TK_WHERE,
b"WINDOW" => TokenType::TK_WINDOW,
b"WITH" => TokenType::TK_WITH,
b"WITHOUT" => TokenType::TK_WITHOUT,
_ => TokenType::TK_ID,
})
}
#[inline(always)]
pub fn is_identifier_start(b: u8) -> bool {
@@ -637,7 +789,7 @@ impl<'a> Lexer<'a> {
let result = &self.input[start..self.offset];
Ok(Token {
value: result,
token_type: Some(keyword_token(result).unwrap_or(TokenType::TK_ID)),
token_type: Some(keyword_or_id_token(result)),
})
}
}
@@ -1253,19 +1405,23 @@ mod tests {
]);
for (key, value) in &values {
assert!(keyword_token(key.as_bytes()).unwrap() == *value);
assert!(
keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value
);
assert!(keyword_or_id_token(key.as_bytes()) == *value);
assert!(keyword_or_id_token(key.as_bytes().to_ascii_lowercase().as_slice()) == *value);
}
assert!(keyword_token(b"").is_none());
assert!(keyword_token(b"wrong").is_none());
assert!(keyword_token(b"super wrong").is_none());
assert!(keyword_token(b"super_wrong").is_none());
assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none());
assert!(keyword_token("Crème Brulée".as_bytes()).is_none());
assert!(keyword_token("fróm".as_bytes()).is_none());
assert_eq!(keyword_or_id_token(b""), TokenType::TK_ID);
assert_eq!(keyword_or_id_token(b"wrong"), TokenType::TK_ID);
assert_eq!(keyword_or_id_token(b"super wrong"), TokenType::TK_ID);
assert_eq!(keyword_or_id_token(b"super_wrong"), TokenType::TK_ID);
assert_eq!(
keyword_or_id_token(b"aae26e78-3ba7-4627-8f8f-02623302495a"),
TokenType::TK_ID
);
assert_eq!(
keyword_or_id_token("Crème Brulée".as_bytes()),
TokenType::TK_ID
);
assert_eq!(keyword_or_id_token("fróm".as_bytes()), TokenType::TK_ID);
}
#[test]

View File

@@ -14,6 +14,7 @@ use crate::error::Error;
use crate::lexer::{Lexer, Token};
use crate::token::TokenType::{self, *};
use crate::Result;
use turso_macros::match_ignore_ascii_case;
macro_rules! peek_expect {
( $parser:expr, $( $x:ident ),* $(,)?) => {
@@ -91,26 +92,19 @@ fn from_bytes(bytes: &[u8]) -> String {
#[inline]
fn join_type_from_bytes(s: &[u8]) -> Result<JoinType> {
if b"CROSS".eq_ignore_ascii_case(s) {
Ok(JoinType::INNER | JoinType::CROSS)
} else if b"FULL".eq_ignore_ascii_case(s) {
Ok(JoinType::LEFT | JoinType::RIGHT | JoinType::OUTER)
} else if b"INNER".eq_ignore_ascii_case(s) {
Ok(JoinType::INNER)
} else if b"LEFT".eq_ignore_ascii_case(s) {
Ok(JoinType::LEFT | JoinType::OUTER)
} else if b"NATURAL".eq_ignore_ascii_case(s) {
Ok(JoinType::NATURAL)
} else if b"RIGHT".eq_ignore_ascii_case(s) {
Ok(JoinType::RIGHT | JoinType::OUTER)
} else if b"OUTER".eq_ignore_ascii_case(s) {
Ok(JoinType::OUTER)
} else {
Err(Error::Custom(format!(
match_ignore_ascii_case!(match s {
b"CROSS" => Ok(JoinType::INNER | JoinType::CROSS),
b"FULL" => Ok(JoinType::LEFT | JoinType::RIGHT | JoinType::OUTER),
b"INNER" => Ok(JoinType::INNER),
b"LEFT" => Ok(JoinType::LEFT | JoinType::OUTER),
b"NATURAL" => Ok(JoinType::NATURAL),
b"RIGHT" => Ok(JoinType::RIGHT | JoinType::OUTER),
b"OUTER" => Ok(JoinType::OUTER),
_ => Err(Error::Custom(format!(
"unsupported JOIN type: {:?}",
str::from_utf8(s)
)))
}
))),
})
}
#[inline]
@@ -1365,15 +1359,12 @@ impl<'a> Parser<'a> {
}
TK_CTIME_KW => {
let tok = eat_assert!(self, TK_CTIME_KW);
if b"CURRENT_DATE".eq_ignore_ascii_case(tok.value) {
Ok(Box::new(Expr::Literal(Literal::CurrentDate)))
} else if b"CURRENT_TIME".eq_ignore_ascii_case(tok.value) {
Ok(Box::new(Expr::Literal(Literal::CurrentTime)))
} else if b"CURRENT_TIMESTAMP".eq_ignore_ascii_case(tok.value) {
Ok(Box::new(Expr::Literal(Literal::CurrentTimestamp)))
} else {
unreachable!()
}
match_ignore_ascii_case!(match tok.value {
b"CURRENT_DATE" => Ok(Box::new(Expr::Literal(Literal::CurrentDate))),
b"CURRENT_TIME" => Ok(Box::new(Expr::Literal(Literal::CurrentTime))),
b"CURRENT_TIMESTAMP" => Ok(Box::new(Expr::Literal(Literal::CurrentTimestamp))),
_ => unreachable!(),
})
}
TK_NOT => {
eat_assert!(self, TK_NOT);
@@ -1729,17 +1720,12 @@ impl<'a> Parser<'a> {
let tok = eat_assert!(self, TK_MATCH, TK_LIKE_KW);
let op = match tok.token_type.unwrap() {
TK_MATCH => LikeOperator::Match,
TK_LIKE_KW => {
if b"LIKE".eq_ignore_ascii_case(tok.value) {
LikeOperator::Like
} else if b"GLOB".eq_ignore_ascii_case(tok.value) {
LikeOperator::Glob
} else if b"REGEXP".eq_ignore_ascii_case(tok.value) {
LikeOperator::Regexp
} else {
unreachable!()
}
}
TK_LIKE_KW => match_ignore_ascii_case!(match tok.value {
b"LIKE" => LikeOperator::Like,
b"GLOB" => LikeOperator::Glob,
b"REGEXP" => LikeOperator::Regexp,
_ => unreachable!(),
}),
_ => unreachable!(),
};
@@ -2743,25 +2729,23 @@ impl<'a> Parser<'a> {
TK_WITHOUT => {
eat_assert!(self, TK_WITHOUT);
let tok = eat_expect!(self, TK_ID);
if b"ROWID".eq_ignore_ascii_case(tok.value) {
Ok(TableOptions::WITHOUT_ROWID)
} else {
Err(Error::Custom(format!(
match_ignore_ascii_case!(match tok.value {
b"ROWID" => Ok(TableOptions::WITHOUT_ROWID),
_ => Err(Error::Custom(format!(
"unknown table option: {}",
from_bytes(tok.value)
)))
}
))),
})
}
TK_ID => {
let tok = eat_assert!(self, TK_ID);
if b"STRICT".eq_ignore_ascii_case(tok.value) {
Ok(TableOptions::STRICT)
} else {
Err(Error::Custom(format!(
match_ignore_ascii_case!(match tok.value {
b"STRICT" => Ok(TableOptions::STRICT),
_ => Err(Error::Custom(format!(
"unknown table option: {}",
from_bytes(tok.value)
)))
}
))),
})
}
_ => Ok(TableOptions::NONE),
},
@@ -2855,18 +2839,16 @@ impl<'a> Parser<'a> {
match &c.col_type {
Some(Type { name, .. }) => {
// The datatype must be one of following: INT INTEGER REAL TEXT BLOB ANY
if !(name.eq_ignore_ascii_case("INT")
|| name.eq_ignore_ascii_case("INTEGER")
|| name.eq_ignore_ascii_case("REAL")
|| name.eq_ignore_ascii_case("TEXT")
|| name.eq_ignore_ascii_case("BLOB")
|| name.eq_ignore_ascii_case("ANY"))
{
let bytes_name = name.as_bytes();
match_ignore_ascii_case!(match bytes_name {
b"INT" | b"INTEGER" | b"REAL" | b"TEXT" | b"BLOB" | b"ANY" => {}
_ => {
return Err(Error::Custom(format!(
"unknown datatype for {}.{}: \"{}\"",
tbl_name, c.col_name, name
)));
}
})
}
_ => {
// Every column definition must specify a datatype for that column. The freedom to specify a column without a datatype is removed.