mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-17 08:34:19 +01:00
add match_ignore_ascii_case macro
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4172,6 +4172,7 @@ dependencies = [
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror 1.0.69",
|
||||
"turso_macros",
|
||||
"turso_sqlite3_parser",
|
||||
]
|
||||
|
||||
|
||||
@@ -17,4 +17,4 @@ proc-macro = true
|
||||
[dependencies]
|
||||
quote = "1.0.38"
|
||||
proc-macro2 = "1.0.93"
|
||||
syn = { version = "2.0.96", features = ["full"] }
|
||||
syn = { version = "2.0.96", features = ["full", "clone-impls"] }
|
||||
|
||||
148
macros/src/ext/match_ignore_ascii_case.rs
Normal file
148
macros/src/ext/match_ignore_ascii_case.rs
Normal file
@@ -0,0 +1,148 @@
|
||||
use quote::quote;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use proc_macro::TokenStream;
|
||||
use syn::{parse_macro_input, spanned::Spanned, Arm, ExprMatch, Lit, Pat};
|
||||
|
||||
pub fn match_ignore_ascci_case(input: TokenStream) -> TokenStream {
|
||||
let match_block = parse_macro_input!(input as ExprMatch);
|
||||
if match_block.arms.is_empty() {
|
||||
return syn::Error::new(
|
||||
match_block.span(),
|
||||
"expected at least one arm with literal string/byte/bytes/char",
|
||||
)
|
||||
.to_compile_error()
|
||||
.into();
|
||||
}
|
||||
let mut arms: Vec<(Vec<u8>, Arm)> = Vec::with_capacity(match_block.arms.len());
|
||||
let mut fallback_arm: Option<Arm> = None;
|
||||
for arm in &match_block.arms {
|
||||
match &arm.pat {
|
||||
Pat::Lit(lit) => match &lit.lit {
|
||||
Lit::ByteStr(bs) => {
|
||||
arms.push((bs.value().to_ascii_uppercase(), arm.clone()));
|
||||
}
|
||||
_ => {
|
||||
return syn::Error::new(
|
||||
arm.pat.span().span(),
|
||||
"expected literal string/byte/bytes/char",
|
||||
)
|
||||
.to_compile_error()
|
||||
.into();
|
||||
}
|
||||
},
|
||||
Pat::Wild(_) => {
|
||||
fallback_arm = Some(arm.clone());
|
||||
}
|
||||
Pat::Or(or) => {
|
||||
for case in &or.cases {
|
||||
match case {
|
||||
Pat::Lit(lit) => match &lit.lit {
|
||||
Lit::ByteStr(bs) => {
|
||||
arms.push((bs.value().to_ascii_uppercase(), arm.clone()));
|
||||
}
|
||||
_ => {
|
||||
return syn::Error::new(
|
||||
arm.pat.span().span(),
|
||||
"expected literal string/byte/bytes/char",
|
||||
)
|
||||
.to_compile_error()
|
||||
.into();
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
return syn::Error::new(
|
||||
arm.pat.span().span(),
|
||||
"expected literal string/byte/bytes/char",
|
||||
)
|
||||
.to_compile_error()
|
||||
.into();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_b => {
|
||||
return syn::Error::new(
|
||||
arm.pat.span().span(),
|
||||
"expected literal string/byte/bytes/char",
|
||||
)
|
||||
.to_compile_error()
|
||||
.into();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct PathEntry {
|
||||
result: Option<Arm>,
|
||||
sub_entries: HashMap<u8, Box<PathEntry>>,
|
||||
}
|
||||
|
||||
let mut paths = Box::new(PathEntry {
|
||||
result: None,
|
||||
sub_entries: HashMap::new(),
|
||||
});
|
||||
|
||||
for (keyword_b, arm) in arms.drain(..) {
|
||||
let mut current = &mut paths;
|
||||
|
||||
for b in keyword_b {
|
||||
match current.sub_entries.get(&b) {
|
||||
Some(_) => {
|
||||
current = current.sub_entries.get_mut(&b).unwrap();
|
||||
}
|
||||
None => {
|
||||
let new_entry = Box::new(PathEntry {
|
||||
result: None,
|
||||
sub_entries: HashMap::new(),
|
||||
});
|
||||
current.sub_entries.insert(b, new_entry);
|
||||
current = current.sub_entries.get_mut(&b).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(current.result.is_none());
|
||||
current.result = Some(arm);
|
||||
}
|
||||
|
||||
fn write_entry(
|
||||
idx: usize,
|
||||
var_name: proc_macro2::TokenStream,
|
||||
fallback_arm: Option<Arm>,
|
||||
entry: &PathEntry,
|
||||
) -> proc_macro2::TokenStream {
|
||||
let eof_handle = if let Some(ref result) = entry.result {
|
||||
let body = &result.body;
|
||||
quote! { None => { #body } }
|
||||
} else {
|
||||
quote! {}
|
||||
};
|
||||
|
||||
let fallback_handle = if let Some(ref result) = fallback_arm {
|
||||
let body = &result.body;
|
||||
quote! { _ => { #body } }
|
||||
} else {
|
||||
quote! {}
|
||||
};
|
||||
|
||||
let mut arms = Vec::with_capacity(entry.sub_entries.len());
|
||||
for (&b, sub_entry) in &entry.sub_entries {
|
||||
let sub_match = write_entry(idx + 1, var_name.clone(), fallback_arm.clone(), sub_entry);
|
||||
if b.is_ascii_alphabetic() {
|
||||
let b_lower = b.to_ascii_lowercase();
|
||||
arms.push(quote! { Some(#b) | Some(#b_lower) => #sub_match });
|
||||
} else {
|
||||
arms.push(quote! { Some(#b) => #sub_match });
|
||||
}
|
||||
}
|
||||
|
||||
quote! { match #var_name.get(#idx) {
|
||||
#eof_handle
|
||||
#(#arms)*
|
||||
#fallback_handle
|
||||
} }
|
||||
}
|
||||
|
||||
let expr = match_block.expr;
|
||||
TokenStream::from(write_entry(0, quote! { #expr }, fallback_arm, &paths))
|
||||
}
|
||||
@@ -5,10 +5,12 @@ use syn::punctuated::Punctuated;
|
||||
use syn::token::Eq;
|
||||
use syn::{parse_macro_input, Ident, LitStr, Token};
|
||||
mod agg_derive;
|
||||
mod match_ignore_ascii_case;
|
||||
mod scalars;
|
||||
mod vfs_derive;
|
||||
mod vtab_derive;
|
||||
pub use agg_derive::derive_agg_func;
|
||||
pub use match_ignore_ascii_case::match_ignore_ascci_case;
|
||||
pub use scalars::scalar;
|
||||
pub use vfs_derive::derive_vfs_module;
|
||||
pub use vtab_derive::derive_vtab_module;
|
||||
|
||||
@@ -439,3 +439,8 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream {
|
||||
pub fn derive_vfs_module(input: TokenStream) -> TokenStream {
|
||||
ext::derive_vfs_module(input)
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn match_ignore_ascii_case(input: TokenStream) -> TokenStream {
|
||||
ext::match_ignore_ascci_case(input)
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ strum = { workspace = true }
|
||||
strum_macros = {workspace = true }
|
||||
serde = { workspace = true , optional = true, features = ["derive"] }
|
||||
thiserror = "1.0.61"
|
||||
turso_macros = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
fallible-iterator = "0.3"
|
||||
|
||||
290
parser/build.rs
290
parser/build.rs
@@ -1,290 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// generates a trie-like function with nested match expressions for parsing SQL keywords
|
||||
/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],]
|
||||
/// A
|
||||
/// ├─ B
|
||||
/// │ ├─ O
|
||||
/// │ │ ├─ R
|
||||
/// │ │ │ ├─ T -> TK_ABORT
|
||||
/// ├─ C
|
||||
/// │ ├─ T
|
||||
/// │ │ ├─ I
|
||||
/// │ │ │ ├─ O
|
||||
/// │ │ │ │ ├─ N -> TK_ACTION
|
||||
/// ├─ D
|
||||
/// │ ├─ D -> TK_ADD
|
||||
fn build_keyword_map(
|
||||
writer: &mut impl Write,
|
||||
func_name: &str,
|
||||
keywords: &[[&'static str; 2]],
|
||||
) -> Result<(), std::io::Error> {
|
||||
assert!(!keywords.is_empty());
|
||||
let mut min_len = keywords[0][0].len();
|
||||
let mut max_len = keywords[0][0].len();
|
||||
|
||||
struct PathEntry {
|
||||
result: Option<&'static str>,
|
||||
sub_entries: HashMap<u8, Box<PathEntry>>,
|
||||
}
|
||||
|
||||
let mut paths = Box::new(PathEntry {
|
||||
result: None,
|
||||
sub_entries: HashMap::new(),
|
||||
});
|
||||
|
||||
for keyword in keywords {
|
||||
let keyword_b = keyword[0].as_bytes();
|
||||
|
||||
if keyword_b.len() < min_len {
|
||||
min_len = keyword_b.len();
|
||||
}
|
||||
|
||||
if keyword_b.len() > max_len {
|
||||
max_len = keyword_b.len();
|
||||
}
|
||||
|
||||
let mut current = &mut paths;
|
||||
|
||||
for &b in keyword_b {
|
||||
let upper_b = b.to_ascii_uppercase();
|
||||
|
||||
match current.sub_entries.get(&upper_b) {
|
||||
Some(_) => {
|
||||
current = current.sub_entries.get_mut(&upper_b).unwrap();
|
||||
}
|
||||
None => {
|
||||
let new_entry = Box::new(PathEntry {
|
||||
result: None,
|
||||
sub_entries: HashMap::new(),
|
||||
});
|
||||
current.sub_entries.insert(upper_b, new_entry);
|
||||
current = current.sub_entries.get_mut(&upper_b).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(current.result.is_none());
|
||||
current.result = Some(keyword[1]);
|
||||
}
|
||||
|
||||
fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<(), std::io::Error> {
|
||||
if let Some(result) = entry.result {
|
||||
writeln!(writer, "if idx == buf.len() {{")?;
|
||||
writeln!(writer, "return Some(TokenType::{result});")?;
|
||||
writeln!(writer, "}}")?;
|
||||
}
|
||||
|
||||
if entry.sub_entries.is_empty() {
|
||||
writeln!(writer, "None")?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
writeln!(writer, "if idx >= buf.len() {{")?;
|
||||
writeln!(writer, "return None;")?;
|
||||
writeln!(writer, "}}")?;
|
||||
|
||||
writeln!(writer, "match buf[idx] {{")?;
|
||||
for (&b, sub_entry) in &entry.sub_entries {
|
||||
if b.is_ascii_alphabetic() {
|
||||
writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?;
|
||||
} else {
|
||||
writeln!(writer, "{b} => {{")?;
|
||||
}
|
||||
writeln!(writer, "idx += 1;")?;
|
||||
write_entry(writer, sub_entry)?;
|
||||
writeln!(writer, "}}")?;
|
||||
}
|
||||
|
||||
writeln!(writer, "_ => None")?;
|
||||
writeln!(writer, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
writeln!(
|
||||
writer,
|
||||
"pub(crate) const MAX_KEYWORD_LEN: usize = {max_len};"
|
||||
)?;
|
||||
writeln!(
|
||||
writer,
|
||||
"pub(crate) const MIN_KEYWORD_LEN: usize = {min_len};"
|
||||
)?;
|
||||
writeln!(writer, "/// Check if `word` is a keyword")?;
|
||||
writeln!(
|
||||
writer,
|
||||
"pub fn {func_name}(buf: &[u8]) -> Option<TokenType> {{"
|
||||
)?;
|
||||
writeln!(
|
||||
writer,
|
||||
"if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{"
|
||||
)?;
|
||||
writeln!(writer, "return None;")?;
|
||||
writeln!(writer, "}}")?;
|
||||
writeln!(writer, "let mut idx = 0;")?;
|
||||
write_entry(writer, &paths)?;
|
||||
writeln!(writer, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
|
||||
let keywords = out_dir.join("keywords.rs");
|
||||
let mut keywords = BufWriter::new(File::create(keywords).unwrap());
|
||||
build_keyword_map(
|
||||
&mut keywords,
|
||||
"keyword_token",
|
||||
&[
|
||||
["ABORT", "TK_ABORT"],
|
||||
["ACTION", "TK_ACTION"],
|
||||
["ADD", "TK_ADD"],
|
||||
["AFTER", "TK_AFTER"],
|
||||
["ALL", "TK_ALL"],
|
||||
["ALTER", "TK_ALTER"],
|
||||
["ALWAYS", "TK_ALWAYS"],
|
||||
["ANALYZE", "TK_ANALYZE"],
|
||||
["AND", "TK_AND"],
|
||||
["AS", "TK_AS"],
|
||||
["ASC", "TK_ASC"],
|
||||
["ATTACH", "TK_ATTACH"],
|
||||
["AUTOINCREMENT", "TK_AUTOINCR"],
|
||||
["BEFORE", "TK_BEFORE"],
|
||||
["BEGIN", "TK_BEGIN"],
|
||||
["BETWEEN", "TK_BETWEEN"],
|
||||
["BY", "TK_BY"],
|
||||
["CASCADE", "TK_CASCADE"],
|
||||
["CASE", "TK_CASE"],
|
||||
["CAST", "TK_CAST"],
|
||||
["CHECK", "TK_CHECK"],
|
||||
["COLLATE", "TK_COLLATE"],
|
||||
["COLUMN", "TK_COLUMNKW"],
|
||||
["COMMIT", "TK_COMMIT"],
|
||||
["CONFLICT", "TK_CONFLICT"],
|
||||
["CONSTRAINT", "TK_CONSTRAINT"],
|
||||
["CREATE", "TK_CREATE"],
|
||||
["CROSS", "TK_JOIN_KW"],
|
||||
["CURRENT", "TK_CURRENT"],
|
||||
["CURRENT_DATE", "TK_CTIME_KW"],
|
||||
["CURRENT_TIME", "TK_CTIME_KW"],
|
||||
["CURRENT_TIMESTAMP", "TK_CTIME_KW"],
|
||||
["DATABASE", "TK_DATABASE"],
|
||||
["DEFAULT", "TK_DEFAULT"],
|
||||
["DEFERRABLE", "TK_DEFERRABLE"],
|
||||
["DEFERRED", "TK_DEFERRED"],
|
||||
["DELETE", "TK_DELETE"],
|
||||
["DESC", "TK_DESC"],
|
||||
["DETACH", "TK_DETACH"],
|
||||
["DISTINCT", "TK_DISTINCT"],
|
||||
["DO", "TK_DO"],
|
||||
["DROP", "TK_DROP"],
|
||||
["EACH", "TK_EACH"],
|
||||
["ELSE", "TK_ELSE"],
|
||||
["END", "TK_END"],
|
||||
["ESCAPE", "TK_ESCAPE"],
|
||||
["EXCEPT", "TK_EXCEPT"],
|
||||
["EXCLUDE", "TK_EXCLUDE"],
|
||||
["EXCLUSIVE", "TK_EXCLUSIVE"],
|
||||
["EXISTS", "TK_EXISTS"],
|
||||
["EXPLAIN", "TK_EXPLAIN"],
|
||||
["FAIL", "TK_FAIL"],
|
||||
["FILTER", "TK_FILTER"],
|
||||
["FIRST", "TK_FIRST"],
|
||||
["FOLLOWING", "TK_FOLLOWING"],
|
||||
["FOR", "TK_FOR"],
|
||||
["FOREIGN", "TK_FOREIGN"],
|
||||
["FROM", "TK_FROM"],
|
||||
["FULL", "TK_JOIN_KW"],
|
||||
["GENERATED", "TK_GENERATED"],
|
||||
["GLOB", "TK_LIKE_KW"],
|
||||
["GROUP", "TK_GROUP"],
|
||||
["GROUPS", "TK_GROUPS"],
|
||||
["HAVING", "TK_HAVING"],
|
||||
["IF", "TK_IF"],
|
||||
["IGNORE", "TK_IGNORE"],
|
||||
["IMMEDIATE", "TK_IMMEDIATE"],
|
||||
["IN", "TK_IN"],
|
||||
["INDEX", "TK_INDEX"],
|
||||
["INDEXED", "TK_INDEXED"],
|
||||
["INITIALLY", "TK_INITIALLY"],
|
||||
["INNER", "TK_JOIN_KW"],
|
||||
["INSERT", "TK_INSERT"],
|
||||
["INSTEAD", "TK_INSTEAD"],
|
||||
["INTERSECT", "TK_INTERSECT"],
|
||||
["INTO", "TK_INTO"],
|
||||
["IS", "TK_IS"],
|
||||
["ISNULL", "TK_ISNULL"],
|
||||
["JOIN", "TK_JOIN"],
|
||||
["KEY", "TK_KEY"],
|
||||
["LAST", "TK_LAST"],
|
||||
["LEFT", "TK_JOIN_KW"],
|
||||
["LIKE", "TK_LIKE_KW"],
|
||||
["LIMIT", "TK_LIMIT"],
|
||||
["MATCH", "TK_MATCH"],
|
||||
["MATERIALIZED", "TK_MATERIALIZED"],
|
||||
["NATURAL", "TK_JOIN_KW"],
|
||||
["NO", "TK_NO"],
|
||||
["NOT", "TK_NOT"],
|
||||
["NOTHING", "TK_NOTHING"],
|
||||
["NOTNULL", "TK_NOTNULL"],
|
||||
["NULL", "TK_NULL"],
|
||||
["NULLS", "TK_NULLS"],
|
||||
["OF", "TK_OF"],
|
||||
["OFFSET", "TK_OFFSET"],
|
||||
["ON", "TK_ON"],
|
||||
["OR", "TK_OR"],
|
||||
["ORDER", "TK_ORDER"],
|
||||
["OTHERS", "TK_OTHERS"],
|
||||
["OUTER", "TK_JOIN_KW"],
|
||||
["OVER", "TK_OVER"],
|
||||
["PARTITION", "TK_PARTITION"],
|
||||
["PLAN", "TK_PLAN"],
|
||||
["PRAGMA", "TK_PRAGMA"],
|
||||
["PRECEDING", "TK_PRECEDING"],
|
||||
["PRIMARY", "TK_PRIMARY"],
|
||||
["QUERY", "TK_QUERY"],
|
||||
["RAISE", "TK_RAISE"],
|
||||
["RANGE", "TK_RANGE"],
|
||||
["RECURSIVE", "TK_RECURSIVE"],
|
||||
["REFERENCES", "TK_REFERENCES"],
|
||||
["REGEXP", "TK_LIKE_KW"],
|
||||
["REINDEX", "TK_REINDEX"],
|
||||
["RELEASE", "TK_RELEASE"],
|
||||
["RENAME", "TK_RENAME"],
|
||||
["REPLACE", "TK_REPLACE"],
|
||||
["RETURNING", "TK_RETURNING"],
|
||||
["RESTRICT", "TK_RESTRICT"],
|
||||
["RIGHT", "TK_JOIN_KW"],
|
||||
["ROLLBACK", "TK_ROLLBACK"],
|
||||
["ROW", "TK_ROW"],
|
||||
["ROWS", "TK_ROWS"],
|
||||
["SAVEPOINT", "TK_SAVEPOINT"],
|
||||
["SELECT", "TK_SELECT"],
|
||||
["SET", "TK_SET"],
|
||||
["TABLE", "TK_TABLE"],
|
||||
["TEMP", "TK_TEMP"],
|
||||
["TEMPORARY", "TK_TEMP"],
|
||||
["THEN", "TK_THEN"],
|
||||
["TIES", "TK_TIES"],
|
||||
["TO", "TK_TO"],
|
||||
["TRANSACTION", "TK_TRANSACTION"],
|
||||
["TRIGGER", "TK_TRIGGER"],
|
||||
["UNBOUNDED", "TK_UNBOUNDED"],
|
||||
["UNION", "TK_UNION"],
|
||||
["UNIQUE", "TK_UNIQUE"],
|
||||
["UPDATE", "TK_UPDATE"],
|
||||
["USING", "TK_USING"],
|
||||
["VACUUM", "TK_VACUUM"],
|
||||
["VALUES", "TK_VALUES"],
|
||||
["VIEW", "TK_VIEW"],
|
||||
["VIRTUAL", "TK_VIRTUAL"],
|
||||
["WHEN", "TK_WHEN"],
|
||||
["WHERE", "TK_WHERE"],
|
||||
["WINDOW", "TK_WINDOW"],
|
||||
["WITH", "TK_WITH"],
|
||||
["WITHOUT", "TK_WITHOUT"],
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
@@ -1,6 +1,158 @@
|
||||
use crate::{error::Error, token::TokenType, Result};
|
||||
use turso_macros::match_ignore_ascii_case;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
|
||||
fn keyword_or_id_token(input: &[u8]) -> TokenType {
|
||||
match_ignore_ascii_case!(match input {
|
||||
b"ABORT" => TokenType::TK_ABORT,
|
||||
b"ACTION" => TokenType::TK_ACTION,
|
||||
b"ADD" => TokenType::TK_ADD,
|
||||
b"AFTER" => TokenType::TK_AFTER,
|
||||
b"ALL" => TokenType::TK_ALL,
|
||||
b"ALTER" => TokenType::TK_ALTER,
|
||||
b"ALWAYS" => TokenType::TK_ALWAYS,
|
||||
b"ANALYZE" => TokenType::TK_ANALYZE,
|
||||
b"AND" => TokenType::TK_AND,
|
||||
b"AS" => TokenType::TK_AS,
|
||||
b"ASC" => TokenType::TK_ASC,
|
||||
b"ATTACH" => TokenType::TK_ATTACH,
|
||||
b"AUTOINCREMENT" => TokenType::TK_AUTOINCR,
|
||||
b"BEFORE" => TokenType::TK_BEFORE,
|
||||
b"BEGIN" => TokenType::TK_BEGIN,
|
||||
b"BETWEEN" => TokenType::TK_BETWEEN,
|
||||
b"BY" => TokenType::TK_BY,
|
||||
b"CASCADE" => TokenType::TK_CASCADE,
|
||||
b"CASE" => TokenType::TK_CASE,
|
||||
b"CAST" => TokenType::TK_CAST,
|
||||
b"CHECK" => TokenType::TK_CHECK,
|
||||
b"COLLATE" => TokenType::TK_COLLATE,
|
||||
b"COLUMN" => TokenType::TK_COLUMNKW,
|
||||
b"COMMIT" => TokenType::TK_COMMIT,
|
||||
b"CONFLICT" => TokenType::TK_CONFLICT,
|
||||
b"CONSTRAINT" => TokenType::TK_CONSTRAINT,
|
||||
b"CREATE" => TokenType::TK_CREATE,
|
||||
b"CROSS" => TokenType::TK_JOIN_KW,
|
||||
b"CURRENT" => TokenType::TK_CURRENT,
|
||||
b"CURRENT_DATE" => TokenType::TK_CTIME_KW,
|
||||
b"CURRENT_TIME" => TokenType::TK_CTIME_KW,
|
||||
b"CURRENT_TIMESTAMP" => TokenType::TK_CTIME_KW,
|
||||
b"DATABASE" => TokenType::TK_DATABASE,
|
||||
b"DEFAULT" => TokenType::TK_DEFAULT,
|
||||
b"DEFERRABLE" => TokenType::TK_DEFERRABLE,
|
||||
b"DEFERRED" => TokenType::TK_DEFERRED,
|
||||
b"DELETE" => TokenType::TK_DELETE,
|
||||
b"DESC" => TokenType::TK_DESC,
|
||||
b"DETACH" => TokenType::TK_DETACH,
|
||||
b"DISTINCT" => TokenType::TK_DISTINCT,
|
||||
b"DO" => TokenType::TK_DO,
|
||||
b"DROP" => TokenType::TK_DROP,
|
||||
b"EACH" => TokenType::TK_EACH,
|
||||
b"ELSE" => TokenType::TK_ELSE,
|
||||
b"END" => TokenType::TK_END,
|
||||
b"ESCAPE" => TokenType::TK_ESCAPE,
|
||||
b"EXCEPT" => TokenType::TK_EXCEPT,
|
||||
b"EXCLUDE" => TokenType::TK_EXCLUDE,
|
||||
b"EXCLUSIVE" => TokenType::TK_EXCLUSIVE,
|
||||
b"EXISTS" => TokenType::TK_EXISTS,
|
||||
b"EXPLAIN" => TokenType::TK_EXPLAIN,
|
||||
b"FAIL" => TokenType::TK_FAIL,
|
||||
b"FILTER" => TokenType::TK_FILTER,
|
||||
b"FIRST" => TokenType::TK_FIRST,
|
||||
b"FOLLOWING" => TokenType::TK_FOLLOWING,
|
||||
b"FOR" => TokenType::TK_FOR,
|
||||
b"FOREIGN" => TokenType::TK_FOREIGN,
|
||||
b"FROM" => TokenType::TK_FROM,
|
||||
b"FULL" => TokenType::TK_JOIN_KW,
|
||||
b"GENERATED" => TokenType::TK_GENERATED,
|
||||
b"GLOB" => TokenType::TK_LIKE_KW,
|
||||
b"GROUP" => TokenType::TK_GROUP,
|
||||
b"GROUPS" => TokenType::TK_GROUPS,
|
||||
b"HAVING" => TokenType::TK_HAVING,
|
||||
b"IF" => TokenType::TK_IF,
|
||||
b"IGNORE" => TokenType::TK_IGNORE,
|
||||
b"IMMEDIATE" => TokenType::TK_IMMEDIATE,
|
||||
b"IN" => TokenType::TK_IN,
|
||||
b"INDEX" => TokenType::TK_INDEX,
|
||||
b"INDEXED" => TokenType::TK_INDEXED,
|
||||
b"INITIALLY" => TokenType::TK_INITIALLY,
|
||||
b"INNER" => TokenType::TK_JOIN_KW,
|
||||
b"INSERT" => TokenType::TK_INSERT,
|
||||
b"INSTEAD" => TokenType::TK_INSTEAD,
|
||||
b"INTERSECT" => TokenType::TK_INTERSECT,
|
||||
b"INTO" => TokenType::TK_INTO,
|
||||
b"IS" => TokenType::TK_IS,
|
||||
b"ISNULL" => TokenType::TK_ISNULL,
|
||||
b"JOIN" => TokenType::TK_JOIN,
|
||||
b"KEY" => TokenType::TK_KEY,
|
||||
b"LAST" => TokenType::TK_LAST,
|
||||
b"LEFT" => TokenType::TK_JOIN_KW,
|
||||
b"LIKE" => TokenType::TK_LIKE_KW,
|
||||
b"LIMIT" => TokenType::TK_LIMIT,
|
||||
b"MATCH" => TokenType::TK_MATCH,
|
||||
b"MATERIALIZED" => TokenType::TK_MATERIALIZED,
|
||||
b"NATURAL" => TokenType::TK_JOIN_KW,
|
||||
b"NO" => TokenType::TK_NO,
|
||||
b"NOT" => TokenType::TK_NOT,
|
||||
b"NOTHING" => TokenType::TK_NOTHING,
|
||||
b"NOTNULL" => TokenType::TK_NOTNULL,
|
||||
b"NULL" => TokenType::TK_NULL,
|
||||
b"NULLS" => TokenType::TK_NULLS,
|
||||
b"OF" => TokenType::TK_OF,
|
||||
b"OFFSET" => TokenType::TK_OFFSET,
|
||||
b"ON" => TokenType::TK_ON,
|
||||
b"OR" => TokenType::TK_OR,
|
||||
b"ORDER" => TokenType::TK_ORDER,
|
||||
b"OTHERS" => TokenType::TK_OTHERS,
|
||||
b"OUTER" => TokenType::TK_JOIN_KW,
|
||||
b"OVER" => TokenType::TK_OVER,
|
||||
b"PARTITION" => TokenType::TK_PARTITION,
|
||||
b"PLAN" => TokenType::TK_PLAN,
|
||||
b"PRAGMA" => TokenType::TK_PRAGMA,
|
||||
b"PRECEDING" => TokenType::TK_PRECEDING,
|
||||
b"PRIMARY" => TokenType::TK_PRIMARY,
|
||||
b"QUERY" => TokenType::TK_QUERY,
|
||||
b"RAISE" => TokenType::TK_RAISE,
|
||||
b"RANGE" => TokenType::TK_RANGE,
|
||||
b"RECURSIVE" => TokenType::TK_RECURSIVE,
|
||||
b"REFERENCES" => TokenType::TK_REFERENCES,
|
||||
b"REGEXP" => TokenType::TK_LIKE_KW,
|
||||
b"REINDEX" => TokenType::TK_REINDEX,
|
||||
b"RELEASE" => TokenType::TK_RELEASE,
|
||||
b"RENAME" => TokenType::TK_RENAME,
|
||||
b"REPLACE" => TokenType::TK_REPLACE,
|
||||
b"RETURNING" => TokenType::TK_RETURNING,
|
||||
b"RESTRICT" => TokenType::TK_RESTRICT,
|
||||
b"RIGHT" => TokenType::TK_JOIN_KW,
|
||||
b"ROLLBACK" => TokenType::TK_ROLLBACK,
|
||||
b"ROW" => TokenType::TK_ROW,
|
||||
b"ROWS" => TokenType::TK_ROWS,
|
||||
b"SAVEPOINT" => TokenType::TK_SAVEPOINT,
|
||||
b"SELECT" => TokenType::TK_SELECT,
|
||||
b"SET" => TokenType::TK_SET,
|
||||
b"TABLE" => TokenType::TK_TABLE,
|
||||
b"TEMP" => TokenType::TK_TEMP,
|
||||
b"TEMPORARY" => TokenType::TK_TEMP,
|
||||
b"THEN" => TokenType::TK_THEN,
|
||||
b"TIES" => TokenType::TK_TIES,
|
||||
b"TO" => TokenType::TK_TO,
|
||||
b"TRANSACTION" => TokenType::TK_TRANSACTION,
|
||||
b"TRIGGER" => TokenType::TK_TRIGGER,
|
||||
b"UNBOUNDED" => TokenType::TK_UNBOUNDED,
|
||||
b"UNION" => TokenType::TK_UNION,
|
||||
b"UNIQUE" => TokenType::TK_UNIQUE,
|
||||
b"UPDATE" => TokenType::TK_UPDATE,
|
||||
b"USING" => TokenType::TK_USING,
|
||||
b"VACUUM" => TokenType::TK_VACUUM,
|
||||
b"VALUES" => TokenType::TK_VALUES,
|
||||
b"VIEW" => TokenType::TK_VIEW,
|
||||
b"VIRTUAL" => TokenType::TK_VIRTUAL,
|
||||
b"WHEN" => TokenType::TK_WHEN,
|
||||
b"WHERE" => TokenType::TK_WHERE,
|
||||
b"WINDOW" => TokenType::TK_WINDOW,
|
||||
b"WITH" => TokenType::TK_WITH,
|
||||
b"WITHOUT" => TokenType::TK_WITHOUT,
|
||||
_ => TokenType::TK_ID,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_identifier_start(b: u8) -> bool {
|
||||
@@ -637,7 +789,7 @@ impl<'a> Lexer<'a> {
|
||||
let result = &self.input[start..self.offset];
|
||||
Ok(Token {
|
||||
value: result,
|
||||
token_type: Some(keyword_token(result).unwrap_or(TokenType::TK_ID)),
|
||||
token_type: Some(keyword_or_id_token(result)),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1253,19 +1405,23 @@ mod tests {
|
||||
]);
|
||||
|
||||
for (key, value) in &values {
|
||||
assert!(keyword_token(key.as_bytes()).unwrap() == *value);
|
||||
assert!(
|
||||
keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value
|
||||
);
|
||||
assert!(keyword_or_id_token(key.as_bytes()) == *value);
|
||||
assert!(keyword_or_id_token(key.as_bytes().to_ascii_lowercase().as_slice()) == *value);
|
||||
}
|
||||
|
||||
assert!(keyword_token(b"").is_none());
|
||||
assert!(keyword_token(b"wrong").is_none());
|
||||
assert!(keyword_token(b"super wrong").is_none());
|
||||
assert!(keyword_token(b"super_wrong").is_none());
|
||||
assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none());
|
||||
assert!(keyword_token("Crème Brulée".as_bytes()).is_none());
|
||||
assert!(keyword_token("fróm".as_bytes()).is_none());
|
||||
assert_eq!(keyword_or_id_token(b""), TokenType::TK_ID);
|
||||
assert_eq!(keyword_or_id_token(b"wrong"), TokenType::TK_ID);
|
||||
assert_eq!(keyword_or_id_token(b"super wrong"), TokenType::TK_ID);
|
||||
assert_eq!(keyword_or_id_token(b"super_wrong"), TokenType::TK_ID);
|
||||
assert_eq!(
|
||||
keyword_or_id_token(b"aae26e78-3ba7-4627-8f8f-02623302495a"),
|
||||
TokenType::TK_ID
|
||||
);
|
||||
assert_eq!(
|
||||
keyword_or_id_token("Crème Brulée".as_bytes()),
|
||||
TokenType::TK_ID
|
||||
);
|
||||
assert_eq!(keyword_or_id_token("fróm".as_bytes()), TokenType::TK_ID);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -14,6 +14,7 @@ use crate::error::Error;
|
||||
use crate::lexer::{Lexer, Token};
|
||||
use crate::token::TokenType::{self, *};
|
||||
use crate::Result;
|
||||
use turso_macros::match_ignore_ascii_case;
|
||||
|
||||
macro_rules! peek_expect {
|
||||
( $parser:expr, $( $x:ident ),* $(,)?) => {
|
||||
@@ -91,26 +92,19 @@ fn from_bytes(bytes: &[u8]) -> String {
|
||||
|
||||
#[inline]
|
||||
fn join_type_from_bytes(s: &[u8]) -> Result<JoinType> {
|
||||
if b"CROSS".eq_ignore_ascii_case(s) {
|
||||
Ok(JoinType::INNER | JoinType::CROSS)
|
||||
} else if b"FULL".eq_ignore_ascii_case(s) {
|
||||
Ok(JoinType::LEFT | JoinType::RIGHT | JoinType::OUTER)
|
||||
} else if b"INNER".eq_ignore_ascii_case(s) {
|
||||
Ok(JoinType::INNER)
|
||||
} else if b"LEFT".eq_ignore_ascii_case(s) {
|
||||
Ok(JoinType::LEFT | JoinType::OUTER)
|
||||
} else if b"NATURAL".eq_ignore_ascii_case(s) {
|
||||
Ok(JoinType::NATURAL)
|
||||
} else if b"RIGHT".eq_ignore_ascii_case(s) {
|
||||
Ok(JoinType::RIGHT | JoinType::OUTER)
|
||||
} else if b"OUTER".eq_ignore_ascii_case(s) {
|
||||
Ok(JoinType::OUTER)
|
||||
} else {
|
||||
Err(Error::Custom(format!(
|
||||
match_ignore_ascii_case!(match s {
|
||||
b"CROSS" => Ok(JoinType::INNER | JoinType::CROSS),
|
||||
b"FULL" => Ok(JoinType::LEFT | JoinType::RIGHT | JoinType::OUTER),
|
||||
b"INNER" => Ok(JoinType::INNER),
|
||||
b"LEFT" => Ok(JoinType::LEFT | JoinType::OUTER),
|
||||
b"NATURAL" => Ok(JoinType::NATURAL),
|
||||
b"RIGHT" => Ok(JoinType::RIGHT | JoinType::OUTER),
|
||||
b"OUTER" => Ok(JoinType::OUTER),
|
||||
_ => Err(Error::Custom(format!(
|
||||
"unsupported JOIN type: {:?}",
|
||||
str::from_utf8(s)
|
||||
)))
|
||||
}
|
||||
))),
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -1365,15 +1359,12 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
TK_CTIME_KW => {
|
||||
let tok = eat_assert!(self, TK_CTIME_KW);
|
||||
if b"CURRENT_DATE".eq_ignore_ascii_case(tok.value) {
|
||||
Ok(Box::new(Expr::Literal(Literal::CurrentDate)))
|
||||
} else if b"CURRENT_TIME".eq_ignore_ascii_case(tok.value) {
|
||||
Ok(Box::new(Expr::Literal(Literal::CurrentTime)))
|
||||
} else if b"CURRENT_TIMESTAMP".eq_ignore_ascii_case(tok.value) {
|
||||
Ok(Box::new(Expr::Literal(Literal::CurrentTimestamp)))
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
match_ignore_ascii_case!(match tok.value {
|
||||
b"CURRENT_DATE" => Ok(Box::new(Expr::Literal(Literal::CurrentDate))),
|
||||
b"CURRENT_TIME" => Ok(Box::new(Expr::Literal(Literal::CurrentTime))),
|
||||
b"CURRENT_TIMESTAMP" => Ok(Box::new(Expr::Literal(Literal::CurrentTimestamp))),
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}
|
||||
TK_NOT => {
|
||||
eat_assert!(self, TK_NOT);
|
||||
@@ -1729,17 +1720,12 @@ impl<'a> Parser<'a> {
|
||||
let tok = eat_assert!(self, TK_MATCH, TK_LIKE_KW);
|
||||
let op = match tok.token_type.unwrap() {
|
||||
TK_MATCH => LikeOperator::Match,
|
||||
TK_LIKE_KW => {
|
||||
if b"LIKE".eq_ignore_ascii_case(tok.value) {
|
||||
LikeOperator::Like
|
||||
} else if b"GLOB".eq_ignore_ascii_case(tok.value) {
|
||||
LikeOperator::Glob
|
||||
} else if b"REGEXP".eq_ignore_ascii_case(tok.value) {
|
||||
LikeOperator::Regexp
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
TK_LIKE_KW => match_ignore_ascii_case!(match tok.value {
|
||||
b"LIKE" => LikeOperator::Like,
|
||||
b"GLOB" => LikeOperator::Glob,
|
||||
b"REGEXP" => LikeOperator::Regexp,
|
||||
_ => unreachable!(),
|
||||
}),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
@@ -2743,25 +2729,23 @@ impl<'a> Parser<'a> {
|
||||
TK_WITHOUT => {
|
||||
eat_assert!(self, TK_WITHOUT);
|
||||
let tok = eat_expect!(self, TK_ID);
|
||||
if b"ROWID".eq_ignore_ascii_case(tok.value) {
|
||||
Ok(TableOptions::WITHOUT_ROWID)
|
||||
} else {
|
||||
Err(Error::Custom(format!(
|
||||
match_ignore_ascii_case!(match tok.value {
|
||||
b"ROWID" => Ok(TableOptions::WITHOUT_ROWID),
|
||||
_ => Err(Error::Custom(format!(
|
||||
"unknown table option: {}",
|
||||
from_bytes(tok.value)
|
||||
)))
|
||||
}
|
||||
))),
|
||||
})
|
||||
}
|
||||
TK_ID => {
|
||||
let tok = eat_assert!(self, TK_ID);
|
||||
if b"STRICT".eq_ignore_ascii_case(tok.value) {
|
||||
Ok(TableOptions::STRICT)
|
||||
} else {
|
||||
Err(Error::Custom(format!(
|
||||
match_ignore_ascii_case!(match tok.value {
|
||||
b"STRICT" => Ok(TableOptions::STRICT),
|
||||
_ => Err(Error::Custom(format!(
|
||||
"unknown table option: {}",
|
||||
from_bytes(tok.value)
|
||||
)))
|
||||
}
|
||||
))),
|
||||
})
|
||||
}
|
||||
_ => Ok(TableOptions::NONE),
|
||||
},
|
||||
@@ -2855,18 +2839,16 @@ impl<'a> Parser<'a> {
|
||||
match &c.col_type {
|
||||
Some(Type { name, .. }) => {
|
||||
// The datatype must be one of following: INT INTEGER REAL TEXT BLOB ANY
|
||||
if !(name.eq_ignore_ascii_case("INT")
|
||||
|| name.eq_ignore_ascii_case("INTEGER")
|
||||
|| name.eq_ignore_ascii_case("REAL")
|
||||
|| name.eq_ignore_ascii_case("TEXT")
|
||||
|| name.eq_ignore_ascii_case("BLOB")
|
||||
|| name.eq_ignore_ascii_case("ANY"))
|
||||
{
|
||||
let bytes_name = name.as_bytes();
|
||||
match_ignore_ascii_case!(match bytes_name {
|
||||
b"INT" | b"INTEGER" | b"REAL" | b"TEXT" | b"BLOB" | b"ANY" => {}
|
||||
_ => {
|
||||
return Err(Error::Custom(format!(
|
||||
"unknown datatype for {}.{}: \"{}\"",
|
||||
tbl_name, c.col_name, name
|
||||
)));
|
||||
}
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// Every column definition must specify a datatype for that column. The freedom to specify a column without a datatype is removed.
|
||||
|
||||
Reference in New Issue
Block a user