diff --git a/Cargo.lock b/Cargo.lock index 2e8b0bdfc..676ba1ed6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2464,45 +2464,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand 0.8.5", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", - "uncased", -] - [[package]] name = "pin-project-lite" version = "0.2.16" @@ -3285,12 +3246,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "slab" version = "0.4.9" @@ -3961,13 +3916,9 @@ dependencies = [ "log", "memchr", "miette", - "phf", - "phf_codegen", - "phf_shared", "serde", "strum", "strum_macros", - "uncased", ] [[package]] diff --git a/vendored/sqlite3-parser/Cargo.toml b/vendored/sqlite3-parser/Cargo.toml index 89ded7ad6..6ae686f07 100644 --- a/vendored/sqlite3-parser/Cargo.toml +++ b/vendored/sqlite3-parser/Cargo.toml @@ -25,12 +25,10 @@ default = ["YYNOERRORRECOVERY", "NDEBUG"] serde = ["dep:serde", "indexmap/serde", "bitflags/serde"] [dependencies] -phf = { version = "0.11", features = ["uncased"] } log = "0.4.22" memchr = "2.0" fallible-iterator = "0.3" bitflags = "2.0" -uncased = "0.9.10" indexmap = "2.0" miette = "7.4.0" strum = { workspace = true } @@ -42,9 +40,6 @@ env_logger = { version = "0.11", default-features = false } [build-dependencies] cc = "1.0" -phf_shared = { version = "0.11", features = ["uncased"] } -phf_codegen = "0.11" -uncased = "0.9.10" [lints.rust] dead_code = "allow" diff --git a/vendored/sqlite3-parser/build.rs b/vendored/sqlite3-parser/build.rs index 39d4b5805..65dfc4375 100644 --- a/vendored/sqlite3-parser/build.rs +++ b/vendored/sqlite3-parser/build.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::env; use std::fs::File; use std::io::{BufWriter, Result, Write}; @@ -5,7 +6,135 @@ use std::path::Path; use std::process::Command; use cc::Build; -use uncased::UncasedStr; + +/// generates a trie-like function with nested match expressions for parsing SQL keywords +/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],] +/// A +/// ├─ B +/// │ ├─ O +/// │ │ ├─ R +/// │ │ │ ├─ T -> TK_ABORT +/// ├─ C +/// │ ├─ T +/// │ │ ├─ I +/// │ │ │ ├─ O +/// │ │ │ │ ├─ N -> TK_ACTION +/// ├─ D +/// │ ├─ D -> TK_ADD +fn build_keyword_map( + writer: &mut impl Write, + func_name: &str, + keywords: &[[&'static str; 2]], +) -> Result<()> { + assert!(!keywords.is_empty()); + let mut min_len = keywords[0][0].len(); + let mut max_len = keywords[0][0].len(); + + struct PathEntry { + result: Option<&'static str>, + sub_entries: HashMap>, + } + + let mut paths = Box::new(PathEntry { + result: None, + sub_entries: HashMap::new(), + }); + + for keyword in keywords { + let keyword_b = keyword[0].as_bytes(); + + if keyword_b.len() < min_len { + min_len = keyword_b.len(); + } + + if keyword_b.len() > max_len { + max_len = keyword_b.len(); + } + + let mut current = &mut paths; + + for &b in keyword_b { + let upper_b = b.to_ascii_uppercase(); + + match current.sub_entries.get(&upper_b) { + Some(_) => { + current = current.sub_entries.get_mut(&upper_b).unwrap(); + } + None => { + let new_entry = Box::new(PathEntry { + result: None, + sub_entries: HashMap::new(), + }); + current.sub_entries.insert(upper_b, new_entry); + current = current.sub_entries.get_mut(&upper_b).unwrap(); + } + } + } + + assert!(current.result.is_none()); + current.result = Some(keyword[1]); + } + + fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<()> { + if let Some(result) = entry.result { + writeln!(writer, "if idx == buf.len() {{")?; + writeln!(writer, "return Some(TokenType::{});", result)?; + writeln!(writer, "}}")?; + } + + if entry.sub_entries.is_empty() { + writeln!(writer, "None")?; + return Ok(()); + } + + writeln!(writer, "if idx >= buf.len() {{")?; + writeln!(writer, "return None;")?; + writeln!(writer, "}}")?; + + writeln!(writer, "match buf[idx] {{")?; + for (&b, sub_entry) in &entry.sub_entries { + if b.is_ascii_alphabetic() { + writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?; + } else { + writeln!(writer, "{} => {{", b)?; + } + writeln!(writer, "idx += 1;")?; + write_entry(writer, sub_entry)?; + writeln!(writer, "}}")?; + } + + writeln!(writer, "_ => None")?; + writeln!(writer, "}}")?; + Ok(()) + } + + writeln!( + writer, + "pub(crate) const MAX_KEYWORD_LEN: usize = {};", + max_len + )?; + writeln!( + writer, + "pub(crate) const MIN_KEYWORD_LEN: usize = {};", + min_len + )?; + writeln!(writer, "/// Check if `word` is a keyword")?; + writeln!( + writer, + "pub fn {}(buf: &[u8]) -> Option {{", + func_name + )?; + writeln!( + writer, + "if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{" + )?; + writeln!(writer, "return None;")?; + writeln!(writer, "}}")?; + writeln!(writer, "let mut idx = 0;")?; + write_entry(writer, &paths)?; + writeln!(writer, "}}")?; + Ok(()) +} fn main() -> Result<()> { let out_dir = env::var("OUT_DIR").unwrap(); @@ -43,164 +172,158 @@ fn main() -> Result<()> { let keywords = out_path.join("keywords.rs"); let mut keywords = BufWriter::new(File::create(keywords)?); - write!( + build_keyword_map( &mut keywords, - "static KEYWORDS: ::phf::Map<&'static UncasedStr, TokenType> = \n{};", - phf_codegen::Map::new() - .entry(UncasedStr::new("ABORT"), "TokenType::TK_ABORT") - .entry(UncasedStr::new("ACTION"), "TokenType::TK_ACTION") - .entry(UncasedStr::new("ADD"), "TokenType::TK_ADD") - .entry(UncasedStr::new("AFTER"), "TokenType::TK_AFTER") - .entry(UncasedStr::new("ALL"), "TokenType::TK_ALL") - .entry(UncasedStr::new("ALTER"), "TokenType::TK_ALTER") - .entry(UncasedStr::new("ALWAYS"), "TokenType::TK_ALWAYS") - .entry(UncasedStr::new("ANALYZE"), "TokenType::TK_ANALYZE") - .entry(UncasedStr::new("AND"), "TokenType::TK_AND") - .entry(UncasedStr::new("AS"), "TokenType::TK_AS") - .entry(UncasedStr::new("ASC"), "TokenType::TK_ASC") - .entry(UncasedStr::new("ATTACH"), "TokenType::TK_ATTACH") - .entry(UncasedStr::new("AUTOINCREMENT"), "TokenType::TK_AUTOINCR") - .entry(UncasedStr::new("BEFORE"), "TokenType::TK_BEFORE") - .entry(UncasedStr::new("BEGIN"), "TokenType::TK_BEGIN") - .entry(UncasedStr::new("BETWEEN"), "TokenType::TK_BETWEEN") - .entry(UncasedStr::new("BY"), "TokenType::TK_BY") - .entry(UncasedStr::new("CASCADE"), "TokenType::TK_CASCADE") - .entry(UncasedStr::new("CASE"), "TokenType::TK_CASE") - .entry(UncasedStr::new("CAST"), "TokenType::TK_CAST") - .entry(UncasedStr::new("CHECK"), "TokenType::TK_CHECK") - .entry(UncasedStr::new("COLLATE"), "TokenType::TK_COLLATE") - .entry(UncasedStr::new("COLUMN"), "TokenType::TK_COLUMNKW") - .entry(UncasedStr::new("COMMIT"), "TokenType::TK_COMMIT") - .entry(UncasedStr::new("CONFLICT"), "TokenType::TK_CONFLICT") - .entry(UncasedStr::new("CONSTRAINT"), "TokenType::TK_CONSTRAINT") - .entry(UncasedStr::new("CREATE"), "TokenType::TK_CREATE") - .entry(UncasedStr::new("CROSS"), "TokenType::TK_JOIN_KW") - .entry(UncasedStr::new("CURRENT"), "TokenType::TK_CURRENT") - .entry(UncasedStr::new("CURRENT_DATE"), "TokenType::TK_CTIME_KW") - .entry(UncasedStr::new("CURRENT_TIME"), "TokenType::TK_CTIME_KW") - .entry( - UncasedStr::new("CURRENT_TIMESTAMP"), - "TokenType::TK_CTIME_KW" - ) - .entry(UncasedStr::new("DATABASE"), "TokenType::TK_DATABASE") - .entry(UncasedStr::new("DEFAULT"), "TokenType::TK_DEFAULT") - .entry(UncasedStr::new("DEFERRABLE"), "TokenType::TK_DEFERRABLE") - .entry(UncasedStr::new("DEFERRED"), "TokenType::TK_DEFERRED") - .entry(UncasedStr::new("DELETE"), "TokenType::TK_DELETE") - .entry(UncasedStr::new("DESC"), "TokenType::TK_DESC") - .entry(UncasedStr::new("DETACH"), "TokenType::TK_DETACH") - .entry(UncasedStr::new("DISTINCT"), "TokenType::TK_DISTINCT") - .entry(UncasedStr::new("DO"), "TokenType::TK_DO") - .entry(UncasedStr::new("DROP"), "TokenType::TK_DROP") - .entry(UncasedStr::new("EACH"), "TokenType::TK_EACH") - .entry(UncasedStr::new("ELSE"), "TokenType::TK_ELSE") - .entry(UncasedStr::new("END"), "TokenType::TK_END") - .entry(UncasedStr::new("ESCAPE"), "TokenType::TK_ESCAPE") - .entry(UncasedStr::new("EXCEPT"), "TokenType::TK_EXCEPT") - .entry(UncasedStr::new("EXCLUDE"), "TokenType::TK_EXCLUDE") - .entry(UncasedStr::new("EXCLUSIVE"), "TokenType::TK_EXCLUSIVE") - .entry(UncasedStr::new("EXISTS"), "TokenType::TK_EXISTS") - .entry(UncasedStr::new("EXPLAIN"), "TokenType::TK_EXPLAIN") - .entry(UncasedStr::new("FAIL"), "TokenType::TK_FAIL") - .entry(UncasedStr::new("FILTER"), "TokenType::TK_FILTER") - .entry(UncasedStr::new("FIRST"), "TokenType::TK_FIRST") - .entry(UncasedStr::new("FOLLOWING"), "TokenType::TK_FOLLOWING") - .entry(UncasedStr::new("FOR"), "TokenType::TK_FOR") - .entry(UncasedStr::new("FOREIGN"), "TokenType::TK_FOREIGN") - .entry(UncasedStr::new("FROM"), "TokenType::TK_FROM") - .entry(UncasedStr::new("FULL"), "TokenType::TK_JOIN_KW") - .entry(UncasedStr::new("GENERATED"), "TokenType::TK_GENERATED") - .entry(UncasedStr::new("GLOB"), "TokenType::TK_LIKE_KW") - .entry(UncasedStr::new("GROUP"), "TokenType::TK_GROUP") - .entry(UncasedStr::new("GROUPS"), "TokenType::TK_GROUPS") - .entry(UncasedStr::new("HAVING"), "TokenType::TK_HAVING") - .entry(UncasedStr::new("IF"), "TokenType::TK_IF") - .entry(UncasedStr::new("IGNORE"), "TokenType::TK_IGNORE") - .entry(UncasedStr::new("IMMEDIATE"), "TokenType::TK_IMMEDIATE") - .entry(UncasedStr::new("IN"), "TokenType::TK_IN") - .entry(UncasedStr::new("INDEX"), "TokenType::TK_INDEX") - .entry(UncasedStr::new("INDEXED"), "TokenType::TK_INDEXED") - .entry(UncasedStr::new("INITIALLY"), "TokenType::TK_INITIALLY") - .entry(UncasedStr::new("INNER"), "TokenType::TK_JOIN_KW") - .entry(UncasedStr::new("INSERT"), "TokenType::TK_INSERT") - .entry(UncasedStr::new("INSTEAD"), "TokenType::TK_INSTEAD") - .entry(UncasedStr::new("INTERSECT"), "TokenType::TK_INTERSECT") - .entry(UncasedStr::new("INTO"), "TokenType::TK_INTO") - .entry(UncasedStr::new("IS"), "TokenType::TK_IS") - .entry(UncasedStr::new("ISNULL"), "TokenType::TK_ISNULL") - .entry(UncasedStr::new("JOIN"), "TokenType::TK_JOIN") - .entry(UncasedStr::new("KEY"), "TokenType::TK_KEY") - .entry(UncasedStr::new("LAST"), "TokenType::TK_LAST") - .entry(UncasedStr::new("LEFT"), "TokenType::TK_JOIN_KW") - .entry(UncasedStr::new("LIKE"), "TokenType::TK_LIKE_KW") - .entry(UncasedStr::new("LIMIT"), "TokenType::TK_LIMIT") - .entry(UncasedStr::new("MATCH"), "TokenType::TK_MATCH") - .entry( - UncasedStr::new("MATERIALIZED"), - "TokenType::TK_MATERIALIZED" - ) - .entry(UncasedStr::new("NATURAL"), "TokenType::TK_JOIN_KW") - .entry(UncasedStr::new("NO"), "TokenType::TK_NO") - .entry(UncasedStr::new("NOT"), "TokenType::TK_NOT") - .entry(UncasedStr::new("NOTHING"), "TokenType::TK_NOTHING") - .entry(UncasedStr::new("NOTNULL"), "TokenType::TK_NOTNULL") - .entry(UncasedStr::new("NULL"), "TokenType::TK_NULL") - .entry(UncasedStr::new("NULLS"), "TokenType::TK_NULLS") - .entry(UncasedStr::new("OF"), "TokenType::TK_OF") - .entry(UncasedStr::new("OFFSET"), "TokenType::TK_OFFSET") - .entry(UncasedStr::new("ON"), "TokenType::TK_ON") - .entry(UncasedStr::new("OR"), "TokenType::TK_OR") - .entry(UncasedStr::new("ORDER"), "TokenType::TK_ORDER") - .entry(UncasedStr::new("OTHERS"), "TokenType::TK_OTHERS") - .entry(UncasedStr::new("OUTER"), "TokenType::TK_JOIN_KW") - .entry(UncasedStr::new("OVER"), "TokenType::TK_OVER") - .entry(UncasedStr::new("PARTITION"), "TokenType::TK_PARTITION") - .entry(UncasedStr::new("PLAN"), "TokenType::TK_PLAN") - .entry(UncasedStr::new("PRAGMA"), "TokenType::TK_PRAGMA") - .entry(UncasedStr::new("PRECEDING"), "TokenType::TK_PRECEDING") - .entry(UncasedStr::new("PRIMARY"), "TokenType::TK_PRIMARY") - .entry(UncasedStr::new("QUERY"), "TokenType::TK_QUERY") - .entry(UncasedStr::new("RAISE"), "TokenType::TK_RAISE") - .entry(UncasedStr::new("RANGE"), "TokenType::TK_RANGE") - .entry(UncasedStr::new("RECURSIVE"), "TokenType::TK_RECURSIVE") - .entry(UncasedStr::new("REFERENCES"), "TokenType::TK_REFERENCES") - .entry(UncasedStr::new("REGEXP"), "TokenType::TK_LIKE_KW") - .entry(UncasedStr::new("REINDEX"), "TokenType::TK_REINDEX") - .entry(UncasedStr::new("RELEASE"), "TokenType::TK_RELEASE") - .entry(UncasedStr::new("RENAME"), "TokenType::TK_RENAME") - .entry(UncasedStr::new("REPLACE"), "TokenType::TK_REPLACE") - .entry(UncasedStr::new("RETURNING"), "TokenType::TK_RETURNING") - .entry(UncasedStr::new("RESTRICT"), "TokenType::TK_RESTRICT") - .entry(UncasedStr::new("RIGHT"), "TokenType::TK_JOIN_KW") - .entry(UncasedStr::new("ROLLBACK"), "TokenType::TK_ROLLBACK") - .entry(UncasedStr::new("ROW"), "TokenType::TK_ROW") - .entry(UncasedStr::new("ROWS"), "TokenType::TK_ROWS") - .entry(UncasedStr::new("SAVEPOINT"), "TokenType::TK_SAVEPOINT") - .entry(UncasedStr::new("SELECT"), "TokenType::TK_SELECT") - .entry(UncasedStr::new("SET"), "TokenType::TK_SET") - .entry(UncasedStr::new("TABLE"), "TokenType::TK_TABLE") - .entry(UncasedStr::new("TEMP"), "TokenType::TK_TEMP") - .entry(UncasedStr::new("TEMPORARY"), "TokenType::TK_TEMP") - .entry(UncasedStr::new("THEN"), "TokenType::TK_THEN") - .entry(UncasedStr::new("TIES"), "TokenType::TK_TIES") - .entry(UncasedStr::new("TO"), "TokenType::TK_TO") - .entry(UncasedStr::new("TRANSACTION"), "TokenType::TK_TRANSACTION") - .entry(UncasedStr::new("TRIGGER"), "TokenType::TK_TRIGGER") - .entry(UncasedStr::new("UNBOUNDED"), "TokenType::TK_UNBOUNDED") - .entry(UncasedStr::new("UNION"), "TokenType::TK_UNION") - .entry(UncasedStr::new("UNIQUE"), "TokenType::TK_UNIQUE") - .entry(UncasedStr::new("UPDATE"), "TokenType::TK_UPDATE") - .entry(UncasedStr::new("USING"), "TokenType::TK_USING") - .entry(UncasedStr::new("VACUUM"), "TokenType::TK_VACUUM") - .entry(UncasedStr::new("VALUES"), "TokenType::TK_VALUES") - .entry(UncasedStr::new("VIEW"), "TokenType::TK_VIEW") - .entry(UncasedStr::new("VIRTUAL"), "TokenType::TK_VIRTUAL") - .entry(UncasedStr::new("WHEN"), "TokenType::TK_WHEN") - .entry(UncasedStr::new("WHERE"), "TokenType::TK_WHERE") - .entry(UncasedStr::new("WINDOW"), "TokenType::TK_WINDOW") - .entry(UncasedStr::new("WITH"), "TokenType::TK_WITH") - .entry(UncasedStr::new("WITHOUT"), "TokenType::TK_WITHOUT") - .build() + "keyword_token", + &[ + ["ABORT", "TK_ABORT"], + ["ACTION", "TK_ACTION"], + ["ADD", "TK_ADD"], + ["AFTER", "TK_AFTER"], + ["ALL", "TK_ALL"], + ["ALTER", "TK_ALTER"], + ["ALWAYS", "TK_ALWAYS"], + ["ANALYZE", "TK_ANALYZE"], + ["AND", "TK_AND"], + ["AS", "TK_AS"], + ["ASC", "TK_ASC"], + ["ATTACH", "TK_ATTACH"], + ["AUTOINCREMENT", "TK_AUTOINCR"], + ["BEFORE", "TK_BEFORE"], + ["BEGIN", "TK_BEGIN"], + ["BETWEEN", "TK_BETWEEN"], + ["BY", "TK_BY"], + ["CASCADE", "TK_CASCADE"], + ["CASE", "TK_CASE"], + ["CAST", "TK_CAST"], + ["CHECK", "TK_CHECK"], + ["COLLATE", "TK_COLLATE"], + ["COLUMN", "TK_COLUMNKW"], + ["COMMIT", "TK_COMMIT"], + ["CONFLICT", "TK_CONFLICT"], + ["CONSTRAINT", "TK_CONSTRAINT"], + ["CREATE", "TK_CREATE"], + ["CROSS", "TK_JOIN_KW"], + ["CURRENT", "TK_CURRENT"], + ["CURRENT_DATE", "TK_CTIME_KW"], + ["CURRENT_TIME", "TK_CTIME_KW"], + ["CURRENT_TIMESTAMP", "TK_CTIME_KW"], + ["DATABASE", "TK_DATABASE"], + ["DEFAULT", "TK_DEFAULT"], + ["DEFERRABLE", "TK_DEFERRABLE"], + ["DEFERRED", "TK_DEFERRED"], + ["DELETE", "TK_DELETE"], + ["DESC", "TK_DESC"], + ["DETACH", "TK_DETACH"], + ["DISTINCT", "TK_DISTINCT"], + ["DO", "TK_DO"], + ["DROP", "TK_DROP"], + ["EACH", "TK_EACH"], + ["ELSE", "TK_ELSE"], + ["END", "TK_END"], + ["ESCAPE", "TK_ESCAPE"], + ["EXCEPT", "TK_EXCEPT"], + ["EXCLUDE", "TK_EXCLUDE"], + ["EXCLUSIVE", "TK_EXCLUSIVE"], + ["EXISTS", "TK_EXISTS"], + ["EXPLAIN", "TK_EXPLAIN"], + ["FAIL", "TK_FAIL"], + ["FILTER", "TK_FILTER"], + ["FIRST", "TK_FIRST"], + ["FOLLOWING", "TK_FOLLOWING"], + ["FOR", "TK_FOR"], + ["FOREIGN", "TK_FOREIGN"], + ["FROM", "TK_FROM"], + ["FULL", "TK_JOIN_KW"], + ["GENERATED", "TK_GENERATED"], + ["GLOB", "TK_LIKE_KW"], + ["GROUP", "TK_GROUP"], + ["GROUPS", "TK_GROUPS"], + ["HAVING", "TK_HAVING"], + ["IF", "TK_IF"], + ["IGNORE", "TK_IGNORE"], + ["IMMEDIATE", "TK_IMMEDIATE"], + ["IN", "TK_IN"], + ["INDEX", "TK_INDEX"], + ["INDEXED", "TK_INDEXED"], + ["INITIALLY", "TK_INITIALLY"], + ["INNER", "TK_JOIN_KW"], + ["INSERT", "TK_INSERT"], + ["INSTEAD", "TK_INSTEAD"], + ["INTERSECT", "TK_INTERSECT"], + ["INTO", "TK_INTO"], + ["IS", "TK_IS"], + ["ISNULL", "TK_ISNULL"], + ["JOIN", "TK_JOIN"], + ["KEY", "TK_KEY"], + ["LAST", "TK_LAST"], + ["LEFT", "TK_JOIN_KW"], + ["LIKE", "TK_LIKE_KW"], + ["LIMIT", "TK_LIMIT"], + ["MATCH", "TK_MATCH"], + ["MATERIALIZED", "TK_MATERIALIZED"], + ["NATURAL", "TK_JOIN_KW"], + ["NO", "TK_NO"], + ["NOT", "TK_NOT"], + ["NOTHING", "TK_NOTHING"], + ["NOTNULL", "TK_NOTNULL"], + ["NULL", "TK_NULL"], + ["NULLS", "TK_NULLS"], + ["OF", "TK_OF"], + ["OFFSET", "TK_OFFSET"], + ["ON", "TK_ON"], + ["OR", "TK_OR"], + ["ORDER", "TK_ORDER"], + ["OTHERS", "TK_OTHERS"], + ["OUTER", "TK_JOIN_KW"], + ["OVER", "TK_OVER"], + ["PARTITION", "TK_PARTITION"], + ["PLAN", "TK_PLAN"], + ["PRAGMA", "TK_PRAGMA"], + ["PRECEDING", "TK_PRECEDING"], + ["PRIMARY", "TK_PRIMARY"], + ["QUERY", "TK_QUERY"], + ["RAISE", "TK_RAISE"], + ["RANGE", "TK_RANGE"], + ["RECURSIVE", "TK_RECURSIVE"], + ["REFERENCES", "TK_REFERENCES"], + ["REGEXP", "TK_LIKE_KW"], + ["REINDEX", "TK_REINDEX"], + ["RELEASE", "TK_RELEASE"], + ["RENAME", "TK_RENAME"], + ["REPLACE", "TK_REPLACE"], + ["RETURNING", "TK_RETURNING"], + ["RESTRICT", "TK_RESTRICT"], + ["RIGHT", "TK_JOIN_KW"], + ["ROLLBACK", "TK_ROLLBACK"], + ["ROW", "TK_ROW"], + ["ROWS", "TK_ROWS"], + ["SAVEPOINT", "TK_SAVEPOINT"], + ["SELECT", "TK_SELECT"], + ["SET", "TK_SET"], + ["TABLE", "TK_TABLE"], + ["TEMP", "TK_TEMP"], + ["TEMPORARY", "TK_TEMP"], + ["THEN", "TK_THEN"], + ["TIES", "TK_TIES"], + ["TO", "TK_TO"], + ["TRANSACTION", "TK_TRANSACTION"], + ["TRIGGER", "TK_TRIGGER"], + ["UNBOUNDED", "TK_UNBOUNDED"], + ["UNION", "TK_UNION"], + ["UNIQUE", "TK_UNIQUE"], + ["UPDATE", "TK_UPDATE"], + ["USING", "TK_USING"], + ["VACUUM", "TK_VACUUM"], + ["VALUES", "TK_VALUES"], + ["VIEW", "TK_VIEW"], + ["VIRTUAL", "TK_VIRTUAL"], + ["WHEN", "TK_WHEN"], + ["WHERE", "TK_WHERE"], + ["WINDOW", "TK_WINDOW"], + ["WITH", "TK_WITH"], + ["WITHOUT", "TK_WITHOUT"], + ], )?; println!("cargo:rerun-if-changed=third_party/lemon/lemon.c"); diff --git a/vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs b/vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs index 33273c1c0..3005fcde1 100644 --- a/vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs +++ b/vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs @@ -12,7 +12,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; use fallible_iterator::FallibleIterator; -use turso_sqlite3_parser::lexer::sql::Parser; +use turso_sqlite3_parser::{dialect::keyword_token, lexer::sql::Parser}; fn basic_queries(c: &mut Criterion) { let mut group = c.benchmark_group("sqlparser-rs parsing benchmark"); @@ -42,6 +42,152 @@ fn basic_queries(c: &mut Criterion) { assert!(parser.next().unwrap().unwrap().readonly()) }); }); + + static VALUES: [&[u8]; 136] = [ + b"ABORT", + b"ACTION", + b"ADD", + b"AFTER", + b"ALL", + b"ALTER", + b"ANALYZE", + b"AND", + b"AS", + b"ASC", + b"ATTACH", + b"AUTOINCREMENT", + b"BEFORE", + b"BEGIN", + b"BETWEEN", + b"BY", + b"CASCADE", + b"CASE", + b"CAST", + b"CHECK", + b"COLLATE", + b"COLUMN", + b"COMMIT", + b"CONFLICT", + b"CONSTRAINT", + b"CREATE", + b"CROSS", + b"CURRENT", + b"CURRENT_DATE", + b"CURRENT_TIME", + b"CURRENT_TIMESTAMP", + b"DATABASE", + b"DEFAULT", + b"DEFERRABLE", + b"DEFERRED", + b"DELETE", + b"DESC", + b"DETACH", + b"DISTINCT", + b"DO", + b"DROP", + b"EACH", + b"ELSE", + b"END", + b"ESCAPE", + b"EXCEPT", + b"EXCLUSIVE", + b"EXISTS", + b"EXPLAIN", + b"FAIL", + b"FILTER", + b"FOLLOWING", + b"FOR", + b"FOREIGN", + b"FROM", + b"FULL", + b"GLOB", + b"GROUP", + b"HAVING", + b"IF", + b"IGNORE", + b"IMMEDIATE", + b"IN", + b"INDEX", + b"INDEXED", + b"INITIALLY", + b"INNER", + b"INSERT", + b"INSTEAD", + b"INTERSECT", + b"INTO", + b"IS", + b"ISNULL", + b"JOIN", + b"KEY", + b"LEFT", + b"LIKE", + b"LIMIT", + b"MATCH", + b"NATURAL", + b"NO", + b"NOT", + b"NOTHING", + b"NOTNULL", + b"NULL", + b"OF", + b"OFFSET", + b"ON", + b"OR", + b"ORDER", + b"OUTER", + b"OVER", + b"PARTITION", + b"PLAN", + b"PRAGMA", + b"PRECEDING", + b"PRIMARY", + b"QUERY", + b"RAISE", + b"RANGE", + b"RECURSIVE", + b"REFERENCES", + b"REGEXP", + b"REINDEX", + b"RELEASE", + b"RENAME", + b"REPLACE", + b"RESTRICT", + b"RIGHT", + b"ROLLBACK", + b"ROW", + b"ROWS", + b"SAVEPOINT", + b"SELECT", + b"SET", + b"TABLE", + b"TEMP", + b"TEMPORARY", + b"THEN", + b"TO", + b"TRANSACTION", + b"TRIGGER", + b"UNBOUNDED", + b"UNION", + b"UNIQUE", + b"UPDATE", + b"USING", + b"VACUUM", + b"VALUES", + b"VIEW", + b"VIRTUAL", + b"WHEN", + b"WHERE", + b"WINDOW", + b"WITH", + b"WITHOUT", + ]; + group.bench_with_input("keyword_token", &VALUES, |b, &s| { + b.iter(|| { + for value in &s { + assert!(keyword_token(value).is_some()) + } + }); + }); } criterion_group!(benches, basic_queries); diff --git a/vendored/sqlite3-parser/src/dialect/mod.rs b/vendored/sqlite3-parser/src/dialect/mod.rs index 4902378f5..c1325f4b2 100644 --- a/vendored/sqlite3-parser/src/dialect/mod.rs +++ b/vendored/sqlite3-parser/src/dialect/mod.rs @@ -2,7 +2,6 @@ use std::fmt::Formatter; use std::str; -use uncased::UncasedStr; mod token; pub use token::TokenType; @@ -42,13 +41,6 @@ pub(crate) fn from_bytes(bytes: &[u8]) -> String { } include!(concat!(env!("OUT_DIR"), "/keywords.rs")); -pub(crate) const MAX_KEYWORD_LEN: usize = 17; - -/// Check if `word` is a keyword -pub fn keyword_token(word: &[u8]) -> Option { - let s = std::str::from_utf8(word).ok()?; - KEYWORDS.get(UncasedStr::new(s)).cloned() -} pub(crate) fn is_identifier(name: &str) -> bool { if name.is_empty() { @@ -242,3 +234,176 @@ impl TokenType { } } } +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn test_keyword_token() { + let values = HashMap::from([ + ("ABORT", TokenType::TK_ABORT), + ("ACTION", TokenType::TK_ACTION), + ("ADD", TokenType::TK_ADD), + ("AFTER", TokenType::TK_AFTER), + ("ALL", TokenType::TK_ALL), + ("ALTER", TokenType::TK_ALTER), + ("ALWAYS", TokenType::TK_ALWAYS), + ("ANALYZE", TokenType::TK_ANALYZE), + ("AND", TokenType::TK_AND), + ("AS", TokenType::TK_AS), + ("ASC", TokenType::TK_ASC), + ("ATTACH", TokenType::TK_ATTACH), + ("AUTOINCREMENT", TokenType::TK_AUTOINCR), + ("BEFORE", TokenType::TK_BEFORE), + ("BEGIN", TokenType::TK_BEGIN), + ("BETWEEN", TokenType::TK_BETWEEN), + ("BY", TokenType::TK_BY), + ("CASCADE", TokenType::TK_CASCADE), + ("CASE", TokenType::TK_CASE), + ("CAST", TokenType::TK_CAST), + ("CHECK", TokenType::TK_CHECK), + ("COLLATE", TokenType::TK_COLLATE), + ("COLUMN", TokenType::TK_COLUMNKW), + ("COMMIT", TokenType::TK_COMMIT), + ("CONFLICT", TokenType::TK_CONFLICT), + ("CONSTRAINT", TokenType::TK_CONSTRAINT), + ("CREATE", TokenType::TK_CREATE), + ("CROSS", TokenType::TK_JOIN_KW), + ("CURRENT", TokenType::TK_CURRENT), + ("CURRENT_DATE", TokenType::TK_CTIME_KW), + ("CURRENT_TIME", TokenType::TK_CTIME_KW), + ("CURRENT_TIMESTAMP", TokenType::TK_CTIME_KW), + ("DATABASE", TokenType::TK_DATABASE), + ("DEFAULT", TokenType::TK_DEFAULT), + ("DEFERRABLE", TokenType::TK_DEFERRABLE), + ("DEFERRED", TokenType::TK_DEFERRED), + ("DELETE", TokenType::TK_DELETE), + ("DESC", TokenType::TK_DESC), + ("DETACH", TokenType::TK_DETACH), + ("DISTINCT", TokenType::TK_DISTINCT), + ("DO", TokenType::TK_DO), + ("DROP", TokenType::TK_DROP), + ("EACH", TokenType::TK_EACH), + ("ELSE", TokenType::TK_ELSE), + ("END", TokenType::TK_END), + ("ESCAPE", TokenType::TK_ESCAPE), + ("EXCEPT", TokenType::TK_EXCEPT), + ("EXCLUDE", TokenType::TK_EXCLUDE), + ("EXCLUSIVE", TokenType::TK_EXCLUSIVE), + ("EXISTS", TokenType::TK_EXISTS), + ("EXPLAIN", TokenType::TK_EXPLAIN), + ("FAIL", TokenType::TK_FAIL), + ("FILTER", TokenType::TK_FILTER), + ("FIRST", TokenType::TK_FIRST), + ("FOLLOWING", TokenType::TK_FOLLOWING), + ("FOR", TokenType::TK_FOR), + ("FOREIGN", TokenType::TK_FOREIGN), + ("FROM", TokenType::TK_FROM), + ("FULL", TokenType::TK_JOIN_KW), + ("GENERATED", TokenType::TK_GENERATED), + ("GLOB", TokenType::TK_LIKE_KW), + ("GROUP", TokenType::TK_GROUP), + ("GROUPS", TokenType::TK_GROUPS), + ("HAVING", TokenType::TK_HAVING), + ("IF", TokenType::TK_IF), + ("IGNORE", TokenType::TK_IGNORE), + ("IMMEDIATE", TokenType::TK_IMMEDIATE), + ("IN", TokenType::TK_IN), + ("INDEX", TokenType::TK_INDEX), + ("INDEXED", TokenType::TK_INDEXED), + ("INITIALLY", TokenType::TK_INITIALLY), + ("INNER", TokenType::TK_JOIN_KW), + ("INSERT", TokenType::TK_INSERT), + ("INSTEAD", TokenType::TK_INSTEAD), + ("INTERSECT", TokenType::TK_INTERSECT), + ("INTO", TokenType::TK_INTO), + ("IS", TokenType::TK_IS), + ("ISNULL", TokenType::TK_ISNULL), + ("JOIN", TokenType::TK_JOIN), + ("KEY", TokenType::TK_KEY), + ("LAST", TokenType::TK_LAST), + ("LEFT", TokenType::TK_JOIN_KW), + ("LIKE", TokenType::TK_LIKE_KW), + ("LIMIT", TokenType::TK_LIMIT), + ("MATCH", TokenType::TK_MATCH), + ("MATERIALIZED", TokenType::TK_MATERIALIZED), + ("NATURAL", TokenType::TK_JOIN_KW), + ("NO", TokenType::TK_NO), + ("NOT", TokenType::TK_NOT), + ("NOTHING", TokenType::TK_NOTHING), + ("NOTNULL", TokenType::TK_NOTNULL), + ("NULL", TokenType::TK_NULL), + ("NULLS", TokenType::TK_NULLS), + ("OF", TokenType::TK_OF), + ("OFFSET", TokenType::TK_OFFSET), + ("ON", TokenType::TK_ON), + ("OR", TokenType::TK_OR), + ("ORDER", TokenType::TK_ORDER), + ("OTHERS", TokenType::TK_OTHERS), + ("OUTER", TokenType::TK_JOIN_KW), + ("OVER", TokenType::TK_OVER), + ("PARTITION", TokenType::TK_PARTITION), + ("PLAN", TokenType::TK_PLAN), + ("PRAGMA", TokenType::TK_PRAGMA), + ("PRECEDING", TokenType::TK_PRECEDING), + ("PRIMARY", TokenType::TK_PRIMARY), + ("QUERY", TokenType::TK_QUERY), + ("RAISE", TokenType::TK_RAISE), + ("RANGE", TokenType::TK_RANGE), + ("RECURSIVE", TokenType::TK_RECURSIVE), + ("REFERENCES", TokenType::TK_REFERENCES), + ("REGEXP", TokenType::TK_LIKE_KW), + ("REINDEX", TokenType::TK_REINDEX), + ("RELEASE", TokenType::TK_RELEASE), + ("RENAME", TokenType::TK_RENAME), + ("REPLACE", TokenType::TK_REPLACE), + ("RETURNING", TokenType::TK_RETURNING), + ("RESTRICT", TokenType::TK_RESTRICT), + ("RIGHT", TokenType::TK_JOIN_KW), + ("ROLLBACK", TokenType::TK_ROLLBACK), + ("ROW", TokenType::TK_ROW), + ("ROWS", TokenType::TK_ROWS), + ("SAVEPOINT", TokenType::TK_SAVEPOINT), + ("SELECT", TokenType::TK_SELECT), + ("SET", TokenType::TK_SET), + ("TABLE", TokenType::TK_TABLE), + ("TEMP", TokenType::TK_TEMP), + ("TEMPORARY", TokenType::TK_TEMP), + ("THEN", TokenType::TK_THEN), + ("TIES", TokenType::TK_TIES), + ("TO", TokenType::TK_TO), + ("TRANSACTION", TokenType::TK_TRANSACTION), + ("TRIGGER", TokenType::TK_TRIGGER), + ("UNBOUNDED", TokenType::TK_UNBOUNDED), + ("UNION", TokenType::TK_UNION), + ("UNIQUE", TokenType::TK_UNIQUE), + ("UPDATE", TokenType::TK_UPDATE), + ("USING", TokenType::TK_USING), + ("VACUUM", TokenType::TK_VACUUM), + ("VALUES", TokenType::TK_VALUES), + ("VIEW", TokenType::TK_VIEW), + ("VIRTUAL", TokenType::TK_VIRTUAL), + ("WHEN", TokenType::TK_WHEN), + ("WHERE", TokenType::TK_WHERE), + ("WINDOW", TokenType::TK_WINDOW), + ("WITH", TokenType::TK_WITH), + ("WITHOUT", TokenType::TK_WITHOUT), + ]); + + for (key, value) in &values { + assert!(keyword_token(key.as_bytes()).unwrap() == *value); + assert!( + keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value + ); + } + + assert!(keyword_token(b"").is_none()); + assert!(keyword_token(b"wrong").is_none()); + assert!(keyword_token(b"super wrong").is_none()); + assert!(keyword_token(b"super_wrong").is_none()); + assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none()); + assert!(keyword_token("Crème Brulée".as_bytes()).is_none()); + assert!(keyword_token("fróm".as_bytes()).is_none()); + } +} diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index b2007f3c7..c9cf13822 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -4,9 +4,7 @@ use memchr::memchr; pub use crate::dialect::TokenType; use crate::dialect::TokenType::*; -use crate::dialect::{ - is_identifier_continue, is_identifier_start, keyword_token, sentinel, MAX_KEYWORD_LEN, -}; +use crate::dialect::{is_identifier_continue, is_identifier_start, keyword_token, sentinel}; use crate::parser::ast::Cmd; use crate::parser::parse::{yyParser, YYCODETYPE}; use crate::parser::Context; @@ -719,12 +717,7 @@ impl Tokenizer { _ => data.len(), }; let word = &data[..i]; - let tt = if word.len() >= 2 && word.len() <= MAX_KEYWORD_LEN && word.is_ascii() { - keyword_token(word).unwrap_or(TK_ID) - } else { - TK_ID - }; - (Some((word, tt)), i) + (Some((word, keyword_token(word).unwrap_or(TK_ID))), i) } }