mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-19 01:24:20 +01:00
291 lines
9.3 KiB
Rust
291 lines
9.3 KiB
Rust
use std::collections::HashMap;
|
|
use std::fs::File;
|
|
use std::io::{BufWriter, Write};
|
|
use std::path::PathBuf;
|
|
|
|
/// generates a trie-like function with nested match expressions for parsing SQL keywords
|
|
/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],]
|
|
/// A
|
|
/// ├─ B
|
|
/// │ ├─ O
|
|
/// │ │ ├─ R
|
|
/// │ │ │ ├─ T -> TK_ABORT
|
|
/// ├─ C
|
|
/// │ ├─ T
|
|
/// │ │ ├─ I
|
|
/// │ │ │ ├─ O
|
|
/// │ │ │ │ ├─ N -> TK_ACTION
|
|
/// ├─ D
|
|
/// │ ├─ D -> TK_ADD
|
|
fn build_keyword_map(
|
|
writer: &mut impl Write,
|
|
func_name: &str,
|
|
keywords: &[[&'static str; 2]],
|
|
) -> Result<(), std::io::Error> {
|
|
assert!(!keywords.is_empty());
|
|
let mut min_len = keywords[0][0].len();
|
|
let mut max_len = keywords[0][0].len();
|
|
|
|
struct PathEntry {
|
|
result: Option<&'static str>,
|
|
sub_entries: HashMap<u8, Box<PathEntry>>,
|
|
}
|
|
|
|
let mut paths = Box::new(PathEntry {
|
|
result: None,
|
|
sub_entries: HashMap::new(),
|
|
});
|
|
|
|
for keyword in keywords {
|
|
let keyword_b = keyword[0].as_bytes();
|
|
|
|
if keyword_b.len() < min_len {
|
|
min_len = keyword_b.len();
|
|
}
|
|
|
|
if keyword_b.len() > max_len {
|
|
max_len = keyword_b.len();
|
|
}
|
|
|
|
let mut current = &mut paths;
|
|
|
|
for &b in keyword_b {
|
|
let upper_b = b.to_ascii_uppercase();
|
|
|
|
match current.sub_entries.get(&upper_b) {
|
|
Some(_) => {
|
|
current = current.sub_entries.get_mut(&upper_b).unwrap();
|
|
}
|
|
None => {
|
|
let new_entry = Box::new(PathEntry {
|
|
result: None,
|
|
sub_entries: HashMap::new(),
|
|
});
|
|
current.sub_entries.insert(upper_b, new_entry);
|
|
current = current.sub_entries.get_mut(&upper_b).unwrap();
|
|
}
|
|
}
|
|
}
|
|
|
|
assert!(current.result.is_none());
|
|
current.result = Some(keyword[1]);
|
|
}
|
|
|
|
fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<(), std::io::Error> {
|
|
if let Some(result) = entry.result {
|
|
writeln!(writer, "if idx == buf.len() {{")?;
|
|
writeln!(writer, "return Some(TokenType::{result});")?;
|
|
writeln!(writer, "}}")?;
|
|
}
|
|
|
|
if entry.sub_entries.is_empty() {
|
|
writeln!(writer, "None")?;
|
|
return Ok(());
|
|
}
|
|
|
|
writeln!(writer, "if idx >= buf.len() {{")?;
|
|
writeln!(writer, "return None;")?;
|
|
writeln!(writer, "}}")?;
|
|
|
|
writeln!(writer, "match buf[idx] {{")?;
|
|
for (&b, sub_entry) in &entry.sub_entries {
|
|
if b.is_ascii_alphabetic() {
|
|
writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?;
|
|
} else {
|
|
writeln!(writer, "{b} => {{")?;
|
|
}
|
|
writeln!(writer, "idx += 1;")?;
|
|
write_entry(writer, sub_entry)?;
|
|
writeln!(writer, "}}")?;
|
|
}
|
|
|
|
writeln!(writer, "_ => None")?;
|
|
writeln!(writer, "}}")?;
|
|
Ok(())
|
|
}
|
|
|
|
writeln!(
|
|
writer,
|
|
"pub(crate) const MAX_KEYWORD_LEN: usize = {max_len};"
|
|
)?;
|
|
writeln!(
|
|
writer,
|
|
"pub(crate) const MIN_KEYWORD_LEN: usize = {min_len};"
|
|
)?;
|
|
writeln!(writer, "/// Check if `word` is a keyword")?;
|
|
writeln!(
|
|
writer,
|
|
"pub fn {func_name}(buf: &[u8]) -> Option<TokenType> {{"
|
|
)?;
|
|
writeln!(
|
|
writer,
|
|
"if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{"
|
|
)?;
|
|
writeln!(writer, "return None;")?;
|
|
writeln!(writer, "}}")?;
|
|
writeln!(writer, "let mut idx = 0;")?;
|
|
write_entry(writer, &paths)?;
|
|
writeln!(writer, "}}")?;
|
|
Ok(())
|
|
}
|
|
|
|
fn main() {
|
|
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
|
|
let keywords = out_dir.join("keywords.rs");
|
|
let mut keywords = BufWriter::new(File::create(keywords).unwrap());
|
|
build_keyword_map(
|
|
&mut keywords,
|
|
"keyword_token",
|
|
&[
|
|
["ABORT", "TK_ABORT"],
|
|
["ACTION", "TK_ACTION"],
|
|
["ADD", "TK_ADD"],
|
|
["AFTER", "TK_AFTER"],
|
|
["ALL", "TK_ALL"],
|
|
["ALTER", "TK_ALTER"],
|
|
["ALWAYS", "TK_ALWAYS"],
|
|
["ANALYZE", "TK_ANALYZE"],
|
|
["AND", "TK_AND"],
|
|
["AS", "TK_AS"],
|
|
["ASC", "TK_ASC"],
|
|
["ATTACH", "TK_ATTACH"],
|
|
["AUTOINCREMENT", "TK_AUTOINCR"],
|
|
["BEFORE", "TK_BEFORE"],
|
|
["BEGIN", "TK_BEGIN"],
|
|
["BETWEEN", "TK_BETWEEN"],
|
|
["BY", "TK_BY"],
|
|
["CASCADE", "TK_CASCADE"],
|
|
["CASE", "TK_CASE"],
|
|
["CAST", "TK_CAST"],
|
|
["CHECK", "TK_CHECK"],
|
|
["COLLATE", "TK_COLLATE"],
|
|
["COLUMN", "TK_COLUMNKW"],
|
|
["COMMIT", "TK_COMMIT"],
|
|
["CONFLICT", "TK_CONFLICT"],
|
|
["CONSTRAINT", "TK_CONSTRAINT"],
|
|
["CREATE", "TK_CREATE"],
|
|
["CROSS", "TK_JOIN_KW"],
|
|
["CURRENT", "TK_CURRENT"],
|
|
["CURRENT_DATE", "TK_CTIME_KW"],
|
|
["CURRENT_TIME", "TK_CTIME_KW"],
|
|
["CURRENT_TIMESTAMP", "TK_CTIME_KW"],
|
|
["DATABASE", "TK_DATABASE"],
|
|
["DEFAULT", "TK_DEFAULT"],
|
|
["DEFERRABLE", "TK_DEFERRABLE"],
|
|
["DEFERRED", "TK_DEFERRED"],
|
|
["DELETE", "TK_DELETE"],
|
|
["DESC", "TK_DESC"],
|
|
["DETACH", "TK_DETACH"],
|
|
["DISTINCT", "TK_DISTINCT"],
|
|
["DO", "TK_DO"],
|
|
["DROP", "TK_DROP"],
|
|
["EACH", "TK_EACH"],
|
|
["ELSE", "TK_ELSE"],
|
|
["END", "TK_END"],
|
|
["ESCAPE", "TK_ESCAPE"],
|
|
["EXCEPT", "TK_EXCEPT"],
|
|
["EXCLUDE", "TK_EXCLUDE"],
|
|
["EXCLUSIVE", "TK_EXCLUSIVE"],
|
|
["EXISTS", "TK_EXISTS"],
|
|
["EXPLAIN", "TK_EXPLAIN"],
|
|
["FAIL", "TK_FAIL"],
|
|
["FILTER", "TK_FILTER"],
|
|
["FIRST", "TK_FIRST"],
|
|
["FOLLOWING", "TK_FOLLOWING"],
|
|
["FOR", "TK_FOR"],
|
|
["FOREIGN", "TK_FOREIGN"],
|
|
["FROM", "TK_FROM"],
|
|
["FULL", "TK_JOIN_KW"],
|
|
["GENERATED", "TK_GENERATED"],
|
|
["GLOB", "TK_LIKE_KW"],
|
|
["GROUP", "TK_GROUP"],
|
|
["GROUPS", "TK_GROUPS"],
|
|
["HAVING", "TK_HAVING"],
|
|
["IF", "TK_IF"],
|
|
["IGNORE", "TK_IGNORE"],
|
|
["IMMEDIATE", "TK_IMMEDIATE"],
|
|
["IN", "TK_IN"],
|
|
["INDEX", "TK_INDEX"],
|
|
["INDEXED", "TK_INDEXED"],
|
|
["INITIALLY", "TK_INITIALLY"],
|
|
["INNER", "TK_JOIN_KW"],
|
|
["INSERT", "TK_INSERT"],
|
|
["INSTEAD", "TK_INSTEAD"],
|
|
["INTERSECT", "TK_INTERSECT"],
|
|
["INTO", "TK_INTO"],
|
|
["IS", "TK_IS"],
|
|
["ISNULL", "TK_ISNULL"],
|
|
["JOIN", "TK_JOIN"],
|
|
["KEY", "TK_KEY"],
|
|
["LAST", "TK_LAST"],
|
|
["LEFT", "TK_JOIN_KW"],
|
|
["LIKE", "TK_LIKE_KW"],
|
|
["LIMIT", "TK_LIMIT"],
|
|
["MATCH", "TK_MATCH"],
|
|
["MATERIALIZED", "TK_MATERIALIZED"],
|
|
["NATURAL", "TK_JOIN_KW"],
|
|
["NO", "TK_NO"],
|
|
["NOT", "TK_NOT"],
|
|
["NOTHING", "TK_NOTHING"],
|
|
["NOTNULL", "TK_NOTNULL"],
|
|
["NULL", "TK_NULL"],
|
|
["NULLS", "TK_NULLS"],
|
|
["OF", "TK_OF"],
|
|
["OFFSET", "TK_OFFSET"],
|
|
["ON", "TK_ON"],
|
|
["OR", "TK_OR"],
|
|
["ORDER", "TK_ORDER"],
|
|
["OTHERS", "TK_OTHERS"],
|
|
["OUTER", "TK_JOIN_KW"],
|
|
["OVER", "TK_OVER"],
|
|
["PARTITION", "TK_PARTITION"],
|
|
["PLAN", "TK_PLAN"],
|
|
["PRAGMA", "TK_PRAGMA"],
|
|
["PRECEDING", "TK_PRECEDING"],
|
|
["PRIMARY", "TK_PRIMARY"],
|
|
["QUERY", "TK_QUERY"],
|
|
["RAISE", "TK_RAISE"],
|
|
["RANGE", "TK_RANGE"],
|
|
["RECURSIVE", "TK_RECURSIVE"],
|
|
["REFERENCES", "TK_REFERENCES"],
|
|
["REGEXP", "TK_LIKE_KW"],
|
|
["REINDEX", "TK_REINDEX"],
|
|
["RELEASE", "TK_RELEASE"],
|
|
["RENAME", "TK_RENAME"],
|
|
["REPLACE", "TK_REPLACE"],
|
|
["RETURNING", "TK_RETURNING"],
|
|
["RESTRICT", "TK_RESTRICT"],
|
|
["RIGHT", "TK_JOIN_KW"],
|
|
["ROLLBACK", "TK_ROLLBACK"],
|
|
["ROW", "TK_ROW"],
|
|
["ROWS", "TK_ROWS"],
|
|
["SAVEPOINT", "TK_SAVEPOINT"],
|
|
["SELECT", "TK_SELECT"],
|
|
["SET", "TK_SET"],
|
|
["TABLE", "TK_TABLE"],
|
|
["TEMP", "TK_TEMP"],
|
|
["TEMPORARY", "TK_TEMP"],
|
|
["THEN", "TK_THEN"],
|
|
["TIES", "TK_TIES"],
|
|
["TO", "TK_TO"],
|
|
["TRANSACTION", "TK_TRANSACTION"],
|
|
["TRIGGER", "TK_TRIGGER"],
|
|
["UNBOUNDED", "TK_UNBOUNDED"],
|
|
["UNION", "TK_UNION"],
|
|
["UNIQUE", "TK_UNIQUE"],
|
|
["UPDATE", "TK_UPDATE"],
|
|
["USING", "TK_USING"],
|
|
["VACUUM", "TK_VACUUM"],
|
|
["VALUES", "TK_VALUES"],
|
|
["VIEW", "TK_VIEW"],
|
|
["VIRTUAL", "TK_VIRTUAL"],
|
|
["WHEN", "TK_WHEN"],
|
|
["WHERE", "TK_WHERE"],
|
|
["WINDOW", "TK_WINDOW"],
|
|
["WITH", "TK_WITH"],
|
|
["WITHOUT", "TK_WITHOUT"],
|
|
],
|
|
)
|
|
.unwrap();
|
|
}
|