diff --git a/core/Cargo.toml b/core/Cargo.toml index 651282010..7039aabb0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -111,3 +111,7 @@ harness = false [[bench]] name = "tpc_h_benchmark" harness = false + +[[bench]] +name = "parser_benchmark" +harness = false diff --git a/core/benches/parser_benchmark.rs b/core/benches/parser_benchmark.rs new file mode 100644 index 000000000..c33b3eeab --- /dev/null +++ b/core/benches/parser_benchmark.rs @@ -0,0 +1,33 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use pprof::criterion::{Output, PProfProfiler}; +use turso_core::parser::lexer::Lexer; + +fn bench_lexer(criterion: &mut Criterion) { + let queries = [ + "SELECT 1", + "SELECT * FROM users LIMIT 1", + "SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1) > 1 ORDER BY count(1) LIMIT 1", + ]; + + for query in queries.iter() { + let mut group = criterion.benchmark_group(format!("Lexer `{query}`")); + let qb = query.as_bytes(); + + group.bench_function(BenchmarkId::new("limbo_lexer_query", ""), |b| { + b.iter(|| { + for token in Lexer::new(black_box(qb)) { + token.unwrap(); + } + }); + }); + + group.finish(); + } +} + +criterion_group! { + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets = bench_lexer +} +criterion_main!(benches); diff --git a/core/build.rs b/core/build.rs index 50afee6bf..e331ca110 100644 --- a/core/build.rs +++ b/core/build.rs @@ -1,6 +1,134 @@ -use std::fs; +use std::collections::HashMap; +use std::fs::{self, File}; +use std::io::{BufWriter, Write}; use std::path::PathBuf; +/// generates a trie-like function with nested match expressions for parsing SQL keywords +/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],] +/// A +/// ├─ B +/// │ ├─ O +/// │ │ ├─ R +/// │ │ │ ├─ T -> TK_ABORT +/// ├─ C +/// │ ├─ T +/// │ │ ├─ I +/// │ │ │ ├─ O +/// │ │ │ │ ├─ N -> TK_ACTION +/// ├─ D +/// │ ├─ D -> TK_ADD +fn build_keyword_map( + writer: &mut impl Write, + func_name: &str, + keywords: &[[&'static str; 2]], +) -> Result<(), std::io::Error> { + assert!(!keywords.is_empty()); + let mut min_len = keywords[0][0].len(); + let mut max_len = keywords[0][0].len(); + + struct PathEntry { + result: Option<&'static str>, + sub_entries: HashMap>, + } + + let mut paths = Box::new(PathEntry { + result: None, + sub_entries: HashMap::new(), + }); + + for keyword in keywords { + let keyword_b = keyword[0].as_bytes(); + + if keyword_b.len() < min_len { + min_len = keyword_b.len(); + } + + if keyword_b.len() > max_len { + max_len = keyword_b.len(); + } + + let mut current = &mut paths; + + for &b in keyword_b { + let upper_b = b.to_ascii_uppercase(); + + match current.sub_entries.get(&upper_b) { + Some(_) => { + current = current.sub_entries.get_mut(&upper_b).unwrap(); + } + None => { + let new_entry = Box::new(PathEntry { + result: None, + sub_entries: HashMap::new(), + }); + current.sub_entries.insert(upper_b, new_entry); + current = current.sub_entries.get_mut(&upper_b).unwrap(); + } + } + } + + assert!(current.result.is_none()); + current.result = Some(keyword[1]); + } + + fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<(), std::io::Error> { + if let Some(result) = entry.result { + writeln!(writer, "if idx == buf.len() {{")?; + writeln!(writer, "return Some(TokenType::{result});")?; + writeln!(writer, "}}")?; + } + + if entry.sub_entries.is_empty() { + writeln!(writer, "None")?; + return Ok(()); + } + + writeln!(writer, "if idx >= buf.len() {{")?; + writeln!(writer, "return None;")?; + writeln!(writer, "}}")?; + + writeln!(writer, "match buf[idx] {{")?; + for (&b, sub_entry) in &entry.sub_entries { + if b.is_ascii_alphabetic() { + writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?; + } else { + writeln!(writer, "{b} => {{")?; + } + writeln!(writer, "idx += 1;")?; + write_entry(writer, sub_entry)?; + writeln!(writer, "}}")?; + } + + writeln!(writer, "_ => None")?; + writeln!(writer, "}}")?; + Ok(()) + } + + writeln!( + writer, + "pub(crate) const MAX_KEYWORD_LEN: usize = {max_len};" + )?; + writeln!( + writer, + "pub(crate) const MIN_KEYWORD_LEN: usize = {min_len};" + )?; + writeln!(writer, "/// Check if `word` is a keyword")?; + writeln!( + writer, + "pub fn {func_name}(buf: &[u8]) -> Option {{" + )?; + writeln!( + writer, + "if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{" + )?; + writeln!(writer, "return None;")?; + writeln!(writer, "}}")?; + writeln!(writer, "let mut idx = 0;")?; + write_entry(writer, &paths)?; + writeln!(writer, "}}")?; + Ok(()) +} + fn main() { let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); let built_file = out_dir.join("built.rs"); @@ -18,4 +146,161 @@ fn main() { ), ) .expect("Failed to append to built file"); + + let keywords = out_dir.join("keywords.rs"); + let mut keywords = BufWriter::new(File::create(keywords).unwrap()); + build_keyword_map( + &mut keywords, + "keyword_token", + &[ + ["ABORT", "TK_ABORT"], + ["ACTION", "TK_ACTION"], + ["ADD", "TK_ADD"], + ["AFTER", "TK_AFTER"], + ["ALL", "TK_ALL"], + ["ALTER", "TK_ALTER"], + ["ALWAYS", "TK_ALWAYS"], + ["ANALYZE", "TK_ANALYZE"], + ["AND", "TK_AND"], + ["AS", "TK_AS"], + ["ASC", "TK_ASC"], + ["ATTACH", "TK_ATTACH"], + ["AUTOINCREMENT", "TK_AUTOINCR"], + ["BEFORE", "TK_BEFORE"], + ["BEGIN", "TK_BEGIN"], + ["BETWEEN", "TK_BETWEEN"], + ["BY", "TK_BY"], + ["CASCADE", "TK_CASCADE"], + ["CASE", "TK_CASE"], + ["CAST", "TK_CAST"], + ["CHECK", "TK_CHECK"], + ["COLLATE", "TK_COLLATE"], + ["COLUMN", "TK_COLUMNKW"], + ["COMMIT", "TK_COMMIT"], + ["CONFLICT", "TK_CONFLICT"], + ["CONSTRAINT", "TK_CONSTRAINT"], + ["CREATE", "TK_CREATE"], + ["CROSS", "TK_JOIN_KW"], + ["CURRENT", "TK_CURRENT"], + ["CURRENT_DATE", "TK_CTIME_KW"], + ["CURRENT_TIME", "TK_CTIME_KW"], + ["CURRENT_TIMESTAMP", "TK_CTIME_KW"], + ["DATABASE", "TK_DATABASE"], + ["DEFAULT", "TK_DEFAULT"], + ["DEFERRABLE", "TK_DEFERRABLE"], + ["DEFERRED", "TK_DEFERRED"], + ["DELETE", "TK_DELETE"], + ["DESC", "TK_DESC"], + ["DETACH", "TK_DETACH"], + ["DISTINCT", "TK_DISTINCT"], + ["DO", "TK_DO"], + ["DROP", "TK_DROP"], + ["EACH", "TK_EACH"], + ["ELSE", "TK_ELSE"], + ["END", "TK_END"], + ["ESCAPE", "TK_ESCAPE"], + ["EXCEPT", "TK_EXCEPT"], + ["EXCLUDE", "TK_EXCLUDE"], + ["EXCLUSIVE", "TK_EXCLUSIVE"], + ["EXISTS", "TK_EXISTS"], + ["EXPLAIN", "TK_EXPLAIN"], + ["FAIL", "TK_FAIL"], + ["FILTER", "TK_FILTER"], + ["FIRST", "TK_FIRST"], + ["FOLLOWING", "TK_FOLLOWING"], + ["FOR", "TK_FOR"], + ["FOREIGN", "TK_FOREIGN"], + ["FROM", "TK_FROM"], + ["FULL", "TK_JOIN_KW"], + ["GENERATED", "TK_GENERATED"], + ["GLOB", "TK_LIKE_KW"], + ["GROUP", "TK_GROUP"], + ["GROUPS", "TK_GROUPS"], + ["HAVING", "TK_HAVING"], + ["IF", "TK_IF"], + ["IGNORE", "TK_IGNORE"], + ["IMMEDIATE", "TK_IMMEDIATE"], + ["IN", "TK_IN"], + ["INDEX", "TK_INDEX"], + ["INDEXED", "TK_INDEXED"], + ["INITIALLY", "TK_INITIALLY"], + ["INNER", "TK_JOIN_KW"], + ["INSERT", "TK_INSERT"], + ["INSTEAD", "TK_INSTEAD"], + ["INTERSECT", "TK_INTERSECT"], + ["INTO", "TK_INTO"], + ["IS", "TK_IS"], + ["ISNULL", "TK_ISNULL"], + ["JOIN", "TK_JOIN"], + ["KEY", "TK_KEY"], + ["LAST", "TK_LAST"], + ["LEFT", "TK_JOIN_KW"], + ["LIKE", "TK_LIKE_KW"], + ["LIMIT", "TK_LIMIT"], + ["MATCH", "TK_MATCH"], + ["MATERIALIZED", "TK_MATERIALIZED"], + ["NATURAL", "TK_JOIN_KW"], + ["NO", "TK_NO"], + ["NOT", "TK_NOT"], + ["NOTHING", "TK_NOTHING"], + ["NOTNULL", "TK_NOTNULL"], + ["NULL", "TK_NULL"], + ["NULLS", "TK_NULLS"], + ["OF", "TK_OF"], + ["OFFSET", "TK_OFFSET"], + ["ON", "TK_ON"], + ["OR", "TK_OR"], + ["ORDER", "TK_ORDER"], + ["OTHERS", "TK_OTHERS"], + ["OUTER", "TK_JOIN_KW"], + ["OVER", "TK_OVER"], + ["PARTITION", "TK_PARTITION"], + ["PLAN", "TK_PLAN"], + ["PRAGMA", "TK_PRAGMA"], + ["PRECEDING", "TK_PRECEDING"], + ["PRIMARY", "TK_PRIMARY"], + ["QUERY", "TK_QUERY"], + ["RAISE", "TK_RAISE"], + ["RANGE", "TK_RANGE"], + ["RECURSIVE", "TK_RECURSIVE"], + ["REFERENCES", "TK_REFERENCES"], + ["REGEXP", "TK_LIKE_KW"], + ["REINDEX", "TK_REINDEX"], + ["RELEASE", "TK_RELEASE"], + ["RENAME", "TK_RENAME"], + ["REPLACE", "TK_REPLACE"], + ["RETURNING", "TK_RETURNING"], + ["RESTRICT", "TK_RESTRICT"], + ["RIGHT", "TK_JOIN_KW"], + ["ROLLBACK", "TK_ROLLBACK"], + ["ROW", "TK_ROW"], + ["ROWS", "TK_ROWS"], + ["SAVEPOINT", "TK_SAVEPOINT"], + ["SELECT", "TK_SELECT"], + ["SET", "TK_SET"], + ["TABLE", "TK_TABLE"], + ["TEMP", "TK_TEMP"], + ["TEMPORARY", "TK_TEMP"], + ["THEN", "TK_THEN"], + ["TIES", "TK_TIES"], + ["TO", "TK_TO"], + ["TRANSACTION", "TK_TRANSACTION"], + ["TRIGGER", "TK_TRIGGER"], + ["UNBOUNDED", "TK_UNBOUNDED"], + ["UNION", "TK_UNION"], + ["UNIQUE", "TK_UNIQUE"], + ["UPDATE", "TK_UPDATE"], + ["USING", "TK_USING"], + ["VACUUM", "TK_VACUUM"], + ["VALUES", "TK_VALUES"], + ["VIEW", "TK_VIEW"], + ["VIRTUAL", "TK_VIRTUAL"], + ["WHEN", "TK_WHEN"], + ["WHERE", "TK_WHERE"], + ["WINDOW", "TK_WINDOW"], + ["WITH", "TK_WITH"], + ["WITHOUT", "TK_WITHOUT"], + ], + ) + .unwrap(); } diff --git a/core/lib.rs b/core/lib.rs index 232e732d1..50d92ff03 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -30,6 +30,7 @@ mod uuid; mod vdbe; mod vector; mod vtab; +pub mod parser; #[cfg(feature = "fuzz")] pub mod numeric; diff --git a/core/parser/ast.rs b/core/parser/ast.rs new file mode 100644 index 000000000..5d2db3778 --- /dev/null +++ b/core/parser/ast.rs @@ -0,0 +1,1455 @@ +use std::str::{self, Bytes}; + +use strum_macros::{EnumIter, EnumString}; + +/// `?` or `$` Prepared statement arg placeholder(s) +#[derive(Default)] +pub struct ParameterInfo { + /// Number of SQL parameters in a prepared statement, like `sqlite3_bind_parameter_count` + pub count: u32, + /// Parameter name(s) if any + pub names: Vec, +} + +/// Statement or Explain statement +// https://sqlite.org/syntax/sql-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Cmd { + /// `EXPLAIN` statement + Explain(Stmt), + /// `EXPLAIN QUERY PLAN` statement + ExplainQueryPlan(Stmt), + /// statement + Stmt(Stmt), +} + +/// SQL statement +// https://sqlite.org/syntax/sql-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Stmt { + /// `ALTER TABLE`: table name, body + AlterTable { + // table name + name: QualifiedName, + // `ALTER TABLE` body + body: AlterTableBody, + }, + /// `ANALYSE`: object name + Analyze { + // object name + name: Option, + }, + /// `ATTACH DATABASE` + Attach { + /// filename + // TODO distinction between ATTACH and ATTACH DATABASE + expr: Box, + /// schema name + db_name: Box, + /// password + key: Option>, + }, + /// `BEGIN`: tx type, tx name + Begin { + // transaction type + typ: Option, + // transaction name + name: Option, + }, + /// `COMMIT`/`END`: tx name + Commit { + // tx name + name: Option, + }, // TODO distinction between COMMIT and END + /// `CREATE INDEX` + CreateIndex { + /// `UNIQUE` + unique: bool, + /// `IF NOT EXISTS` + if_not_exists: bool, + /// index name + idx_name: QualifiedName, + /// table name + tbl_name: Name, + /// indexed columns or expressions + columns: Vec, + /// partial index + where_clause: Option>, + }, + /// `CREATE TABLE` + CreateTable { + /// `TEMPORARY` + temporary: bool, // TODO distinction between TEMP and TEMPORARY + /// `IF NOT EXISTS` + if_not_exists: bool, + /// table name + tbl_name: QualifiedName, + /// table body + body: CreateTableBody, + }, + /// `CREATE TRIGGER` + CreateTrigger { + /// `TEMPORARY` + temporary: bool, + /// `IF NOT EXISTS` + if_not_exists: bool, + /// trigger name + trigger_name: QualifiedName, + /// `BEFORE`/`AFTER`/`INSTEAD OF` + time: Option, + /// `DELETE`/`INSERT`/`UPDATE` + event: TriggerEvent, + /// table name + tbl_name: QualifiedName, + /// `FOR EACH ROW` + for_each_row: bool, + /// `WHEN` + when_clause: Option>, + /// statements + commands: Vec, + }, + /// `CREATE VIEW` + CreateView { + /// `TEMPORARY` + temporary: bool, + /// `IF NOT EXISTS` + if_not_exists: bool, + /// view name + view_name: QualifiedName, + /// columns + columns: Option>, + /// query + select: Select, + }, + /// `CREATE VIRTUAL TABLE` + CreateVirtualTable { + /// `IF NOT EXISTS` + if_not_exists: bool, + /// table name + tbl_name: QualifiedName, + /// module name + module_name: Name, + /// args + args: Option>, // TODO smol str + }, + /// `DELETE` + Delete { + /// CTE + with: Option, + /// `FROM` table name + tbl_name: QualifiedName, + /// `INDEXED` + indexed: Option, + /// `WHERE` clause + where_clause: Option>, + /// `RETURNING` + returning: Option>, + /// `ORDER BY` + order_by: Option>, + /// `LIMIT` + limit: Option, + }, + /// `DETACH DATABASE`: db name + Detach { + // db name + name: Box, + }, // TODO distinction between DETACH and DETACH DATABASE + /// `DROP INDEX` + DropIndex { + /// `IF EXISTS` + if_exists: bool, + /// index name + idx_name: QualifiedName, + }, + /// `DROP TABLE` + DropTable { + /// `IF EXISTS` + if_exists: bool, + /// table name + tbl_name: QualifiedName, + }, + /// `DROP TRIGGER` + DropTrigger { + /// `IF EXISTS` + if_exists: bool, + /// trigger name + trigger_name: QualifiedName, + }, + /// `DROP VIEW` + DropView { + /// `IF EXISTS` + if_exists: bool, + /// view name + view_name: QualifiedName, + }, + /// `INSERT` + Insert { + /// CTE + with: Option, + /// `OR` + or_conflict: Option, // TODO distinction between REPLACE and INSERT OR REPLACE + /// table name + tbl_name: QualifiedName, + /// `COLUMNS` + columns: Option>, + /// `VALUES` or `SELECT` + body: InsertBody, + /// `RETURNING` + returning: Option>, + }, + /// `PRAGMA`: pragma name, body + Pragma { + // pragma name + name: QualifiedName, + // pragma body + body: Option, + }, + /// `REINDEX` + Reindex { + /// collation or index or table name + name: Option, + }, + /// `RELEASE`: savepoint name + Release { + // savepoint name + name: Name, + }, // TODO distinction between RELEASE and RELEASE SAVEPOINT + /// `ROLLBACK` + Rollback { + /// transaction name + tx_name: Option, + /// savepoint name + savepoint_name: Option, // TODO distinction between TO and TO SAVEPOINT + }, + /// `SAVEPOINT`: savepoint name + Savepoint { + // savepoint name + name: Name, + }, + /// `SELECT` + Select(Select), + /// `UPDATE` + Update { + /// CTE + with: Option, + /// `OR` + or_conflict: Option, + /// table name + tbl_name: QualifiedName, + /// `INDEXED` + indexed: Option, + /// `SET` assignments + sets: Vec, + /// `FROM` + from: Option, + /// `WHERE` clause + where_clause: Option>, + /// `RETURNING` + returning: Option>, + /// `ORDER BY` + order_by: Option>, + /// `LIMIT` + limit: Option, + }, + /// `VACUUM`: database name, into expr + Vacuum { + // database name + name: Option, + // into expression + into: Option>, + }, +} + +#[repr(transparent)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +/// Internal ID of a table reference. +/// +/// Used by [Expr::Column] and [Expr::RowId] to refer to a table. +/// E.g. in 'SELECT * FROM t UNION ALL SELECT * FROM t', there are two table references, +/// so there are two TableInternalIds. +/// +/// FIXME: rename this to TableReferenceId. +pub struct TableInternalId(usize); + +/// SQL expression +// https://sqlite.org/syntax/expr.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Expr { + /// `BETWEEN` + Between { + /// expression + lhs: Box, + /// `NOT` + not: bool, + /// start + start: Box, + /// end + end: Box, + }, + /// binary expression + Binary(Box, Operator, Box), + /// `CASE` expression + Case { + /// operand + base: Option>, + /// `WHEN` condition `THEN` result + when_then_pairs: Vec<(Box, Box)>, + /// `ELSE` result + else_expr: Option>, + }, + /// CAST expression + Cast { + /// expression + expr: Box, + /// `AS` type name + type_name: Option, + }, + /// `COLLATE`: expression + Collate(Box, String), + /// schema-name.table-name.column-name + DoublyQualified(Name, Name, Name), + /// `EXISTS` subquery + Exists(Select), + /// call to a built-in function + FunctionCall { + /// function name + name: Name, + /// `DISTINCT` + distinctness: Option, + /// arguments + args: Option>>, + /// `ORDER BY` + order_by: Option>, + /// `FILTER` + filter_over: Option, + }, + /// Function call expression with '*' as arg + FunctionCallStar { + /// function name + name: Name, + /// `FILTER` + filter_over: Option, + }, + /// Identifier + Id(Name), + /// Column + Column { + /// the x in `x.y.z`. index of the db in catalog. + database: Option, + /// the y in `x.y.z`. index of the table in catalog. + table: TableInternalId, + /// the z in `x.y.z`. index of the column in the table. + column: usize, + /// is the column a rowid alias + is_rowid_alias: bool, + }, + /// `ROWID` + RowId { + /// the x in `x.y.z`. index of the db in catalog. + database: Option, + /// the y in `x.y.z`. index of the table in catalog. + table: TableInternalId, + }, + /// `IN` + InList { + /// expression + lhs: Box, + /// `NOT` + not: bool, + /// values + rhs: Option>>, + }, + /// `IN` subselect + InSelect { + /// expression + lhs: Box, + /// `NOT` + not: bool, + /// subquery + rhs: Select, + }, + /// `IN` table name / function + InTable { + /// expression + lhs: Box, + /// `NOT` + not: bool, + /// table name + rhs: QualifiedName, + /// table function arguments + args: Option>>, + }, + /// `IS NULL` + IsNull(Box), + /// `LIKE` + Like { + /// expression + lhs: Box, + /// `NOT` + not: bool, + /// operator + op: LikeOperator, + /// pattern + rhs: Box, + /// `ESCAPE` char + escape: Option>, + }, + /// Literal expression + Literal(Literal), + /// Name + Name(Name), + /// `NOT NULL` or `NOTNULL` + NotNull(Box), + /// Parenthesized subexpression + Parenthesized(Vec>), + /// Qualified name + Qualified(Name, Name), + /// `RAISE` function call + Raise(ResolveType, Option>), + /// Subquery expression + Subquery(Select), + /// Unary expression + Unary(UnaryOperator, Box), + /// Parameters + Variable(String), +} + +/// SQL literal +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Literal { + /// Number + Numeric(String), + /// String + // TODO Check that string is already quoted and correctly escaped + String(String), + /// BLOB + // TODO Check that string is valid (only hexa) + Blob(String), + /// Keyword + Keyword(String), + /// `NULL` + Null, + /// `CURRENT_DATE` + CurrentDate, + /// `CURRENT_TIME` + CurrentTime, + /// `CURRENT_TIMESTAMP` + CurrentTimestamp, +} + +/// Textual comparison operator in an expression +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum LikeOperator { + /// `GLOB` + Glob, + /// `LIKE` + Like, + /// `MATCH` + Match, + /// `REGEXP` + Regexp, +} + +/// SQL operators +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Operator { + /// `+` + Add, + /// `AND` + And, + /// `->` + ArrowRight, + /// `->>` + ArrowRightShift, + /// `&` + BitwiseAnd, + /// `|` + BitwiseOr, + /// `~` + BitwiseNot, + /// String concatenation (`||`) + Concat, + /// `=` or `==` + Equals, + /// `/` + Divide, + /// `>` + Greater, + /// `>=` + GreaterEquals, + /// `IS` + Is, + /// `IS NOT` + IsNot, + /// `<<` + LeftShift, + /// `<` + Less, + /// `<=` + LessEquals, + /// `%` + Modulus, + /// `*` + Multiply, + /// `!=` or `<>` + NotEquals, + /// `OR` + Or, + /// `>>` + RightShift, + /// `-` + Subtract, +} + +/// Unary operators +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum UnaryOperator { + /// bitwise negation (`~`) + BitwiseNot, + /// negative-sign + Negative, + /// `NOT` + Not, + /// positive-sign + Positive, +} + +/// `SELECT` statement +// https://sqlite.org/lang_select.html +// https://sqlite.org/syntax/factored-select-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Select { + /// CTE + pub with: Option, + /// body + pub body: SelectBody, + /// `ORDER BY` + pub order_by: Option>, // ORDER BY term does not match any column in the result set + /// `LIMIT` + pub limit: Option, +} + +/// `SELECT` body +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct SelectBody { + /// first select + pub select: OneSelect, + /// compounds + pub compounds: Option>, +} + +/// Compound select +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct CompoundSelect { + /// operator + pub operator: CompoundOperator, + /// select + pub select: OneSelect, +} + +/// Compound operators +// https://sqlite.org/syntax/compound-operator.html +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum CompoundOperator { + /// `UNION` + Union, + /// `UNION ALL` + UnionAll, + /// `EXCEPT` + Except, + /// `INTERSECT` + Intersect, +} + +/// `SELECT` core +// https://sqlite.org/syntax/select-core.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum OneSelect { + /// `SELECT` + Select(SelectInner), + /// `VALUES` + Values(Vec>>), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +/// `SELECT` core +pub struct SelectInner { + /// `DISTINCT` + pub distinctness: Option, + /// columns + pub columns: Vec, + /// `FROM` clause + pub from: Option, + /// `WHERE` clause + pub where_clause: Option>, + /// `GROUP BY` + pub group_by: Option, + /// `WINDOW` definition + pub window_clause: Option>, +} + +/// `SELECT` ... `FROM` clause +// https://sqlite.org/syntax/join-clause.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct FromClause { + /// table + pub select: Option>, // FIXME mandatory + /// `JOIN`ed tabled + pub joins: Option>, + /// A default join operator + pub op: Option, // FIXME transient +} + +/// `SELECT` distinctness +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Distinctness { + /// `DISTINCT` + Distinct, + /// `ALL` + All, +} + +/// `SELECT` or `RETURNING` result column +// https://sqlite.org/syntax/result-column.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum ResultColumn { + /// expression + Expr(Box, Option), + /// `*` + Star, + /// table name.`*` + TableStar(Name), +} + +/// Alias +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum As { + /// `AS` + As(Name), + /// no `AS` + Elided(Name), // FIXME Ids +} + +/// `JOIN` clause +// https://sqlite.org/syntax/join-clause.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct JoinedSelectTable { + /// operator + pub operator: JoinOperator, + /// table + pub table: Box, + /// constraint + pub constraint: Option, +} + +/// Table or subquery +// https://sqlite.org/syntax/table-or-subquery.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum SelectTable { + /// table + Table(QualifiedName, Option, Option), + /// table function call + TableCall(QualifiedName, Option>>, Option), + /// `SELECT` subquery + Select(Select, Option), + /// subquery + Sub(FromClause, Option), +} + +/// Join operators +// https://sqlite.org/syntax/join-operator.html +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum JoinOperator { + /// `,` + Comma, + /// `JOIN` + TypedJoin(Option), +} + +// https://github.com/sqlite/sqlite/blob/80511f32f7e71062026edd471913ef0455563964/src/select.c#L197-L257 +bitflags::bitflags! { + /// `JOIN` types + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub struct JoinType: u8 { + /// `INNER` + const INNER = 0x01; + /// `CROSS` => INNER|CROSS + const CROSS = 0x02; + /// `NATURAL` + const NATURAL = 0x04; + /// `LEFT` => LEFT|OUTER + const LEFT = 0x08; + /// `RIGHT` => RIGHT|OUTER + const RIGHT = 0x10; + /// `OUTER` + const OUTER = 0x20; + } +} + +/// `JOIN` constraint +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum JoinConstraint { + /// `ON` + On(Box), + /// `USING`: col names + Using(Vec), +} + +/// `GROUP BY` +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct GroupBy { + /// expressions + pub exprs: Vec>, + /// `HAVING` + pub having: Option>, // HAVING clause on a non-aggregate query +} + +/// identifier or string or `CROSS` or `FULL` or `INNER` or `LEFT` or `NATURAL` or `OUTER` or `RIGHT`. +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Name { + /// Identifier + Ident(String), + /// Quoted values + Quoted(String), +} + +/// Qualified name +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct QualifiedName { + /// schema + pub db_name: Option, + /// object name + pub name: Name, + /// alias + pub alias: Option, // FIXME restrict alias usage (fullname vs xfullname) +} + +/// `ALTER TABLE` body +// https://sqlite.org/lang_altertable.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum AlterTableBody { + /// `RENAME TO`: new table name + RenameTo(Name), + /// `ADD COLUMN` + AddColumn(ColumnDefinition), // TODO distinction between ADD and ADD COLUMN + /// `RENAME COLUMN` + RenameColumn { + /// old name + old: Name, + /// new name + new: Name, + }, + /// `DROP COLUMN` + DropColumn(Name), // TODO distinction between DROP and DROP COLUMN +} + +/// `CREATE TABLE` body +// https://sqlite.org/lang_createtable.html +// https://sqlite.org/syntax/create-table-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum CreateTableBody { + /// columns and constraints + ColumnsAndConstraints { + /// table column definitions + columns: Vec, + /// table constraints + constraints: Option>, + /// table options + options: TableOptions, + }, + /// `AS` select + AsSelect(Select), +} + +/// Table column definition +// https://sqlite.org/syntax/column-def.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ColumnDefinition { + /// column name + pub col_name: Name, + /// column type + pub col_type: Option, + /// column constraints + pub constraints: Vec, +} + +/// Named column constraint +// https://sqlite.org/syntax/column-constraint.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct NamedColumnConstraint { + /// constraint name + pub name: Option, + /// constraint + pub constraint: ColumnConstraint, +} + +/// Column constraint +// https://sqlite.org/syntax/column-constraint.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum ColumnConstraint { + /// `PRIMARY KEY` + PrimaryKey { + /// `ASC` / `DESC` + order: Option, + /// `ON CONFLICT` clause + conflict_clause: Option, + /// `AUTOINCREMENT` + auto_increment: bool, + }, + /// `NULL` + NotNull { + /// `NOT` + nullable: bool, + /// `ON CONFLICT` clause + conflict_clause: Option, + }, + /// `UNIQUE` + Unique(Option), + /// `CHECK` + Check(Box), + /// `DEFAULT` + Default(Box), + /// `DEFERRABLE` + Defer(DeferSubclause), // FIXME + /// `COLLATE` + Collate { + /// collation name + collation_name: Name, // FIXME Ids + }, + /// `REFERENCES` foreign-key clause + ForeignKey { + /// clause + clause: ForeignKeyClause, + /// `DEFERRABLE` + deref_clause: Option, + }, + /// `GENERATED` + Generated { + /// expression + expr: Box, + /// `STORED` / `VIRTUAL` + typ: Option, + }, +} + +/// Named table constraint +// https://sqlite.org/syntax/table-constraint.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct NamedTableConstraint { + /// constraint name + pub name: Option, + /// constraint + pub constraint: TableConstraint, +} + +/// Table constraint +// https://sqlite.org/syntax/table-constraint.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TableConstraint { + /// `PRIMARY KEY` + PrimaryKey { + /// columns + columns: Vec, + /// `AUTOINCREMENT` + auto_increment: bool, + /// `ON CONFLICT` clause + conflict_clause: Option, + }, + /// `UNIQUE` + Unique { + /// columns + columns: Vec, + /// `ON CONFLICT` clause + conflict_clause: Option, + }, + /// `CHECK` + Check(Box), + /// `FOREIGN KEY` + ForeignKey { + /// columns + columns: Vec, + /// `REFERENCES` + clause: ForeignKeyClause, + /// `DEFERRABLE` + deref_clause: Option, + }, +} + +bitflags::bitflags! { + /// `CREATE TABLE` options + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub struct TableOptions: u8 { + /// None + const NONE = 0; + /// `WITHOUT ROWID` + const WITHOUT_ROWID = 1; + /// `STRICT` + const STRICT = 2; + } +} + +/// Sort orders +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum SortOrder { + /// `ASC` + Asc, + /// `DESC` + Desc, +} + +/// `NULLS FIRST` or `NULLS LAST` +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum NullsOrder { + /// `NULLS FIRST` + First, + /// `NULLS LAST` + Last, +} + +/// `REFERENCES` clause +// https://sqlite.org/syntax/foreign-key-clause.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ForeignKeyClause { + /// foreign table name + pub tbl_name: Name, + /// foreign table columns + pub columns: Option>, + /// referential action(s) / deferrable option(s) + pub args: Vec, +} + +/// foreign-key reference args +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum RefArg { + /// `ON DELETE` + OnDelete(RefAct), + /// `ON INSERT` + OnInsert(RefAct), + /// `ON UPDATE` + OnUpdate(RefAct), + /// `MATCH` + Match(Name), +} + +/// foreign-key reference actions +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum RefAct { + /// `SET NULL` + SetNull, + /// `SET DEFAULT` + SetDefault, + /// `CASCADE` + Cascade, + /// `RESTRICT` + Restrict, + /// `NO ACTION` + NoAction, +} + +/// foreign-key defer clause +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct DeferSubclause { + /// `DEFERRABLE` + pub deferrable: bool, + /// `INITIALLY` `DEFERRED` / `IMMEDIATE` + pub init_deferred: Option, +} + +/// `INITIALLY` `DEFERRED` / `IMMEDIATE` +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum InitDeferredPred { + /// `INITIALLY DEFERRED` + InitiallyDeferred, + /// `INITIALLY IMMEDIATE` + InitiallyImmediate, // default +} + +/// Indexed column +// https://sqlite.org/syntax/indexed-column.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct IndexedColumn { + /// column name + pub col_name: Name, + /// `COLLATE` + pub collation_name: Option, // FIXME Ids + /// `ORDER BY` + pub order: Option, +} + +/// `INDEXED BY` / `NOT INDEXED` +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Indexed { + /// `INDEXED BY`: idx name + IndexedBy(Name), + /// `NOT INDEXED` + NotIndexed, +} + +/// Sorted column +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct SortedColumn { + /// expression + pub expr: Box, + /// `ASC` / `DESC` + pub order: Option, + /// `NULLS FIRST` / `NULLS LAST` + pub nulls: Option, +} + +/// `LIMIT` +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Limit { + /// count + pub expr: Box, + /// `OFFSET` + pub offset: Option>, // TODO distinction between LIMIT offset, count and LIMIT count OFFSET offset +} + +/// `INSERT` body +// https://sqlite.org/lang_insert.html +// https://sqlite.org/syntax/insert-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum InsertBody { + /// `SELECT` or `VALUES` + Select(Select, Option>), + /// `DEFAULT VALUES` + DefaultValues, +} + +/// `UPDATE ... SET` +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Set { + /// column name(s) + pub col_names: Vec, + /// expression + pub expr: Box, +} + +/// `PRAGMA` body +// https://sqlite.org/syntax/pragma-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum PragmaBody { + /// `=` + Equals(PragmaValue), + /// function call + Call(PragmaValue), +} + +/// `PRAGMA` value +// https://sqlite.org/syntax/pragma-value.html +pub type PragmaValue = Expr; // TODO + +/// `PRAGMA` value +// https://sqlite.org/pragma.html +#[derive(Clone, Debug, PartialEq, Eq, EnumIter, EnumString, strum::Display)] +#[strum(serialize_all = "snake_case")] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum PragmaName { + /// Returns the application ID of the database file. + ApplicationId, + /// set the autovacuum mode + AutoVacuum, + /// `cache_size` pragma + CacheSize, + /// List databases + DatabaseList, + /// Encoding - only support utf8 + Encoding, + /// Run integrity check on the database file + IntegrityCheck, + /// `journal_mode` pragma + JournalMode, + /// Noop as per SQLite docs + LegacyFileFormat, + /// Return the total number of pages in the database file. + PageCount, + /// Return the page size of the database in bytes. + PageSize, + /// Returns schema version of the database file. + SchemaVersion, + /// returns information about the columns of a table + TableInfo, + /// enable capture-changes logic for the connection + UnstableCaptureDataChangesConn, + /// Returns the user version of the database file. + UserVersion, + /// trigger a checkpoint to run on database(s) if WAL is enabled + WalCheckpoint, +} + +/// `CREATE TRIGGER` time +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TriggerTime { + /// `BEFORE` + Before, // default + /// `AFTER` + After, + /// `INSTEAD OF` + InsteadOf, +} + +/// `CREATE TRIGGER` event +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TriggerEvent { + /// `DELETE` + Delete, + /// `INSERT` + Insert, + /// `UPDATE` + Update, + /// `UPDATE OF`: col names + UpdateOf(Vec), +} + +/// `CREATE TRIGGER` command +// https://sqlite.org/lang_createtrigger.html +// https://sqlite.org/syntax/create-trigger-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TriggerCmd { + /// `UPDATE` + Update(TriggerCmdUpdate), + /// `INSERT` + Insert(TriggerCmdInsert), + /// `DELETE` + Delete(TriggerCmdDelete), + /// `SELECT` + Select(Select), +} + +/// `UPDATE` trigger command +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct TriggerCmdUpdate { + /// `OR` + pub or_conflict: Option, + /// table name + pub tbl_name: Name, + /// `SET` assignments + pub sets: Vec, + /// `FROM` + pub from: Option, + /// `WHERE` clause + pub where_clause: Option>, +} + +/// `INSERT` trigger command +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct TriggerCmdInsert { + /// `OR` + pub or_conflict: Option, + /// table name + pub tbl_name: Name, + /// `COLUMNS` + pub col_names: Option>, + /// `SELECT` or `VALUES` + pub select: Select, + /// `ON CONFLICT` clause + pub upsert: Option>, + /// `RETURNING` + pub returning: Option>, +} + +/// `DELETE` trigger command +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct TriggerCmdDelete { + /// table name + pub tbl_name: Name, + /// `WHERE` clause + pub where_clause: Option>, +} + +/// Conflict resolution types +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum ResolveType { + /// `ROLLBACK` + Rollback, + /// `ABORT` + Abort, // default + /// `FAIL` + Fail, + /// `IGNORE` + Ignore, + /// `REPLACE` + Replace, +} + +impl ResolveType { + /// Get the OE_XXX bit value + pub fn bit_value(&self) -> usize { + match self { + ResolveType::Rollback => 1, + ResolveType::Abort => 2, + ResolveType::Fail => 3, + ResolveType::Ignore => 4, + ResolveType::Replace => 5, + } + } +} + +/// `WITH` clause +// https://sqlite.org/lang_with.html +// https://sqlite.org/syntax/with-clause.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct With { + /// `RECURSIVE` + pub recursive: bool, + /// CTEs + pub ctes: Vec, +} + +/// CTE materialization +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Materialized { + /// No hint + Any, + /// `MATERIALIZED` + Yes, + /// `NOT MATERIALIZED` + No, +} + +/// CTE +// https://sqlite.org/syntax/common-table-expression.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct CommonTableExpr { + /// table name + pub tbl_name: Name, + /// table columns + pub columns: Option>, // check no duplicate + /// `MATERIALIZED` + pub materialized: Materialized, + /// query + pub select: Select, +} + +/// Column type +// https://sqlite.org/syntax/type-name.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Type { + /// type name + pub name: String, // TODO Validate: Ids+ + /// type size + pub size: Option, +} + +/// Column type size limit(s) +// https://sqlite.org/syntax/type-name.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TypeSize { + /// maximum size + MaxSize(Box), + /// precision + TypeSize(Box, Box), +} + +/// Transaction types +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TransactionType { + /// `DEFERRED` + Deferred, // default + /// `IMMEDIATE` + Immediate, + /// `EXCLUSIVE` + Exclusive, +} + +/// Upsert clause +// https://sqlite.org/lang_upsert.html +// https://sqlite.org/syntax/upsert-clause.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Upsert { + /// conflict targets + pub index: Option, + /// `DO` clause + pub do_clause: UpsertDo, + /// next upsert + pub next: Option>, +} + +/// Upsert conflict targets +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct UpsertIndex { + /// columns + pub targets: Vec, + /// `WHERE` clause + pub where_clause: Option>, +} + +/// Upsert `DO` action +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum UpsertDo { + /// `SET` + Set { + /// assignments + sets: Vec, + /// `WHERE` clause + where_clause: Option>, + }, + /// `NOTHING` + Nothing, +} + +/// Function call tail +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct FunctionTail { + /// `FILTER` clause + pub filter_clause: Option>, + /// `OVER` clause + pub over_clause: Option>, +} + +/// Function call `OVER` clause +// https://sqlite.org/syntax/over-clause.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Over { + /// Window definition + Window(Window), + /// Window name + Name(Name), +} + +/// `OVER` window definition +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct WindowDef { + /// window name + pub name: Name, + /// window definition + pub window: Window, +} + +/// Window definition +// https://sqlite.org/syntax/window-defn.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Window { + /// base window name + pub base: Option, + /// `PARTITION BY` + pub partition_by: Option>>, + /// `ORDER BY` + pub order_by: Option>, + /// frame spec + pub frame_clause: Option, +} + +/// Frame specification +// https://sqlite.org/syntax/frame-spec.html +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct FrameClause { + /// unit + pub mode: FrameMode, + /// start bound + pub start: FrameBound, + /// end bound + pub end: Option, + /// `EXCLUDE` + pub exclude: Option, +} + +/// Frame modes +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum FrameMode { + /// `GROUPS` + Groups, + /// `RANGE` + Range, + /// `ROWS` + Rows, +} + +/// Frame bounds +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum FrameBound { + /// `CURRENT ROW` + CurrentRow, + /// `FOLLOWING` + Following(Box), + /// `PRECEDING` + Preceding(Box), + /// `UNBOUNDED FOLLOWING` + UnboundedFollowing, + /// `UNBOUNDED PRECEDING` + UnboundedPreceding, +} + +/// Frame exclusions +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum FrameExclude { + /// `NO OTHERS` + NoOthers, + /// `CURRENT ROW` + CurrentRow, + /// `GROUP` + Group, + /// `TIES` + Ties, +} diff --git a/core/parser/error.rs b/core/parser/error.rs new file mode 100644 index 000000000..72416a84d --- /dev/null +++ b/core/parser/error.rs @@ -0,0 +1,94 @@ +use std::error; +use std::fmt; +use std::io; + +/// SQL lexer and parser errors +#[non_exhaustive] +#[derive(Debug, miette::Diagnostic)] +#[diagnostic()] +pub enum Error { + /// I/O Error + Io(io::Error), + /// Lexer error + UnrecognizedToken(usize, #[label("here")] Option), + /// Missing quote or double-quote or backtick + UnterminatedLiteral(usize, #[label("here")] Option), + /// Missing `]` + UnterminatedBracket(usize, #[label("here")] Option), + /// Missing `*/` + UnterminatedBlockComment(usize, #[label("here")] Option), + /// Invalid parameter name + BadVariableName(usize, #[label("here")] Option), + /// Invalid number format + #[diagnostic(help("Invalid digit at `{0}`"))] + BadNumber( + usize, + #[label("here")] Option, + String, // Holds the offending number as a string + ), + #[diagnostic(help("Invalid digit at `{0}`"))] + BadFractionalPart( + usize, + #[label("here")] Option, + String, // Holds the offending number as a string + ), + #[diagnostic(help("Invalid digit at `{0}`"))] + BadExponentPart( + usize, + #[label("here")] Option, + String, // Holds the offending number as a string + ), + /// Invalid or missing sign after `!` + ExpectedEqualsSign(usize, #[label("here")] Option), + /// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character. + MalformedBlobLiteral(usize, #[label("here")] Option), + /// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits. + MalformedHexInteger( + usize, + #[label("here")] Option, + #[help] Option<&'static str>, + ), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Self::Io(ref err) => err.fmt(f), + Self::UnrecognizedToken(pos, _) => { + write!(f, "unrecognized token at {:?}", pos) + } + Self::UnterminatedLiteral(pos, _) => { + write!(f, "non-terminated literal at {:?}", pos) + } + Self::UnterminatedBracket(pos, _) => { + write!(f, "non-terminated bracket at {:?}", pos) + } + Self::UnterminatedBlockComment(pos, _) => { + write!(f, "non-terminated block comment at {:?}", pos) + } + Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos), + Self::BadNumber(pos, _, _) => write!(f, "bad number at {:?}", pos), + Self::BadFractionalPart(pos, _, _) => { + write!(f, "bad fractional part at {:?}", pos) + } + Self::BadExponentPart(pos, _, _) => { + write!(f, "bad exponent part at {:?}", pos) + } + Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos), + Self::MalformedBlobLiteral(pos, _) => { + write!(f, "malformed blob literal at {:?}", pos) + } + Self::MalformedHexInteger(pos, _, _) => { + write!(f, "malformed hex integer at {:?}", pos) + } + } + } +} + +impl error::Error for Error {} + +impl From for Error { + fn from(err: io::Error) -> Self { + Self::Io(err) + } +} diff --git a/core/parser/lexer.rs b/core/parser/lexer.rs new file mode 100644 index 000000000..a4842ae59 --- /dev/null +++ b/core/parser/lexer.rs @@ -0,0 +1,1379 @@ +use crate::parser::{error::Error, token::TokenType}; + +include!(concat!(env!("OUT_DIR"), "/keywords.rs")); + +pub(crate) fn is_identifier_start(b: u8) -> bool { + b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F' +} + +pub(crate) fn is_identifier_continue(b: u8) -> bool { + b == b'$' + || b.is_ascii_digit() + || b.is_ascii_uppercase() + || b == b'_' + || b.is_ascii_lowercase() + || b > b'\x7F' +} + +pub struct Token<'a> { + pub value: &'a [u8], + pub token_type: Option, // None means Token is whitespaces or comments +} + +pub struct Lexer<'a> { + offset: usize, + input: &'a [u8], +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Result, Error>; + + fn next(&mut self) -> Option { + match self.peek() { + None => None, // End of file + Some(b) if b.is_ascii_whitespace() => Some(Ok(self.eat_white_space())), + // matching logic + Some(b) => match b { + b'-' => Some(Ok(self.eat_minus_or_comment_or_ptr())), + b'(' => Some(Ok(self.eat_token(1, TokenType::TK_LP))), + b')' => Some(Ok(self.eat_token(1, TokenType::TK_RP))), + b';' => Some(Ok(self.eat_token(1, TokenType::TK_SEMI))), + b'+' => Some(Ok(self.eat_token(1, TokenType::TK_PLUS))), + b'*' => Some(Ok(self.eat_token(1, TokenType::TK_STAR))), + b'/' => Some(self.eat_slash_or_comment()), + b'%' => Some(Ok(self.eat_token(1, TokenType::TK_REM))), + b'=' => Some(Ok(self.eat_eq())), + b'<' => Some(Ok(self.eat_le_or_ne_or_lshift_or_lt())), + b'>' => Some(Ok(self.eat_ge_or_gt_or_rshift())), + b'!' => Some(self.eat_ne()), + b'|' => Some(Ok(self.eat_concat_or_bitor())), + b',' => Some(Ok(self.eat_token(1, TokenType::TK_COMMA))), + b'&' => Some(Ok(self.eat_token(1, TokenType::TK_BITAND))), + b'~' => Some(Ok(self.eat_token(1, TokenType::TK_BITNOT))), + b'\'' | b'"' | b'`' => Some(self.eat_lit_or_id()), + b'.' => Some(self.eat_dot_or_frac()), + b'0'..=b'9' => Some(self.eat_number()), + b'[' => Some(self.eat_bracket()), + b'?' | b'$' | b'@' | b'#' | b':' => Some(self.eat_var()), + b if is_identifier_start(b) => Some(self.eat_blob_or_id()), + _ => Some(Ok(self.eat_unrecognized())), + }, + } + } +} + +impl<'a> Lexer<'a> { + #[inline(always)] + pub fn new(input: &'a [u8]) -> Self { + Lexer { input, offset: 0 } + } + + #[inline(always)] + pub fn remaining(&self) -> &'a [u8] { + &self.input[self.offset..] + } + + /// Returns the current offset in the input without consuming. + #[inline(always)] + pub fn peek(&self) -> Option { + if self.offset < self.input.len() { + Some(self.input[self.offset]) + } else { + None // End of file + } + } + + /// Returns the current offset in the input and consumes it. + #[inline(always)] + pub fn eat(&mut self) -> Option { + let result = self.peek(); + if result.is_some() { + self.offset += 1; + } + + result + } + + #[inline(always)] + fn eat_and_assert(&mut self, f: F) + where + F: Fn(u8) -> bool, + { + let _value = self.eat(); + debug_assert!(f(_value.unwrap())) + } + + #[inline(always)] + fn eat_while(&mut self, f: F) + where + F: Fn(Option) -> bool, + { + loop { + if !f(self.peek()) { + return; + } + + self.eat(); + } + } + + #[inline(always)] + fn eat_while_number_digit(&mut self) -> Result<(), Error> { + loop { + let start = self.offset; + self.eat_while(|b| b.is_some() && b.unwrap().is_ascii_digit()); + match self.peek() { + Some(b'_') => { + if start == self.offset { + // before the underscore, there was no digit + return Err(Error::BadNumber(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset].to_vec()) + })); + } + + self.eat_and_assert(|b| b == b'_'); + match self.peek() { + Some(b) if b.is_ascii_digit() => continue, // Continue if next is a digit + _ => { + // after the underscore, there is no digit + return Err(Error::BadNumber(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset].to_vec()) + })); + } + } + } + _ => return Ok(()), + } + } + } + + #[inline(always)] + fn eat_while_number_hexdigit(&mut self) -> Result<(), Error> { + loop { + let start = self.offset; + self.eat_while(|b| b.is_some() && b.unwrap().is_ascii_hexdigit()); + match self.peek() { + Some(b'_') => { + if start == self.offset { + // before the underscore, there was no digit + return Err(Error::BadNumber(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset].to_vec()) + })); + } + + self.eat_and_assert(|b| b == b'_'); + match self.peek() { + Some(b) if b.is_ascii_hexdigit() => continue, // Continue if next is a digit + _ => { + // after the underscore, there is no digit + return Err(Error::BadNumber(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset].to_vec()) + })); + } + } + } + _ => return Ok(()), + } + } + } + + #[inline(always)] + fn eat_token(&mut self, size: usize, typ: TokenType) -> Token<'a> { + debug_assert!(size > 0); + debug_assert!(self.remaining().len() >= size); + + let tok = Token { + value: &self.remaining()[..size], + token_type: Some(typ), + }; + self.offset += size; + tok + } + + #[inline(always)] + fn eat_white_space(&mut self) -> Token<'a> { + let start = self.offset; + self.eat_and_assert(|b| b.is_ascii_whitespace()); + self.eat_while(|b| b.is_some() && b.unwrap().is_ascii_whitespace()); + Token { + value: &self.input[start..self.offset], + token_type: None, // This is a whitespace + } + } + + #[inline(always)] + fn eat_minus_or_comment_or_ptr(&mut self) -> Token<'a> { + let start = self.offset; + self.eat_and_assert(|b| b == b'-'); + + match self.peek() { + Some(b'-') => { + self.eat_and_assert(|b| b == b'-'); + self.eat_while(|b| b.is_some() && b.unwrap() != b'\n'); + if self.peek() == Some(b'\n') { + self.eat_and_assert(|b| b == b'\n'); + } + + Token { + value: &self.input[start..self.offset], + token_type: None, // This is a comment + } + } + Some(b'>') => { + self.eat_and_assert(|b| b == b'>'); + if self.peek() == Some(b'>') { + self.eat_and_assert(|b| b == b'>'); + } + + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_RP), + } + } + _ => Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_MINUS), + }, + } + } + + #[inline(always)] + fn eat_slash_or_comment(&mut self) -> Result, Error> { + let start = self.offset; + self.eat_and_assert(|b| b == b'/'); + match self.peek() { + Some(b'*') => { + self.eat_and_assert(|b| b == b'*'); + loop { + self.eat_while(|b| b.is_some() && b.unwrap() != b'*'); + match self.peek() { + Some(b'*') => { + self.eat_and_assert(|b| b == b'*'); + match self.peek() { + Some(b'/') => { + self.eat_and_assert(|b| b == b'/'); + break; // End of block comment + } + None => { + return Err(Error::UnterminatedBlockComment(self.offset, None)) + } + _ => {} + } + } + None => return Err(Error::UnterminatedBlockComment(self.offset, None)), + _ => unreachable!(), // We should not reach here + } + } + + Ok(Token { + value: &self.input[start..self.offset], + token_type: None, // This is a comment + }) + } + _ => Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_SLASH), + }), + } + } + + #[inline(always)] + fn eat_eq(&mut self) -> Token<'a> { + let start = self.offset; + self.eat_and_assert(|b| b == b'='); + if self.peek() == Some(b'=') { + self.eat_and_assert(|b| b == b'='); + } + + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_EQ), + } + } + + #[inline(always)] + fn eat_le_or_ne_or_lshift_or_lt(&mut self) -> Token<'a> { + let start = self.offset; + self.eat_and_assert(|b| b == b'<'); + match self.peek() { + Some(b'=') => { + self.eat_and_assert(|b| b == b'='); + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_LE), + } + } + Some(b'<') => { + self.eat_and_assert(|b| b == b'<'); + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_LSHIFT), + } + } + Some(b'>') => { + self.eat_and_assert(|b| b == b'>'); + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_NE), + } + } + _ => Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_LT), + }, + } + } + + #[inline(always)] + fn eat_ge_or_gt_or_rshift(&mut self) -> Token<'a> { + let start = self.offset; + self.eat_and_assert(|b| b == b'>'); + match self.peek() { + Some(b'=') => { + self.eat_and_assert(|b| b == b'='); + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_GE), + } + } + Some(b'>') => { + self.eat_and_assert(|b| b == b'>'); + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_RSHIFT), + } + } + _ => Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_GT), + }, + } + } + + #[inline(always)] + fn eat_ne(&mut self) -> Result, Error> { + let start = self.offset; + self.eat_and_assert(|b| b == b'!'); + match self.peek() { + Some(b'=') => { + self.eat_and_assert(|b| b == b'='); + } + _ => return Err(Error::ExpectedEqualsSign(self.offset, None)), + } + + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_NE), + }) + } + + #[inline(always)] + fn eat_concat_or_bitor(&mut self) -> Token<'a> { + let start = self.offset; + self.eat_and_assert(|b| b == b'|'); + if self.peek() == Some(b'|') { + self.eat_and_assert(|b| b == b'|'); + return Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_CONCAT), + }; + } + + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_BITOR), + } + } + + #[inline(always)] + fn eat_lit_or_id(&mut self) -> Result, Error> { + let start = self.offset; + let quote = self.eat().unwrap(); + debug_assert!(quote == b'\'' || quote == b'"' || quote == b'`'); + let tt = if quote == b'\'' { + TokenType::TK_STRING + } else { + TokenType::TK_ID + }; + + loop { + self.eat_while(|b| b.is_some() && b.unwrap() != quote); + match self.peek() { + Some(b) if b == quote => { + self.eat_and_assert(|b| b == quote); + match self.peek() { + Some(b) if b == quote => { + self.eat_and_assert(|b| b == quote); + continue; + } + _ => break, + } + } + None => return Err(Error::UnterminatedLiteral(self.offset, None)), + _ => unreachable!(), + }; + } + + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(tt), + }) + } + + #[inline(always)] + fn eat_dot_or_frac(&mut self) -> Result, Error> { + let start = self.offset; + self.eat_and_assert(|b| b == b'.'); + + match self.peek() { + Some(b) if b.is_ascii_digit() => { + self.eat_while_number_digit()?; + match self.peek() { + Some(b'e') | Some(b'E') => { + _ = self.eat_expo()?; + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_FLOAT), + }) + } + Some(b) if is_identifier_start(b) => { + Err(Error::BadFractionalPart(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec()) + })) + } + _ => Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_FLOAT), + }), + } + } + _ => Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_DOT), + }), + } + } + + #[inline(always)] + fn eat_expo(&mut self) -> Result, Error> { + let start = self.offset; + self.eat_and_assert(|b| b == b'e' || b == b'E'); + match self.peek() { + Some(b'+') | Some(b'-') => { + self.eat_and_assert(|b| b == b'+' || b == b'-'); + } + _ => {} + } + + let start_num = self.offset; + self.eat_while_number_digit()?; + if start_num == self.offset { + return Err(Error::BadExponentPart(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset].to_vec()) + })); + } + + if self.peek().is_some() && is_identifier_start(self.peek().unwrap()) { + return Err(Error::BadExponentPart(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec()) + })); + } + + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_FLOAT), // This is a number + }) + } + + #[inline(always)] + fn eat_number(&mut self) -> Result, Error> { + let start = self.offset; + let first_digit = self.eat().unwrap(); + debug_assert!(first_digit.is_ascii_digit()); + + // hex int + if first_digit == b'0' { + match self.peek() { + Some(b'x') | Some(b'X') => { + self.eat_and_assert(|b| b == b'x' || b == b'X'); + let start_hex = self.offset; + self.eat_while_number_hexdigit()?; + + if start_hex == self.offset { + return Err(Error::BadNumber(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset].to_vec()) + })); + } + + if self.peek().is_some() && is_identifier_start(self.peek().unwrap()) { + return Err(Error::BadNumber(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec()) + })); + } + + return Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_INTEGER), + }); + } + _ => {} + } + } + + self.eat_while_number_digit()?; + match self.peek() { + Some(b'.') => { + self.eat_dot_or_frac()?; + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_FLOAT), + }) + } + Some(b'e') | Some(b'E') => { + self.eat_expo()?; + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_FLOAT), + }) + } + Some(b) if is_identifier_start(b) => Err(Error::BadNumber(self.offset, None, unsafe { + String::from_utf8_unchecked(self.input[start..self.offset + 1].to_vec()) + })), + _ => Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_INTEGER), + }), + } + } + + #[inline(always)] + fn eat_bracket(&mut self) -> Result, Error> { + let start = self.offset; + self.eat_and_assert(|b| b == b'['); + self.eat_while(|b| b.is_some() && b.unwrap() != b']'); + match self.peek() { + Some(b']') => { + self.eat_and_assert(|b| b == b']'); + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_ID), + }) + } + None => Err(Error::UnterminatedBracket(self.offset, None)), + _ => unreachable!(), // We should not reach here + } + } + + #[inline(always)] + fn eat_var(&mut self) -> Result, Error> { + let start = self.offset; + let tok = self.eat().unwrap(); + debug_assert!(tok == b'?' || tok == b'$' || tok == b'@' || tok == b'#' || tok == b':'); + + match tok { + b'?' => { + let start_digit = self.offset; + self.eat_while(|b| b.is_some() && b.unwrap().is_ascii_digit()); + + // empty variable name + if start_digit == self.offset { + return Err(Error::BadVariableName(self.offset, None)); + } + + Ok(Token { + value: &self.input[start + 1..self.offset], // do not include '? in the value + token_type: Some(TokenType::TK_VARIABLE), + }) + } + _ => { + let start_id = self.offset; + self.eat_while(|b| b.is_some() && is_identifier_continue(b.unwrap())); + + // empty variable name + if start_id == self.offset { + return Err(Error::BadVariableName(self.offset, None)); + } + + Ok(Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_VARIABLE), + }) + } + } + } + + #[inline(always)] + fn eat_blob_or_id(&mut self) -> Result, Error> { + let start = self.offset; + let start_char = self.eat().unwrap(); + debug_assert!(is_identifier_start(start_char)); + + match start_char { + b'x' | b'X' if self.peek() == Some(b'\'') => { + self.eat_and_assert(|b| b == b'\''); + let start_hex = self.offset; + self.eat_while(|b| b.is_some() && b.unwrap().is_ascii_hexdigit()); + + match self.peek() { + Some(b'\'') => { + let end_hex = self.offset; + debug_assert!(end_hex >= start_hex); + self.eat_and_assert(|b| b == b'\''); + + if (end_hex - start_hex) % 2 != 0 { + return Err(Error::UnrecognizedToken(self.offset, None)); + } + + Ok(Token { + value: &self.input[start + 2..self.offset - 1], // do not include 'x' or 'X' and the last ' + token_type: Some(TokenType::TK_BLOB), + }) + } + _ => Err(Error::UnterminatedLiteral(self.offset, None)), + } + } + _ => { + self.eat_while(|b| b.is_some() && is_identifier_continue(b.unwrap())); + let result = &self.input[start..self.offset]; + Ok(Token { + value: result, + token_type: Some(keyword_token(result).unwrap_or(TokenType::TK_ID)), + }) + } + } + } + + #[inline(always)] + fn eat_unrecognized(&mut self) -> Token<'a> { + let start = self.offset; + self.eat_while(|b| b.is_some() && !b.unwrap().is_ascii_whitespace()); + Token { + value: &self.input[start..self.offset], + token_type: Some(TokenType::TK_ILLEGAL), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn test_lexer_one_tok() { + let test_cases = vec![ + ( + b" ".as_slice(), + Token { + value: b" ".as_slice(), + token_type: None, + }, + ), + ( + b"-- This is a comment\n".as_slice(), + Token { + value: b"-- This is a comment\n".as_slice(), + token_type: None, // This is a comment + }, + ), + ( + b"-".as_slice(), + Token { + value: b"-".as_slice(), + token_type: Some(TokenType::TK_MINUS), + }, + ), + ( + b"->".as_slice(), + Token { + value: b"->".as_slice(), + token_type: Some(TokenType::TK_RP), + }, + ), + ( + b"->>".as_slice(), + Token { + value: b"->>".as_slice(), + token_type: Some(TokenType::TK_RP), + }, + ), + ( + b"(".as_slice(), + Token { + value: b"(".as_slice(), + token_type: Some(TokenType::TK_LP), + }, + ), + ( + b")".as_slice(), + Token { + value: b")".as_slice(), + token_type: Some(TokenType::TK_RP), + }, + ), + ( + b";".as_slice(), + Token { + value: b";".as_slice(), + token_type: Some(TokenType::TK_SEMI), + }, + ), + ( + b"+".as_slice(), + Token { + value: b"+".as_slice(), + token_type: Some(TokenType::TK_PLUS), + }, + ), + ( + b"*".as_slice(), + Token { + value: b"*".as_slice(), + token_type: Some(TokenType::TK_STAR), + }, + ), + ( + b"/".as_slice(), + Token { + value: b"/".as_slice(), + token_type: Some(TokenType::TK_SLASH), + }, + ), + ( + b"/* This is a block comment */".as_slice(), + Token { + value: b"/* This is a block comment */".as_slice(), + token_type: None, // This is a comment + }, + ), + ( + b"/* This is a\n\n block comment */".as_slice(), + Token { + value: b"/* This is a\n\n block comment */".as_slice(), + token_type: None, // This is a comment + }, + ), + ( + b"/* This is a** block* comment */".as_slice(), + Token { + value: b"/* This is a** block* comment */".as_slice(), + token_type: None, // This is a comment + }, + ), + ( + b"=".as_slice(), + Token { + value: b"=".as_slice(), + token_type: Some(TokenType::TK_EQ), + }, + ), + ( + b"==".as_slice(), + Token { + value: b"==".as_slice(), + token_type: Some(TokenType::TK_EQ), + }, + ), + ( + b"<".as_slice(), + Token { + value: b"<".as_slice(), + token_type: Some(TokenType::TK_LT), + }, + ), + ( + b"<>".as_slice(), + Token { + value: b"<>".as_slice(), + token_type: Some(TokenType::TK_NE), + }, + ), + ( + b"<=".as_slice(), + Token { + value: b"<=".as_slice(), + token_type: Some(TokenType::TK_LE), + }, + ), + ( + b"<<".as_slice(), + Token { + value: b"<<".as_slice(), + token_type: Some(TokenType::TK_LSHIFT), + }, + ), + ( + b">".as_slice(), + Token { + value: b">".as_slice(), + token_type: Some(TokenType::TK_GT), + }, + ), + ( + b">=".as_slice(), + Token { + value: b">=".as_slice(), + token_type: Some(TokenType::TK_GE), + }, + ), + ( + b">>".as_slice(), + Token { + value: b">>".as_slice(), + token_type: Some(TokenType::TK_RSHIFT), + }, + ), + ( + b"!=".as_slice(), + Token { + value: b"!=".as_slice(), + token_type: Some(TokenType::TK_NE), + }, + ), + ( + b"|".as_slice(), + Token { + value: b"|".as_slice(), + token_type: Some(TokenType::TK_BITOR), + }, + ), + ( + b"||".as_slice(), + Token { + value: b"||".as_slice(), + token_type: Some(TokenType::TK_CONCAT), + }, + ), + ( + b",".as_slice(), + Token { + value: b",".as_slice(), + token_type: Some(TokenType::TK_COMMA), + }, + ), + ( + b"&".as_slice(), + Token { + value: b"&".as_slice(), + token_type: Some(TokenType::TK_BITAND), + }, + ), + ( + b"~".as_slice(), + Token { + value: b"~".as_slice(), + token_type: Some(TokenType::TK_BITNOT), + }, + ), + ( + b"'string'".as_slice(), + Token { + value: b"'string'".as_slice(), + token_type: Some(TokenType::TK_STRING), + }, + ), + ( + b"`identifier`".as_slice(), + Token { + value: b"`identifier`".as_slice(), + token_type: Some(TokenType::TK_ID), + }, + ), + ( + b"\"quoted string\"".as_slice(), + Token { + value: b"\"quoted string\"".as_slice(), + token_type: Some(TokenType::TK_ID), + }, + ), + ( + b"\"\"\"triple \"\"quoted string\"\"\"".as_slice(), + Token { + value: b"\"\"\"triple \"\"quoted string\"\"\"".as_slice(), + token_type: Some(TokenType::TK_ID), + }, + ), + ( + b"```triple ``quoted string```".as_slice(), + Token { + value: b"```triple ``quoted string```".as_slice(), + token_type: Some(TokenType::TK_ID), + }, + ), + ( + b"'''triple ''quoted string'''".as_slice(), + Token { + value: b"'''triple ''quoted string'''".as_slice(), + token_type: Some(TokenType::TK_STRING), + }, + ), + ( + b".".as_slice(), + Token { + value: b".".as_slice(), + token_type: Some(TokenType::TK_DOT), + }, + ), + ( + b".123".as_slice(), + Token { + value: b".123".as_slice(), + token_type: Some(TokenType::TK_FLOAT), + }, + ), + ( + b".456".as_slice(), + Token { + value: b".456".as_slice(), + token_type: Some(TokenType::TK_FLOAT), + }, + ), + ( + b".456e789".as_slice(), + Token { + value: b".456e789".as_slice(), + token_type: Some(TokenType::TK_FLOAT), + }, + ), + ( + b".456E-789".as_slice(), + Token { + value: b".456E-789".as_slice(), + token_type: Some(TokenType::TK_FLOAT), + }, + ), + ( + b"123".as_slice(), + Token { + value: b"123".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + ), + ( + b"123.456".as_slice(), + Token { + value: b"123.456".as_slice(), + token_type: Some(TokenType::TK_FLOAT), + }, + ), + ( + b"123e456".as_slice(), + Token { + value: b"123e456".as_slice(), + token_type: Some(TokenType::TK_FLOAT), + }, + ), + ( + b"123E-456".as_slice(), + Token { + value: b"123E-456".as_slice(), + token_type: Some(TokenType::TK_FLOAT), + }, + ), + ( + b"0x1A3F".as_slice(), + Token { + value: b"0x1A3F".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + ), + ( + b"0x1A3F_5678".as_slice(), + Token { + value: b"0x1A3F_5678".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + ), + ( + b"0x1A3F_5678e9".as_slice(), + Token { + value: b"0x1A3F_5678e9".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + ), + ( + b"[identifier]".as_slice(), + Token { + value: b"[identifier]".as_slice(), + token_type: Some(TokenType::TK_ID), + }, + ), + ( + b"?123".as_slice(), + Token { + value: b"123".as_slice(), // '?' is not included in the value + token_type: Some(TokenType::TK_VARIABLE), + }, + ), + ( + b"$var_name".as_slice(), + Token { + value: b"$var_name".as_slice(), + token_type: Some(TokenType::TK_VARIABLE), + }, + ), + ( + b"@param".as_slice(), + Token { + value: b"@param".as_slice(), + token_type: Some(TokenType::TK_VARIABLE), + }, + ), + ( + b"#comment".as_slice(), + Token { + value: b"#comment".as_slice(), + token_type: Some(TokenType::TK_VARIABLE), + }, + ), + ( + b":named_param".as_slice(), + Token { + value: b":named_param".as_slice(), + token_type: Some(TokenType::TK_VARIABLE), + }, + ), + ( + b"x'1234567890abcdef'".as_slice(), + Token { + value: b"1234567890abcdef".as_slice(), // 'x' is not included in the value + token_type: Some(TokenType::TK_BLOB), + }, + ), + ( + b"X'1234567890abcdef'".as_slice(), + Token { + value: b"1234567890abcdef".as_slice(), // 'X' is not included in the value + token_type: Some(TokenType::TK_BLOB), + }, + ), + ( + b"x''".as_slice(), + Token { + value: b"".as_slice(), // 'x' is not included in the value + token_type: Some(TokenType::TK_BLOB), + }, + ), + ( + b"X''".as_slice(), + Token { + value: b"".as_slice(), // 'X' is not included in the value + token_type: Some(TokenType::TK_BLOB), + }, + ), + ( + b"wHeRe".as_slice(), + Token { + value: b"wHeRe".as_slice(), // 'X' is not included in the value + token_type: Some(TokenType::TK_WHERE), + }, + ), + ( + b"wHeRe123".as_slice(), + Token { + value: b"wHeRe123".as_slice(), // 'X' is not included in the value + token_type: Some(TokenType::TK_ID), + }, + ), + ( + b"wHeRe_123".as_slice(), + Token { + value: b"wHeRe_123".as_slice(), // 'X' is not included in the value + token_type: Some(TokenType::TK_ID), + }, + ), + ]; + + for (input, expected) in test_cases { + let mut lexer = Lexer::new(input); + let token = lexer.next().unwrap().unwrap(); + let expect_value = unsafe { String::from_utf8_unchecked(expected.value.to_vec()) }; + let got_value = unsafe { String::from_utf8_unchecked(token.value.to_vec()) }; + print!( + "Input: {:?}, Expected: {:?}, Got: {:?}\n", + input, expect_value, got_value + ); + assert_eq!(got_value, expect_value); + assert_eq!(token.token_type, expected.token_type); + } + } + + #[test] + fn test_keyword_token() { + let values = HashMap::from([ + ("ABORT", TokenType::TK_ABORT), + ("ACTION", TokenType::TK_ACTION), + ("ADD", TokenType::TK_ADD), + ("AFTER", TokenType::TK_AFTER), + ("ALL", TokenType::TK_ALL), + ("ALTER", TokenType::TK_ALTER), + ("ALWAYS", TokenType::TK_ALWAYS), + ("ANALYZE", TokenType::TK_ANALYZE), + ("AND", TokenType::TK_AND), + ("AS", TokenType::TK_AS), + ("ASC", TokenType::TK_ASC), + ("ATTACH", TokenType::TK_ATTACH), + ("AUTOINCREMENT", TokenType::TK_AUTOINCR), + ("BEFORE", TokenType::TK_BEFORE), + ("BEGIN", TokenType::TK_BEGIN), + ("BETWEEN", TokenType::TK_BETWEEN), + ("BY", TokenType::TK_BY), + ("CASCADE", TokenType::TK_CASCADE), + ("CASE", TokenType::TK_CASE), + ("CAST", TokenType::TK_CAST), + ("CHECK", TokenType::TK_CHECK), + ("COLLATE", TokenType::TK_COLLATE), + ("COLUMN", TokenType::TK_COLUMNKW), + ("COMMIT", TokenType::TK_COMMIT), + ("CONFLICT", TokenType::TK_CONFLICT), + ("CONSTRAINT", TokenType::TK_CONSTRAINT), + ("CREATE", TokenType::TK_CREATE), + ("CROSS", TokenType::TK_JOIN_KW), + ("CURRENT", TokenType::TK_CURRENT), + ("CURRENT_DATE", TokenType::TK_CTIME_KW), + ("CURRENT_TIME", TokenType::TK_CTIME_KW), + ("CURRENT_TIMESTAMP", TokenType::TK_CTIME_KW), + ("DATABASE", TokenType::TK_DATABASE), + ("DEFAULT", TokenType::TK_DEFAULT), + ("DEFERRABLE", TokenType::TK_DEFERRABLE), + ("DEFERRED", TokenType::TK_DEFERRED), + ("DELETE", TokenType::TK_DELETE), + ("DESC", TokenType::TK_DESC), + ("DETACH", TokenType::TK_DETACH), + ("DISTINCT", TokenType::TK_DISTINCT), + ("DO", TokenType::TK_DO), + ("DROP", TokenType::TK_DROP), + ("EACH", TokenType::TK_EACH), + ("ELSE", TokenType::TK_ELSE), + ("END", TokenType::TK_END), + ("ESCAPE", TokenType::TK_ESCAPE), + ("EXCEPT", TokenType::TK_EXCEPT), + ("EXCLUDE", TokenType::TK_EXCLUDE), + ("EXCLUSIVE", TokenType::TK_EXCLUSIVE), + ("EXISTS", TokenType::TK_EXISTS), + ("EXPLAIN", TokenType::TK_EXPLAIN), + ("FAIL", TokenType::TK_FAIL), + ("FILTER", TokenType::TK_FILTER), + ("FIRST", TokenType::TK_FIRST), + ("FOLLOWING", TokenType::TK_FOLLOWING), + ("FOR", TokenType::TK_FOR), + ("FOREIGN", TokenType::TK_FOREIGN), + ("FROM", TokenType::TK_FROM), + ("FULL", TokenType::TK_JOIN_KW), + ("GENERATED", TokenType::TK_GENERATED), + ("GLOB", TokenType::TK_LIKE_KW), + ("GROUP", TokenType::TK_GROUP), + ("GROUPS", TokenType::TK_GROUPS), + ("HAVING", TokenType::TK_HAVING), + ("IF", TokenType::TK_IF), + ("IGNORE", TokenType::TK_IGNORE), + ("IMMEDIATE", TokenType::TK_IMMEDIATE), + ("IN", TokenType::TK_IN), + ("INDEX", TokenType::TK_INDEX), + ("INDEXED", TokenType::TK_INDEXED), + ("INITIALLY", TokenType::TK_INITIALLY), + ("INNER", TokenType::TK_JOIN_KW), + ("INSERT", TokenType::TK_INSERT), + ("INSTEAD", TokenType::TK_INSTEAD), + ("INTERSECT", TokenType::TK_INTERSECT), + ("INTO", TokenType::TK_INTO), + ("IS", TokenType::TK_IS), + ("ISNULL", TokenType::TK_ISNULL), + ("JOIN", TokenType::TK_JOIN), + ("KEY", TokenType::TK_KEY), + ("LAST", TokenType::TK_LAST), + ("LEFT", TokenType::TK_JOIN_KW), + ("LIKE", TokenType::TK_LIKE_KW), + ("LIMIT", TokenType::TK_LIMIT), + ("MATCH", TokenType::TK_MATCH), + ("MATERIALIZED", TokenType::TK_MATERIALIZED), + ("NATURAL", TokenType::TK_JOIN_KW), + ("NO", TokenType::TK_NO), + ("NOT", TokenType::TK_NOT), + ("NOTHING", TokenType::TK_NOTHING), + ("NOTNULL", TokenType::TK_NOTNULL), + ("NULL", TokenType::TK_NULL), + ("NULLS", TokenType::TK_NULLS), + ("OF", TokenType::TK_OF), + ("OFFSET", TokenType::TK_OFFSET), + ("ON", TokenType::TK_ON), + ("OR", TokenType::TK_OR), + ("ORDER", TokenType::TK_ORDER), + ("OTHERS", TokenType::TK_OTHERS), + ("OUTER", TokenType::TK_JOIN_KW), + ("OVER", TokenType::TK_OVER), + ("PARTITION", TokenType::TK_PARTITION), + ("PLAN", TokenType::TK_PLAN), + ("PRAGMA", TokenType::TK_PRAGMA), + ("PRECEDING", TokenType::TK_PRECEDING), + ("PRIMARY", TokenType::TK_PRIMARY), + ("QUERY", TokenType::TK_QUERY), + ("RAISE", TokenType::TK_RAISE), + ("RANGE", TokenType::TK_RANGE), + ("RECURSIVE", TokenType::TK_RECURSIVE), + ("REFERENCES", TokenType::TK_REFERENCES), + ("REGEXP", TokenType::TK_LIKE_KW), + ("REINDEX", TokenType::TK_REINDEX), + ("RELEASE", TokenType::TK_RELEASE), + ("RENAME", TokenType::TK_RENAME), + ("REPLACE", TokenType::TK_REPLACE), + ("RETURNING", TokenType::TK_RETURNING), + ("RESTRICT", TokenType::TK_RESTRICT), + ("RIGHT", TokenType::TK_JOIN_KW), + ("ROLLBACK", TokenType::TK_ROLLBACK), + ("ROW", TokenType::TK_ROW), + ("ROWS", TokenType::TK_ROWS), + ("SAVEPOINT", TokenType::TK_SAVEPOINT), + ("SELECT", TokenType::TK_SELECT), + ("SET", TokenType::TK_SET), + ("TABLE", TokenType::TK_TABLE), + ("TEMP", TokenType::TK_TEMP), + ("TEMPORARY", TokenType::TK_TEMP), + ("THEN", TokenType::TK_THEN), + ("TIES", TokenType::TK_TIES), + ("TO", TokenType::TK_TO), + ("TRANSACTION", TokenType::TK_TRANSACTION), + ("TRIGGER", TokenType::TK_TRIGGER), + ("UNBOUNDED", TokenType::TK_UNBOUNDED), + ("UNION", TokenType::TK_UNION), + ("UNIQUE", TokenType::TK_UNIQUE), + ("UPDATE", TokenType::TK_UPDATE), + ("USING", TokenType::TK_USING), + ("VACUUM", TokenType::TK_VACUUM), + ("VALUES", TokenType::TK_VALUES), + ("VIEW", TokenType::TK_VIEW), + ("VIRTUAL", TokenType::TK_VIRTUAL), + ("WHEN", TokenType::TK_WHEN), + ("WHERE", TokenType::TK_WHERE), + ("WINDOW", TokenType::TK_WINDOW), + ("WITH", TokenType::TK_WITH), + ("WITHOUT", TokenType::TK_WITHOUT), + ]); + + for (key, value) in &values { + assert!(keyword_token(key.as_bytes()).unwrap() == *value); + assert!( + keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value + ); + } + + assert!(keyword_token(b"").is_none()); + assert!(keyword_token(b"wrong").is_none()); + assert!(keyword_token(b"super wrong").is_none()); + assert!(keyword_token(b"super_wrong").is_none()); + assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none()); + assert!(keyword_token("Crème Brulée".as_bytes()).is_none()); + assert!(keyword_token("fróm".as_bytes()).is_none()); + } + + #[test] + fn test_lexer_multi_tok() { + let test_cases = vec![ + ( + b" SELECT 1".as_slice(), + vec![ + Token { + value: b" ".as_slice(), + token_type: None, + }, + Token { + value: b"SELECT".as_slice(), + token_type: Some(TokenType::TK_SELECT), + }, + Token { + value: b" ".as_slice(), + token_type: None, + }, + Token { + value: b"1".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + ], + ), + ( + b"INSERT INTO users VALUES (1,2,3)".as_slice(), + vec![ + Token { + value: b"INSERT".as_slice(), + token_type: Some(TokenType::TK_INSERT), + }, + Token { + value: b" ".as_slice(), + token_type: None, + }, + Token { + value: b"INTO".as_slice(), + token_type: Some(TokenType::TK_INTO), + }, + Token { + value: b" ".as_slice(), + token_type: None, + }, + Token { + value: b"users".as_slice(), + token_type: Some(TokenType::TK_ID), + }, + Token { + value: b" ".as_slice(), + token_type: None, + }, + Token { + value: b"VALUES".as_slice(), + token_type: Some(TokenType::TK_VALUES), + }, + Token { + value: b" ".as_slice(), + token_type: None, + }, + Token { + value: b"(".as_slice(), + token_type: Some(TokenType::TK_LP), + }, + Token { + value: b"1".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + Token { + value: b",".as_slice(), + token_type: Some(TokenType::TK_COMMA), + }, + Token { + value: b"2".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + Token { + value: b",".as_slice(), + token_type: Some(TokenType::TK_COMMA), + }, + Token { + value: b"3".as_slice(), + token_type: Some(TokenType::TK_INTEGER), + }, + Token { + value: b")".as_slice(), + token_type: Some(TokenType::TK_RP), + }, + ], + ), + ]; + + for (input, expected_tokens) in test_cases { + let lexer = Lexer::new(input); + let mut tokens = Vec::new(); + + for token in lexer { + tokens.push(token.unwrap()); + } + + assert_eq!(tokens.len(), expected_tokens.len()); + + for (i, token) in tokens.iter().enumerate() { + let expect_value = + unsafe { String::from_utf8_unchecked(expected_tokens[i].value.to_vec()) }; + let got_value = unsafe { String::from_utf8_unchecked(token.value.to_vec()) }; + assert_eq!(got_value, expect_value); + assert_eq!(token.token_type, expected_tokens[i].token_type); + } + } + } +} diff --git a/core/parser/mod.rs b/core/parser/mod.rs new file mode 100644 index 000000000..d97eadbf9 --- /dev/null +++ b/core/parser/mod.rs @@ -0,0 +1,4 @@ +pub mod error; +pub mod token; +pub mod ast; +pub mod lexer; diff --git a/core/parser/token.rs b/core/parser/token.rs new file mode 100644 index 000000000..86fec2dcd --- /dev/null +++ b/core/parser/token.rs @@ -0,0 +1,179 @@ +/// Token classes +// Generated by lemon (parse.h). +// Renamed manually. +// To be keep in sync. +#[non_exhaustive] +#[allow(non_camel_case_types, missing_docs)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)] +#[repr(u16)] +pub enum TokenType { + TK_EOF = 0, + TK_SEMI = 1, + TK_EXPLAIN = 2, + TK_QUERY = 3, + TK_PLAN = 4, + TK_BEGIN = 5, + TK_TRANSACTION = 6, + TK_DEFERRED = 7, + TK_IMMEDIATE = 8, + TK_EXCLUSIVE = 9, + TK_COMMIT = 10, + TK_END = 11, + TK_ROLLBACK = 12, + TK_SAVEPOINT = 13, + TK_RELEASE = 14, + TK_TO = 15, + TK_TABLE = 16, + TK_CREATE = 17, + TK_IF = 18, + TK_NOT = 19, + TK_EXISTS = 20, + TK_TEMP = 21, + TK_LP = 22, + TK_RP = 23, + TK_AS = 24, + TK_COMMA = 25, + TK_WITHOUT = 26, + TK_ABORT = 27, + TK_ACTION = 28, + TK_AFTER = 29, + TK_ANALYZE = 30, + TK_ASC = 31, + TK_ATTACH = 32, + TK_BEFORE = 33, + TK_BY = 34, + TK_CASCADE = 35, + TK_CAST = 36, + TK_CONFLICT = 37, + TK_DATABASE = 38, + TK_DESC = 39, + TK_DETACH = 40, + TK_EACH = 41, + TK_FAIL = 42, + TK_OR = 43, + TK_AND = 44, + TK_IS = 45, + TK_ISNOT = 46, + TK_MATCH = 47, + TK_LIKE_KW = 48, + TK_BETWEEN = 49, + TK_IN = 50, + TK_ISNULL = 51, + TK_NOTNULL = 52, + TK_NE = 53, + TK_EQ = 54, + TK_GT = 55, + TK_LE = 56, + TK_LT = 57, + TK_GE = 58, + TK_ESCAPE = 59, + TK_ID = 60, + TK_COLUMNKW = 61, + TK_DO = 62, + TK_FOR = 63, + TK_IGNORE = 64, + TK_INITIALLY = 65, + TK_INSTEAD = 66, + TK_NO = 67, + TK_KEY = 68, + TK_OF = 69, + TK_OFFSET = 70, + TK_PRAGMA = 71, + TK_RAISE = 72, + TK_RECURSIVE = 73, + TK_REPLACE = 74, + TK_RESTRICT = 75, + TK_ROW = 76, + TK_ROWS = 77, + TK_TRIGGER = 78, + TK_VACUUM = 79, + TK_VIEW = 80, + TK_VIRTUAL = 81, + TK_WITH = 82, + TK_NULLS = 83, + TK_FIRST = 84, + TK_LAST = 85, + TK_CURRENT = 86, + TK_FOLLOWING = 87, + TK_PARTITION = 88, + TK_PRECEDING = 89, + TK_RANGE = 90, + TK_UNBOUNDED = 91, + TK_EXCLUDE = 92, + TK_GROUPS = 93, + TK_OTHERS = 94, + TK_TIES = 95, + TK_GENERATED = 96, + TK_ALWAYS = 97, + TK_MATERIALIZED = 98, + TK_REINDEX = 99, + TK_RENAME = 100, + TK_CTIME_KW = 101, + TK_ANY = 102, + TK_BITAND = 103, + TK_BITOR = 104, + TK_LSHIFT = 105, + TK_RSHIFT = 106, + TK_PLUS = 107, + TK_MINUS = 108, + TK_STAR = 109, + TK_SLASH = 110, + TK_REM = 111, + TK_CONCAT = 112, + TK_PTR = 113, + TK_COLLATE = 114, + TK_BITNOT = 115, + TK_ON = 116, + TK_INDEXED = 117, + TK_STRING = 118, + TK_JOIN_KW = 119, + TK_CONSTRAINT = 120, + TK_DEFAULT = 121, + TK_NULL = 122, + TK_PRIMARY = 123, + TK_UNIQUE = 124, + TK_CHECK = 125, + TK_REFERENCES = 126, + TK_AUTOINCR = 127, + TK_INSERT = 128, + TK_DELETE = 129, + TK_UPDATE = 130, + TK_SET = 131, + TK_DEFERRABLE = 132, + TK_FOREIGN = 133, + TK_DROP = 134, + TK_UNION = 135, + TK_ALL = 136, + TK_EXCEPT = 137, + TK_INTERSECT = 138, + TK_SELECT = 139, + TK_VALUES = 140, + TK_DISTINCT = 141, + TK_DOT = 142, + TK_FROM = 143, + TK_JOIN = 144, + TK_USING = 145, + TK_ORDER = 146, + TK_GROUP = 147, + TK_HAVING = 148, + TK_LIMIT = 149, + TK_WHERE = 150, + TK_RETURNING = 151, + TK_INTO = 152, + TK_NOTHING = 153, + TK_BLOB = 154, + TK_FLOAT = 155, + TK_INTEGER = 156, + TK_VARIABLE = 157, + TK_CASE = 158, + TK_WHEN = 159, + TK_THEN = 160, + TK_ELSE = 161, + TK_INDEX = 162, + TK_ALTER = 163, + TK_ADD = 164, + TK_WINDOW = 165, + TK_OVER = 166, + TK_FILTER = 167, + TK_ILLEGAL = 185, +}