diff --git a/core/benches/parser_benchmark.rs b/core/benches/parser_benchmark.rs index aad5ebdc6..c417bb949 100644 --- a/core/benches/parser_benchmark.rs +++ b/core/benches/parser_benchmark.rs @@ -7,6 +7,27 @@ use turso_sqlite3_parser::lexer::{ Scanner, }; +fn bench_parser(criterion: &mut Criterion) { + let queries = ["SELECT 1"]; + + for query in queries.iter() { + let mut group = criterion.benchmark_group(format!("Parser `{query}`")); + let qb = query.as_bytes(); + + group.bench_function(BenchmarkId::new("limbo_parser_query", ""), |b| { + b.iter(|| Parser::new(black_box(qb)).next().unwrap()); + }); + + group.bench_function(BenchmarkId::new("limbo_old_parser_query", ""), |b| { + b.iter(|| { + OldParser::new(black_box(qb)).next().unwrap().unwrap(); + }); + }); + + group.finish(); + } +} + fn bench_lexer(criterion: &mut Criterion) { let queries = [ "SELECT 1", @@ -43,44 +64,9 @@ fn bench_lexer(criterion: &mut Criterion) { } } -fn bench_parser(criterion: &mut Criterion) { - let queries = [ - "BEGIN", - "BEGIN EXCLUSIVE TRANSACTION my_trans", - "COMMIT", - "COMMIT TRANSACTION my_trans", - "ROLLBACK", - "ROLLBACK TRANSACTION my_transaction TO my_savepoint", - "SAVEPOINT my_savepoint", - "RELEASE SAVEPOINT my_savepoint", - ]; - - for query in queries.iter() { - let mut group = criterion.benchmark_group(format!("Parser `{query}`")); - let qb = query.as_bytes(); - - group.bench_function(BenchmarkId::new("limbo_parser_query", ""), |b| { - b.iter(|| { - for stmt in Parser::new(black_box(qb)) { - stmt.unwrap(); - } - }); - }); - - group.bench_function(BenchmarkId::new("limbo_old_parser_query", ""), |b| { - b.iter(|| { - let mut parser = OldParser::new(black_box(qb)); - parser.next().unwrap().unwrap() - }); - }); - - group.finish(); - } -} - criterion_group! { name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); - targets = bench_lexer, bench_parser + targets = bench_parser, bench_lexer } criterion_main!(benches); diff --git a/core/parser/ast.rs b/core/parser/ast.rs index aae1f9fc7..f4c0decbe 100644 --- a/core/parser/ast.rs +++ b/core/parser/ast.rs @@ -1,5 +1,3 @@ -use std::str::{self, Bytes}; - use strum_macros::{EnumIter, EnumString}; /// `?` or `$` Prepared statement arg placeholder(s) @@ -320,18 +318,18 @@ pub enum Expr { /// `DISTINCT` distinctness: Option, /// arguments - args: Option>>, + args: Vec>, /// `ORDER BY` - order_by: Option>, + order_by: Vec, /// `FILTER` - filter_over: Option, + filter_over: FunctionTail, }, /// Function call expression with '*' as arg FunctionCallStar { /// function name name: Name, /// `FILTER` - filter_over: Option, + filter_over: FunctionTail, }, /// Identifier Id(Name), @@ -578,40 +576,33 @@ pub enum CompoundOperator { #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum OneSelect { /// `SELECT` - Select(SelectInner), + Select { + /// `DISTINCT` + distinctness: Option, + /// columns + columns: Vec, + /// `FROM` clause + from: Option, + /// `WHERE` clause + where_clause: Option>, + /// `GROUP BY` + group_by: Option, + /// `WINDOW` definition + window_clause: Vec, + }, /// `VALUES` Values(Vec>>), } -#[derive(Clone, Debug, PartialEq, Eq)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -/// `SELECT` core -pub struct SelectInner { - /// `DISTINCT` - pub distinctness: Option, - /// columns - pub columns: Vec, - /// `FROM` clause - pub from: Option, - /// `WHERE` clause - pub where_clause: Option>, - /// `GROUP BY` - pub group_by: Option, - /// `WINDOW` definition - pub window_clause: Option>, -} - /// `SELECT` ... `FROM` clause // https://sqlite.org/syntax/join-clause.html #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct FromClause { /// table - pub select: Option>, // FIXME mandatory + pub select: Box, // FIXME mandatory /// `JOIN`ed tabled - pub joins: Option>, - /// A default join operator - pub op: Option, // FIXME transient + pub joins: Vec, } /// `SELECT` distinctness @@ -668,7 +659,7 @@ pub enum SelectTable { /// table Table(QualifiedName, Option, Option), /// table function call - TableCall(QualifiedName, Option>>, Option), + TableCall(QualifiedName, Vec>, Option), /// `SELECT` subquery Select(Select, Option), /// subquery @@ -1358,7 +1349,7 @@ pub struct FunctionTail { /// `FILTER` clause pub filter_clause: Option>, /// `OVER` clause - pub over_clause: Option>, + pub over_clause: Option, } /// Function call `OVER` clause @@ -1390,9 +1381,9 @@ pub struct Window { /// base window name pub base: Option, /// `PARTITION BY` - pub partition_by: Option>>, + pub partition_by: Vec>, /// `ORDER BY` - pub order_by: Option>, + pub order_by: Vec, /// frame spec pub frame_clause: Option, } diff --git a/core/parser/lexer.rs b/core/parser/lexer.rs index 74375260d..72d0c75b3 100644 --- a/core/parser/lexer.rs +++ b/core/parser/lexer.rs @@ -2,10 +2,12 @@ use crate::parser::{error::Error, token::TokenType}; include!(concat!(env!("OUT_DIR"), "/keywords.rs")); +#[inline(always)] pub(crate) fn is_identifier_start(b: u8) -> bool { b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F' } +#[inline(always)] pub(crate) fn is_identifier_continue(b: u8) -> bool { b == b'$' || b.is_ascii_digit() @@ -22,13 +24,14 @@ pub struct Token<'a> { } pub struct Lexer<'a> { - offset: usize, + pub(crate) offset: usize, input: &'a [u8], } impl<'a> Iterator for Lexer<'a> { type Item = Result, Error>; + #[inline(always)] fn next(&mut self) -> Option { match self.peek() { None => None, // End of file @@ -41,22 +44,22 @@ impl<'a> Iterator for Lexer<'a> { b';' => Some(Ok(self.eat_token(1, TokenType::TK_SEMI))), b'+' => Some(Ok(self.eat_token(1, TokenType::TK_PLUS))), b'*' => Some(Ok(self.eat_token(1, TokenType::TK_STAR))), - b'/' => Some(self.eat_slash_or_comment()), + b'/' => Some(self.mark(|l| l.eat_slash_or_comment())), b'%' => Some(Ok(self.eat_token(1, TokenType::TK_REM))), b'=' => Some(Ok(self.eat_eq())), b'<' => Some(Ok(self.eat_le_or_ne_or_lshift_or_lt())), b'>' => Some(Ok(self.eat_ge_or_gt_or_rshift())), - b'!' => Some(self.eat_ne()), + b'!' => Some(self.mark(|l| l.eat_ne())), b'|' => Some(Ok(self.eat_concat_or_bitor())), b',' => Some(Ok(self.eat_token(1, TokenType::TK_COMMA))), b'&' => Some(Ok(self.eat_token(1, TokenType::TK_BITAND))), b'~' => Some(Ok(self.eat_token(1, TokenType::TK_BITNOT))), - b'\'' | b'"' | b'`' => Some(self.eat_lit_or_id()), - b'.' => Some(self.eat_dot_or_frac()), - b'0'..=b'9' => Some(self.eat_number()), - b'[' => Some(self.eat_bracket()), - b'?' | b'$' | b'@' | b'#' | b':' => Some(self.eat_var()), - b if is_identifier_start(b) => Some(self.eat_blob_or_id()), + b'\'' | b'"' | b'`' => Some(self.mark(|l| l.eat_lit_or_id())), + b'.' => Some(self.mark(|l| l.eat_dot_or_frac())), + b'0'..=b'9' => Some(self.mark(|l| l.eat_number())), + b'[' => Some(self.mark(|l| l.eat_bracket())), + b'?' | b'$' | b'@' | b'#' | b':' => Some(self.mark(|l| l.eat_var())), + b if is_identifier_start(b) => Some(self.mark(|l| l.eat_blob_or_id())), _ => Some(Ok(self.eat_unrecognized())), }, } @@ -74,6 +77,19 @@ impl<'a> Lexer<'a> { &self.input[self.offset..] } + #[inline(always)] + pub fn mark(&mut self, exc: F) -> Result + where + F: FnOnce(&mut Self) -> Result, + { + let start_offset = self.offset; + let result = exc(self); + if result.is_err() { + self.offset = start_offset; // Reset to the start offset if an error occurs + } + result + } + /// Returns the current offset in the input without consuming. #[inline(always)] pub fn peek(&self) -> Option { diff --git a/core/parser/parser.rs b/core/parser/parser.rs index 2f7a65cdb..3894e1781 100644 --- a/core/parser/parser.rs +++ b/core/parser/parser.rs @@ -1,107 +1,93 @@ use crate::parser::ast::{ - Cmd, CommonTableExpr, CreateTableBody, Expr, IndexedColumn, LikeOperator, Limit, Literal, - Materialized, Name, NullsOrder, Operator, QualifiedName, Select, SelectBody, SortOrder, - SortedColumn, Stmt, TransactionType, Type, TypeSize, UnaryOperator, With, + As, Cmd, CommonTableExpr, CompoundOperator, CompoundSelect, CreateTableBody, Distinctness, + Expr, FrameBound, FrameClause, FrameExclude, FrameMode, FromClause, FunctionTail, GroupBy, + Indexed, IndexedColumn, JoinConstraint, JoinOperator, JoinType, JoinedSelectTable, + LikeOperator, Limit, Literal, Materialized, Name, NullsOrder, OneSelect, Operator, Over, + QualifiedName, ResultColumn, Select, SelectBody, SelectTable, SortOrder, SortedColumn, Stmt, + TransactionType, Type, TypeSize, UnaryOperator, Window, WindowDef, With, }; use crate::parser::error::Error; use crate::parser::lexer::{Lexer, Token}; use crate::parser::token::TokenType; +#[inline(always)] fn from_bytes_as_str(bytes: &[u8]) -> &str { unsafe { str::from_utf8_unchecked(bytes) } } +#[inline(always)] fn from_bytes(bytes: &[u8]) -> String { unsafe { str::from_utf8_unchecked(bytes).to_owned() } } +#[inline(always)] +fn join_type_from_bytes(s: &[u8]) -> Result { + if b"CROSS".eq_ignore_ascii_case(s) { + Ok(JoinType::INNER | JoinType::CROSS) + } else if b"FULL".eq_ignore_ascii_case(s) { + Ok(JoinType::LEFT | JoinType::RIGHT | JoinType::OUTER) + } else if b"INNER".eq_ignore_ascii_case(s) { + Ok(JoinType::INNER) + } else if b"LEFT".eq_ignore_ascii_case(s) { + Ok(JoinType::LEFT | JoinType::OUTER) + } else if b"NATURAL".eq_ignore_ascii_case(s) { + Ok(JoinType::NATURAL) + } else if b"RIGHT".eq_ignore_ascii_case(s) { + Ok(JoinType::RIGHT | JoinType::OUTER) + } else if b"OUTER".eq_ignore_ascii_case(s) { + Ok(JoinType::OUTER) + } else { + Err(Error::Custom(format!( + "unsupported JOIN type: {:?}", + str::from_utf8(s) + ))) + } +} + +#[inline(always)] +fn new_join_type(n0: &[u8], n1: Option<&[u8]>, n2: Option<&[u8]>) -> Result { + let mut jt = join_type_from_bytes(n0)?; + + if let Some(n1) = n1 { + jt |= join_type_from_bytes(n1)?; + } + + if let Some(n2) = n2 { + jt |= join_type_from_bytes(n2)?; + } + + if (jt & (JoinType::INNER | JoinType::OUTER)) == (JoinType::INNER | JoinType::OUTER) + || (jt & (JoinType::OUTER | JoinType::LEFT | JoinType::RIGHT)) == JoinType::OUTER + { + return Err(Error::Custom(format!( + "unsupported JOIN type: {:?} {:?} {:?}", + from_bytes_as_str(n0), + from_bytes_as_str(n1.unwrap_or(&[])), + from_bytes_as_str(n2.unwrap_or(&[])), + ))); + } + + Ok(jt) +} + pub struct Parser<'a> { lexer: Lexer<'a>, + /// The current token being processed - peek_mark: Option>, + current_token: Token<'a>, + peekable: bool, } impl<'a> Iterator for Parser<'a> { type Item = Result; + #[inline(always)] fn next(&mut self) -> Option { - // consumes prefix SEMI - while let Some(Ok(token)) = self.peek() { - if token.token_type == Some(TokenType::TK_SEMI) { - self.eat_assert(&[TokenType::TK_SEMI]); - } else { - break; - } + match self.mark(|p| p.next_cmd()) { + Ok(None) => None, // EOF + Ok(Some(cmd)) => Some(Ok(cmd)), + Err(err) => Some(Err(err)), } - - let result = match self.peek() { - None => None, // EOF - Some(Ok(token)) => match token.token_type { - Some(TokenType::TK_EXPLAIN) => { - self.eat_assert(&[TokenType::TK_EXPLAIN]); - - let mut is_query_plan = false; - match self.peek_no_eof() { - Ok(tok) if tok.token_type == Some(TokenType::TK_QUERY) => { - self.eat_assert(&[TokenType::TK_QUERY]); - - if let Err(err) = self.eat_expect(&[TokenType::TK_PLAN]) { - return Some(Err(err)); - } - - is_query_plan = true; - } - Err(err) => return Some(Err(err)), - _ => {} - } - - let stmt = self.parse_stmt(); - if let Err(err) = stmt { - return Some(Err(err)); - } - - if is_query_plan { - Some(Ok(Cmd::ExplainQueryPlan(stmt.unwrap()))) - } else { - Some(Ok(Cmd::Explain(stmt.unwrap()))) - } - } - _ => { - let stmt = self.parse_stmt(); - if let Err(err) = stmt { - return Some(Err(err)); - } - - Some(Ok(Cmd::Stmt(stmt.unwrap()))) - } - }, - Some(Err(err)) => return Some(Err(err)), - }; - - // consumes suffix SEMI - let mut found_semi = false; - loop { - match self.peek_ignore_eof() { - Ok(None) => break, - Ok(Some(token)) if token.token_type == Some(TokenType::TK_SEMI) => { - found_semi = true; - self.eat_assert(&[TokenType::TK_SEMI]); - } - Ok(Some(token)) => { - if !found_semi { - return Some(Err(Error::ParseUnexpectedToken { - expected: &[TokenType::TK_SEMI], - got: token.token_type.unwrap(), - })); - } - - break; - } - Err(err) => return Some(Err(err)), - } - } - - result } } @@ -110,17 +96,82 @@ impl<'a> Parser<'a> { pub fn new(input: &'a [u8]) -> Self { Self { lexer: Lexer::new(input), - peek_mark: None, + peekable: false, + current_token: Token { + value: b"", + token_type: None, + }, } } - /// Get the next token from the lexer + // entrypoint of parsing #[inline(always)] - fn eat(&mut self) -> Option, Error>> { - if let Some(token) = self.peek_mark.take() { - return Some(Ok(token)); + fn next_cmd(&mut self) -> Result, Error> { + // consumes prefix SEMI + while let Some(token) = self.peek()? { + if token.token_type == Some(TokenType::TK_SEMI) { + self.eat_assert(&[TokenType::TK_SEMI]); + } else { + break; + } } + let result = match self.peek()? { + None => None, // EOF + Some(token) => match token.token_type.unwrap() { + TokenType::TK_EXPLAIN => { + self.eat_assert(&[TokenType::TK_EXPLAIN]); + + let mut is_query_plan = false; + if self.peek_no_eof()?.token_type == Some(TokenType::TK_QUERY) { + self.eat_assert(&[TokenType::TK_QUERY]); + self.eat_expect(&[TokenType::TK_PLAN])?; + is_query_plan = true; + } + + let stmt = self.parse_stmt()?; + if is_query_plan { + Some(Cmd::ExplainQueryPlan(stmt)) + } else { + Some(Cmd::Explain(stmt)) + } + } + _ => { + let stmt = self.parse_stmt()?; + Some(Cmd::Stmt(stmt)) + } + }, + }; + + let mut found_semi = false; + loop { + match self.peek()? { + None => break, + Some(token) if token.token_type == Some(TokenType::TK_SEMI) => { + found_semi = true; + self.eat_assert(&[TokenType::TK_SEMI]); + } + Some(token) => { + if !found_semi { + return Err(Error::ParseUnexpectedToken { + expected: &[TokenType::TK_SEMI], + got: token.token_type.unwrap(), + }); + } + + break; + } + } + } + + Ok(result) + } + + #[inline(always)] + fn consume_lexer_without_whitespaces_or_comments( + &mut self, + ) -> Option, Error>> { + debug_assert!(!self.peekable); loop { let tok = self.lexer.next(); if let Some(Ok(ref token)) = tok { @@ -134,14 +185,191 @@ impl<'a> Parser<'a> { } #[inline(always)] - fn eat_no_eof(&mut self) -> Result, Error> { - match self.eat() { - None => Err(Error::ParseUnexpectedEOF), - Some(Ok(token)) => Ok(token), + fn next_token(&mut self) -> Result>, Error> { + debug_assert!(!self.peekable); + let mut next = self.consume_lexer_without_whitespaces_or_comments(); + + fn get_token(tt: TokenType) -> TokenType { + match tt { + TokenType::TK_INDEX + | TokenType::TK_STRING + | TokenType::TK_JOIN_KW + | TokenType::TK_WINDOW + | TokenType::TK_OVER => TokenType::TK_ID, + _ => tt.fallback_id_if_ok(), + } + } + + if let Some(Ok(ref mut tok)) = next { + /* + ** The following three functions are called immediately after the tokenizer + ** reads the keywords WINDOW, OVER and FILTER, respectively, to determine + ** whether the token should be treated as a keyword or an SQL identifier. + ** This cannot be handled by the usual lemon %fallback method, due to + ** the ambiguity in some constructions. e.g. + ** + ** SELECT sum(x) OVER ... + ** + ** In the above, "OVER" might be a keyword, or it might be an alias for the + ** sum(x) expression. If a "%fallback ID OVER" directive were added to + ** grammar, then SQLite would always treat "OVER" as an alias, making it + ** impossible to call a window-function without a FILTER clause. + ** + ** WINDOW is treated as a keyword if: + ** + ** * the following token is an identifier, or a keyword that can fallback + ** to being an identifier, and + ** * the token after than one is TK_AS. + ** + ** OVER is a keyword if: + ** + ** * the previous token was TK_RP, and + ** * the next token is either TK_LP or an identifier. + ** + ** FILTER is a keyword if: + ** + ** * the previous token was TK_RP, and + ** * the next token is TK_LP. + */ + match tok.token_type.unwrap() { + TokenType::TK_WINDOW => { + let can_be_window = self.try_parse(|p| { + match p.consume_lexer_without_whitespaces_or_comments() { + None => return Ok(false), + Some(tok) => match get_token(tok?.token_type.unwrap()) { + TokenType::TK_ID => {} + _ => return Ok(false), + }, + } + + match p.consume_lexer_without_whitespaces_or_comments() { + None => return Ok(false), + Some(tok) => match get_token(tok?.token_type.unwrap()) { + TokenType::TK_AS => Ok(true), + _ => Ok(false), + }, + } + })?; + + if !can_be_window { + tok.token_type = Some(TokenType::TK_ID); + } + } + TokenType::TK_OVER => { + let prev_tt = self.current_token.token_type.unwrap_or(TokenType::TK_EOF); + let can_be_over = { + if prev_tt == TokenType::TK_RP { + self.try_parse(|p| { + match p.consume_lexer_without_whitespaces_or_comments() { + None => return Ok(false), + Some(tok) => match get_token(tok?.token_type.unwrap()) { + TokenType::TK_LP | TokenType::TK_ID => Ok(true), + _ => Ok(false), + }, + } + })? + } else { + false + } + }; + + if !can_be_over { + tok.token_type = Some(TokenType::TK_ID); + } + } + TokenType::TK_FILTER => { + let prev_tt = self.current_token.token_type.unwrap_or(TokenType::TK_EOF); + let can_be_filter = { + if prev_tt == TokenType::TK_RP { + self.try_parse(|p| { + match p.consume_lexer_without_whitespaces_or_comments() { + None => return Ok(false), + Some(tok) => match get_token(tok?.token_type.unwrap()) { + TokenType::TK_LP => Ok(true), + _ => Ok(false), + }, + } + })? + } else { + false + } + }; + + if !can_be_filter { + tok.token_type = Some(TokenType::TK_ID); + } + } + _ => {} + } + } + + match next { + None => Ok(None), // EOF + Some(Ok(tok)) => { + self.current_token = tok.clone(); + self.peekable = true; + Ok(Some(tok)) + } Some(Err(err)) => Err(err), } } + #[inline(always)] + fn mark(&mut self, exc: F) -> Result + where + F: FnOnce(&mut Self) -> Result, + { + let old_peekable = self.peekable; + let old_current_token = self.current_token.clone(); + let start_offset = self.lexer.offset; + let result = exc(self); + if result.is_err() { + self.peekable = old_peekable; + self.current_token = old_current_token; + self.lexer.offset = start_offset; + } + result + } + + #[inline(always)] + fn try_parse(&mut self, exc: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + debug_assert!(!self.peekable); + let start_offset = self.lexer.offset; + let result = exc(self); + self.peekable = false; + self.lexer.offset = start_offset; + result + } + + /// Get the next token from the lexer + #[inline(always)] + fn eat(&mut self) -> Result>, Error> { + let result = self.peek()?; + self.peekable = false; // Clear the peek mark after consuming + Ok(result) + } + + /// Peek at the next token without consuming it + #[inline(always)] + fn peek(&mut self) -> Result>, Error> { + if self.peekable { + return Ok(Some(self.current_token.clone())); + } + + self.next_token() + } + + #[inline(always)] + fn eat_no_eof(&mut self) -> Result, Error> { + match self.eat()? { + None => Err(Error::ParseUnexpectedEOF), + Some(token) => Ok(token), + } + } + #[inline(always)] fn eat_expect(&mut self, expected: &'static [TokenType]) -> Result, Error> { self.peek_expect(expected)?; @@ -158,6 +386,12 @@ impl<'a> Parser<'a> { if token.token_type == Some(*expected) { return token; } + + if *expected == TokenType::TK_ID + && token.token_type.unwrap().fallback_id_if_ok() == TokenType::TK_ID + { + return token; + } } panic!( @@ -171,38 +405,11 @@ impl<'a> Parser<'a> { token // in release mode, we assume the caller has checked the token type } - /// Peek at the next token without consuming it - #[inline(always)] - fn peek(&mut self) -> Option, Error>> { - if let Some(ref token) = self.peek_mark { - return Some(Ok(token.clone())); - } - - match self.eat() { - None => None, // EOF - Some(Ok(token)) => { - self.peek_mark = Some(token.clone()); - Some(Ok(token)) - } - Some(Err(err)) => Some(Err(err)), - } - } - #[inline(always)] fn peek_no_eof(&mut self) -> Result, Error> { - match self.peek() { + match self.peek()? { None => Err(Error::ParseUnexpectedEOF), - Some(Ok(token)) => Ok(token), - Some(Err(err)) => Err(err), - } - } - - #[inline(always)] - fn peek_ignore_eof(&mut self) -> Result>, Error> { - match self.peek() { - None => Ok(None), - Some(Ok(token)) => Ok(Some(token)), - Some(Err(err)) => Err(err), + Some(token) => Ok(token), } } @@ -213,6 +420,12 @@ impl<'a> Parser<'a> { if token.token_type == Some(*expected) { return Ok(token); } + + if *expected == TokenType::TK_ID + && token.token_type.unwrap().fallback_id_if_ok() == TokenType::TK_ID + { + return Ok(token); + } } Err(Error::ParseUnexpectedToken { @@ -231,6 +444,8 @@ impl<'a> Parser<'a> { TokenType::TK_SAVEPOINT, TokenType::TK_RELEASE, TokenType::TK_CREATE, + TokenType::TK_SELECT, + TokenType::TK_VALUES, // add more ])?; @@ -241,6 +456,7 @@ impl<'a> Parser<'a> { TokenType::TK_SAVEPOINT => self.parse_savepoint(), TokenType::TK_RELEASE => self.parse_release(), TokenType::TK_CREATE => self.parse_create_stmt(), + TokenType::TK_SELECT | TokenType::TK_VALUES => Ok(Stmt::Select(self.parse_select()?)), _ => unreachable!(), } } @@ -271,9 +487,8 @@ impl<'a> Parser<'a> { } } - #[inline(always)] fn parse_transopt(&mut self) -> Result, Error> { - match self.peek_ignore_eof()? { + match self.peek()? { None => Ok(None), Some(tok) => match tok.token_type.unwrap() { TokenType::TK_TRANSACTION => { @@ -289,11 +504,10 @@ impl<'a> Parser<'a> { } } - #[inline(always)] fn parse_begin(&mut self) -> Result { self.eat_assert(&[TokenType::TK_BEGIN]); - let transtype = match self.peek_ignore_eof()? { + let transtype = match self.peek()? { None => None, Some(tok) => match tok.token_type.unwrap() { TokenType::TK_DEFERRED => { @@ -318,7 +532,6 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] fn parse_commit(&mut self) -> Result { self.eat_assert(&[TokenType::TK_COMMIT, TokenType::TK_END]); Ok(Stmt::Commit { @@ -326,13 +539,12 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] fn parse_rollback(&mut self) -> Result { self.eat_assert(&[TokenType::TK_ROLLBACK]); let tx_name = self.parse_transopt()?; - let savepoint_name = match self.peek_ignore_eof()? { + let savepoint_name = match self.peek()? { None => None, Some(tok) => { if tok.token_type == Some(TokenType::TK_TO) { @@ -356,7 +568,6 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] fn parse_savepoint(&mut self) -> Result { self.eat_assert(&[TokenType::TK_SAVEPOINT]); self.peek_nm()?; @@ -365,7 +576,6 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] fn parse_release(&mut self) -> Result { self.eat_assert(&[TokenType::TK_RELEASE]); @@ -379,10 +589,9 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] fn parse_create_stmt(&mut self) -> Result { self.eat_assert(&[TokenType::TK_CREATE]); - let first_tok = self.peek_expect(&[ + let mut first_tok = self.peek_expect(&[ TokenType::TK_TEMP, TokenType::TK_TABLE, TokenType::TK_VIRTUAL, @@ -391,29 +600,29 @@ impl<'a> Parser<'a> { TokenType::TK_UNIQUE, TokenType::TK_TRIGGER, ])?; + let mut temp = false; + if first_tok.token_type == Some(TokenType::TK_TEMP) { + temp = true; + first_tok = self.peek_expect(&[ + TokenType::TK_TABLE, + TokenType::TK_VIEW, + TokenType::TK_TRIGGER, + ])?; + } match first_tok.token_type.unwrap() { - TokenType::TK_TABLE => self.parse_create_table(false), - TokenType::TK_TEMP => { - self.eat_assert(&[TokenType::TK_TEMP]); - let first_tok = self.peek_expect(&[ - TokenType::TK_TABLE, - TokenType::TK_VIEW, - TokenType::TK_TRIGGER, - ])?; - - match first_tok.token_type.unwrap() { - TokenType::TK_TABLE => self.parse_create_table(true), - _ => unreachable!(), - } - } + TokenType::TK_TABLE => self.parse_create_table(temp), + TokenType::TK_VIRTUAL => todo!(), + TokenType::TK_VIEW => todo!(), + TokenType::TK_INDEX => todo!(), + TokenType::TK_UNIQUE => todo!(), + TokenType::TK_TRIGGER => todo!(), _ => unreachable!(), } } - #[inline(always)] fn parse_if_not_exists(&mut self) -> Result { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_IF) { self.eat_assert(&[TokenType::TK_IF]); } else { @@ -428,12 +637,11 @@ impl<'a> Parser<'a> { Ok(true) } - #[inline(always)] fn parse_fullname(&mut self, allow_alias: bool) -> Result { self.peek_nm()?; let first_name = self.parse_nm(); - let secone_name = if let Some(tok) = self.peek_ignore_eof()? { + let secone_name = if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_DOT) { self.eat_assert(&[TokenType::TK_DOT]); self.peek_nm()?; @@ -446,7 +654,7 @@ impl<'a> Parser<'a> { }; let alias_name = if allow_alias { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_AS) { self.eat_assert(&[TokenType::TK_AS]); self.peek_nm()?; @@ -476,7 +684,6 @@ impl<'a> Parser<'a> { } } - #[inline(always)] fn parse_signed(&mut self) -> Result, Error> { self.peek_expect(&[ TokenType::TK_FLOAT, @@ -497,10 +704,9 @@ impl<'a> Parser<'a> { } } - #[inline(always)] fn parse_type(&mut self) -> Result, Error> { - let mut type_name = if let Some(tok) = self.peek_ignore_eof()? { - match tok.token_type.unwrap() { + let mut type_name = if let Some(tok) = self.peek()? { + match tok.token_type.unwrap().fallback_id_if_ok() { TokenType::TK_ID | TokenType::TK_STRING => { self.eat_assert(&[TokenType::TK_ID, TokenType::TK_STRING]); from_bytes(tok.value) @@ -512,8 +718,8 @@ impl<'a> Parser<'a> { }; loop { - if let Some(tok) = self.peek_ignore_eof()? { - match tok.token_type.unwrap() { + if let Some(tok) = self.peek()? { + match tok.token_type.unwrap().fallback_id_if_ok() { TokenType::TK_ID | TokenType::TK_STRING => { self.eat_assert(&[TokenType::TK_ID, TokenType::TK_STRING]); type_name.push_str(" "); @@ -526,7 +732,7 @@ impl<'a> Parser<'a> { } } - let size = if let Some(tok) = self.peek_ignore_eof()? { + let size = if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_LP) { self.eat_assert(&[TokenType::TK_LP]); let first_size = self.parse_signed()?; @@ -576,9 +782,9 @@ impl<'a> Parser<'a> { /// /// this function detect precedence by peeking first token of operator /// after parsing a operand (binary operator) - #[inline(always)] + fn current_token_precedence(&mut self) -> Result, Error> { - let tok = self.peek_ignore_eof()?; + let tok = self.peek()?; if tok.is_none() { return Ok(None); } @@ -613,10 +819,244 @@ impl<'a> Parser<'a> { } } - #[inline(always)] + fn parse_distinct(&mut self) -> Result, Error> { + match self.peek()? { + None => Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_DISTINCT => { + self.eat_assert(&[TokenType::TK_DISTINCT]); + Ok(Some(Distinctness::Distinct)) + } + TokenType::TK_ALL => { + self.eat_assert(&[TokenType::TK_ALL]); + Ok(Some(Distinctness::All)) + } + _ => Ok(None), + }, + } + } + + fn parse_filter_clause(&mut self) -> Result>, Error> { + match self.peek()? { + None => return Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_FILTER => { + self.eat_assert(&[TokenType::TK_FILTER]); + } + _ => return Ok(None), + }, + } + + self.eat_expect(&[TokenType::TK_LP])?; + self.eat_expect(&[TokenType::TK_WHERE])?; + let expr = self.parse_expr(0)?; + self.eat_expect(&[TokenType::TK_RP])?; + Ok(Some(expr)) + } + + fn parse_frame_opt(&mut self) -> Result, Error> { + let range_or_rows = match self.peek()? { + None => return Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_RANGE => { + self.eat_assert(&[TokenType::TK_RANGE]); + FrameMode::Range + } + TokenType::TK_ROWS => { + self.eat_assert(&[TokenType::TK_ROWS]); + FrameMode::Rows + } + TokenType::TK_GROUPS => { + self.eat_assert(&[TokenType::TK_GROUPS]); + FrameMode::Groups + } + _ => return Ok(None), + }, + }; + + let has_end = match self.peek_no_eof()?.token_type.unwrap() { + TokenType::TK_BETWEEN => { + self.eat_assert(&[TokenType::TK_BETWEEN]); + true + } + _ => false, + }; + + let start = match self.peek_no_eof()?.token_type.unwrap() { + TokenType::TK_UNBOUNDED => { + self.eat_assert(&[TokenType::TK_UNBOUNDED]); + self.eat_expect(&[TokenType::TK_PRECEDING])?; + FrameBound::UnboundedPreceding + } + TokenType::TK_CURRENT => { + self.eat_assert(&[TokenType::TK_CURRENT]); + self.eat_expect(&[TokenType::TK_ROW])?; + FrameBound::CurrentRow + } + _ => { + let expr = self.parse_expr(0)?; + let tok = self.eat_expect(&[TokenType::TK_PRECEDING, TokenType::TK_FOLLOWING])?; + match tok.token_type.unwrap() { + TokenType::TK_PRECEDING => FrameBound::Preceding(expr), + TokenType::TK_FOLLOWING => FrameBound::Following(expr), + _ => unreachable!(), + } + } + }; + + let end = if has_end { + self.eat_expect(&[TokenType::TK_AND])?; + + Some(match self.peek_no_eof()?.token_type.unwrap() { + TokenType::TK_UNBOUNDED => { + self.eat_assert(&[TokenType::TK_UNBOUNDED]); + self.eat_expect(&[TokenType::TK_FOLLOWING])?; + FrameBound::UnboundedFollowing + } + TokenType::TK_CURRENT => { + self.eat_assert(&[TokenType::TK_CURRENT]); + self.eat_expect(&[TokenType::TK_ROW])?; + FrameBound::CurrentRow + } + _ => { + let expr = self.parse_expr(0)?; + let tok = + self.eat_expect(&[TokenType::TK_PRECEDING, TokenType::TK_FOLLOWING])?; + match tok.token_type.unwrap() { + TokenType::TK_PRECEDING => FrameBound::Preceding(expr), + TokenType::TK_FOLLOWING => FrameBound::Following(expr), + _ => unreachable!(), + } + } + }) + } else { + None + }; + + let exclude = match self.peek()? { + None => None, + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_EXCLUDE => { + self.eat_assert(&[TokenType::TK_EXCLUDE]); + let tok = self.eat_expect(&[ + TokenType::TK_NO, + TokenType::TK_CURRENT, + TokenType::TK_GROUP, + TokenType::TK_TIES, + ])?; + match tok.token_type.unwrap() { + TokenType::TK_NO => { + self.eat_expect(&[TokenType::TK_OTHERS])?; + Some(FrameExclude::NoOthers) + } + TokenType::TK_CURRENT => { + self.eat_expect(&[TokenType::TK_ROW])?; + Some(FrameExclude::CurrentRow) + } + TokenType::TK_GROUP => Some(FrameExclude::Group), + TokenType::TK_TIES => Some(FrameExclude::Ties), + _ => unreachable!(), + } + } + _ => None, + }, + }; + + Ok(Some(FrameClause { + mode: range_or_rows, + start, + end, + exclude, + })) + } + + fn parse_window(&mut self) -> Result { + let name = match self.peek()? { + None => None, + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_PARTITION + | TokenType::TK_ORDER + | TokenType::TK_RANGE + | TokenType::TK_ROWS + | TokenType::TK_GROUPS => None, + tt => match tt.fallback_id_if_ok() { + TokenType::TK_ID + | TokenType::TK_STRING + | TokenType::TK_INDEXED + | TokenType::TK_JOIN_KW => Some(self.parse_nm()), + _ => None, + }, + }, + }; + + let partition_by = match self.peek()? { + None => vec![], + Some(tok) if tok.token_type == Some(TokenType::TK_PARTITION) => { + self.eat_assert(&[TokenType::TK_PARTITION]); + self.eat_expect(&[TokenType::TK_BY])?; + self.parse_nexpr_list()? + } + _ => vec![], + }; + + let order_by = self.parse_order_by()?; + let frame_clause = self.parse_frame_opt()?; + Ok(Window { + base: name, + partition_by, + order_by, + frame_clause, + }) + } + + fn parse_over_clause(&mut self) -> Result, Error> { + match self.peek()? { + None => return Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_OVER => { + self.eat_assert(&[TokenType::TK_OVER]); + } + _ => return Ok(None), + }, + } + + let tok = self.peek_expect(&[ + TokenType::TK_LP, + TokenType::TK_ID, + TokenType::TK_STRING, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + ])?; + match tok.token_type.unwrap() { + TokenType::TK_LP => { + self.eat_assert(&[TokenType::TK_LP]); + let window = self.parse_window()?; + self.eat_expect(&[TokenType::TK_LP])?; + Ok(Some(Over::Window(window))) + } + _ => { + let name = self.parse_nm(); + Ok(Some(Over::Name(name))) + } + } + } + + fn parse_filter_over(&mut self) -> Result { + let filter_clause = self.parse_filter_clause()?; + let over_clause = self.parse_over_clause()?; + Ok(FunctionTail { + filter_clause, + over_clause, + }) + } + + + #[inline(always)] // this function is hot :) fn parse_expr_operand(&mut self) -> Result, Error> { let tok = self.peek_expect(&[ TokenType::TK_LP, + TokenType::TK_CAST, + TokenType::TK_CTIME_KW, TokenType::TK_ID, TokenType::TK_STRING, TokenType::TK_INDEXED, @@ -626,8 +1066,6 @@ impl<'a> Parser<'a> { TokenType::TK_FLOAT, TokenType::TK_INTEGER, TokenType::TK_VARIABLE, - TokenType::TK_CAST, - TokenType::TK_CTIME_KW, TokenType::TK_NOT, TokenType::TK_BITNOT, TokenType::TK_PLUS, @@ -639,59 +1077,10 @@ impl<'a> Parser<'a> { match tok.token_type.unwrap() { TokenType::TK_LP => { self.eat_assert(&[TokenType::TK_LP]); - let exprs = self.parse_expr_list()?; + let exprs = self.parse_nexpr_list()?; self.eat_expect(&[TokenType::TK_RP])?; Ok(Box::new(Expr::Parenthesized(exprs))) } - TokenType::TK_ID - | TokenType::TK_STRING - | TokenType::TK_INDEXED - | TokenType::TK_JOIN_KW => { - let can_be_lit_str = tok.token_type == Some(TokenType::TK_STRING); - debug_assert!(self.peek_nm().is_ok(), "Expected a name token"); - let name = self.parse_nm(); - - let second_name = if let Some(tok) = self.peek_ignore_eof()? { - if tok.token_type == Some(TokenType::TK_DOT) { - self.eat_assert(&[TokenType::TK_DOT]); - self.peek_nm()?; - Some(self.parse_nm()) - } else { - None - } - } else { - None - }; - - let third_name = if let Some(tok) = self.peek_ignore_eof()? { - if tok.token_type == Some(TokenType::TK_DOT) { - self.eat_assert(&[TokenType::TK_DOT]); - self.peek_nm()?; - Some(self.parse_nm()) - } else { - None - } - } else { - None - }; - - if second_name.is_some() && third_name.is_some() { - Ok(Box::new(Expr::DoublyQualified( - name, - second_name.unwrap(), - third_name.unwrap(), - ))) - } else if second_name.is_some() { - Ok(Box::new(Expr::Qualified(name, second_name.unwrap()))) - } else if can_be_lit_str { - Ok(Box::new(Expr::Literal(match name { - Name::Quoted(s) => Literal::String(s), - Name::Ident(s) => Literal::String(s), - }))) - } else { - Ok(Box::new(Expr::Id(name))) - } - } TokenType::TK_NULL => { self.eat_assert(&[TokenType::TK_NULL]); Ok(Box::new(Expr::Literal(Literal::Null))) @@ -725,10 +1114,10 @@ impl<'a> Parser<'a> { self.eat_expect(&[TokenType::TK_AS])?; let typ = self.parse_type()?; self.eat_expect(&[TokenType::TK_RP])?; - Ok(Box::new(Expr::Cast { + return Ok(Box::new(Expr::Cast { expr, type_name: typ, - })) + })); } TokenType::TK_CTIME_KW => { let tok = self.eat_assert(&[TokenType::TK_CTIME_KW]); @@ -779,7 +1168,7 @@ impl<'a> Parser<'a> { let mut when_then_pairs = vec![]; loop { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type.unwrap() != TokenType::TK_WHEN { break; } @@ -794,7 +1183,7 @@ impl<'a> Parser<'a> { when_then_pairs.push((when, then)); } - let else_expr = if let Some(ok) = self.peek_ignore_eof()? { + let else_expr = if let Some(ok) = self.peek()? { if ok.token_type == Some(TokenType::TK_ELSE) { self.eat_assert(&[TokenType::TK_ELSE]); Some(self.parse_expr(0)?) @@ -811,16 +1200,100 @@ impl<'a> Parser<'a> { else_expr, })) } - _ => unreachable!(), + _ => { + let can_be_lit_str = tok.token_type == Some(TokenType::TK_STRING); + debug_assert!(self.peek_nm().is_ok(), "Expected a name token"); + let name = self.parse_nm(); + + let second_name = if let Some(tok) = self.peek()? { + if tok.token_type == Some(TokenType::TK_DOT) { + self.eat_assert(&[TokenType::TK_DOT]); + self.peek_nm()?; + Some(self.parse_nm()) + } else if tok.token_type == Some(TokenType::TK_LP) { + if can_be_lit_str { + return Err(Error::ParseUnexpectedToken { + got: TokenType::TK_STRING, + expected: &[ + TokenType::TK_ID, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + ], + }); + } // can not be literal string in function name + + self.eat_assert(&[TokenType::TK_LP]); + let tok = self.peek_no_eof()?; + match tok.token_type.unwrap() { + TokenType::TK_STAR => { + self.eat_assert(&[TokenType::TK_STAR]); + self.eat_expect(&[TokenType::TK_RP])?; + return Ok(Box::new(Expr::FunctionCallStar { + name: name, + filter_over: self.parse_filter_over()?, + })); + } + _ => { + let distinct = self.parse_distinct()?; + let exprs = self.parse_expr_list()?; + self.eat_expect(&[TokenType::TK_RP])?; + let order_by = self.parse_order_by()?; + let filter_over = self.parse_filter_over()?; + return Ok(Box::new(Expr::FunctionCall { + name, + distinctness: distinct, + args: exprs, + order_by, + filter_over, + })); + } + } + } else { + None + } + } else { + None + }; + + let third_name = if let Some(tok) = self.peek()? { + if tok.token_type == Some(TokenType::TK_DOT) { + self.eat_assert(&[TokenType::TK_DOT]); + self.peek_nm()?; + Some(self.parse_nm()) + } else { + None + } + } else { + None + }; + + if second_name.is_some() && third_name.is_some() { + Ok(Box::new(Expr::DoublyQualified( + name, + second_name.unwrap(), + third_name.unwrap(), + ))) + } else if second_name.is_some() { + Ok(Box::new(Expr::Qualified(name, second_name.unwrap()))) + } else if can_be_lit_str { + Ok(Box::new(Expr::Literal(match name { + Name::Quoted(s) => Literal::String(s), + Name::Ident(s) => Literal::String(s), + }))) + } else { + Ok(Box::new(Expr::Id(name))) + } + } } } fn parse_expr_list(&mut self) -> Result>, Error> { let mut exprs = vec![]; loop { - match self.peek_ignore_eof()? { - Some(tok) => match tok.token_type.unwrap() { + match self.peek()? { + Some(tok) => match tok.token_type.unwrap().fallback_id_if_ok() { TokenType::TK_LP + | TokenType::TK_CAST | TokenType::TK_ID | TokenType::TK_STRING | TokenType::TK_INDEXED @@ -830,7 +1303,6 @@ impl<'a> Parser<'a> { | TokenType::TK_FLOAT | TokenType::TK_INTEGER | TokenType::TK_VARIABLE - | TokenType::TK_CAST | TokenType::TK_CTIME_KW | TokenType::TK_NOT | TokenType::TK_BITNOT @@ -855,7 +1327,6 @@ impl<'a> Parser<'a> { Ok(exprs) } - #[inline(always)] fn parse_expr(&mut self, precedence: u8) -> Result, Error> { let mut result = self.parse_expr_operand()?; @@ -924,7 +1395,9 @@ impl<'a> Parser<'a> { self.eat_assert(&[TokenType::TK_LP]); let tok = self.peek_no_eof()?; match tok.token_type.unwrap() { - TokenType::TK_SELECT | TokenType::TK_WITH => { + TokenType::TK_SELECT + | TokenType::TK_WITH + | TokenType::TK_VALUES => { let select = self.parse_select()?; self.eat_expect(&[TokenType::TK_RP])?; Box::new(Expr::InSelect { @@ -947,7 +1420,7 @@ impl<'a> Parser<'a> { _ => { let name = self.parse_fullname(false)?; let mut exprs = vec![]; - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_LP) { self.eat_assert(&[TokenType::TK_LP]); exprs = self.parse_expr_list()?; @@ -983,7 +1456,7 @@ impl<'a> Parser<'a> { }; let expr = self.parse_expr(5)?; // do not consume ESCAPE - let escape = if let Some(tok) = self.peek_ignore_eof()? { + let escape = if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_ESCAPE) { self.eat_assert(&[TokenType::TK_ESCAPE]); Some(self.parse_expr(5)?) @@ -1135,9 +1608,8 @@ impl<'a> Parser<'a> { Ok(result) } - #[inline(always)] fn parse_collate(&mut self) -> Result, Error> { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_COLLATE) { self.eat_assert(&[TokenType::TK_COLLATE]); } else { @@ -1155,9 +1627,8 @@ impl<'a> Parser<'a> { } } - #[inline(always)] fn parse_sort_order(&mut self) -> Result, Error> { - match self.peek_ignore_eof()? { + match self.peek()? { Some(tok) if tok.token_type == Some(TokenType::TK_ASC) => { self.eat_assert(&[TokenType::TK_ASC]); Ok(Some(SortOrder::Asc)) @@ -1170,9 +1641,8 @@ impl<'a> Parser<'a> { } } - #[inline(always)] fn parse_eid_list(&mut self) -> Result, Error> { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_LP) { self.eat_assert(&[TokenType::TK_LP]); } else { @@ -1236,9 +1706,8 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] fn parse_with(&mut self) -> Result, Error> { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_WITH) { self.eat_assert(&[TokenType::TK_WITH]); } else { @@ -1256,7 +1725,7 @@ impl<'a> Parser<'a> { }; let mut ctes = vec![self.parse_common_table_expr()?]; - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_COMMA) { self.eat_assert(&[TokenType::TK_COMMA]); loop { @@ -1274,17 +1743,543 @@ impl<'a> Parser<'a> { Ok(Some(With { recursive, ctes })) } - #[inline(always)] + fn parse_as(&mut self) -> Result, Error> { + match self.peek()? { + None => Ok(None), + Some(tok) => match tok.token_type.unwrap().fallback_id_if_ok() { + TokenType::TK_AS => { + self.eat_assert(&[TokenType::TK_AS]); + self.peek_nm()?; + Ok(Some(As::As(self.parse_nm()))) + } + TokenType::TK_STRING | TokenType::TK_ID => Ok(Some(As::Elided(self.parse_nm()))), + _ => Ok(None), + }, + } + } + + fn parse_window_defn(&mut self) -> Result { + self.peek_nm()?; + let name = self.parse_nm(); + self.eat_expect(&[TokenType::TK_AS])?; + self.eat_expect(&[TokenType::TK_LP])?; + let window = self.parse_window()?; + self.eat_expect(&[TokenType::TK_RP])?; + Ok(WindowDef { name, window }) + } + + fn parse_window_clause(&mut self) -> Result, Error> { + match self.peek()? { + None => return Ok(vec![]), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_WINDOW => { + self.eat_assert(&[TokenType::TK_WINDOW]); + } + _ => return Ok(vec![]), + }, + } + + let mut result = vec![self.parse_window_defn()?]; + loop { + match self.peek_no_eof()?.token_type.unwrap() { + TokenType::TK_COMMA => { + self.eat_assert(&[TokenType::TK_COMMA]); + result.push(self.parse_window_defn()?); + } + _ => break, + } + } + + Ok(result) + } + + fn parse_group_by(&mut self) -> Result, Error> { + match self.peek()? { + None => return Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_GROUP => { + self.eat_assert(&[TokenType::TK_GROUP]); + self.eat_expect(&[TokenType::TK_BY])?; + } + _ => return Ok(None), + }, + } + + let exprs = self.parse_nexpr_list()?; + let having = match self.peek()? { + Some(tok) if tok.token_type == Some(TokenType::TK_HAVING) => { + self.eat_assert(&[TokenType::TK_HAVING]); + Some(self.parse_expr(0)?) + } + _ => None, + }; + + Ok(Some(GroupBy { exprs, having })) + } + + fn parse_where(&mut self) -> Result>, Error> { + match self.peek()? { + None => Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_WHERE => { + self.eat_assert(&[TokenType::TK_WHERE]); + let expr = self.parse_expr(0)?; + Ok(Some(expr)) + } + _ => Ok(None), + }, + } + } + + fn parse_indexed(&mut self) -> Result, Error> { + match self.peek()? { + None => Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_INDEXED => { + self.eat_assert(&[TokenType::TK_INDEXED]); + self.eat_expect(&[TokenType::TK_BY])?; + self.peek_nm()?; + Ok(Some(Indexed::IndexedBy(self.parse_nm()))) + } + TokenType::TK_NOT => { + self.eat_assert(&[TokenType::TK_NOT]); + self.eat_expect(&[TokenType::TK_INDEXED])?; + Ok(Some(Indexed::NotIndexed)) + } + _ => Ok(None), + }, + } + } + + fn parse_nm_list(&mut self) -> Result, Error> { + self.peek_nm()?; + let mut names = vec![self.parse_nm()]; + + loop { + match self.peek()? { + Some(tok) if tok.token_type == Some(TokenType::TK_COMMA) => { + self.eat_assert(&[TokenType::TK_COMMA]); + self.peek_nm()?; + names.push(self.parse_nm()); + } + _ => break, + } + } + + Ok(names) + } + + fn parse_on_using(&mut self) -> Result, Error> { + match self.peek()? { + None => Ok(None), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_ON => { + self.eat_assert(&[TokenType::TK_ON]); + let expr = self.parse_expr(0)?; + Ok(Some(JoinConstraint::On(expr))) + } + TokenType::TK_USING => { + self.eat_assert(&[TokenType::TK_USING]); + self.eat_expect(&[TokenType::TK_LP])?; + let names = self.parse_nm_list()?; + self.eat_expect(&[TokenType::TK_RP])?; + Ok(Some(JoinConstraint::Using(names))) + } + _ => Ok(None), + }, + } + } + + fn parse_joined_tables(&mut self) -> Result, Error> { + let mut result = vec![]; + loop { + let op = match self.peek()? { + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_COMMA => { + self.eat_assert(&[TokenType::TK_COMMA]); + JoinOperator::Comma + } + TokenType::TK_JOIN => { + self.eat_assert(&[TokenType::TK_JOIN]); + JoinOperator::TypedJoin(None) + } + TokenType::TK_JOIN_KW => { + let jkw = self.eat_assert(&[TokenType::TK_JOIN_KW]); + let tok = self.eat_expect(&[ + TokenType::TK_JOIN, + TokenType::TK_ID, + TokenType::TK_STRING, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + ])?; + + match tok.token_type.unwrap() { + TokenType::TK_JOIN => { + JoinOperator::TypedJoin(Some(new_join_type(jkw.value, None, None)?)) + } + _ => { + let tok_name_1 = self.eat_assert(&[ + TokenType::TK_ID, + TokenType::TK_STRING, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + ]); + + let tok = self.eat_expect(&[ + TokenType::TK_JOIN, + TokenType::TK_ID, + TokenType::TK_STRING, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + ])?; + + match tok.token_type.unwrap() { + TokenType::TK_JOIN => JoinOperator::TypedJoin(Some( + new_join_type(jkw.value, Some(tok_name_1.value), None)?, + )), + _ => { + let tok_name_2 = self.eat_assert(&[ + TokenType::TK_ID, + TokenType::TK_STRING, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + ]); + self.eat_expect(&[TokenType::TK_JOIN])?; + JoinOperator::TypedJoin(Some(new_join_type( + jkw.value, + Some(tok_name_1.value), + Some(tok_name_2.value), + )?)) + } + } + } + } + } + _ => break, + }, + None => break, + }; + + let tok = self.peek_expect(&[ + TokenType::TK_ID, + TokenType::TK_STRING, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + TokenType::TK_LP, + ])?; + + match tok.token_type.unwrap().fallback_id_if_ok() { + TokenType::TK_ID + | TokenType::TK_STRING + | TokenType::TK_INDEXED + | TokenType::TK_JOIN_KW => { + let name = self.parse_fullname(false)?; + match self.peek()? { + None => { + result.push(JoinedSelectTable { + operator: op, + table: Box::new(SelectTable::Table(name, None, None)), + constraint: None, + }); + } + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_LP => { + self.eat_assert(&[TokenType::TK_LP]); + let exprs = self.parse_expr_list()?; + self.eat_assert(&[TokenType::TK_RP]); + let alias = self.parse_as()?; + let on_using = self.parse_on_using()?; + result.push(JoinedSelectTable { + operator: op, + table: Box::new(SelectTable::TableCall(name, exprs, alias)), + constraint: on_using, + }); + } + _ => { + let alias = self.parse_as()?; + let indexed = self.parse_indexed()?; + let on_using = self.parse_on_using()?; + result.push(JoinedSelectTable { + operator: op, + table: Box::new(SelectTable::Table(name, alias, indexed)), + constraint: on_using, + }); + } + }, + } + } + TokenType::TK_LP => { + self.eat_assert(&[TokenType::TK_LP]); + match self.peek_no_eof()?.token_type.unwrap() { + TokenType::TK_SELECT | TokenType::TK_WITH | TokenType::TK_VALUES => { + let select = self.parse_select()?; + self.eat_expect(&[TokenType::TK_RP])?; + let alias = self.parse_as()?; + let on_using = self.parse_on_using()?; + result.push(JoinedSelectTable { + operator: op, + table: Box::new(SelectTable::Select(select, alias)), + constraint: on_using, + }); + } + _ => { + let fr = self.parse_from_clause()?; + self.eat_expect(&[TokenType::TK_RP])?; + let alias = self.parse_as()?; + let on_using = self.parse_on_using()?; + result.push(JoinedSelectTable { + operator: op, + table: Box::new(SelectTable::Sub(fr, alias)), + constraint: on_using, + }); + } + } + } + _ => unreachable!(), + } + } + + Ok(result) + } + + fn parse_from_clause(&mut self) -> Result { + let tok = self.peek_expect(&[ + TokenType::TK_ID, + TokenType::TK_STRING, + TokenType::TK_INDEXED, + TokenType::TK_JOIN_KW, + TokenType::TK_LP, + ])?; + + match tok.token_type.unwrap().fallback_id_if_ok() { + TokenType::TK_ID + | TokenType::TK_STRING + | TokenType::TK_INDEXED + | TokenType::TK_JOIN_KW => { + let name = self.parse_fullname(false)?; + match self.peek()? { + None => Ok(FromClause { + select: Box::new(SelectTable::Table(name, None, None)), + joins: vec![], + }), + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_LP => { + self.eat_assert(&[TokenType::TK_LP]); + let exprs = self.parse_expr_list()?; + self.eat_assert(&[TokenType::TK_RP]); + let alias = self.parse_as()?; + Ok(FromClause { + select: Box::new(SelectTable::TableCall(name, exprs, alias)), + joins: self.parse_joined_tables()?, + }) + } + _ => { + let alias = self.parse_as()?; + let indexed = self.parse_indexed()?; + Ok(FromClause { + select: Box::new(SelectTable::Table(name, alias, indexed)), + joins: self.parse_joined_tables()?, + }) + } + }, + } + } + TokenType::TK_LP => { + self.eat_assert(&[TokenType::TK_LP]); + match self.peek_no_eof()?.token_type.unwrap() { + TokenType::TK_SELECT | TokenType::TK_WITH | TokenType::TK_VALUES => { + let select = self.parse_select()?; + self.eat_expect(&[TokenType::TK_RP])?; + let alias = self.parse_as()?; + Ok(FromClause { + select: Box::new(SelectTable::Select(select, alias)), + joins: self.parse_joined_tables()?, + }) + } + _ => { + let fr = self.parse_from_clause()?; + self.eat_expect(&[TokenType::TK_RP])?; + let alias = self.parse_as()?; + Ok(FromClause { + select: Box::new(SelectTable::Sub(fr, alias)), + joins: self.parse_joined_tables()?, + }) + } + } + } + _ => unreachable!(), + } + } + + fn parse_from_clause_opt(&mut self) -> Result, Error> { + match self.peek()? { + None => return Ok(None), + Some(tok) if tok.token_type == Some(TokenType::TK_FROM) => { + self.eat_assert(&[TokenType::TK_FROM]); + } + _ => return Ok(None), + } + + Ok(Some(self.parse_from_clause()?)) + } + + fn parse_select_column(&mut self) -> Result { + match self.peek_no_eof()?.token_type.unwrap().fallback_id_if_ok() { + TokenType::TK_STAR => { + self.eat_assert(&[TokenType::TK_STAR]); + Ok(ResultColumn::Star) + } + tt => { + // dot STAR case + if tt == TokenType::TK_ID + || tt == TokenType::TK_STRING + || tt == TokenType::TK_INDEXED + || tt == TokenType::TK_JOIN_KW + { + if let Ok(res) = self.mark(|p| -> Result { + let name = p.parse_nm(); + p.eat_expect(&[TokenType::TK_DOT])?; + p.eat_expect(&[TokenType::TK_STAR])?; + Ok(ResultColumn::TableStar(name)) + }) { + return Ok(res); + } + } + + let expr = self.parse_expr(0)?; + let alias = self.parse_as()?; + Ok(ResultColumn::Expr(expr, alias)) + } + } + } + + fn parse_select_columns(&mut self) -> Result, Error> { + let mut result = vec![self.parse_select_column()?]; + + loop { + if let Some(tok) = self.peek()? { + if tok.token_type == Some(TokenType::TK_COMMA) { + self.eat_assert(&[TokenType::TK_COMMA]); + } else { + break; + } + } else { + break; + } + + result.push(self.parse_select_column()?); + } + + Ok(result) + } + + fn parse_nexpr_list(&mut self) -> Result>, Error> { + let mut result = vec![self.parse_expr(0)?]; + loop { + if let Some(tok) = self.peek()? { + if tok.token_type == Some(TokenType::TK_COMMA) { + self.eat_assert(&[TokenType::TK_COMMA]); + } else { + break; + } + } else { + break; + } + + result.push(self.parse_expr(0)?); + } + + Ok(result) + } + + fn parse_one_select(&mut self) -> Result { + let tok = self.eat_expect(&[TokenType::TK_SELECT, TokenType::TK_VALUES])?; + match tok.token_type.unwrap() { + TokenType::TK_SELECT => { + let distinct = self.parse_distinct()?; + let collist = self.parse_select_columns()?; + let from = self.parse_from_clause_opt()?; + let where_clause = self.parse_where()?; + let group_by = self.parse_group_by()?; + let window_clause = self.parse_window_clause()?; + Ok(OneSelect::Select { + distinctness: distinct, + columns: collist, + from, + where_clause, + group_by, + window_clause, + }) + } + TokenType::TK_VALUES => { + self.eat_expect(&[TokenType::TK_LP])?; + let mut values = vec![self.parse_nexpr_list()?]; + self.eat_expect(&[TokenType::TK_RP])?; + + loop { + if let Some(tok) = self.peek()? { + if tok.token_type == Some(TokenType::TK_COMMA) { + self.eat_assert(&[TokenType::TK_COMMA]); + } else { + break; + } + } else { + break; + } + + self.eat_expect(&[TokenType::TK_LP])?; + values.push(self.parse_nexpr_list()?); + self.eat_expect(&[TokenType::TK_RP])?; + } + + Ok(OneSelect::Values(values)) + } + _ => unreachable!(), + } + } + fn parse_select_body(&mut self) -> Result { - self.eat_expect(&[TokenType::TK_SELECT])?; - todo!() + let select = self.parse_one_select()?; + let mut compounds = vec![]; + loop { + let op = match self.peek()? { + Some(tok) => match tok.token_type.unwrap() { + TokenType::TK_UNION => { + self.eat_assert(&[TokenType::TK_UNION]); + if self.peek_no_eof()?.token_type == Some(TokenType::TK_ALL) { + self.eat_assert(&[TokenType::TK_ALL]); + CompoundOperator::UnionAll + } else { + CompoundOperator::Union + } + } + TokenType::TK_EXCEPT => { + self.eat_assert(&[TokenType::TK_EXCEPT]); + CompoundOperator::Except + } + TokenType::TK_INTERSECT => { + self.eat_assert(&[TokenType::TK_INTERSECT]); + CompoundOperator::Intersect + } + _ => break, + }, + None => break, + }; + + compounds.push(CompoundSelect { + operator: op, + select: self.parse_one_select()?, + }); + } + + Ok(SelectBody { select, compounds }) } fn parse_sorted_column(&mut self) -> Result { let expr = self.parse_expr(0)?; let sort_order = self.parse_sort_order()?; - let nulls = match self.peek_ignore_eof()? { + let nulls = match self.peek()? { Some(tok) if tok.token_type == Some(TokenType::TK_NULLS) => { self.eat_assert(&[TokenType::TK_NULLS]); let tok = self.eat_expect(&[TokenType::TK_FIRST, TokenType::TK_LAST])?; @@ -1304,9 +2299,8 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] fn parse_order_by(&mut self) -> Result, Error> { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_ORDER) { self.eat_assert(&[TokenType::TK_ORDER]); } else { @@ -1318,7 +2312,7 @@ impl<'a> Parser<'a> { self.eat_expect(&[TokenType::TK_BY])?; let mut columns = vec![self.parse_sorted_column()?]; - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_COMMA) { self.eat_assert(&[TokenType::TK_COMMA]); loop { @@ -1336,9 +2330,8 @@ impl<'a> Parser<'a> { Ok(columns) } - #[inline(always)] fn parse_limit(&mut self) -> Result, Error> { - if let Some(tok) = self.peek_ignore_eof()? { + if let Some(tok) = self.peek()? { if tok.token_type == Some(TokenType::TK_LIMIT) { self.eat_assert(&[TokenType::TK_LIMIT]); } else { @@ -1349,7 +2342,7 @@ impl<'a> Parser<'a> { } let limit = self.parse_expr(0)?; - let offset = match self.peek_ignore_eof()? { + let offset = match self.peek()? { Some(tok) => match tok.token_type.unwrap() { TokenType::TK_OFFSET | TokenType::TK_COMMA => { self.eat_assert(&[TokenType::TK_OFFSET, TokenType::TK_COMMA]); @@ -1366,9 +2359,7 @@ impl<'a> Parser<'a> { })) } - #[inline(always)] - fn parse_select(&mut self) -> Result { - let with = self.parse_with()?; + fn parse_select_without_cte(&mut self, with: Option) -> Result { let body = self.parse_select_body()?; let order_by = self.parse_order_by()?; let limit = self.parse_limit()?; @@ -1380,7 +2371,11 @@ impl<'a> Parser<'a> { }) } - #[inline(always)] + fn parse_select(&mut self) -> Result { + let with = self.parse_with()?; + self.parse_select_without_cte(with) + } + fn parse_create_table_args(&mut self) -> Result { let tok = self.eat_expect(&[TokenType::TK_LP, TokenType::TK_AS])?; match tok.token_type.unwrap() { @@ -1390,7 +2385,6 @@ impl<'a> Parser<'a> { } } - #[inline(always)] fn parse_create_table(&mut self, temporary: bool) -> Result { self.eat_assert(&[TokenType::TK_TABLE]); let if_not_exists = self.parse_if_not_exists()?; @@ -1604,6 +2598,35 @@ mod tests { name: Name::Quoted("'my_savepoint'".to_string()), })], ), + ( + b"RELEASE SAVEPOINT ABORT".as_slice(), + vec![Cmd::Stmt(Stmt::Release { + name: Name::Ident("ABORT".to_string()), + })], + ), + // test exprs + ( + b"SELECT 1".as_slice(), + vec![Cmd::Stmt(Stmt::Select(Select { + with: None, + body: SelectBody { + select: OneSelect::Select { + distinctness: None, + columns: vec![ResultColumn::Expr( + Box::new(Expr::Literal(Literal::Numeric("1".to_owned()))), + None, + )], + from: None, + where_clause: None, + group_by: None, + window_clause: vec![], + }, + compounds: vec![], + }, + order_by: vec![], + limit: None, + }))], + ), ]; for (input, expected) in test_cases { diff --git a/core/parser/token.rs b/core/parser/token.rs index 0f85e84be..7ea008aa9 100644 --- a/core/parser/token.rs +++ b/core/parser/token.rs @@ -184,164 +184,185 @@ impl Display for TokenType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use TokenType::*; let s = match self { - TK_ABORT => Some("ABORT"), - TK_ACTION => Some("ACTION"), - TK_ADD => Some("ADD"), - TK_AFTER => Some("AFTER"), - TK_ALL => Some("ALL"), - TK_ALTER => Some("ALTER"), - TK_ANALYZE => Some("ANALYZE"), - TK_ALWAYS => Some("ALWAYS"), - TK_AND => Some("AND"), - TK_AS => Some("AS"), - TK_ASC => Some("ASC"), - TK_ATTACH => Some("ATTACH"), - TK_AUTOINCR => Some("AUTOINCREMENT"), - TK_BEFORE => Some("BEFORE"), - TK_BEGIN => Some("BEGIN"), - TK_BETWEEN => Some("BETWEEN"), - TK_BY => Some("BY"), - TK_CASCADE => Some("CASCADE"), - TK_CASE => Some("CASE"), - TK_CAST => Some("CAST"), - TK_CHECK => Some("CHECK"), - TK_COLLATE => Some("COLLATE"), - TK_COLUMNKW => Some("COLUMN"), - TK_COMMIT => Some("COMMIT"), - TK_CONFLICT => Some("CONFLICT"), - TK_CONSTRAINT => Some("CONSTRAINT"), - TK_CREATE => Some("CREATE"), - TK_CURRENT => Some("CURRENT"), - TK_DATABASE => Some("DATABASE"), - TK_DEFAULT => Some("DEFAULT"), - TK_DEFERRABLE => Some("DEFERRABLE"), - TK_DEFERRED => Some("DEFERRED"), - TK_DELETE => Some("DELETE"), - TK_DESC => Some("DESC"), - TK_DETACH => Some("DETACH"), - TK_DISTINCT => Some("DISTINCT"), - TK_DO => Some("DO"), - TK_DROP => Some("DROP"), - TK_EACH => Some("EACH"), - TK_ELSE => Some("ELSE"), - TK_END => Some("END"), - TK_ESCAPE => Some("ESCAPE"), - TK_EXCEPT => Some("EXCEPT"), - TK_EXCLUDE => Some("EXCLUDE"), - TK_EXCLUSIVE => Some("EXCLUSIVE"), - TK_EXISTS => Some("EXISTS"), - TK_EXPLAIN => Some("EXPLAIN"), - TK_FAIL => Some("FAIL"), - TK_FILTER => Some("FILTER"), - TK_FIRST => Some("FIRST"), - TK_FOLLOWING => Some("FOLLOWING"), - TK_FOR => Some("FOR"), - TK_FOREIGN => Some("FOREIGN"), - TK_FROM => Some("FROM"), - TK_GENERATED => Some("GENERATED"), - TK_GROUP => Some("GROUP"), - TK_GROUPS => Some("GROUPS"), - TK_HAVING => Some("HAVING"), - TK_IF => Some("IF"), - TK_IGNORE => Some("IGNORE"), - TK_IMMEDIATE => Some("IMMEDIATE"), - TK_IN => Some("IN"), - TK_INDEX => Some("INDEX"), - TK_INDEXED => Some("INDEXED"), - TK_INITIALLY => Some("INITIALLY"), - TK_INSERT => Some("INSERT"), - TK_INSTEAD => Some("INSTEAD"), - TK_INTERSECT => Some("INTERSECT"), - TK_INTO => Some("INTO"), - TK_IS => Some("IS"), - TK_ISNULL => Some("ISNULL"), - TK_JOIN => Some("JOIN"), - TK_KEY => Some("KEY"), - TK_LAST => Some("LAST"), - TK_LIMIT => Some("LIMIT"), - TK_MATCH => Some("MATCH"), - TK_MATERIALIZED => Some("MATERIALIZED"), - TK_NO => Some("NO"), - TK_NOT => Some("NOT"), - TK_NOTHING => Some("NOTHING"), - TK_NOTNULL => Some("NOTNULL"), - TK_NULL => Some("NULL"), - TK_NULLS => Some("NULLS"), - TK_OF => Some("OF"), - TK_OFFSET => Some("OFFSET"), - TK_ON => Some("ON"), - TK_OR => Some("OR"), - TK_ORDER => Some("ORDER"), - TK_OTHERS => Some("OTHERS"), - TK_OVER => Some("OVER"), - TK_PARTITION => Some("PARTITION"), - TK_PLAN => Some("PLAN"), - TK_PRAGMA => Some("PRAGMA"), - TK_PRECEDING => Some("PRECEDING"), - TK_PRIMARY => Some("PRIMARY"), - TK_QUERY => Some("QUERY"), - TK_RAISE => Some("RAISE"), - TK_RANGE => Some("RANGE"), - TK_RECURSIVE => Some("RECURSIVE"), - TK_REFERENCES => Some("REFERENCES"), - TK_REINDEX => Some("REINDEX"), - TK_RELEASE => Some("RELEASE"), - TK_RENAME => Some("RENAME"), - TK_REPLACE => Some("REPLACE"), - TK_RETURNING => Some("RETURNING"), - TK_RESTRICT => Some("RESTRICT"), - TK_ROLLBACK => Some("ROLLBACK"), - TK_ROW => Some("ROW"), - TK_ROWS => Some("ROWS"), - TK_SAVEPOINT => Some("SAVEPOINT"), - TK_SELECT => Some("SELECT"), - TK_SET => Some("SET"), - TK_TABLE => Some("TABLE"), - TK_TEMP => Some("TEMP"), // or TEMPORARY - TK_TIES => Some("TIES"), - TK_THEN => Some("THEN"), - TK_TO => Some("TO"), - TK_TRANSACTION => Some("TRANSACTION"), - TK_TRIGGER => Some("TRIGGER"), - TK_UNBOUNDED => Some("UNBOUNDED"), - TK_UNION => Some("UNION"), - TK_UNIQUE => Some("UNIQUE"), - TK_UPDATE => Some("UPDATE"), - TK_USING => Some("USING"), - TK_VACUUM => Some("VACUUM"), - TK_VALUES => Some("VALUES"), - TK_VIEW => Some("VIEW"), - TK_VIRTUAL => Some("VIRTUAL"), - TK_WHEN => Some("WHEN"), - TK_WHERE => Some("WHERE"), - TK_WINDOW => Some("WINDOW"), - TK_WITH => Some("WITH"), - TK_WITHOUT => Some("WITHOUT"), - TK_BITAND => Some("&"), - TK_BITNOT => Some("~"), - TK_BITOR => Some("|"), - TK_COMMA => Some(","), - TK_CONCAT => Some("||"), - TK_DOT => Some("."), - TK_EQ => Some("="), // or == - TK_GT => Some(">"), - TK_GE => Some(">="), - TK_LP => Some("("), - TK_LSHIFT => Some("<<"), - TK_LE => Some("<="), - TK_LT => Some("<"), - TK_MINUS => Some("-"), - TK_NE => Some("!="), // or <> - TK_PLUS => Some("+"), - TK_REM => Some("%"), - TK_RP => Some(")"), - TK_RSHIFT => Some(">>"), - TK_SEMI => Some(";"), - TK_SLASH => Some("/"), - TK_STAR => Some("*"), - _ => None, - } - .unwrap_or("unknown"); + TK_ABORT => "ABORT", + TK_ACTION => "ACTION", + TK_ADD => "ADD", + TK_AFTER => "AFTER", + TK_ALL => "ALL", + TK_ALTER => "ALTER", + TK_ANALYZE => "ANALYZE", + TK_ALWAYS => "ALWAYS", + TK_AND => "AND", + TK_AS => "AS", + TK_ASC => "ASC", + TK_ATTACH => "ATTACH", + TK_AUTOINCR => "AUTOINCREMENT", + TK_BEFORE => "BEFORE", + TK_BEGIN => "BEGIN", + TK_BETWEEN => "BETWEEN", + TK_BY => "BY", + TK_CASCADE => "CASCADE", + TK_CASE => "CASE", + TK_CAST => "CAST", + TK_CHECK => "CHECK", + TK_COLLATE => "COLLATE", + TK_COLUMNKW => "COLUMN", + TK_COMMIT => "COMMIT", + TK_CONFLICT => "CONFLICT", + TK_CONSTRAINT => "CONSTRAINT", + TK_CREATE => "CREATE", + TK_CURRENT => "CURRENT", + TK_DATABASE => "DATABASE", + TK_DEFAULT => "DEFAULT", + TK_DEFERRABLE => "DEFERRABLE", + TK_DEFERRED => "DEFERRED", + TK_DELETE => "DELETE", + TK_DESC => "DESC", + TK_DETACH => "DETACH", + TK_DISTINCT => "DISTINCT", + TK_DO => "DO", + TK_DROP => "DROP", + TK_EACH => "EACH", + TK_ELSE => "ELSE", + TK_END => "END", + TK_ESCAPE => "ESCAPE", + TK_EXCEPT => "EXCEPT", + TK_EXCLUDE => "EXCLUDE", + TK_EXCLUSIVE => "EXCLUSIVE", + TK_EXISTS => "EXISTS", + TK_EXPLAIN => "EXPLAIN", + TK_FAIL => "FAIL", + TK_FILTER => "FILTER", + TK_FIRST => "FIRST", + TK_FOLLOWING => "FOLLOWING", + TK_FOR => "FOR", + TK_FOREIGN => "FOREIGN", + TK_FROM => "FROM", + TK_GENERATED => "GENERATED", + TK_GROUP => "GROUP", + TK_GROUPS => "GROUPS", + TK_HAVING => "HAVING", + TK_IF => "IF", + TK_IGNORE => "IGNORE", + TK_IMMEDIATE => "IMMEDIATE", + TK_IN => "IN", + TK_INDEX => "INDEX", + TK_INDEXED => "INDEXED", + TK_INITIALLY => "INITIALLY", + TK_INSERT => "INSERT", + TK_INSTEAD => "INSTEAD", + TK_INTERSECT => "INTERSECT", + TK_INTO => "INTO", + TK_IS => "IS", + TK_ISNULL => "ISNULL", + TK_JOIN => "JOIN", + TK_KEY => "KEY", + TK_LAST => "LAST", + TK_LIMIT => "LIMIT", + TK_MATCH => "MATCH", + TK_MATERIALIZED => "MATERIALIZED", + TK_NO => "NO", + TK_NOT => "NOT", + TK_NOTHING => "NOTHING", + TK_NOTNULL => "NOTNULL", + TK_NULL => "NULL", + TK_NULLS => "NULLS", + TK_OF => "OF", + TK_OFFSET => "OFFSET", + TK_ON => "ON", + TK_OR => "OR", + TK_ORDER => "ORDER", + TK_OTHERS => "OTHERS", + TK_OVER => "OVER", + TK_PARTITION => "PARTITION", + TK_PLAN => "PLAN", + TK_PRAGMA => "PRAGMA", + TK_PRECEDING => "PRECEDING", + TK_PRIMARY => "PRIMARY", + TK_QUERY => "QUERY", + TK_RAISE => "RAISE", + TK_RANGE => "RANGE", + TK_RECURSIVE => "RECURSIVE", + TK_REFERENCES => "REFERENCES", + TK_REINDEX => "REINDEX", + TK_RELEASE => "RELEASE", + TK_RENAME => "RENAME", + TK_REPLACE => "REPLACE", + TK_RETURNING => "RETURNING", + TK_RESTRICT => "RESTRICT", + TK_ROLLBACK => "ROLLBACK", + TK_ROW => "ROW", + TK_ROWS => "ROWS", + TK_SAVEPOINT => "SAVEPOINT", + TK_SELECT => "SELECT", + TK_SET => "SET", + TK_TABLE => "TABLE", + TK_TEMP => "TEMP", // or TEMPORARY + TK_TIES => "TIES", + TK_THEN => "THEN", + TK_TO => "TO", + TK_TRANSACTION => "TRANSACTION", + TK_TRIGGER => "TRIGGER", + TK_UNBOUNDED => "UNBOUNDED", + TK_UNION => "UNION", + TK_UNIQUE => "UNIQUE", + TK_UPDATE => "UPDATE", + TK_USING => "USING", + TK_VACUUM => "VACUUM", + TK_VALUES => "VALUES", + TK_VIEW => "VIEW", + TK_VIRTUAL => "VIRTUAL", + TK_WHEN => "WHEN", + TK_WHERE => "WHERE", + TK_WINDOW => "WINDOW", + TK_WITH => "WITH", + TK_WITHOUT => "WITHOUT", + TK_BITAND => "&", + TK_BITNOT => "~", + TK_BITOR => "|", + TK_COMMA => ",", + TK_CONCAT => "||", + TK_DOT => ".", + TK_EQ => "=", // or == + TK_GT => ">", + TK_GE => ">=", + TK_LP => "(", + TK_LSHIFT => "<<", + TK_LE => "<=", + TK_LT => "<", + TK_MINUS => "-", + TK_NE => "!=", // or <> + TK_PLUS => "+", + TK_REM => "%", + TK_RP => ")", + TK_RSHIFT => ">>", + TK_SEMI => ";", + TK_SLASH => "/", + TK_STAR => "*", + _ => "unknown", + }; write!(f, "{s}") } } + +impl TokenType { + /// if your parsing process expects next token to be TK_ID, remember to call this function !!! + pub fn fallback_id_if_ok(self) -> Self { + use TokenType::*; + match self { + TK_ABORT | TK_ACTION | TK_AFTER | TK_ANALYZE | TK_ASC | TK_ATTACH | TK_BEFORE + | TK_BEGIN | TK_BY | TK_CASCADE | TK_CAST | TK_COLUMNKW | TK_CONFLICT | TK_DATABASE + | TK_DEFERRED | TK_DESC | TK_DETACH | TK_DO | TK_EACH | TK_END | TK_EXCLUSIVE + | TK_EXPLAIN | TK_FAIL | TK_FOR | TK_IGNORE | TK_IMMEDIATE | TK_INITIALLY + | TK_INSTEAD | TK_LIKE_KW | TK_MATCH | TK_NO | TK_PLAN | TK_QUERY | TK_KEY | TK_OF + | TK_OFFSET | TK_PRAGMA | TK_RAISE | TK_RECURSIVE | TK_RELEASE | TK_REPLACE + | TK_RESTRICT | TK_ROW | TK_ROWS | TK_ROLLBACK | TK_SAVEPOINT | TK_TEMP + | TK_TRIGGER | TK_VACUUM | TK_VIEW | TK_VIRTUAL | TK_WITH | TK_WITHOUT | TK_NULLS + | TK_FIRST | TK_LAST | TK_EXCEPT | TK_INTERSECT | TK_UNION | TK_CURRENT + | TK_FOLLOWING | TK_PARTITION | TK_PRECEDING | TK_RANGE | TK_UNBOUNDED | TK_EXCLUDE + | TK_GROUPS | TK_OTHERS | TK_TIES | TK_GENERATED | TK_ALWAYS | TK_MATERIALIZED + | TK_REINDEX | TK_RENAME | TK_CTIME_KW | TK_IF => TK_ID, + _ => self, + } + } +}