mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-27 11:54:30 +01:00
vendor sqlite3-parser (lemon-rs)
This commit is contained in:
245
vendored/sqlite3-parser/src/dialect/mod.rs
Normal file
245
vendored/sqlite3-parser/src/dialect/mod.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
//! SQLite dialect
|
||||
|
||||
use std::fmt::Formatter;
|
||||
use std::str;
|
||||
use uncased::UncasedStr;
|
||||
|
||||
mod token;
|
||||
pub use token::TokenType;
|
||||
|
||||
/// Token value (lexeme)
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Token<'i>(pub usize, pub &'i [u8], pub usize);
|
||||
|
||||
pub(crate) fn sentinel(start: usize) -> Token<'static> {
|
||||
Token(start, b"", start)
|
||||
}
|
||||
|
||||
impl Token<'_> {
|
||||
/// Access token value
|
||||
pub fn unwrap(self) -> String {
|
||||
from_bytes(self.1)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Token<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("Token").field(&self.1).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenType {
|
||||
// TODO try Cow<&'static, str> (Borrowed<&'static str> for keyword and Owned<String> for below),
|
||||
// => Syntax error on keyword will be better
|
||||
// => `from_token` will become unnecessary
|
||||
pub(crate) fn to_token(self, start: usize, value: &[u8], end: usize) -> Token<'_> {
|
||||
Token(start, value, end)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_bytes(bytes: &[u8]) -> String {
|
||||
unsafe { str::from_utf8_unchecked(bytes).to_owned() }
|
||||
}
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
|
||||
pub(crate) const MAX_KEYWORD_LEN: usize = 17;
|
||||
|
||||
/// Check if `word` is a keyword
|
||||
pub fn keyword_token(word: &[u8]) -> Option<TokenType> {
|
||||
KEYWORDS
|
||||
.get(UncasedStr::new(unsafe { str::from_utf8_unchecked(word) }))
|
||||
.copied()
|
||||
}
|
||||
|
||||
pub(crate) fn is_identifier(name: &str) -> bool {
|
||||
if name.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let bytes = name.as_bytes();
|
||||
is_identifier_start(bytes[0])
|
||||
&& (bytes.len() == 1 || bytes[1..].iter().all(|b| is_identifier_continue(*b)))
|
||||
}
|
||||
|
||||
pub(crate) fn is_identifier_start(b: u8) -> bool {
|
||||
b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F'
|
||||
}
|
||||
|
||||
pub(crate) fn is_identifier_continue(b: u8) -> bool {
|
||||
b == b'$'
|
||||
|| b.is_ascii_digit()
|
||||
|| b.is_ascii_uppercase()
|
||||
|| b == b'_'
|
||||
|| b.is_ascii_lowercase()
|
||||
|| b > b'\x7F'
|
||||
}
|
||||
|
||||
// keyword may become an identifier
|
||||
// see %fallback in parse.y
|
||||
pub(crate) fn from_token(_ty: u16, value: Token) -> String {
|
||||
from_bytes(value.1)
|
||||
}
|
||||
|
||||
impl TokenType {
|
||||
/// Return the associated string (mainly for testing)
|
||||
pub const fn as_str(&self) -> Option<&'static str> {
|
||||
use TokenType::*;
|
||||
match self {
|
||||
TK_ABORT => Some("ABORT"),
|
||||
TK_ACTION => Some("ACTION"),
|
||||
TK_ADD => Some("ADD"),
|
||||
TK_AFTER => Some("AFTER"),
|
||||
TK_ALL => Some("ALL"),
|
||||
TK_ALTER => Some("ALTER"),
|
||||
TK_ANALYZE => Some("ANALYZE"),
|
||||
TK_ALWAYS => Some("ALWAYS"),
|
||||
TK_AND => Some("AND"),
|
||||
TK_AS => Some("AS"),
|
||||
TK_ASC => Some("ASC"),
|
||||
TK_ATTACH => Some("ATTACH"),
|
||||
TK_AUTOINCR => Some("AUTOINCREMENT"),
|
||||
TK_BEFORE => Some("BEFORE"),
|
||||
TK_BEGIN => Some("BEGIN"),
|
||||
TK_BETWEEN => Some("BETWEEN"),
|
||||
TK_BY => Some("BY"),
|
||||
TK_CASCADE => Some("CASCADE"),
|
||||
TK_CASE => Some("CASE"),
|
||||
TK_CAST => Some("CAST"),
|
||||
TK_CHECK => Some("CHECK"),
|
||||
TK_COLLATE => Some("COLLATE"),
|
||||
TK_COLUMNKW => Some("COLUMN"),
|
||||
TK_COMMIT => Some("COMMIT"),
|
||||
TK_CONFLICT => Some("CONFLICT"),
|
||||
TK_CONSTRAINT => Some("CONSTRAINT"),
|
||||
TK_CREATE => Some("CREATE"),
|
||||
TK_CURRENT => Some("CURRENT"),
|
||||
TK_DATABASE => Some("DATABASE"),
|
||||
TK_DEFAULT => Some("DEFAULT"),
|
||||
TK_DEFERRABLE => Some("DEFERRABLE"),
|
||||
TK_DEFERRED => Some("DEFERRED"),
|
||||
TK_DELETE => Some("DELETE"),
|
||||
TK_DESC => Some("DESC"),
|
||||
TK_DETACH => Some("DETACH"),
|
||||
TK_DISTINCT => Some("DISTINCT"),
|
||||
TK_DO => Some("DO"),
|
||||
TK_DROP => Some("DROP"),
|
||||
TK_EACH => Some("EACH"),
|
||||
TK_ELSE => Some("ELSE"),
|
||||
TK_END => Some("END"),
|
||||
TK_ESCAPE => Some("ESCAPE"),
|
||||
TK_EXCEPT => Some("EXCEPT"),
|
||||
TK_EXCLUDE => Some("EXCLUDE"),
|
||||
TK_EXCLUSIVE => Some("EXCLUSIVE"),
|
||||
TK_EXISTS => Some("EXISTS"),
|
||||
TK_EXPLAIN => Some("EXPLAIN"),
|
||||
TK_FAIL => Some("FAIL"),
|
||||
TK_FILTER => Some("FILTER"),
|
||||
TK_FIRST => Some("FIRST"),
|
||||
TK_FOLLOWING => Some("FOLLOWING"),
|
||||
TK_FOR => Some("FOR"),
|
||||
TK_FOREIGN => Some("FOREIGN"),
|
||||
TK_FROM => Some("FROM"),
|
||||
TK_GENERATED => Some("GENERATED"),
|
||||
TK_GROUP => Some("GROUP"),
|
||||
TK_GROUPS => Some("GROUPS"),
|
||||
TK_HAVING => Some("HAVING"),
|
||||
TK_IF => Some("IF"),
|
||||
TK_IGNORE => Some("IGNORE"),
|
||||
TK_IMMEDIATE => Some("IMMEDIATE"),
|
||||
TK_IN => Some("IN"),
|
||||
TK_INDEX => Some("INDEX"),
|
||||
TK_INDEXED => Some("INDEXED"),
|
||||
TK_INITIALLY => Some("INITIALLY"),
|
||||
TK_INSERT => Some("INSERT"),
|
||||
TK_INSTEAD => Some("INSTEAD"),
|
||||
TK_INTERSECT => Some("INTERSECT"),
|
||||
TK_INTO => Some("INTO"),
|
||||
TK_IS => Some("IS"),
|
||||
TK_ISNULL => Some("ISNULL"),
|
||||
TK_JOIN => Some("JOIN"),
|
||||
TK_KEY => Some("KEY"),
|
||||
TK_LAST => Some("LAST"),
|
||||
TK_LIMIT => Some("LIMIT"),
|
||||
TK_MATCH => Some("MATCH"),
|
||||
TK_MATERIALIZED => Some("MATERIALIZED"),
|
||||
TK_NO => Some("NO"),
|
||||
TK_NOT => Some("NOT"),
|
||||
TK_NOTHING => Some("NOTHING"),
|
||||
TK_NOTNULL => Some("NOTNULL"),
|
||||
TK_NULL => Some("NULL"),
|
||||
TK_NULLS => Some("NULLS"),
|
||||
TK_OF => Some("OF"),
|
||||
TK_OFFSET => Some("OFFSET"),
|
||||
TK_ON => Some("ON"),
|
||||
TK_OR => Some("OR"),
|
||||
TK_ORDER => Some("ORDER"),
|
||||
TK_OTHERS => Some("OTHERS"),
|
||||
TK_OVER => Some("OVER"),
|
||||
TK_PARTITION => Some("PARTITION"),
|
||||
TK_PLAN => Some("PLAN"),
|
||||
TK_PRAGMA => Some("PRAGMA"),
|
||||
TK_PRECEDING => Some("PRECEDING"),
|
||||
TK_PRIMARY => Some("PRIMARY"),
|
||||
TK_QUERY => Some("QUERY"),
|
||||
TK_RAISE => Some("RAISE"),
|
||||
TK_RANGE => Some("RANGE"),
|
||||
TK_RECURSIVE => Some("RECURSIVE"),
|
||||
TK_REFERENCES => Some("REFERENCES"),
|
||||
TK_REINDEX => Some("REINDEX"),
|
||||
TK_RELEASE => Some("RELEASE"),
|
||||
TK_RENAME => Some("RENAME"),
|
||||
TK_REPLACE => Some("REPLACE"),
|
||||
TK_RETURNING => Some("RETURNING"),
|
||||
TK_RESTRICT => Some("RESTRICT"),
|
||||
TK_ROLLBACK => Some("ROLLBACK"),
|
||||
TK_ROW => Some("ROW"),
|
||||
TK_ROWS => Some("ROWS"),
|
||||
TK_SAVEPOINT => Some("SAVEPOINT"),
|
||||
TK_SELECT => Some("SELECT"),
|
||||
TK_SET => Some("SET"),
|
||||
TK_TABLE => Some("TABLE"),
|
||||
TK_TEMP => Some("TEMP"), // or TEMPORARY
|
||||
TK_TIES => Some("TIES"),
|
||||
TK_THEN => Some("THEN"),
|
||||
TK_TO => Some("TO"),
|
||||
TK_TRANSACTION => Some("TRANSACTION"),
|
||||
TK_TRIGGER => Some("TRIGGER"),
|
||||
TK_UNBOUNDED => Some("UNBOUNDED"),
|
||||
TK_UNION => Some("UNION"),
|
||||
TK_UNIQUE => Some("UNIQUE"),
|
||||
TK_UPDATE => Some("UPDATE"),
|
||||
TK_USING => Some("USING"),
|
||||
TK_VACUUM => Some("VACUUM"),
|
||||
TK_VALUES => Some("VALUES"),
|
||||
TK_VIEW => Some("VIEW"),
|
||||
TK_VIRTUAL => Some("VIRTUAL"),
|
||||
TK_WHEN => Some("WHEN"),
|
||||
TK_WHERE => Some("WHERE"),
|
||||
TK_WINDOW => Some("WINDOW"),
|
||||
TK_WITH => Some("WITH"),
|
||||
TK_WITHOUT => Some("WITHOUT"),
|
||||
TK_BITAND => Some("&"),
|
||||
TK_BITNOT => Some("~"),
|
||||
TK_BITOR => Some("|"),
|
||||
TK_COMMA => Some(","),
|
||||
TK_CONCAT => Some("||"),
|
||||
TK_DOT => Some("."),
|
||||
TK_EQ => Some("="), // or ==
|
||||
TK_GT => Some(">"),
|
||||
TK_GE => Some(">="),
|
||||
TK_LP => Some("("),
|
||||
TK_LSHIFT => Some("<<"),
|
||||
TK_LE => Some("<="),
|
||||
TK_LT => Some("<"),
|
||||
TK_MINUS => Some("-"),
|
||||
TK_NE => Some("<>"), // or !=
|
||||
TK_PLUS => Some("+"),
|
||||
TK_REM => Some("%"),
|
||||
TK_RP => Some(")"),
|
||||
TK_RSHIFT => Some(">>"),
|
||||
TK_SEMI => Some(";"),
|
||||
TK_SLASH => Some("/"),
|
||||
TK_STAR => Some("*"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
180
vendored/sqlite3-parser/src/dialect/token.rs
Normal file
180
vendored/sqlite3-parser/src/dialect/token.rs
Normal file
@@ -0,0 +1,180 @@
|
||||
//! All terminal symbols.
|
||||
|
||||
/// Token classes
|
||||
// Generated by lemon (parse.h).
|
||||
// Renamed manually.
|
||||
// To be keep in sync.
|
||||
#[non_exhaustive]
|
||||
#[allow(non_camel_case_types, missing_docs)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
|
||||
#[repr(u16)]
|
||||
pub enum TokenType {
|
||||
TK_EOF = 0,
|
||||
TK_SEMI = 1,
|
||||
TK_EXPLAIN = 2,
|
||||
TK_QUERY = 3,
|
||||
TK_PLAN = 4,
|
||||
TK_BEGIN = 5,
|
||||
TK_TRANSACTION = 6,
|
||||
TK_DEFERRED = 7,
|
||||
TK_IMMEDIATE = 8,
|
||||
TK_EXCLUSIVE = 9,
|
||||
TK_COMMIT = 10,
|
||||
TK_END = 11,
|
||||
TK_ROLLBACK = 12,
|
||||
TK_SAVEPOINT = 13,
|
||||
TK_RELEASE = 14,
|
||||
TK_TO = 15,
|
||||
TK_TABLE = 16,
|
||||
TK_CREATE = 17,
|
||||
TK_IF = 18,
|
||||
TK_NOT = 19,
|
||||
TK_EXISTS = 20,
|
||||
TK_TEMP = 21,
|
||||
TK_LP = 22,
|
||||
TK_RP = 23,
|
||||
TK_AS = 24,
|
||||
TK_COMMA = 25,
|
||||
TK_WITHOUT = 26,
|
||||
TK_ABORT = 27,
|
||||
TK_ACTION = 28,
|
||||
TK_AFTER = 29,
|
||||
TK_ANALYZE = 30,
|
||||
TK_ASC = 31,
|
||||
TK_ATTACH = 32,
|
||||
TK_BEFORE = 33,
|
||||
TK_BY = 34,
|
||||
TK_CASCADE = 35,
|
||||
TK_CAST = 36,
|
||||
TK_CONFLICT = 37,
|
||||
TK_DATABASE = 38,
|
||||
TK_DESC = 39,
|
||||
TK_DETACH = 40,
|
||||
TK_EACH = 41,
|
||||
TK_FAIL = 42,
|
||||
TK_OR = 43,
|
||||
TK_AND = 44,
|
||||
TK_IS = 45,
|
||||
TK_ISNOT = 46,
|
||||
TK_MATCH = 47,
|
||||
TK_LIKE_KW = 48,
|
||||
TK_BETWEEN = 49,
|
||||
TK_IN = 50,
|
||||
TK_ISNULL = 51,
|
||||
TK_NOTNULL = 52,
|
||||
TK_NE = 53,
|
||||
TK_EQ = 54,
|
||||
TK_GT = 55,
|
||||
TK_LE = 56,
|
||||
TK_LT = 57,
|
||||
TK_GE = 58,
|
||||
TK_ESCAPE = 59,
|
||||
TK_ID = 60,
|
||||
TK_COLUMNKW = 61,
|
||||
TK_DO = 62,
|
||||
TK_FOR = 63,
|
||||
TK_IGNORE = 64,
|
||||
TK_INITIALLY = 65,
|
||||
TK_INSTEAD = 66,
|
||||
TK_NO = 67,
|
||||
TK_KEY = 68,
|
||||
TK_OF = 69,
|
||||
TK_OFFSET = 70,
|
||||
TK_PRAGMA = 71,
|
||||
TK_RAISE = 72,
|
||||
TK_RECURSIVE = 73,
|
||||
TK_REPLACE = 74,
|
||||
TK_RESTRICT = 75,
|
||||
TK_ROW = 76,
|
||||
TK_ROWS = 77,
|
||||
TK_TRIGGER = 78,
|
||||
TK_VACUUM = 79,
|
||||
TK_VIEW = 80,
|
||||
TK_VIRTUAL = 81,
|
||||
TK_WITH = 82,
|
||||
TK_NULLS = 83,
|
||||
TK_FIRST = 84,
|
||||
TK_LAST = 85,
|
||||
TK_CURRENT = 86,
|
||||
TK_FOLLOWING = 87,
|
||||
TK_PARTITION = 88,
|
||||
TK_PRECEDING = 89,
|
||||
TK_RANGE = 90,
|
||||
TK_UNBOUNDED = 91,
|
||||
TK_EXCLUDE = 92,
|
||||
TK_GROUPS = 93,
|
||||
TK_OTHERS = 94,
|
||||
TK_TIES = 95,
|
||||
TK_GENERATED = 96,
|
||||
TK_ALWAYS = 97,
|
||||
TK_MATERIALIZED = 98,
|
||||
TK_REINDEX = 99,
|
||||
TK_RENAME = 100,
|
||||
TK_CTIME_KW = 101,
|
||||
TK_ANY = 102,
|
||||
TK_BITAND = 103,
|
||||
TK_BITOR = 104,
|
||||
TK_LSHIFT = 105,
|
||||
TK_RSHIFT = 106,
|
||||
TK_PLUS = 107,
|
||||
TK_MINUS = 108,
|
||||
TK_STAR = 109,
|
||||
TK_SLASH = 110,
|
||||
TK_REM = 111,
|
||||
TK_CONCAT = 112,
|
||||
TK_PTR = 113,
|
||||
TK_COLLATE = 114,
|
||||
TK_BITNOT = 115,
|
||||
TK_ON = 116,
|
||||
TK_INDEXED = 117,
|
||||
TK_STRING = 118,
|
||||
TK_JOIN_KW = 119,
|
||||
TK_CONSTRAINT = 120,
|
||||
TK_DEFAULT = 121,
|
||||
TK_NULL = 122,
|
||||
TK_PRIMARY = 123,
|
||||
TK_UNIQUE = 124,
|
||||
TK_CHECK = 125,
|
||||
TK_REFERENCES = 126,
|
||||
TK_AUTOINCR = 127,
|
||||
TK_INSERT = 128,
|
||||
TK_DELETE = 129,
|
||||
TK_UPDATE = 130,
|
||||
TK_SET = 131,
|
||||
TK_DEFERRABLE = 132,
|
||||
TK_FOREIGN = 133,
|
||||
TK_DROP = 134,
|
||||
TK_UNION = 135,
|
||||
TK_ALL = 136,
|
||||
TK_EXCEPT = 137,
|
||||
TK_INTERSECT = 138,
|
||||
TK_SELECT = 139,
|
||||
TK_VALUES = 140,
|
||||
TK_DISTINCT = 141,
|
||||
TK_DOT = 142,
|
||||
TK_FROM = 143,
|
||||
TK_JOIN = 144,
|
||||
TK_USING = 145,
|
||||
TK_ORDER = 146,
|
||||
TK_GROUP = 147,
|
||||
TK_HAVING = 148,
|
||||
TK_LIMIT = 149,
|
||||
TK_WHERE = 150,
|
||||
TK_RETURNING = 151,
|
||||
TK_INTO = 152,
|
||||
TK_NOTHING = 153,
|
||||
TK_BLOB = 154,
|
||||
TK_FLOAT = 155,
|
||||
TK_INTEGER = 156,
|
||||
TK_VARIABLE = 157,
|
||||
TK_CASE = 158,
|
||||
TK_WHEN = 159,
|
||||
TK_THEN = 160,
|
||||
TK_ELSE = 161,
|
||||
TK_INDEX = 162,
|
||||
TK_ALTER = 163,
|
||||
TK_ADD = 164,
|
||||
TK_WINDOW = 165,
|
||||
TK_OVER = 166,
|
||||
TK_FILTER = 167,
|
||||
}
|
||||
6
vendored/sqlite3-parser/src/lexer/mod.rs
Normal file
6
vendored/sqlite3-parser/src/lexer/mod.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
//! Streaming SQLite tokenizer
|
||||
|
||||
mod scan;
|
||||
pub mod sql;
|
||||
|
||||
pub use scan::{ScanError, Scanner, Splitter};
|
||||
172
vendored/sqlite3-parser/src/lexer/scan.rs
Normal file
172
vendored/sqlite3-parser/src/lexer/scan.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
//! Adaptation/port of [Go scanner](http://tip.golang.org/pkg/bufio/#Scanner).
|
||||
|
||||
use log::debug;
|
||||
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
|
||||
/// Error with position
|
||||
pub trait ScanError: Error + From<io::Error> + Sized {
|
||||
/// Update the position where the error occurs
|
||||
fn position(&mut self, line: u64, column: usize);
|
||||
}
|
||||
|
||||
/// The `(&[u8], TokenType)` is the token.
|
||||
/// And the `usize` is the amount of bytes to consume.
|
||||
type SplitResult<'input, TokenType, Error> =
|
||||
Result<(Option<(&'input [u8], TokenType)>, usize), Error>;
|
||||
|
||||
/// Split function used to tokenize the input
|
||||
pub trait Splitter: Sized {
|
||||
/// Potential error raised
|
||||
type Error: ScanError;
|
||||
//type Item: ?Sized;
|
||||
/// Token generated
|
||||
type TokenType;
|
||||
|
||||
/// The arguments are an initial substring of the remaining unprocessed
|
||||
/// data.
|
||||
///
|
||||
/// If the returned error is non-nil, scanning stops and the error
|
||||
/// is returned to the client.
|
||||
///
|
||||
/// The function is never called with an empty data slice.
|
||||
fn split<'input>(
|
||||
&mut self,
|
||||
data: &'input [u8],
|
||||
) -> SplitResult<'input, Self::TokenType, Self::Error>;
|
||||
}
|
||||
|
||||
/// Like a `BufReader` but with a growable buffer.
|
||||
/// Successive calls to the `scan` method will step through the 'tokens'
|
||||
/// of a file, skipping the bytes between the tokens.
|
||||
///
|
||||
/// Scanning stops unrecoverably at EOF, the first I/O error, or a token too
|
||||
/// large to fit in the buffer. When a scan stops, the reader may have
|
||||
/// advanced arbitrarily far past the last token.
|
||||
pub struct Scanner<S: Splitter> {
|
||||
/// offset in `input`
|
||||
offset: usize,
|
||||
/// mark
|
||||
mark: (usize, u64, usize),
|
||||
/// The function to tokenize the input.
|
||||
splitter: S,
|
||||
/// current line number
|
||||
line: u64,
|
||||
/// current column number (byte offset, not char offset)
|
||||
column: usize,
|
||||
}
|
||||
|
||||
impl<S: Splitter> Scanner<S> {
|
||||
/// Constructor
|
||||
pub fn new(splitter: S) -> Self {
|
||||
Self {
|
||||
offset: 0,
|
||||
mark: (0, 0, 0),
|
||||
splitter,
|
||||
line: 1,
|
||||
column: 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Current line number
|
||||
pub fn line(&self) -> u64 {
|
||||
self.line
|
||||
}
|
||||
|
||||
/// Current column number (byte offset, not char offset)
|
||||
pub fn column(&self) -> usize {
|
||||
self.column
|
||||
}
|
||||
/// Associated splitter
|
||||
pub fn splitter(&self) -> &S {
|
||||
&self.splitter
|
||||
}
|
||||
/// Mark current position
|
||||
pub fn mark(&mut self) {
|
||||
self.mark = (self.offset, self.line, self.column);
|
||||
}
|
||||
/// Reset to mark
|
||||
pub fn reset_to_mark(&mut self) {
|
||||
(self.offset, self.line, self.column) = self.mark;
|
||||
}
|
||||
|
||||
/// Reset the scanner such that it behaves as if it had never been used.
|
||||
pub fn reset(&mut self) {
|
||||
self.offset = 0;
|
||||
self.line = 1;
|
||||
self.column = 1;
|
||||
}
|
||||
}
|
||||
|
||||
type ScanResult<'input, TokenType, Error> =
|
||||
Result<(usize, Option<(&'input [u8], TokenType)>, usize), Error>;
|
||||
|
||||
impl<S: Splitter> Scanner<S> {
|
||||
/// Advance the Scanner to next token.
|
||||
/// Return the token as a byte slice.
|
||||
/// Return `None` when the end of the input is reached.
|
||||
/// Return any error that occurs while reading the input.
|
||||
pub fn scan<'input>(
|
||||
&mut self,
|
||||
input: &'input [u8],
|
||||
) -> ScanResult<'input, S::TokenType, S::Error> {
|
||||
debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column);
|
||||
// Loop until we have a token.
|
||||
loop {
|
||||
// See if we can get a token with what we already have.
|
||||
if self.offset < input.len() {
|
||||
let data = &input[self.offset..];
|
||||
match self.splitter.split(data) {
|
||||
Err(mut e) => {
|
||||
e.position(self.line, self.column);
|
||||
return Err(e);
|
||||
}
|
||||
Ok((None, 0)) => {
|
||||
// Done
|
||||
}
|
||||
Ok((None, amt)) => {
|
||||
// Ignore/skip this data
|
||||
self.consume(data, amt);
|
||||
continue;
|
||||
}
|
||||
Ok((tok, amt)) => {
|
||||
let start = self.offset;
|
||||
self.consume(data, amt);
|
||||
return Ok((start, tok, self.offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
// We cannot generate a token with what we are holding.
|
||||
// we are done.
|
||||
return Ok((self.offset, None, self.offset));
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume `amt` bytes of the buffer.
|
||||
fn consume(&mut self, data: &[u8], amt: usize) {
|
||||
debug!(target: "scanner", "consume({})", amt);
|
||||
debug_assert!(amt <= data.len());
|
||||
for byte in &data[..amt] {
|
||||
if *byte == b'\n' {
|
||||
self.line += 1;
|
||||
self.column = 1;
|
||||
} else {
|
||||
self.column += 1;
|
||||
}
|
||||
}
|
||||
self.offset += amt;
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Splitter> fmt::Debug for Scanner<S> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("Scanner")
|
||||
.field("offset", &self.offset)
|
||||
.field("mark", &self.mark)
|
||||
.field("line", &self.line)
|
||||
.field("column", &self.column)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
95
vendored/sqlite3-parser/src/lexer/sql/error.rs
Normal file
95
vendored/sqlite3-parser/src/lexer/sql/error.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
|
||||
use crate::lexer::scan::ScanError;
|
||||
use crate::parser::ParserError;
|
||||
|
||||
/// SQL lexer and parser errors
|
||||
#[non_exhaustive]
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// I/O Error
|
||||
Io(io::Error),
|
||||
/// Lexer error
|
||||
UnrecognizedToken(Option<(u64, usize)>),
|
||||
/// Missing quote or double-quote or backtick
|
||||
UnterminatedLiteral(Option<(u64, usize)>),
|
||||
/// Missing `]`
|
||||
UnterminatedBracket(Option<(u64, usize)>),
|
||||
/// Missing `*/`
|
||||
UnterminatedBlockComment(Option<(u64, usize)>),
|
||||
/// Invalid parameter name
|
||||
BadVariableName(Option<(u64, usize)>),
|
||||
/// Invalid number format
|
||||
BadNumber(Option<(u64, usize)>),
|
||||
/// Invalid or missing sign after `!`
|
||||
ExpectedEqualsSign(Option<(u64, usize)>),
|
||||
/// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character.
|
||||
MalformedBlobLiteral(Option<(u64, usize)>),
|
||||
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
|
||||
MalformedHexInteger(Option<(u64, usize)>),
|
||||
/// Grammar error
|
||||
ParserError(ParserError, Option<(u64, usize)>),
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
Self::Io(ref err) => err.fmt(f),
|
||||
Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()),
|
||||
Self::UnterminatedLiteral(pos) => {
|
||||
write!(f, "non-terminated literal at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::UnterminatedBracket(pos) => {
|
||||
write!(f, "non-terminated bracket at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::UnterminatedBlockComment(pos) => {
|
||||
write!(f, "non-terminated block comment at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()),
|
||||
Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()),
|
||||
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()),
|
||||
Self::MalformedBlobLiteral(pos) => {
|
||||
write!(f, "malformed blob literal at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::MalformedHexInteger(pos) => {
|
||||
write!(f, "malformed hex integer at {:?}", pos.unwrap())
|
||||
}
|
||||
Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"),
|
||||
Self::ParserError(ref msg, _) => write!(f, "{msg}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(err: io::Error) -> Self {
|
||||
Self::Io(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParserError> for Error {
|
||||
fn from(err: ParserError) -> Self {
|
||||
Self::ParserError(err, None)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScanError for Error {
|
||||
fn position(&mut self, line: u64, column: usize) {
|
||||
match *self {
|
||||
Self::Io(_) => {}
|
||||
Self::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::BadVariableName(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::BadNumber(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)),
|
||||
Self::ParserError(_, ref mut pos) => *pos = Some((line, column)),
|
||||
}
|
||||
}
|
||||
}
|
||||
678
vendored/sqlite3-parser/src/lexer/sql/mod.rs
Normal file
678
vendored/sqlite3-parser/src/lexer/sql/mod.rs
Normal file
@@ -0,0 +1,678 @@
|
||||
//! Adaptation/port of [`SQLite` tokenizer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c)
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use memchr::memchr;
|
||||
|
||||
pub use crate::dialect::TokenType;
|
||||
use crate::dialect::TokenType::*;
|
||||
use crate::dialect::{
|
||||
is_identifier_continue, is_identifier_start, keyword_token, sentinel, MAX_KEYWORD_LEN,
|
||||
};
|
||||
use crate::parser::ast::Cmd;
|
||||
use crate::parser::parse::{yyParser, YYCODETYPE};
|
||||
use crate::parser::Context;
|
||||
|
||||
mod error;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use crate::lexer::scan::ScanError;
|
||||
use crate::lexer::scan::Splitter;
|
||||
use crate::lexer::Scanner;
|
||||
pub use crate::parser::ParserError;
|
||||
pub use error::Error;
|
||||
|
||||
// TODO Extract scanning stuff and move this into the parser crate
|
||||
// to make possible to use the tokenizer without depending on the parser...
|
||||
|
||||
/// SQL parser
|
||||
pub struct Parser<'input> {
|
||||
input: &'input [u8],
|
||||
scanner: Scanner<Tokenizer>,
|
||||
parser: yyParser<'input>,
|
||||
}
|
||||
|
||||
impl<'input> Parser<'input> {
|
||||
/// Constructor
|
||||
pub fn new(input: &'input [u8]) -> Self {
|
||||
let lexer = Tokenizer::new();
|
||||
let scanner = Scanner::new(lexer);
|
||||
let ctx = Context::new(input);
|
||||
let parser = yyParser::new(ctx);
|
||||
Parser {
|
||||
input,
|
||||
scanner,
|
||||
parser,
|
||||
}
|
||||
}
|
||||
/// Parse new `input`
|
||||
pub fn reset(&mut self, input: &'input [u8]) {
|
||||
self.input = input;
|
||||
self.scanner.reset();
|
||||
}
|
||||
/// Current line position in input
|
||||
pub fn line(&self) -> u64 {
|
||||
self.scanner.line()
|
||||
}
|
||||
/// Current column position in input
|
||||
pub fn column(&self) -> usize {
|
||||
self.scanner.column()
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return the id of the next token in input.
|
||||
*/
|
||||
fn get_token(scanner: &mut Scanner<Tokenizer>, input: &[u8]) -> Result<TokenType, Error> {
|
||||
let mut t = {
|
||||
let (_, token_type) = match scanner.scan(input)? {
|
||||
(_, None, _) => {
|
||||
return Ok(TK_EOF);
|
||||
}
|
||||
(_, Some(tuple), _) => tuple,
|
||||
};
|
||||
token_type
|
||||
};
|
||||
if t == TK_ID
|
||||
|| t == TK_STRING
|
||||
|| t == TK_JOIN_KW
|
||||
|| t == TK_WINDOW
|
||||
|| t == TK_OVER
|
||||
|| yyParser::parse_fallback(t as YYCODETYPE) == TK_ID as YYCODETYPE
|
||||
{
|
||||
t = TK_ID;
|
||||
}
|
||||
Ok(t)
|
||||
}
|
||||
|
||||
/*
|
||||
** The following three functions are called immediately after the tokenizer
|
||||
** reads the keywords WINDOW, OVER and FILTER, respectively, to determine
|
||||
** whether the token should be treated as a keyword or an SQL identifier.
|
||||
** This cannot be handled by the usual lemon %fallback method, due to
|
||||
** the ambiguity in some constructions. e.g.
|
||||
**
|
||||
** SELECT sum(x) OVER ...
|
||||
**
|
||||
** In the above, "OVER" might be a keyword, or it might be an alias for the
|
||||
** sum(x) expression. If a "%fallback ID OVER" directive were added to
|
||||
** grammar, then SQLite would always treat "OVER" as an alias, making it
|
||||
** impossible to call a window-function without a FILTER clause.
|
||||
**
|
||||
** WINDOW is treated as a keyword if:
|
||||
**
|
||||
** * the following token is an identifier, or a keyword that can fallback
|
||||
** to being an identifier, and
|
||||
** * the token after than one is TK_AS.
|
||||
**
|
||||
** OVER is a keyword if:
|
||||
**
|
||||
** * the previous token was TK_RP, and
|
||||
** * the next token is either TK_LP or an identifier.
|
||||
**
|
||||
** FILTER is a keyword if:
|
||||
**
|
||||
** * the previous token was TK_RP, and
|
||||
** * the next token is TK_LP.
|
||||
*/
|
||||
fn analyze_window_keyword(
|
||||
scanner: &mut Scanner<Tokenizer>,
|
||||
input: &[u8],
|
||||
) -> Result<TokenType, Error> {
|
||||
let t = get_token(scanner, input)?;
|
||||
if t != TK_ID {
|
||||
return Ok(TK_ID);
|
||||
};
|
||||
let t = get_token(scanner, input)?;
|
||||
if t != TK_AS {
|
||||
return Ok(TK_ID);
|
||||
};
|
||||
Ok(TK_WINDOW)
|
||||
}
|
||||
fn analyze_over_keyword(
|
||||
scanner: &mut Scanner<Tokenizer>,
|
||||
input: &[u8],
|
||||
last_token: TokenType,
|
||||
) -> Result<TokenType, Error> {
|
||||
if last_token == TK_RP {
|
||||
let t = get_token(scanner, input)?;
|
||||
if t == TK_LP || t == TK_ID {
|
||||
return Ok(TK_OVER);
|
||||
}
|
||||
}
|
||||
Ok(TK_ID)
|
||||
}
|
||||
fn analyze_filter_keyword(
|
||||
scanner: &mut Scanner<Tokenizer>,
|
||||
input: &[u8],
|
||||
last_token: TokenType,
|
||||
) -> Result<TokenType, Error> {
|
||||
if last_token == TK_RP && get_token(scanner, input)? == TK_LP {
|
||||
return Ok(TK_FILTER);
|
||||
}
|
||||
Ok(TK_ID)
|
||||
}
|
||||
|
||||
macro_rules! try_with_position {
|
||||
($scanner:expr, $expr:expr) => {
|
||||
match $expr {
|
||||
Ok(val) => val,
|
||||
Err(err) => {
|
||||
let mut err = Error::from(err);
|
||||
err.position($scanner.line(), $scanner.column());
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl FallibleIterator for Parser<'_> {
|
||||
type Item = Cmd;
|
||||
type Error = Error;
|
||||
|
||||
fn next(&mut self) -> Result<Option<Cmd>, Error> {
|
||||
//print!("line: {}, column: {}: ", self.scanner.line(), self.scanner.column());
|
||||
self.parser.ctx.reset();
|
||||
let mut last_token_parsed = TK_EOF;
|
||||
let mut eof = false;
|
||||
loop {
|
||||
let (start, (value, mut token_type), end) = match self.scanner.scan(self.input)? {
|
||||
(_, None, _) => {
|
||||
eof = true;
|
||||
break;
|
||||
}
|
||||
(start, Some(tuple), end) => (start, tuple, end),
|
||||
};
|
||||
let token = if token_type >= TK_WINDOW {
|
||||
debug_assert!(
|
||||
token_type == TK_OVER || token_type == TK_FILTER || token_type == TK_WINDOW
|
||||
);
|
||||
self.scanner.mark();
|
||||
if token_type == TK_WINDOW {
|
||||
token_type = analyze_window_keyword(&mut self.scanner, self.input)?;
|
||||
} else if token_type == TK_OVER {
|
||||
token_type =
|
||||
analyze_over_keyword(&mut self.scanner, self.input, last_token_parsed)?;
|
||||
} else if token_type == TK_FILTER {
|
||||
token_type =
|
||||
analyze_filter_keyword(&mut self.scanner, self.input, last_token_parsed)?;
|
||||
}
|
||||
self.scanner.reset_to_mark();
|
||||
token_type.to_token(start, value, end)
|
||||
} else {
|
||||
token_type.to_token(start, value, end)
|
||||
};
|
||||
//println!("({:?}, {:?})", token_type, token);
|
||||
try_with_position!(self.scanner, self.parser.sqlite3Parser(token_type, token));
|
||||
last_token_parsed = token_type;
|
||||
if self.parser.ctx.done() {
|
||||
//println!();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if last_token_parsed == TK_EOF {
|
||||
return Ok(None); // empty input
|
||||
}
|
||||
/* Upon reaching the end of input, call the parser two more times
|
||||
with tokens TK_SEMI and 0, in that order. */
|
||||
if eof && self.parser.ctx.is_ok() {
|
||||
if last_token_parsed != TK_SEMI {
|
||||
try_with_position!(
|
||||
self.scanner,
|
||||
self.parser
|
||||
.sqlite3Parser(TK_SEMI, sentinel(self.input.len()))
|
||||
);
|
||||
}
|
||||
try_with_position!(
|
||||
self.scanner,
|
||||
self.parser
|
||||
.sqlite3Parser(TK_EOF, sentinel(self.input.len()))
|
||||
);
|
||||
}
|
||||
self.parser.sqlite3ParserFinalize();
|
||||
if let Some(e) = self.parser.ctx.error() {
|
||||
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
|
||||
return Err(err);
|
||||
}
|
||||
let cmd = self.parser.ctx.cmd();
|
||||
if let Some(ref cmd) = cmd {
|
||||
if let Err(e) = cmd.check() {
|
||||
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
Ok(cmd)
|
||||
}
|
||||
}
|
||||
|
||||
/// SQL token
|
||||
pub type Token<'input> = (&'input [u8], TokenType);
|
||||
|
||||
/// SQL lexer
|
||||
#[derive(Default)]
|
||||
pub struct Tokenizer {}
|
||||
|
||||
impl Tokenizer {
|
||||
/// Constructor
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
/// ```rust
|
||||
/// use sqlite3_parser::lexer::sql::Tokenizer;
|
||||
/// use sqlite3_parser::lexer::Scanner;
|
||||
///
|
||||
/// let tokenizer = Tokenizer::new();
|
||||
/// let input = b"PRAGMA parser_trace=ON;";
|
||||
/// let mut s = Scanner::new(tokenizer);
|
||||
/// let Ok((_, Some((token1, _)), _)) = s.scan(input) else { panic!() };
|
||||
/// s.scan(input).unwrap();
|
||||
/// assert!(b"PRAGMA".eq_ignore_ascii_case(token1));
|
||||
/// ```
|
||||
impl Splitter for Tokenizer {
|
||||
type Error = Error;
|
||||
type TokenType = TokenType;
|
||||
|
||||
fn split<'input>(
|
||||
&mut self,
|
||||
data: &'input [u8],
|
||||
) -> Result<(Option<Token<'input>>, usize), Error> {
|
||||
if data[0].is_ascii_whitespace() {
|
||||
// eat as much space as possible
|
||||
return Ok((
|
||||
None,
|
||||
match data.iter().skip(1).position(|&b| !b.is_ascii_whitespace()) {
|
||||
Some(i) => i + 1,
|
||||
_ => data.len(),
|
||||
},
|
||||
));
|
||||
}
|
||||
match data[0] {
|
||||
b'-' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
if *b == b'-' {
|
||||
// eat comment
|
||||
if let Some(i) = memchr(b'\n', data) {
|
||||
Ok((None, i + 1))
|
||||
} else {
|
||||
Ok((None, data.len()))
|
||||
}
|
||||
} else if *b == b'>' {
|
||||
if let Some(b) = data.get(2) {
|
||||
if *b == b'>' {
|
||||
return Ok((Some((&data[..3], TK_PTR)), 3));
|
||||
}
|
||||
}
|
||||
Ok((Some((&data[..2], TK_PTR)), 2))
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_MINUS)), 1))
|
||||
}
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_MINUS)), 1))
|
||||
}
|
||||
}
|
||||
b'(' => Ok((Some((&data[..1], TK_LP)), 1)),
|
||||
b')' => Ok((Some((&data[..1], TK_RP)), 1)),
|
||||
b';' => Ok((Some((&data[..1], TK_SEMI)), 1)),
|
||||
b'+' => Ok((Some((&data[..1], TK_PLUS)), 1)),
|
||||
b'*' => Ok((Some((&data[..1], TK_STAR)), 1)),
|
||||
b'/' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
if *b == b'*' {
|
||||
// eat comment
|
||||
let mut pb = 0;
|
||||
let mut end = None;
|
||||
for (i, b) in data.iter().enumerate().skip(2) {
|
||||
if *b == b'/' && pb == b'*' {
|
||||
end = Some(i);
|
||||
break;
|
||||
}
|
||||
pb = *b;
|
||||
}
|
||||
if let Some(i) = end {
|
||||
Ok((None, i + 1))
|
||||
} else {
|
||||
Err(Error::UnterminatedBlockComment(None))
|
||||
}
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_SLASH)), 1))
|
||||
}
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_SLASH)), 1))
|
||||
}
|
||||
}
|
||||
b'%' => Ok((Some((&data[..1], TK_REM)), 1)),
|
||||
b'=' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
Ok(if *b == b'=' {
|
||||
(Some((&data[..2], TK_EQ)), 2)
|
||||
} else {
|
||||
(Some((&data[..1], TK_EQ)), 1)
|
||||
})
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_EQ)), 1))
|
||||
}
|
||||
}
|
||||
b'<' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
Ok(match *b {
|
||||
b'=' => (Some((&data[..2], TK_LE)), 2),
|
||||
b'>' => (Some((&data[..2], TK_NE)), 2),
|
||||
b'<' => (Some((&data[..2], TK_LSHIFT)), 2),
|
||||
_ => (Some((&data[..1], TK_LT)), 1),
|
||||
})
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_LT)), 1))
|
||||
}
|
||||
}
|
||||
b'>' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
Ok(match *b {
|
||||
b'=' => (Some((&data[..2], TK_GE)), 2),
|
||||
b'>' => (Some((&data[..2], TK_RSHIFT)), 2),
|
||||
_ => (Some((&data[..1], TK_GT)), 1),
|
||||
})
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_GT)), 1))
|
||||
}
|
||||
}
|
||||
b'!' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
if *b == b'=' {
|
||||
Ok((Some((&data[..2], TK_NE)), 2))
|
||||
} else {
|
||||
Err(Error::ExpectedEqualsSign(None))
|
||||
}
|
||||
} else {
|
||||
Err(Error::ExpectedEqualsSign(None))
|
||||
}
|
||||
}
|
||||
b'|' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
Ok(if *b == b'|' {
|
||||
(Some((&data[..2], TK_CONCAT)), 2)
|
||||
} else {
|
||||
(Some((&data[..1], TK_BITOR)), 1)
|
||||
})
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_BITOR)), 1))
|
||||
}
|
||||
}
|
||||
b',' => Ok((Some((&data[..1], TK_COMMA)), 1)),
|
||||
b'&' => Ok((Some((&data[..1], TK_BITAND)), 1)),
|
||||
b'~' => Ok((Some((&data[..1], TK_BITNOT)), 1)),
|
||||
quote @ (b'`' | b'\'' | b'"') => literal(data, quote),
|
||||
b'.' => {
|
||||
if let Some(b) = data.get(1) {
|
||||
if b.is_ascii_digit() {
|
||||
fractional_part(data, 0)
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_DOT)), 1))
|
||||
}
|
||||
} else {
|
||||
Ok((Some((&data[..1], TK_DOT)), 1))
|
||||
}
|
||||
}
|
||||
b'0'..=b'9' => number(data),
|
||||
b'[' => {
|
||||
if let Some(i) = memchr(b']', data) {
|
||||
// Keep original quotes / '[' ... ’]'
|
||||
Ok((Some((&data[0..=i], TK_ID)), i + 1))
|
||||
} else {
|
||||
Err(Error::UnterminatedBracket(None))
|
||||
}
|
||||
}
|
||||
b'?' => {
|
||||
match data.iter().skip(1).position(|&b| !b.is_ascii_digit()) {
|
||||
Some(i) => {
|
||||
// do not include the '?' in the token
|
||||
Ok((Some((&data[1..=i], TK_VARIABLE)), i + 1))
|
||||
}
|
||||
None => Ok((Some((&data[1..], TK_VARIABLE)), data.len())),
|
||||
}
|
||||
}
|
||||
b'$' | b'@' | b'#' | b':' => {
|
||||
match data
|
||||
.iter()
|
||||
.skip(1)
|
||||
.position(|&b| !is_identifier_continue(b))
|
||||
{
|
||||
Some(0) => Err(Error::BadVariableName(None)),
|
||||
Some(i) => {
|
||||
// '$' is included as part of the name
|
||||
Ok((Some((&data[..=i], TK_VARIABLE)), i + 1))
|
||||
}
|
||||
None => {
|
||||
if data.len() == 1 {
|
||||
return Err(Error::BadVariableName(None));
|
||||
}
|
||||
Ok((Some((data, TK_VARIABLE)), data.len()))
|
||||
}
|
||||
}
|
||||
}
|
||||
b if is_identifier_start(b) => {
|
||||
if b == b'x' || b == b'X' {
|
||||
if let Some(&b'\'') = data.get(1) {
|
||||
blob_literal(data)
|
||||
} else {
|
||||
Ok(self.identifierish(data))
|
||||
}
|
||||
} else {
|
||||
Ok(self.identifierish(data))
|
||||
}
|
||||
}
|
||||
_ => Err(Error::UnrecognizedToken(None)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn literal(data: &[u8], quote: u8) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
debug_assert_eq!(data[0], quote);
|
||||
let tt = if quote == b'\'' { TK_STRING } else { TK_ID };
|
||||
let mut pb = 0;
|
||||
let mut end = None;
|
||||
// data[0] == quote => skip(1)
|
||||
for (i, b) in data.iter().enumerate().skip(1) {
|
||||
if *b == quote {
|
||||
if pb == quote {
|
||||
// escaped quote
|
||||
pb = 0;
|
||||
continue;
|
||||
}
|
||||
} else if pb == quote {
|
||||
end = Some(i);
|
||||
break;
|
||||
}
|
||||
pb = *b;
|
||||
}
|
||||
if end.is_some() || pb == quote {
|
||||
let i = match end {
|
||||
Some(i) => i,
|
||||
_ => data.len(),
|
||||
};
|
||||
// keep original quotes in the token
|
||||
Ok((Some((&data[0..i], tt)), i))
|
||||
} else {
|
||||
Err(Error::UnterminatedLiteral(None))
|
||||
}
|
||||
}
|
||||
|
||||
fn blob_literal(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
debug_assert!(data[0] == b'x' || data[0] == b'X');
|
||||
debug_assert_eq!(data[1], b'\'');
|
||||
if let Some((i, b)) = data
|
||||
.iter()
|
||||
.enumerate()
|
||||
.skip(2)
|
||||
.find(|&(_, &b)| !b.is_ascii_hexdigit())
|
||||
{
|
||||
if *b != b'\'' || i % 2 != 0 {
|
||||
return Err(Error::MalformedBlobLiteral(None));
|
||||
}
|
||||
Ok((Some((&data[2..i], TK_BLOB)), i + 1))
|
||||
} else {
|
||||
Err(Error::MalformedBlobLiteral(None))
|
||||
}
|
||||
}
|
||||
|
||||
fn number(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
debug_assert!(data[0].is_ascii_digit());
|
||||
if data[0] == b'0' {
|
||||
if let Some(b) = data.get(1) {
|
||||
if *b == b'x' || *b == b'X' {
|
||||
return hex_integer(data);
|
||||
}
|
||||
} else {
|
||||
return Ok((Some((data, TK_INTEGER)), data.len()));
|
||||
}
|
||||
}
|
||||
if let Some((i, b)) = find_end_of_number(data, 1, u8::is_ascii_digit)? {
|
||||
if b == b'.' {
|
||||
return fractional_part(data, i);
|
||||
} else if b == b'e' || b == b'E' {
|
||||
return exponential_part(data, i);
|
||||
} else if is_identifier_start(b) {
|
||||
return Err(Error::BadNumber(None));
|
||||
}
|
||||
Ok((Some((&data[..i], TK_INTEGER)), i))
|
||||
} else {
|
||||
Ok((Some((data, TK_INTEGER)), data.len()))
|
||||
}
|
||||
}
|
||||
|
||||
fn hex_integer(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
debug_assert_eq!(data[0], b'0');
|
||||
debug_assert!(data[1] == b'x' || data[1] == b'X');
|
||||
if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? {
|
||||
// Must not be empty (Ox is invalid)
|
||||
if i == 2 || is_identifier_start(b) {
|
||||
return Err(Error::MalformedHexInteger(None));
|
||||
}
|
||||
Ok((Some((&data[..i], TK_INTEGER)), i))
|
||||
} else {
|
||||
// Must not be empty (Ox is invalid)
|
||||
if data.len() == 2 {
|
||||
return Err(Error::MalformedHexInteger(None));
|
||||
}
|
||||
Ok((Some((data, TK_INTEGER)), data.len()))
|
||||
}
|
||||
}
|
||||
|
||||
fn fractional_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
debug_assert_eq!(data[i], b'.');
|
||||
if let Some((i, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
|
||||
if b == b'e' || b == b'E' {
|
||||
return exponential_part(data, i);
|
||||
} else if is_identifier_start(b) {
|
||||
return Err(Error::BadNumber(None));
|
||||
}
|
||||
Ok((Some((&data[..i], TK_FLOAT)), i))
|
||||
} else {
|
||||
Ok((Some((data, TK_FLOAT)), data.len()))
|
||||
}
|
||||
}
|
||||
|
||||
fn exponential_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize), Error> {
|
||||
debug_assert!(data[i] == b'e' || data[i] == b'E');
|
||||
// data[i] == 'e'|'E'
|
||||
if let Some(b) = data.get(i + 1) {
|
||||
let i = if *b == b'+' || *b == b'-' { i + 1 } else { i };
|
||||
if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
|
||||
if j == i + 1 || is_identifier_start(b) {
|
||||
return Err(Error::BadNumber(None));
|
||||
}
|
||||
Ok((Some((&data[..j], TK_FLOAT)), j))
|
||||
} else {
|
||||
if data.len() == i + 1 {
|
||||
return Err(Error::BadNumber(None));
|
||||
}
|
||||
Ok((Some((data, TK_FLOAT)), data.len()))
|
||||
}
|
||||
} else {
|
||||
Err(Error::BadNumber(None))
|
||||
}
|
||||
}
|
||||
|
||||
fn find_end_of_number(
|
||||
data: &[u8],
|
||||
i: usize,
|
||||
test: fn(&u8) -> bool,
|
||||
) -> Result<Option<(usize, u8)>, Error> {
|
||||
for (j, &b) in data.iter().enumerate().skip(i) {
|
||||
if test(&b) {
|
||||
continue;
|
||||
} else if b == b'_' {
|
||||
if j >= 1 && data.get(j - 1).map_or(false, test) && data.get(j + 1).map_or(false, test)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
return Err(Error::BadNumber(None));
|
||||
} else {
|
||||
return Ok(Some((j, b)));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
impl Tokenizer {
|
||||
fn identifierish<'input>(&mut self, data: &'input [u8]) -> (Option<Token<'input>>, usize) {
|
||||
debug_assert!(is_identifier_start(data[0]));
|
||||
// data[0] is_identifier_start => skip(1)
|
||||
let end = data
|
||||
.iter()
|
||||
.skip(1)
|
||||
.position(|&b| !is_identifier_continue(b));
|
||||
let i = match end {
|
||||
Some(i) => i + 1,
|
||||
_ => data.len(),
|
||||
};
|
||||
let word = &data[..i];
|
||||
let tt = if word.len() >= 2 && word.len() <= MAX_KEYWORD_LEN && word.is_ascii() {
|
||||
keyword_token(word).unwrap_or(TK_ID)
|
||||
} else {
|
||||
TK_ID
|
||||
};
|
||||
(Some((word, tt)), i)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Tokenizer;
|
||||
use crate::dialect::TokenType;
|
||||
use crate::lexer::sql::Error;
|
||||
use crate::lexer::Scanner;
|
||||
|
||||
#[test]
|
||||
fn fallible_iterator() -> Result<(), Error> {
|
||||
let tokenizer = Tokenizer::new();
|
||||
let input = b"PRAGMA parser_trace=ON;";
|
||||
let mut s = Scanner::new(tokenizer);
|
||||
expect_token(&mut s, input, b"PRAGMA", TokenType::TK_PRAGMA)?;
|
||||
expect_token(&mut s, input, b"parser_trace", TokenType::TK_ID)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_number_literal() -> Result<(), Error> {
|
||||
let tokenizer = Tokenizer::new();
|
||||
let input = b"SELECT 1E;";
|
||||
let mut s = Scanner::new(tokenizer);
|
||||
expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?;
|
||||
let err = s.scan(input).unwrap_err();
|
||||
assert!(matches!(err, Error::BadNumber(_)));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn expect_token(
|
||||
s: &mut Scanner<Tokenizer>,
|
||||
input: &[u8],
|
||||
token: &[u8],
|
||||
token_type: TokenType,
|
||||
) -> Result<(), Error> {
|
||||
let (t, tt) = s.scan(input)?.1.unwrap();
|
||||
assert_eq!(token, t);
|
||||
assert_eq!(token_type, tt);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
376
vendored/sqlite3-parser/src/lexer/sql/test.rs
Normal file
376
vendored/sqlite3-parser/src/lexer/sql/test.rs
Normal file
@@ -0,0 +1,376 @@
|
||||
use fallible_iterator::FallibleIterator;
|
||||
|
||||
use super::{Error, Parser};
|
||||
use crate::parser::ast::fmt::ToTokens;
|
||||
use crate::parser::{
|
||||
ast::{Cmd, Name, ParameterInfo, QualifiedName, Stmt},
|
||||
ParserError,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn count_placeholders() {
|
||||
let ast = parse_cmd(b"SELECT ? WHERE 1 = ?");
|
||||
let mut info = ParameterInfo::default();
|
||||
ast.to_tokens(&mut info).unwrap();
|
||||
assert_eq!(info.count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_numbered_placeholders() {
|
||||
let ast = parse_cmd(b"SELECT ?1 WHERE 1 = ?2 AND 0 = ?1");
|
||||
let mut info = ParameterInfo::default();
|
||||
ast.to_tokens(&mut info).unwrap();
|
||||
assert_eq!(info.count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_unused_placeholders() {
|
||||
let ast = parse_cmd(b"SELECT ?1 WHERE 1 = ?3");
|
||||
let mut info = ParameterInfo::default();
|
||||
ast.to_tokens(&mut info).unwrap();
|
||||
assert_eq!(info.count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_named_placeholders() {
|
||||
let ast = parse_cmd(b"SELECT :x, :y WHERE 1 = :y");
|
||||
let mut info = ParameterInfo::default();
|
||||
ast.to_tokens(&mut info).unwrap();
|
||||
assert_eq!(info.count, 2);
|
||||
assert_eq!(info.names.len(), 2);
|
||||
assert!(info.names.contains(":x"));
|
||||
assert!(info.names.contains(":y"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duplicate_column() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TABLE t (x TEXT, x TEXT)",
|
||||
"duplicate column name: x",
|
||||
);
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TABLE t (x TEXT, \"x\" TEXT)",
|
||||
"duplicate column name: \"x\"",
|
||||
);
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TABLE t (x TEXT, `x` TEXT)",
|
||||
"duplicate column name: `x`",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_table_without_column() {
|
||||
expect_parser_err(
|
||||
b"CREATE TABLE t ()",
|
||||
ParserError::SyntaxError(")".to_owned()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vtab_args() -> Result<(), Error> {
|
||||
let sql = b"CREATE VIRTUAL TABLE mail USING fts3(
|
||||
subject VARCHAR(256) NOT NULL,
|
||||
body TEXT CHECK(length(body)<10240)
|
||||
);";
|
||||
let r = parse_cmd(sql);
|
||||
let Cmd::Stmt(Stmt::CreateVirtualTable {
|
||||
tbl_name: QualifiedName {
|
||||
name: Name(tbl_name),
|
||||
..
|
||||
},
|
||||
module_name: Name(module_name),
|
||||
args: Some(args),
|
||||
..
|
||||
}) = r
|
||||
else {
|
||||
panic!("unexpected AST")
|
||||
};
|
||||
assert_eq!(tbl_name, "mail");
|
||||
assert_eq!(module_name, "fts3");
|
||||
assert_eq!(args.len(), 2);
|
||||
assert_eq!(args[0], "subject VARCHAR(256) NOT NULL");
|
||||
assert_eq!(args[1], "body TEXT CHECK(length(body)<10240)");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_semicolons_no_statements() {
|
||||
let sqls = ["", ";", ";;;"];
|
||||
for sql in &sqls {
|
||||
let r = parse(sql.as_bytes());
|
||||
assert_eq!(r.unwrap(), None);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extra_semicolons_between_statements() {
|
||||
let sqls = [
|
||||
"SELECT 1; SELECT 2",
|
||||
"SELECT 1; SELECT 2;",
|
||||
"; SELECT 1; SELECT 2",
|
||||
";; SELECT 1;; SELECT 2;;",
|
||||
];
|
||||
for sql in &sqls {
|
||||
let mut parser = Parser::new(sql.as_bytes());
|
||||
assert!(matches!(
|
||||
parser.next().unwrap(),
|
||||
Some(Cmd::Stmt(Stmt::Select { .. }))
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.next().unwrap(),
|
||||
Some(Cmd::Stmt(Stmt::Select { .. }))
|
||||
));
|
||||
assert_eq!(parser.next().unwrap(), None);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extra_comments_between_statements() {
|
||||
let sqls = [
|
||||
"-- abc\nSELECT 1; --def\nSELECT 2 -- ghj",
|
||||
"/* abc */ SELECT 1; /* def */ SELECT 2; /* ghj */",
|
||||
"/* abc */; SELECT 1 /* def */; SELECT 2 /* ghj */",
|
||||
"/* abc */;; SELECT 1;/* def */; SELECT 2; /* ghj */; /* klm */",
|
||||
];
|
||||
for sql in &sqls {
|
||||
let mut parser = Parser::new(sql.as_bytes());
|
||||
assert!(matches!(
|
||||
parser.next().unwrap(),
|
||||
Some(Cmd::Stmt(Stmt::Select { .. }))
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.next().unwrap(),
|
||||
Some(Cmd::Stmt(Stmt::Select { .. }))
|
||||
));
|
||||
assert_eq!(parser.next().unwrap(), None);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_mismatch_count() {
|
||||
expect_parser_err_msg(b"INSERT INTO t (a, b) VALUES (1)", "1 values for 2 columns");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_default_values() {
|
||||
expect_parser_err_msg(
|
||||
b"INSERT INTO t (a) DEFAULT VALUES",
|
||||
"0 values for 1 columns",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_view_mismatch_count() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE VIEW v (c1, c2) AS SELECT 1",
|
||||
"expected 2 columns for v but got 1",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_view_duplicate_column_name() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE VIEW v (c1, c1) AS SELECT 1, 2",
|
||||
"duplicate column name: c1",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_table_without_rowid_missing_pk() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TABLE t (c1) WITHOUT ROWID",
|
||||
"PRIMARY KEY missing on table t",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_temporary_table_with_qualified_name() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TEMPORARY TABLE mem.x AS SELECT 1",
|
||||
"temporary table name must be unqualified",
|
||||
);
|
||||
parse_cmd(b"CREATE TEMPORARY TABLE temp.x AS SELECT 1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_table_with_only_generated_column() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TABLE test(data AS (1))",
|
||||
"must have at least one non-generated column",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_strict_table_missing_datatype() {
|
||||
expect_parser_err_msg(b"CREATE TABLE t (c1) STRICT", "missing datatype for t.c1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_strict_table_unknown_datatype() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TABLE t (c1 BOOL) STRICT",
|
||||
"unknown datatype for t.c1: \"BOOL\"",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn foreign_key_on_column() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TABLE t(a REFERENCES o(a,b))",
|
||||
"foreign key on a should reference only one column of table o",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_strict_table_generated_column() {
|
||||
parse_cmd(
|
||||
b"CREATE TABLE IF NOT EXISTS transactions (
|
||||
debit REAL,
|
||||
credit REAL,
|
||||
amount REAL GENERATED ALWAYS AS (ifnull(credit, 0.0) -ifnull(debit, 0.0))
|
||||
) STRICT;",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn selects_compound_mismatch_columns_count() {
|
||||
expect_parser_err_msg(
|
||||
b"SELECT 1 UNION SELECT 1, 2",
|
||||
"SELECTs to the left and right of UNION do not have the same number of result columns",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_order_by_without_limit() {
|
||||
expect_parser_err_msg(
|
||||
b"DELETE FROM t ORDER BY x",
|
||||
"ORDER BY without LIMIT on DELETE",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_order_by_without_limit() {
|
||||
expect_parser_err_msg(
|
||||
b"UPDATE t SET x = 1 ORDER BY x",
|
||||
"ORDER BY without LIMIT on UPDATE",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn values_mismatch_columns_count() {
|
||||
expect_parser_err_msg(
|
||||
b"INSERT INTO t VALUES (1), (1,2)",
|
||||
"all VALUES must have the same number of terms",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn column_specified_more_than_once() {
|
||||
expect_parser_err_msg(
|
||||
b"INSERT INTO t (n, n, m) VALUES (1, 0, 2)",
|
||||
"column \"n\" specified more than once",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alter_add_column_primary_key() {
|
||||
expect_parser_err_msg(
|
||||
b"ALTER TABLE t ADD COLUMN c PRIMARY KEY",
|
||||
"Cannot add a PRIMARY KEY column",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alter_add_column_unique() {
|
||||
expect_parser_err_msg(
|
||||
b"ALTER TABLE t ADD COLUMN c UNIQUE",
|
||||
"Cannot add a UNIQUE column",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alter_rename_same() {
|
||||
expect_parser_err_msg(
|
||||
b"ALTER TABLE t RENAME TO t",
|
||||
"there is already another table or index with this name: t",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn natural_join_on() {
|
||||
expect_parser_err_msg(
|
||||
b"SELECT x FROM t NATURAL JOIN t USING (x)",
|
||||
"a NATURAL join may not have an ON or USING clause",
|
||||
);
|
||||
expect_parser_err_msg(
|
||||
b"SELECT x FROM t NATURAL JOIN t ON t.x = t.x",
|
||||
"a NATURAL join may not have an ON or USING clause",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_join_clause() {
|
||||
expect_parser_err_msg(
|
||||
b"SELECT a FROM tt ON b",
|
||||
"a JOIN clause is required before ON",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cast_without_typename() {
|
||||
parse_cmd(b"SELECT CAST(a AS ) FROM t");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_table_option() {
|
||||
expect_parser_err_msg(b"CREATE TABLE t(x)o", "unknown table option: o");
|
||||
expect_parser_err_msg(b"CREATE TABLE t(x) WITHOUT o", "unknown table option: o");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qualified_table_name_within_triggers() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TRIGGER tr1 AFTER INSERT ON t1 BEGIN
|
||||
DELETE FROM main.t2;
|
||||
END;",
|
||||
"qualified table names are not allowed on INSERT, UPDATE, and DELETE statements \
|
||||
within triggers",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn indexed_by_clause_within_triggers() {
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TRIGGER main.t16err5 AFTER INSERT ON tA BEGIN
|
||||
UPDATE t16 INDEXED BY t16a SET rowid=rowid+1 WHERE a=1;
|
||||
END;",
|
||||
"the INDEXED BY clause is not allowed on UPDATE or DELETE statements \
|
||||
within triggers",
|
||||
);
|
||||
expect_parser_err_msg(
|
||||
b"CREATE TRIGGER main.t16err6 AFTER INSERT ON tA BEGIN
|
||||
DELETE FROM t16 NOT INDEXED WHERE a=123;
|
||||
END;",
|
||||
"the NOT INDEXED clause is not allowed on UPDATE or DELETE statements \
|
||||
within triggers",
|
||||
);
|
||||
}
|
||||
|
||||
fn expect_parser_err_msg(input: &[u8], error_msg: &str) {
|
||||
expect_parser_err(input, ParserError::Custom(error_msg.to_owned()))
|
||||
}
|
||||
fn expect_parser_err(input: &[u8], err: ParserError) {
|
||||
let r = parse(input);
|
||||
if let Error::ParserError(e, _) = r.unwrap_err() {
|
||||
assert_eq!(e, err);
|
||||
} else {
|
||||
panic!("unexpected error type")
|
||||
};
|
||||
}
|
||||
fn parse_cmd(input: &[u8]) -> Cmd {
|
||||
parse(input).unwrap().unwrap()
|
||||
}
|
||||
fn parse(input: &[u8]) -> Result<Option<Cmd>, Error> {
|
||||
let mut parser = Parser::new(input);
|
||||
parser.next()
|
||||
}
|
||||
8
vendored/sqlite3-parser/src/lib.rs
Normal file
8
vendored/sqlite3-parser/src/lib.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
//! SQLite3 syntax lexer and parser
|
||||
#![warn(missing_docs)]
|
||||
|
||||
pub mod dialect;
|
||||
// In Lemon, the tokenizer calls the parser.
|
||||
pub mod lexer;
|
||||
mod parser;
|
||||
pub use parser::ast;
|
||||
336
vendored/sqlite3-parser/src/parser/ast/check.rs
Normal file
336
vendored/sqlite3-parser/src/parser/ast/check.rs
Normal file
@@ -0,0 +1,336 @@
|
||||
//! Check for additional syntax error
|
||||
use crate::ast::*;
|
||||
use crate::custom_err;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
impl Cmd {
|
||||
/// Statement accessor
|
||||
pub fn stmt(&self) -> &Stmt {
|
||||
match self {
|
||||
Self::Explain(stmt) => stmt,
|
||||
Self::ExplainQueryPlan(stmt) => stmt,
|
||||
Self::Stmt(stmt) => stmt,
|
||||
}
|
||||
}
|
||||
/// Like `sqlite3_column_count` but more limited
|
||||
pub fn column_count(&self) -> ColumnCount {
|
||||
match self {
|
||||
Self::Explain(_) => ColumnCount::Fixed(8),
|
||||
Self::ExplainQueryPlan(_) => ColumnCount::Fixed(4),
|
||||
Self::Stmt(stmt) => stmt.column_count(),
|
||||
}
|
||||
}
|
||||
/// Like `sqlite3_stmt_isexplain`
|
||||
pub fn is_explain(&self) -> bool {
|
||||
matches!(self, Self::Explain(_) | Self::ExplainQueryPlan(_))
|
||||
}
|
||||
/// Like `sqlite3_stmt_readonly`
|
||||
pub fn readonly(&self) -> bool {
|
||||
self.stmt().readonly()
|
||||
}
|
||||
/// check for extra rules
|
||||
pub fn check(&self) -> Result<(), ParserError> {
|
||||
self.stmt().check()
|
||||
}
|
||||
}
|
||||
|
||||
/// Column count
|
||||
pub enum ColumnCount {
|
||||
/// With `SELECT *` / PRAGMA
|
||||
Dynamic,
|
||||
/// Constant count
|
||||
Fixed(usize),
|
||||
/// No column
|
||||
None,
|
||||
}
|
||||
|
||||
impl ColumnCount {
|
||||
fn incr(&mut self) {
|
||||
if let Self::Fixed(n) = self {
|
||||
*n += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stmt {
|
||||
/// Like `sqlite3_column_count` but more limited
|
||||
pub fn column_count(&self) -> ColumnCount {
|
||||
match self {
|
||||
Self::Delete {
|
||||
returning: Some(returning),
|
||||
..
|
||||
} => column_count(returning),
|
||||
Self::Insert {
|
||||
returning: Some(returning),
|
||||
..
|
||||
} => column_count(returning),
|
||||
Self::Pragma(..) => ColumnCount::Dynamic,
|
||||
Self::Select(s) => s.column_count(),
|
||||
Self::Update {
|
||||
returning: Some(returning),
|
||||
..
|
||||
} => column_count(returning),
|
||||
_ => ColumnCount::None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `sqlite3_stmt_readonly`
|
||||
pub fn readonly(&self) -> bool {
|
||||
match self {
|
||||
Self::Attach { .. } => true,
|
||||
Self::Begin(..) => true,
|
||||
Self::Commit(..) => true,
|
||||
Self::Detach(..) => true,
|
||||
Self::Pragma(..) => true, // TODO check all
|
||||
Self::Reindex { .. } => true,
|
||||
Self::Release(..) => true,
|
||||
Self::Rollback { .. } => true,
|
||||
Self::Savepoint(..) => true,
|
||||
Self::Select(..) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// check for extra rules
|
||||
pub fn check(&self) -> Result<(), ParserError> {
|
||||
match self {
|
||||
Self::AlterTable(old_name, AlterTableBody::RenameTo(new_name)) => {
|
||||
if *new_name == old_name.name {
|
||||
return Err(custom_err!(
|
||||
"there is already another table or index with this name: {}",
|
||||
new_name
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Self::AlterTable(.., AlterTableBody::AddColumn(cd)) => {
|
||||
for c in cd {
|
||||
if let ColumnConstraint::PrimaryKey { .. } = c {
|
||||
return Err(custom_err!("Cannot add a PRIMARY KEY column"));
|
||||
} else if let ColumnConstraint::Unique(..) = c {
|
||||
return Err(custom_err!("Cannot add a UNIQUE column"));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Self::CreateTable {
|
||||
temporary,
|
||||
tbl_name,
|
||||
body,
|
||||
..
|
||||
} => {
|
||||
if *temporary {
|
||||
if let Some(ref db_name) = tbl_name.db_name {
|
||||
if db_name != "TEMP" {
|
||||
return Err(custom_err!("temporary table name must be unqualified"));
|
||||
}
|
||||
}
|
||||
}
|
||||
body.check(tbl_name)
|
||||
}
|
||||
Self::CreateView {
|
||||
view_name,
|
||||
columns: Some(columns),
|
||||
select,
|
||||
..
|
||||
} => {
|
||||
// SQLite3 engine renames duplicates:
|
||||
for (i, c) in columns.iter().enumerate() {
|
||||
for o in &columns[i + 1..] {
|
||||
if c.col_name == o.col_name {
|
||||
return Err(custom_err!("duplicate column name: {}", c.col_name,));
|
||||
}
|
||||
}
|
||||
}
|
||||
// SQLite3 engine raises this error later (not while parsing):
|
||||
match select.column_count() {
|
||||
ColumnCount::Fixed(n) if n != columns.len() => Err(custom_err!(
|
||||
"expected {} columns for {} but got {}",
|
||||
columns.len(),
|
||||
view_name,
|
||||
n
|
||||
)),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
Self::Delete {
|
||||
order_by: Some(_),
|
||||
limit: None,
|
||||
..
|
||||
} => Err(custom_err!("ORDER BY without LIMIT on DELETE")),
|
||||
Self::Insert {
|
||||
columns: Some(columns),
|
||||
body: InsertBody::Select(select, ..),
|
||||
..
|
||||
} => match select.body.select.column_count() {
|
||||
ColumnCount::Fixed(n) if n != columns.len() => {
|
||||
Err(custom_err!("{} values for {} columns", n, columns.len()))
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
Self::Insert {
|
||||
columns: Some(columns),
|
||||
body: InsertBody::DefaultValues,
|
||||
..
|
||||
} => Err(custom_err!("0 values for {} columns", columns.len())),
|
||||
Self::Update {
|
||||
order_by: Some(_),
|
||||
limit: None,
|
||||
..
|
||||
} => Err(custom_err!("ORDER BY without LIMIT on UPDATE")),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CreateTableBody {
|
||||
/// check for extra rules
|
||||
pub fn check(&self, tbl_name: &QualifiedName) -> Result<(), ParserError> {
|
||||
if let Self::ColumnsAndConstraints {
|
||||
columns,
|
||||
constraints: _,
|
||||
options,
|
||||
} = self
|
||||
{
|
||||
let mut generated_count = 0;
|
||||
for c in columns.values() {
|
||||
for cs in &c.constraints {
|
||||
if let ColumnConstraint::Generated { .. } = cs.constraint {
|
||||
generated_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if generated_count == columns.len() {
|
||||
return Err(custom_err!("must have at least one non-generated column"));
|
||||
}
|
||||
|
||||
if options.contains(TableOptions::STRICT) {
|
||||
for c in columns.values() {
|
||||
match &c.col_type {
|
||||
Some(Type { name, .. }) => {
|
||||
// The datatype must be one of following: INT INTEGER REAL TEXT BLOB ANY
|
||||
if !(name.eq_ignore_ascii_case("INT")
|
||||
|| name.eq_ignore_ascii_case("INTEGER")
|
||||
|| name.eq_ignore_ascii_case("REAL")
|
||||
|| name.eq_ignore_ascii_case("TEXT")
|
||||
|| name.eq_ignore_ascii_case("BLOB")
|
||||
|| name.eq_ignore_ascii_case("ANY"))
|
||||
{
|
||||
return Err(custom_err!(
|
||||
"unknown datatype for {}.{}: \"{}\"",
|
||||
tbl_name,
|
||||
c.col_name,
|
||||
name
|
||||
));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Every column definition must specify a datatype for that column. The freedom to specify a column without a datatype is removed.
|
||||
return Err(custom_err!(
|
||||
"missing datatype for {}.{}",
|
||||
tbl_name,
|
||||
c.col_name
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if options.contains(TableOptions::WITHOUT_ROWID) && !self.has_primary_key() {
|
||||
return Err(custom_err!("PRIMARY KEY missing on table {}", tbl_name,));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// explicit primary key constraint ?
|
||||
pub fn has_primary_key(&self) -> bool {
|
||||
if let Self::ColumnsAndConstraints {
|
||||
columns,
|
||||
constraints,
|
||||
..
|
||||
} = self
|
||||
{
|
||||
for col in columns.values() {
|
||||
for c in col {
|
||||
if let ColumnConstraint::PrimaryKey { .. } = c {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(constraints) = constraints {
|
||||
for c in constraints {
|
||||
if let TableConstraint::PrimaryKey { .. } = c.constraint {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for &'a ColumnDefinition {
|
||||
type Item = &'a ColumnConstraint;
|
||||
type IntoIter = std::iter::Map<
|
||||
std::slice::Iter<'a, NamedColumnConstraint>,
|
||||
fn(&'a NamedColumnConstraint) -> &'a ColumnConstraint,
|
||||
>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.constraints.iter().map(|nc| &nc.constraint)
|
||||
}
|
||||
}
|
||||
|
||||
impl Select {
|
||||
/// Like `sqlite3_column_count` but more limited
|
||||
pub fn column_count(&self) -> ColumnCount {
|
||||
self.body.select.column_count()
|
||||
}
|
||||
}
|
||||
|
||||
impl OneSelect {
|
||||
/// Like `sqlite3_column_count` but more limited
|
||||
pub fn column_count(&self) -> ColumnCount {
|
||||
match self {
|
||||
Self::Select { columns, .. } => column_count(columns),
|
||||
Self::Values(values) => {
|
||||
assert!(!values.is_empty()); // TODO Validate
|
||||
ColumnCount::Fixed(values[0].len())
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Check all VALUES have the same number of terms
|
||||
pub fn push(values: &mut Vec<Vec<Expr>>, v: Vec<Expr>) -> Result<(), ParserError> {
|
||||
if values[0].len() != v.len() {
|
||||
return Err(custom_err!("all VALUES must have the same number of terms"));
|
||||
}
|
||||
values.push(v);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for QualifiedName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.to_fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl ResultColumn {
|
||||
fn column_count(&self) -> ColumnCount {
|
||||
match self {
|
||||
Self::Expr(..) => ColumnCount::Fixed(1),
|
||||
_ => ColumnCount::Dynamic,
|
||||
}
|
||||
}
|
||||
}
|
||||
fn column_count(cols: &[ResultColumn]) -> ColumnCount {
|
||||
assert!(!cols.is_empty());
|
||||
let mut count = ColumnCount::Fixed(0);
|
||||
for col in cols {
|
||||
match col.column_count() {
|
||||
ColumnCount::Fixed(_) => count.incr(),
|
||||
_ => return ColumnCount::Dynamic,
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
2037
vendored/sqlite3-parser/src/parser/ast/fmt.rs
Normal file
2037
vendored/sqlite3-parser/src/parser/ast/fmt.rs
Normal file
File diff suppressed because it is too large
Load Diff
1860
vendored/sqlite3-parser/src/parser/ast/mod.rs
Normal file
1860
vendored/sqlite3-parser/src/parser/ast/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
154
vendored/sqlite3-parser/src/parser/mod.rs
Normal file
154
vendored/sqlite3-parser/src/parser/mod.rs
Normal file
@@ -0,0 +1,154 @@
|
||||
//! SQLite parser
|
||||
|
||||
pub mod ast;
|
||||
pub mod parse {
|
||||
#![expect(unused_braces)]
|
||||
#![expect(clippy::if_same_then_else)]
|
||||
#![expect(clippy::absurd_extreme_comparisons)] // FIXME
|
||||
#![expect(clippy::needless_return)]
|
||||
#![expect(clippy::upper_case_acronyms)]
|
||||
#![expect(clippy::manual_range_patterns)]
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/parse.rs"));
|
||||
}
|
||||
|
||||
use crate::dialect::Token;
|
||||
use ast::{Cmd, ExplainKind, Name, Stmt};
|
||||
|
||||
/// Parser error
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum ParserError {
|
||||
/// Syntax error
|
||||
SyntaxError(String),
|
||||
/// Unexpected EOF
|
||||
UnexpectedEof,
|
||||
/// Custom error
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ParserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::SyntaxError(s) => {
|
||||
write!(f, "near \"{s}\": syntax error")
|
||||
}
|
||||
Self::UnexpectedEof => f.write_str("unexpected end of input"),
|
||||
Self::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParserError {}
|
||||
|
||||
/// Custom error constructor
|
||||
#[macro_export]
|
||||
macro_rules! custom_err {
|
||||
($msg:literal $(,)?) => {
|
||||
$crate::parser::ParserError::Custom($msg.to_owned())
|
||||
};
|
||||
($err:expr $(,)?) => {
|
||||
$crate::parser::ParserError::Custom(format!($err))
|
||||
};
|
||||
($fmt:expr, $($arg:tt)*) => {
|
||||
$crate::parser::ParserError::Custom(format!($fmt, $($arg)*))
|
||||
};
|
||||
}
|
||||
|
||||
/// Parser context
|
||||
pub struct Context<'input> {
|
||||
input: &'input [u8],
|
||||
explain: Option<ExplainKind>,
|
||||
stmt: Option<Stmt>,
|
||||
constraint_name: Option<Name>, // transient
|
||||
module_arg: Option<(usize, usize)>, // Complete text of a module argument
|
||||
module_args: Option<Vec<String>>, // CREATE VIRTUAL TABLE args
|
||||
done: bool,
|
||||
error: Option<ParserError>,
|
||||
}
|
||||
|
||||
impl<'input> Context<'input> {
|
||||
pub fn new(input: &'input [u8]) -> Self {
|
||||
Context {
|
||||
input,
|
||||
explain: None,
|
||||
stmt: None,
|
||||
constraint_name: None,
|
||||
module_arg: None,
|
||||
module_args: None,
|
||||
done: false,
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume parsed command
|
||||
pub fn cmd(&mut self) -> Option<Cmd> {
|
||||
if let Some(stmt) = self.stmt.take() {
|
||||
match self.explain.take() {
|
||||
Some(ExplainKind::Explain) => Some(Cmd::Explain(stmt)),
|
||||
Some(ExplainKind::QueryPlan) => Some(Cmd::ExplainQueryPlan(stmt)),
|
||||
None => Some(Cmd::Stmt(stmt)),
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn constraint_name(&mut self) -> Option<Name> {
|
||||
self.constraint_name.take()
|
||||
}
|
||||
fn no_constraint_name(&self) -> bool {
|
||||
self.constraint_name.is_none()
|
||||
}
|
||||
|
||||
fn vtab_arg_init(&mut self) {
|
||||
self.add_module_arg();
|
||||
self.module_arg = None;
|
||||
}
|
||||
fn vtab_arg_extend(&mut self, any: Token) {
|
||||
if let Some((_, ref mut n)) = self.module_arg {
|
||||
*n = any.2
|
||||
} else {
|
||||
self.module_arg = Some((any.0, any.2))
|
||||
}
|
||||
}
|
||||
fn add_module_arg(&mut self) {
|
||||
if let Some((start, end)) = self.module_arg.take() {
|
||||
if let Ok(arg) = std::str::from_utf8(&self.input[start..end]) {
|
||||
self.module_args.get_or_insert(vec![]).push(arg.to_owned());
|
||||
} // FIXME error handling
|
||||
}
|
||||
}
|
||||
fn module_args(&mut self) -> Option<Vec<String>> {
|
||||
self.add_module_arg();
|
||||
self.module_args.take()
|
||||
}
|
||||
|
||||
/// This routine is called after a single SQL statement has been parsed.
|
||||
fn sqlite3_finish_coding(&mut self) {
|
||||
self.done = true;
|
||||
}
|
||||
|
||||
/// Return `true` if parser completes either successfully or with an error.
|
||||
pub fn done(&self) -> bool {
|
||||
self.done || self.error.is_some()
|
||||
}
|
||||
|
||||
pub fn is_ok(&self) -> bool {
|
||||
self.error.is_none()
|
||||
}
|
||||
|
||||
/// Consume error generated by parser
|
||||
pub fn error(&mut self) -> Option<ParserError> {
|
||||
self.error.take()
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
self.explain = None;
|
||||
self.stmt = None;
|
||||
self.constraint_name = None;
|
||||
self.module_arg = None;
|
||||
self.module_args = None;
|
||||
self.done = false;
|
||||
self.error = None;
|
||||
}
|
||||
}
|
||||
1492
vendored/sqlite3-parser/src/parser/parse.y
Normal file
1492
vendored/sqlite3-parser/src/parser/parse.y
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user