mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-27 13:04:20 +01:00
Merge 'Convert SQLite parser in Rust by hand ' from Lâm Hoàng Phúc
working on #2337 Closes #2381
This commit is contained in:
15
Cargo.lock
generated
15
Cargo.lock
generated
@@ -3949,6 +3949,21 @@ dependencies = [
|
||||
"turso_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "turso_parser"
|
||||
version = "0.1.4-pre.8"
|
||||
dependencies = [
|
||||
"bitflags 2.9.0",
|
||||
"criterion",
|
||||
"fallible-iterator",
|
||||
"miette",
|
||||
"pprof",
|
||||
"serde",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"turso_sqlite3_parser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "turso_sqlite3"
|
||||
version = "0.1.4-pre.8"
|
||||
|
||||
@@ -26,6 +26,7 @@ members = [
|
||||
"testing/sqlite_test_ext",
|
||||
"tests",
|
||||
"vendored/sqlite3-parser/sqlparser_bench",
|
||||
"parser",
|
||||
"packages/turso-sync-engine",
|
||||
"packages/turso-sync-js",
|
||||
]
|
||||
|
||||
34
parser/Cargo.toml
Normal file
34
parser/Cargo.toml
Normal file
@@ -0,0 +1,34 @@
|
||||
[package]
|
||||
name = "turso_parser"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
description = "The Turso parser library"
|
||||
|
||||
[lib]
|
||||
name = "turso_parser"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
serde = ["dep:serde", "bitflags/serde"]
|
||||
|
||||
[dependencies]
|
||||
bitflags = "2.0"
|
||||
miette = "7.4.0"
|
||||
strum = { workspace = true }
|
||||
strum_macros = {workspace = true }
|
||||
serde = { workspace = true , optional = true, features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
fallible-iterator = "0.3"
|
||||
criterion = { version = "0.5", features = ["html_reports" ] }
|
||||
turso_sqlite3_parser = { workspace = true }
|
||||
|
||||
[target.'cfg(not(target_family = "windows"))'.dev-dependencies]
|
||||
pprof = { version = "0.14.0", features = ["criterion", "flamegraph"] }
|
||||
|
||||
[[bench]]
|
||||
name = "parser_benchmark"
|
||||
harness = false
|
||||
1
parser/README.md
Normal file
1
parser/README.md
Normal file
@@ -0,0 +1 @@
|
||||
TODO
|
||||
102
parser/benches/parser_benchmark.rs
Normal file
102
parser/benches/parser_benchmark.rs
Normal file
@@ -0,0 +1,102 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use pprof::criterion::{Output, PProfProfiler};
|
||||
use turso_parser::{lexer::Lexer, parser::Parser};
|
||||
use turso_sqlite3_parser::lexer::{
|
||||
sql::{Parser as OldParser, Tokenizer},
|
||||
Scanner,
|
||||
};
|
||||
|
||||
fn bench_parser(criterion: &mut Criterion) {
|
||||
let queries = [
|
||||
"SELECT 1",
|
||||
"SELECT * FROM users LIMIT 1",
|
||||
"SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1) > 1 ORDER BY count(1) LIMIT 1",
|
||||
];
|
||||
|
||||
for query in queries.iter() {
|
||||
let mut group = criterion.benchmark_group(format!("Parser `{query}`"));
|
||||
let qb = query.as_bytes();
|
||||
|
||||
group.bench_function(BenchmarkId::new("limbo_parser_query", ""), |b| {
|
||||
b.iter(|| Parser::new(black_box(qb)).next().unwrap());
|
||||
});
|
||||
|
||||
group.bench_function(BenchmarkId::new("limbo_old_parser_query", ""), |b| {
|
||||
b.iter(|| {
|
||||
OldParser::new(black_box(qb)).next().unwrap().unwrap();
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_parser_insert_batch(criterion: &mut Criterion) {
|
||||
for batch_size in [1, 10, 100] {
|
||||
let mut values = String::from("INSERT INTO test VALUES ");
|
||||
for i in 0..batch_size {
|
||||
if i > 0 {
|
||||
values.push(',');
|
||||
}
|
||||
values.push_str(&format!("({}, '{}')", i, format_args!("value_{i}")));
|
||||
}
|
||||
|
||||
let mut group = criterion.benchmark_group(format!("Parser insert batch `{values}`"));
|
||||
let qb = values.as_bytes();
|
||||
|
||||
group.bench_function(BenchmarkId::new("limbo_parser_insert_batch", ""), |b| {
|
||||
b.iter(|| Parser::new(black_box(qb)).next().unwrap());
|
||||
});
|
||||
|
||||
group.bench_function(BenchmarkId::new("limbo_old_parser_insert_batch", ""), |b| {
|
||||
b.iter(|| {
|
||||
OldParser::new(black_box(qb)).next().unwrap().unwrap();
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_lexer(criterion: &mut Criterion) {
|
||||
let queries = [
|
||||
"SELECT 1",
|
||||
"SELECT * FROM users LIMIT 1",
|
||||
"SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1) > 1 ORDER BY count(1) LIMIT 1",
|
||||
];
|
||||
|
||||
for query in queries.iter() {
|
||||
let mut group = criterion.benchmark_group(format!("Lexer `{query}`"));
|
||||
let qb = query.as_bytes();
|
||||
|
||||
group.bench_function(BenchmarkId::new("limbo_lexer_query", ""), |b| {
|
||||
b.iter(|| {
|
||||
for token in Lexer::new(black_box(qb)) {
|
||||
token.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function(BenchmarkId::new("limbo_old_lexer_query", ""), |b| {
|
||||
b.iter(|| {
|
||||
let tokenizer = Tokenizer::new();
|
||||
let mut scanner = Scanner::new(black_box(tokenizer));
|
||||
loop {
|
||||
if let (_, None, _) = scanner.scan(black_box(qb)).unwrap() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = benches;
|
||||
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
|
||||
targets = bench_parser, bench_parser_insert_batch, bench_lexer
|
||||
}
|
||||
criterion_main!(benches);
|
||||
290
parser/build.rs
Normal file
290
parser/build.rs
Normal file
@@ -0,0 +1,290 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// generates a trie-like function with nested match expressions for parsing SQL keywords
|
||||
/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],]
|
||||
/// A
|
||||
/// ├─ B
|
||||
/// │ ├─ O
|
||||
/// │ │ ├─ R
|
||||
/// │ │ │ ├─ T -> TK_ABORT
|
||||
/// ├─ C
|
||||
/// │ ├─ T
|
||||
/// │ │ ├─ I
|
||||
/// │ │ │ ├─ O
|
||||
/// │ │ │ │ ├─ N -> TK_ACTION
|
||||
/// ├─ D
|
||||
/// │ ├─ D -> TK_ADD
|
||||
fn build_keyword_map(
|
||||
writer: &mut impl Write,
|
||||
func_name: &str,
|
||||
keywords: &[[&'static str; 2]],
|
||||
) -> Result<(), std::io::Error> {
|
||||
assert!(!keywords.is_empty());
|
||||
let mut min_len = keywords[0][0].len();
|
||||
let mut max_len = keywords[0][0].len();
|
||||
|
||||
struct PathEntry {
|
||||
result: Option<&'static str>,
|
||||
sub_entries: HashMap<u8, Box<PathEntry>>,
|
||||
}
|
||||
|
||||
let mut paths = Box::new(PathEntry {
|
||||
result: None,
|
||||
sub_entries: HashMap::new(),
|
||||
});
|
||||
|
||||
for keyword in keywords {
|
||||
let keyword_b = keyword[0].as_bytes();
|
||||
|
||||
if keyword_b.len() < min_len {
|
||||
min_len = keyword_b.len();
|
||||
}
|
||||
|
||||
if keyword_b.len() > max_len {
|
||||
max_len = keyword_b.len();
|
||||
}
|
||||
|
||||
let mut current = &mut paths;
|
||||
|
||||
for &b in keyword_b {
|
||||
let upper_b = b.to_ascii_uppercase();
|
||||
|
||||
match current.sub_entries.get(&upper_b) {
|
||||
Some(_) => {
|
||||
current = current.sub_entries.get_mut(&upper_b).unwrap();
|
||||
}
|
||||
None => {
|
||||
let new_entry = Box::new(PathEntry {
|
||||
result: None,
|
||||
sub_entries: HashMap::new(),
|
||||
});
|
||||
current.sub_entries.insert(upper_b, new_entry);
|
||||
current = current.sub_entries.get_mut(&upper_b).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(current.result.is_none());
|
||||
current.result = Some(keyword[1]);
|
||||
}
|
||||
|
||||
fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<(), std::io::Error> {
|
||||
if let Some(result) = entry.result {
|
||||
writeln!(writer, "if idx == buf.len() {{")?;
|
||||
writeln!(writer, "return Some(TokenType::{result});")?;
|
||||
writeln!(writer, "}}")?;
|
||||
}
|
||||
|
||||
if entry.sub_entries.is_empty() {
|
||||
writeln!(writer, "None")?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
writeln!(writer, "if idx >= buf.len() {{")?;
|
||||
writeln!(writer, "return None;")?;
|
||||
writeln!(writer, "}}")?;
|
||||
|
||||
writeln!(writer, "match buf[idx] {{")?;
|
||||
for (&b, sub_entry) in &entry.sub_entries {
|
||||
if b.is_ascii_alphabetic() {
|
||||
writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?;
|
||||
} else {
|
||||
writeln!(writer, "{b} => {{")?;
|
||||
}
|
||||
writeln!(writer, "idx += 1;")?;
|
||||
write_entry(writer, sub_entry)?;
|
||||
writeln!(writer, "}}")?;
|
||||
}
|
||||
|
||||
writeln!(writer, "_ => None")?;
|
||||
writeln!(writer, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
writeln!(
|
||||
writer,
|
||||
"pub(crate) const MAX_KEYWORD_LEN: usize = {max_len};"
|
||||
)?;
|
||||
writeln!(
|
||||
writer,
|
||||
"pub(crate) const MIN_KEYWORD_LEN: usize = {min_len};"
|
||||
)?;
|
||||
writeln!(writer, "/// Check if `word` is a keyword")?;
|
||||
writeln!(
|
||||
writer,
|
||||
"pub fn {func_name}(buf: &[u8]) -> Option<TokenType> {{"
|
||||
)?;
|
||||
writeln!(
|
||||
writer,
|
||||
"if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{"
|
||||
)?;
|
||||
writeln!(writer, "return None;")?;
|
||||
writeln!(writer, "}}")?;
|
||||
writeln!(writer, "let mut idx = 0;")?;
|
||||
write_entry(writer, &paths)?;
|
||||
writeln!(writer, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
|
||||
let keywords = out_dir.join("keywords.rs");
|
||||
let mut keywords = BufWriter::new(File::create(keywords).unwrap());
|
||||
build_keyword_map(
|
||||
&mut keywords,
|
||||
"keyword_token",
|
||||
&[
|
||||
["ABORT", "TK_ABORT"],
|
||||
["ACTION", "TK_ACTION"],
|
||||
["ADD", "TK_ADD"],
|
||||
["AFTER", "TK_AFTER"],
|
||||
["ALL", "TK_ALL"],
|
||||
["ALTER", "TK_ALTER"],
|
||||
["ALWAYS", "TK_ALWAYS"],
|
||||
["ANALYZE", "TK_ANALYZE"],
|
||||
["AND", "TK_AND"],
|
||||
["AS", "TK_AS"],
|
||||
["ASC", "TK_ASC"],
|
||||
["ATTACH", "TK_ATTACH"],
|
||||
["AUTOINCREMENT", "TK_AUTOINCR"],
|
||||
["BEFORE", "TK_BEFORE"],
|
||||
["BEGIN", "TK_BEGIN"],
|
||||
["BETWEEN", "TK_BETWEEN"],
|
||||
["BY", "TK_BY"],
|
||||
["CASCADE", "TK_CASCADE"],
|
||||
["CASE", "TK_CASE"],
|
||||
["CAST", "TK_CAST"],
|
||||
["CHECK", "TK_CHECK"],
|
||||
["COLLATE", "TK_COLLATE"],
|
||||
["COLUMN", "TK_COLUMNKW"],
|
||||
["COMMIT", "TK_COMMIT"],
|
||||
["CONFLICT", "TK_CONFLICT"],
|
||||
["CONSTRAINT", "TK_CONSTRAINT"],
|
||||
["CREATE", "TK_CREATE"],
|
||||
["CROSS", "TK_JOIN_KW"],
|
||||
["CURRENT", "TK_CURRENT"],
|
||||
["CURRENT_DATE", "TK_CTIME_KW"],
|
||||
["CURRENT_TIME", "TK_CTIME_KW"],
|
||||
["CURRENT_TIMESTAMP", "TK_CTIME_KW"],
|
||||
["DATABASE", "TK_DATABASE"],
|
||||
["DEFAULT", "TK_DEFAULT"],
|
||||
["DEFERRABLE", "TK_DEFERRABLE"],
|
||||
["DEFERRED", "TK_DEFERRED"],
|
||||
["DELETE", "TK_DELETE"],
|
||||
["DESC", "TK_DESC"],
|
||||
["DETACH", "TK_DETACH"],
|
||||
["DISTINCT", "TK_DISTINCT"],
|
||||
["DO", "TK_DO"],
|
||||
["DROP", "TK_DROP"],
|
||||
["EACH", "TK_EACH"],
|
||||
["ELSE", "TK_ELSE"],
|
||||
["END", "TK_END"],
|
||||
["ESCAPE", "TK_ESCAPE"],
|
||||
["EXCEPT", "TK_EXCEPT"],
|
||||
["EXCLUDE", "TK_EXCLUDE"],
|
||||
["EXCLUSIVE", "TK_EXCLUSIVE"],
|
||||
["EXISTS", "TK_EXISTS"],
|
||||
["EXPLAIN", "TK_EXPLAIN"],
|
||||
["FAIL", "TK_FAIL"],
|
||||
["FILTER", "TK_FILTER"],
|
||||
["FIRST", "TK_FIRST"],
|
||||
["FOLLOWING", "TK_FOLLOWING"],
|
||||
["FOR", "TK_FOR"],
|
||||
["FOREIGN", "TK_FOREIGN"],
|
||||
["FROM", "TK_FROM"],
|
||||
["FULL", "TK_JOIN_KW"],
|
||||
["GENERATED", "TK_GENERATED"],
|
||||
["GLOB", "TK_LIKE_KW"],
|
||||
["GROUP", "TK_GROUP"],
|
||||
["GROUPS", "TK_GROUPS"],
|
||||
["HAVING", "TK_HAVING"],
|
||||
["IF", "TK_IF"],
|
||||
["IGNORE", "TK_IGNORE"],
|
||||
["IMMEDIATE", "TK_IMMEDIATE"],
|
||||
["IN", "TK_IN"],
|
||||
["INDEX", "TK_INDEX"],
|
||||
["INDEXED", "TK_INDEXED"],
|
||||
["INITIALLY", "TK_INITIALLY"],
|
||||
["INNER", "TK_JOIN_KW"],
|
||||
["INSERT", "TK_INSERT"],
|
||||
["INSTEAD", "TK_INSTEAD"],
|
||||
["INTERSECT", "TK_INTERSECT"],
|
||||
["INTO", "TK_INTO"],
|
||||
["IS", "TK_IS"],
|
||||
["ISNULL", "TK_ISNULL"],
|
||||
["JOIN", "TK_JOIN"],
|
||||
["KEY", "TK_KEY"],
|
||||
["LAST", "TK_LAST"],
|
||||
["LEFT", "TK_JOIN_KW"],
|
||||
["LIKE", "TK_LIKE_KW"],
|
||||
["LIMIT", "TK_LIMIT"],
|
||||
["MATCH", "TK_MATCH"],
|
||||
["MATERIALIZED", "TK_MATERIALIZED"],
|
||||
["NATURAL", "TK_JOIN_KW"],
|
||||
["NO", "TK_NO"],
|
||||
["NOT", "TK_NOT"],
|
||||
["NOTHING", "TK_NOTHING"],
|
||||
["NOTNULL", "TK_NOTNULL"],
|
||||
["NULL", "TK_NULL"],
|
||||
["NULLS", "TK_NULLS"],
|
||||
["OF", "TK_OF"],
|
||||
["OFFSET", "TK_OFFSET"],
|
||||
["ON", "TK_ON"],
|
||||
["OR", "TK_OR"],
|
||||
["ORDER", "TK_ORDER"],
|
||||
["OTHERS", "TK_OTHERS"],
|
||||
["OUTER", "TK_JOIN_KW"],
|
||||
["OVER", "TK_OVER"],
|
||||
["PARTITION", "TK_PARTITION"],
|
||||
["PLAN", "TK_PLAN"],
|
||||
["PRAGMA", "TK_PRAGMA"],
|
||||
["PRECEDING", "TK_PRECEDING"],
|
||||
["PRIMARY", "TK_PRIMARY"],
|
||||
["QUERY", "TK_QUERY"],
|
||||
["RAISE", "TK_RAISE"],
|
||||
["RANGE", "TK_RANGE"],
|
||||
["RECURSIVE", "TK_RECURSIVE"],
|
||||
["REFERENCES", "TK_REFERENCES"],
|
||||
["REGEXP", "TK_LIKE_KW"],
|
||||
["REINDEX", "TK_REINDEX"],
|
||||
["RELEASE", "TK_RELEASE"],
|
||||
["RENAME", "TK_RENAME"],
|
||||
["REPLACE", "TK_REPLACE"],
|
||||
["RETURNING", "TK_RETURNING"],
|
||||
["RESTRICT", "TK_RESTRICT"],
|
||||
["RIGHT", "TK_JOIN_KW"],
|
||||
["ROLLBACK", "TK_ROLLBACK"],
|
||||
["ROW", "TK_ROW"],
|
||||
["ROWS", "TK_ROWS"],
|
||||
["SAVEPOINT", "TK_SAVEPOINT"],
|
||||
["SELECT", "TK_SELECT"],
|
||||
["SET", "TK_SET"],
|
||||
["TABLE", "TK_TABLE"],
|
||||
["TEMP", "TK_TEMP"],
|
||||
["TEMPORARY", "TK_TEMP"],
|
||||
["THEN", "TK_THEN"],
|
||||
["TIES", "TK_TIES"],
|
||||
["TO", "TK_TO"],
|
||||
["TRANSACTION", "TK_TRANSACTION"],
|
||||
["TRIGGER", "TK_TRIGGER"],
|
||||
["UNBOUNDED", "TK_UNBOUNDED"],
|
||||
["UNION", "TK_UNION"],
|
||||
["UNIQUE", "TK_UNIQUE"],
|
||||
["UPDATE", "TK_UPDATE"],
|
||||
["USING", "TK_USING"],
|
||||
["VACUUM", "TK_VACUUM"],
|
||||
["VALUES", "TK_VALUES"],
|
||||
["VIEW", "TK_VIEW"],
|
||||
["VIRTUAL", "TK_VIRTUAL"],
|
||||
["WHEN", "TK_WHEN"],
|
||||
["WHERE", "TK_WHERE"],
|
||||
["WINDOW", "TK_WINDOW"],
|
||||
["WITH", "TK_WITH"],
|
||||
["WITHOUT", "TK_WITHOUT"],
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
1417
parser/src/ast.rs
Normal file
1417
parser/src/ast.rs
Normal file
File diff suppressed because it is too large
Load Diff
93
parser/src/error.rs
Normal file
93
parser/src/error.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
use crate::token::TokenType;
|
||||
|
||||
/// SQL lexer and parser errors
|
||||
#[non_exhaustive]
|
||||
#[derive(Debug, miette::Diagnostic)]
|
||||
#[diagnostic()]
|
||||
pub enum Error {
|
||||
/// Lexer error
|
||||
UnrecognizedToken(#[label("here")] miette::SourceSpan),
|
||||
/// Missing quote or double-quote or backtick
|
||||
UnterminatedLiteral(#[label("here")] miette::SourceSpan),
|
||||
/// Missing `]`
|
||||
UnterminatedBracket(#[label("here")] miette::SourceSpan),
|
||||
/// Missing `*/`
|
||||
UnterminatedBlockComment(#[label("here")] miette::SourceSpan),
|
||||
/// Invalid parameter name
|
||||
BadVariableName(#[label("here")] miette::SourceSpan),
|
||||
/// Invalid number format
|
||||
BadNumber(#[label("here")] miette::SourceSpan),
|
||||
// Bad fractional part of a number
|
||||
BadFractionalPart(#[label("here")] miette::SourceSpan),
|
||||
// Bad exponent part of a number
|
||||
BadExponentPart(#[label("here")] miette::SourceSpan),
|
||||
/// Invalid or missing sign after `!`
|
||||
ExpectedEqualsSign(#[label("here")] miette::SourceSpan),
|
||||
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
|
||||
MalformedHexInteger(#[label("here")] miette::SourceSpan),
|
||||
// parse errors
|
||||
// Unexpected end of file
|
||||
ParseUnexpectedEOF,
|
||||
// Unexpected token
|
||||
ParseUnexpectedToken {
|
||||
#[label("parsed to here")]
|
||||
parsed_offset: miette::SourceSpan,
|
||||
|
||||
got: TokenType,
|
||||
expected: &'static [TokenType],
|
||||
},
|
||||
// Custom error message
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
Self::UnrecognizedToken(pos) => {
|
||||
write!(f, "unrecognized token at {pos:?}")
|
||||
}
|
||||
Self::UnterminatedLiteral(pos) => {
|
||||
write!(f, "non-terminated literal at {pos:?}")
|
||||
}
|
||||
Self::UnterminatedBracket(pos) => {
|
||||
write!(f, "non-terminated bracket at {pos:?}")
|
||||
}
|
||||
Self::UnterminatedBlockComment(pos) => {
|
||||
write!(f, "non-terminated block comment at {pos:?}")
|
||||
}
|
||||
Self::BadVariableName(pos) => write!(f, "bad variable name at {pos:?}"),
|
||||
Self::BadNumber(pos) => write!(f, "bad number at {pos:?}"),
|
||||
Self::BadFractionalPart(pos) => {
|
||||
write!(f, "bad fractional part at {pos:?}")
|
||||
}
|
||||
Self::BadExponentPart(pos) => {
|
||||
write!(f, "bad exponent part at {pos:?}")
|
||||
}
|
||||
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {pos:?}"),
|
||||
Self::MalformedHexInteger(pos) => {
|
||||
write!(f, "malformed hex integer at {pos:?}")
|
||||
}
|
||||
Self::ParseUnexpectedEOF => {
|
||||
write!(f, "unexpected end of file")
|
||||
}
|
||||
Self::ParseUnexpectedToken {
|
||||
parsed_offset,
|
||||
got,
|
||||
expected,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"got unexpected token after parsing to offset {parsed_offset:?}: expected {expected:?}, found {got}",
|
||||
)
|
||||
}
|
||||
Self::Custom(ref s) => {
|
||||
write!(f, "custom error: {s}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {}
|
||||
1380
parser/src/lexer.rs
Normal file
1380
parser/src/lexer.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
parser/src/lib.rs
Normal file
5
parser/src/lib.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub mod ast;
|
||||
pub mod error;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
pub mod token;
|
||||
11230
parser/src/parser.rs
Normal file
11230
parser/src/parser.rs
Normal file
File diff suppressed because it is too large
Load Diff
384
parser/src/token.rs
Normal file
384
parser/src/token.rs
Normal file
@@ -0,0 +1,384 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
/// Token classes
|
||||
// Generated by lemon (parse.h).
|
||||
// Renamed manually.
|
||||
// To be keep in sync.
|
||||
#[non_exhaustive]
|
||||
#[allow(non_camel_case_types, missing_docs)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
|
||||
#[repr(u16)]
|
||||
pub enum TokenType {
|
||||
TK_EOF = 0,
|
||||
TK_SEMI = 1,
|
||||
TK_EXPLAIN = 2,
|
||||
TK_QUERY = 3,
|
||||
TK_PLAN = 4,
|
||||
TK_BEGIN = 5,
|
||||
TK_TRANSACTION = 6,
|
||||
TK_DEFERRED = 7,
|
||||
TK_IMMEDIATE = 8,
|
||||
TK_EXCLUSIVE = 9,
|
||||
TK_COMMIT = 10,
|
||||
TK_END = 11,
|
||||
TK_ROLLBACK = 12,
|
||||
TK_SAVEPOINT = 13,
|
||||
TK_RELEASE = 14,
|
||||
TK_TO = 15,
|
||||
TK_TABLE = 16,
|
||||
TK_CREATE = 17,
|
||||
TK_IF = 18,
|
||||
TK_NOT = 19,
|
||||
TK_EXISTS = 20,
|
||||
TK_TEMP = 21,
|
||||
TK_LP = 22,
|
||||
TK_RP = 23,
|
||||
TK_AS = 24,
|
||||
TK_COMMA = 25,
|
||||
TK_WITHOUT = 26,
|
||||
TK_ABORT = 27,
|
||||
TK_ACTION = 28,
|
||||
TK_AFTER = 29,
|
||||
TK_ANALYZE = 30,
|
||||
TK_ASC = 31,
|
||||
TK_ATTACH = 32,
|
||||
TK_BEFORE = 33,
|
||||
TK_BY = 34,
|
||||
TK_CASCADE = 35,
|
||||
TK_CAST = 36,
|
||||
TK_CONFLICT = 37,
|
||||
TK_DATABASE = 38,
|
||||
TK_DESC = 39,
|
||||
TK_DETACH = 40,
|
||||
TK_EACH = 41,
|
||||
TK_FAIL = 42,
|
||||
TK_OR = 43,
|
||||
TK_AND = 44,
|
||||
TK_IS = 45,
|
||||
TK_ISNOT = 46,
|
||||
TK_MATCH = 47,
|
||||
TK_LIKE_KW = 48,
|
||||
TK_BETWEEN = 49,
|
||||
TK_IN = 50,
|
||||
TK_ISNULL = 51,
|
||||
TK_NOTNULL = 52,
|
||||
TK_NE = 53,
|
||||
TK_EQ = 54,
|
||||
TK_GT = 55,
|
||||
TK_LE = 56,
|
||||
TK_LT = 57,
|
||||
TK_GE = 58,
|
||||
TK_ESCAPE = 59,
|
||||
TK_ID = 60,
|
||||
TK_COLUMNKW = 61,
|
||||
TK_DO = 62,
|
||||
TK_FOR = 63,
|
||||
TK_IGNORE = 64,
|
||||
TK_INITIALLY = 65,
|
||||
TK_INSTEAD = 66,
|
||||
TK_NO = 67,
|
||||
TK_KEY = 68,
|
||||
TK_OF = 69,
|
||||
TK_OFFSET = 70,
|
||||
TK_PRAGMA = 71,
|
||||
TK_RAISE = 72,
|
||||
TK_RECURSIVE = 73,
|
||||
TK_REPLACE = 74,
|
||||
TK_RESTRICT = 75,
|
||||
TK_ROW = 76,
|
||||
TK_ROWS = 77,
|
||||
TK_TRIGGER = 78,
|
||||
TK_VACUUM = 79,
|
||||
TK_VIEW = 80,
|
||||
TK_VIRTUAL = 81,
|
||||
TK_WITH = 82,
|
||||
TK_NULLS = 83,
|
||||
TK_FIRST = 84,
|
||||
TK_LAST = 85,
|
||||
TK_CURRENT = 86,
|
||||
TK_FOLLOWING = 87,
|
||||
TK_PARTITION = 88,
|
||||
TK_PRECEDING = 89,
|
||||
TK_RANGE = 90,
|
||||
TK_UNBOUNDED = 91,
|
||||
TK_EXCLUDE = 92,
|
||||
TK_GROUPS = 93,
|
||||
TK_OTHERS = 94,
|
||||
TK_TIES = 95,
|
||||
TK_GENERATED = 96,
|
||||
TK_ALWAYS = 97,
|
||||
TK_MATERIALIZED = 98,
|
||||
TK_REINDEX = 99,
|
||||
TK_RENAME = 100,
|
||||
TK_CTIME_KW = 101,
|
||||
TK_ANY = 102,
|
||||
TK_BITAND = 103,
|
||||
TK_BITOR = 104,
|
||||
TK_LSHIFT = 105,
|
||||
TK_RSHIFT = 106,
|
||||
TK_PLUS = 107,
|
||||
TK_MINUS = 108,
|
||||
TK_STAR = 109,
|
||||
TK_SLASH = 110,
|
||||
TK_REM = 111,
|
||||
TK_CONCAT = 112,
|
||||
TK_PTR = 113,
|
||||
TK_COLLATE = 114,
|
||||
TK_BITNOT = 115,
|
||||
TK_ON = 116,
|
||||
TK_INDEXED = 117,
|
||||
TK_STRING = 118,
|
||||
TK_JOIN_KW = 119,
|
||||
TK_CONSTRAINT = 120,
|
||||
TK_DEFAULT = 121,
|
||||
TK_NULL = 122,
|
||||
TK_PRIMARY = 123,
|
||||
TK_UNIQUE = 124,
|
||||
TK_CHECK = 125,
|
||||
TK_REFERENCES = 126,
|
||||
TK_AUTOINCR = 127,
|
||||
TK_INSERT = 128,
|
||||
TK_DELETE = 129,
|
||||
TK_UPDATE = 130,
|
||||
TK_SET = 131,
|
||||
TK_DEFERRABLE = 132,
|
||||
TK_FOREIGN = 133,
|
||||
TK_DROP = 134,
|
||||
TK_UNION = 135,
|
||||
TK_ALL = 136,
|
||||
TK_EXCEPT = 137,
|
||||
TK_INTERSECT = 138,
|
||||
TK_SELECT = 139,
|
||||
TK_VALUES = 140,
|
||||
TK_DISTINCT = 141,
|
||||
TK_DOT = 142,
|
||||
TK_FROM = 143,
|
||||
TK_JOIN = 144,
|
||||
TK_USING = 145,
|
||||
TK_ORDER = 146,
|
||||
TK_GROUP = 147,
|
||||
TK_HAVING = 148,
|
||||
TK_LIMIT = 149,
|
||||
TK_WHERE = 150,
|
||||
TK_RETURNING = 151,
|
||||
TK_INTO = 152,
|
||||
TK_NOTHING = 153,
|
||||
TK_BLOB = 154,
|
||||
TK_FLOAT = 155,
|
||||
TK_INTEGER = 156,
|
||||
TK_VARIABLE = 157,
|
||||
TK_CASE = 158,
|
||||
TK_WHEN = 159,
|
||||
TK_THEN = 160,
|
||||
TK_ELSE = 161,
|
||||
TK_INDEX = 162,
|
||||
TK_ALTER = 163,
|
||||
TK_ADD = 164,
|
||||
TK_WINDOW = 165,
|
||||
TK_OVER = 166,
|
||||
TK_FILTER = 167,
|
||||
TK_ILLEGAL = 185,
|
||||
}
|
||||
|
||||
impl Display for TokenType {
|
||||
// for debugging purposes
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use TokenType::*;
|
||||
let s = match self {
|
||||
TK_EOF => "TK_EOF",
|
||||
TK_SEMI => "TK_SEMI",
|
||||
TK_EXPLAIN => "TK_EXPLAIN",
|
||||
TK_QUERY => "TK_QUERY",
|
||||
TK_PLAN => "TK_PLAN",
|
||||
TK_BEGIN => "TK_BEGIN",
|
||||
TK_TRANSACTION => "TK_TRANSACTION",
|
||||
TK_DEFERRED => "TK_DEFERRED",
|
||||
TK_IMMEDIATE => "TK_IMMEDIATE",
|
||||
TK_EXCLUSIVE => "TK_EXCLUSIVE",
|
||||
TK_COMMIT => "TK_COMMIT",
|
||||
TK_END => "TK_END",
|
||||
TK_ROLLBACK => "TK_ROLLBACK",
|
||||
TK_SAVEPOINT => "TK_SAVEPOINT",
|
||||
TK_RELEASE => "TK_RELEASE",
|
||||
TK_TO => "TK_TO",
|
||||
TK_TABLE => "TK_TABLE",
|
||||
TK_CREATE => "TK_CREATE",
|
||||
TK_IF => "TK_IF",
|
||||
TK_NOT => "TK_NOT",
|
||||
TK_EXISTS => "TK_EXISTS",
|
||||
TK_TEMP => "TK_TEMP",
|
||||
TK_LP => "TK_LP",
|
||||
TK_RP => "TK_RP",
|
||||
TK_AS => "TK_AS",
|
||||
TK_COMMA => "TK_COMMA",
|
||||
TK_WITHOUT => "TK_WITHOUT",
|
||||
TK_ABORT => "TK_ABORT",
|
||||
TK_ACTION => "TK_ACTION",
|
||||
TK_AFTER => "TK_AFTER",
|
||||
TK_ANALYZE => "TK_ANALYZE",
|
||||
TK_ASC => "TK_ASC",
|
||||
TK_ATTACH => "TK_ATTACH",
|
||||
TK_BEFORE => "TK_BEFORE",
|
||||
TK_BY => "TK_BY",
|
||||
TK_CASCADE => "TK_CASCADE",
|
||||
TK_CAST => "TK_CAST",
|
||||
TK_CONFLICT => "TK_CONFLICT",
|
||||
TK_DATABASE => "TK_DATABASE",
|
||||
TK_DESC => "TK_DESC",
|
||||
TK_DETACH => "TK_DETACH",
|
||||
TK_EACH => "TK_EACH",
|
||||
TK_FAIL => "TK_FAIL",
|
||||
TK_OR => "TK_OR",
|
||||
TK_AND => "TK_AND",
|
||||
TK_IS => "TK_IS",
|
||||
TK_ISNOT => "TK_ISNOT",
|
||||
TK_MATCH => "TK_MATCH",
|
||||
TK_LIKE_KW => "TK_LIKE_KW",
|
||||
TK_BETWEEN => "TK_BETWEEN",
|
||||
TK_IN => "TK_IN",
|
||||
TK_ISNULL => "TK_ISNULL",
|
||||
TK_NOTNULL => "TK_NOTNULL",
|
||||
TK_NE => "TK_NE",
|
||||
TK_EQ => "TK_EQ",
|
||||
TK_GT => "TK_GT",
|
||||
TK_LE => "TK_LE",
|
||||
TK_LT => "TK_LT",
|
||||
TK_GE => "TK_GE",
|
||||
TK_ESCAPE => "TK_ESCAPE",
|
||||
TK_ID => "TK_ID",
|
||||
TK_COLUMNKW => "TK_COLUMNKW",
|
||||
TK_DO => "TK_DO",
|
||||
TK_FOR => "TK_FOR",
|
||||
TK_IGNORE => "TK_IGNORE",
|
||||
TK_INITIALLY => "TK_INITIALLY",
|
||||
TK_INSTEAD => "TK_INSTEAD",
|
||||
TK_NO => "TK_NO",
|
||||
TK_KEY => "TK_KEY",
|
||||
TK_OF => "TK_OF",
|
||||
TK_OFFSET => "TK_OFFSET",
|
||||
TK_PRAGMA => "TK_PRAGMA",
|
||||
TK_RAISE => "TK_RAISE",
|
||||
TK_RECURSIVE => "TK_RECURSIVE",
|
||||
TK_REPLACE => "TK_REPLACE",
|
||||
TK_RESTRICT => "TK_RESTRICT",
|
||||
TK_ROW => "TK_ROW",
|
||||
TK_ROWS => "TK_ROWS",
|
||||
TK_TRIGGER => "TK_TRIGGER",
|
||||
TK_VACUUM => "TK_VACUUM",
|
||||
TK_VIEW => "TK_VIEW",
|
||||
TK_VIRTUAL => "TK_VIRTUAL",
|
||||
TK_WITH => "TK_WITH",
|
||||
TK_NULLS => "TK_NULLS",
|
||||
TK_FIRST => "TK_FIRST",
|
||||
TK_LAST => "TK_LAST",
|
||||
TK_CURRENT => "TK_CURRENT",
|
||||
TK_FOLLOWING => "TK_FOLLOWING",
|
||||
TK_PARTITION => "TK_PARTITION",
|
||||
TK_PRECEDING => "TK_PRECEDING",
|
||||
TK_RANGE => "TK_RANGE",
|
||||
TK_UNBOUNDED => "TK_UNBOUNDED",
|
||||
TK_EXCLUDE => "TK_EXCLUDE",
|
||||
TK_GROUPS => "TK_GROUPS",
|
||||
TK_OTHERS => "TK_OTHERS",
|
||||
TK_TIES => "TK_TIES",
|
||||
TK_GENERATED => "TK_GENERATED",
|
||||
TK_ALWAYS => "TK_ALWAYS",
|
||||
TK_MATERIALIZED => "TK_MATERIALIZED",
|
||||
TK_REINDEX => "TK_REINDEX",
|
||||
TK_RENAME => "TK_RENAME",
|
||||
TK_CTIME_KW => "TK_CTIME_KW",
|
||||
TK_ANY => "TK_ANY",
|
||||
TK_BITAND => "TK_BITAND",
|
||||
TK_BITOR => "TK_BITOR",
|
||||
TK_LSHIFT => "TK_LSHIFT",
|
||||
TK_RSHIFT => "TK_RSHIFT",
|
||||
TK_PLUS => "TK_PLUS",
|
||||
TK_MINUS => "TK_MINUS",
|
||||
TK_STAR => "TK_STAR",
|
||||
TK_SLASH => "TK_SLASH",
|
||||
TK_REM => "TK_REM",
|
||||
TK_CONCAT => "TK_CONCAT",
|
||||
TK_PTR => "TK_PTR",
|
||||
TK_COLLATE => "TK_COLLATE",
|
||||
TK_BITNOT => "TK_BITNOT",
|
||||
TK_ON => "TK_ON",
|
||||
TK_INDEXED => "TK_INDEXED",
|
||||
TK_STRING => "TK_STRING",
|
||||
TK_JOIN_KW => "TK_JOIN_KW",
|
||||
TK_CONSTRAINT => "TK_CONSTRAINT",
|
||||
TK_DEFAULT => "TK_DEFAULT",
|
||||
TK_NULL => "TK_NULL",
|
||||
TK_PRIMARY => "TK_PRIMARY",
|
||||
TK_UNIQUE => "TK_UNIQUE",
|
||||
TK_CHECK => "TK_CHECK",
|
||||
TK_REFERENCES => "TK_REFERENCES",
|
||||
TK_AUTOINCR => "TK_AUTOINCR",
|
||||
TK_INSERT => "TK_INSERT",
|
||||
TK_DELETE => "TK_DELETE",
|
||||
TK_UPDATE => "TK_UPDATE",
|
||||
TK_SET => "TK_SET",
|
||||
TK_DEFERRABLE => "TK_DEFERRABLE",
|
||||
TK_FOREIGN => "TK_FOREIGN",
|
||||
TK_DROP => "TK_DROP",
|
||||
TK_UNION => "TK_UNION",
|
||||
TK_ALL => "TK_ALL",
|
||||
TK_EXCEPT => "TK_EXCEPT",
|
||||
TK_INTERSECT => "TK_INTERSECT",
|
||||
TK_SELECT => "TK_SELECT",
|
||||
TK_VALUES => "TK_VALUES",
|
||||
TK_DISTINCT => "TK_DISTINCT",
|
||||
TK_DOT => "TK_DOT",
|
||||
TK_FROM => "TK_FROM",
|
||||
TK_JOIN => "TK_JOIN",
|
||||
TK_USING => "TK_USING",
|
||||
TK_ORDER => "TK_ORDER",
|
||||
TK_GROUP => "TK_GROUP",
|
||||
TK_HAVING => "TK_HAVING",
|
||||
TK_LIMIT => "TK_LIMIT",
|
||||
TK_WHERE => "TK_WHERE",
|
||||
TK_RETURNING => "TK_RETURNING",
|
||||
TK_INTO => "TK_INTO",
|
||||
TK_NOTHING => "TK_NOTHING",
|
||||
TK_BLOB => "TK_BLOB",
|
||||
TK_FLOAT => "TK_FLOAT",
|
||||
TK_INTEGER => "TK_INTEGER",
|
||||
TK_VARIABLE => "TK_VARIABLE",
|
||||
TK_CASE => "TK_CASE",
|
||||
TK_WHEN => "TK_WHEN",
|
||||
TK_THEN => "TK_THEN",
|
||||
TK_ELSE => "TK_ELSE",
|
||||
TK_INDEX => "TK_INDEX",
|
||||
TK_ALTER => "TK_ALTER",
|
||||
TK_ADD => "TK_ADD",
|
||||
TK_WINDOW => "TK_WINDOW",
|
||||
TK_OVER => "TK_OVER",
|
||||
TK_FILTER => "TK_FILTER",
|
||||
TK_ILLEGAL => "TK_ILLEGAL",
|
||||
};
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenType {
|
||||
/// if your parsing process expects next token to be TK_ID, remember to call this function !!!
|
||||
#[inline(always)]
|
||||
pub fn fallback_id_if_ok(self) -> Self {
|
||||
use TokenType::*;
|
||||
match self {
|
||||
TK_ABORT | TK_ACTION | TK_AFTER | TK_ANALYZE | TK_ASC | TK_ATTACH | TK_BEFORE
|
||||
| TK_BEGIN | TK_BY | TK_CASCADE | TK_CAST | TK_CONFLICT | TK_DATABASE | TK_DEFERRED
|
||||
| TK_DESC | TK_DETACH | TK_DO | TK_EACH | TK_END | TK_EXCLUSIVE | TK_EXPLAIN
|
||||
| TK_FAIL | TK_FOR | TK_IGNORE | TK_IMMEDIATE | TK_INITIALLY | TK_INSTEAD
|
||||
| TK_LIKE_KW | TK_MATCH | TK_NO | TK_PLAN | TK_QUERY | TK_KEY | TK_OF | TK_OFFSET
|
||||
| TK_PRAGMA | TK_RAISE | TK_RECURSIVE | TK_RELEASE | TK_REPLACE | TK_RESTRICT
|
||||
| TK_ROW | TK_ROWS | TK_ROLLBACK | TK_SAVEPOINT | TK_TEMP | TK_TRIGGER | TK_VACUUM
|
||||
| TK_VIEW | TK_VIRTUAL | TK_WITH | TK_NULLS | TK_FIRST | TK_LAST | TK_CURRENT
|
||||
| TK_FOLLOWING | TK_PARTITION | TK_PRECEDING | TK_RANGE | TK_UNBOUNDED | TK_EXCLUDE
|
||||
| TK_GROUPS | TK_OTHERS | TK_TIES | TK_ALWAYS | TK_MATERIALIZED | TK_REINDEX
|
||||
| TK_RENAME | TK_CTIME_KW | TK_IF => TK_ID,
|
||||
// | TK_COLUMNKW | TK_UNION | TK_EXCEPT | TK_INTERSECT | TK_GENERATED | TK_WITHOUT
|
||||
// see comments in `next_token` of parser
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user