Remove vendored parser now that we have our own

This commit is contained in:
PThorpe92
2025-09-29 09:44:38 -04:00
parent 121e8898cd
commit bd89554cad
45 changed files with 0 additions and 17926 deletions

View File

@@ -1,11 +0,0 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "cargo" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "weekly"

View File

@@ -1,36 +0,0 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]
permissions:
contents: read
jobs:
build:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build
- name: Run tests
run: cargo test
direct-minimal-versions:
name: Test min versions
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: hecrj/setup-rust-action@v2
with:
rust-version: nightly
- run: |
cargo update -Z direct-minimal-versions
cargo test

View File

@@ -1,6 +0,0 @@
target
rlemon
*.h
*.out
Cargo.lock
cmake-build-debug

View File

@@ -1,5 +0,0 @@
cmake_minimum_required(VERSION 3.6)
project(rlemon)
set(SOURCE_FILES third_party/lemon/lemon.c)
add_executable(rlemon ${SOURCE_FILES})

View File

@@ -1,47 +0,0 @@
[package]
name = "turso_sqlite3_parser"
version.workspace = true
edition.workspace = true
authors = ["gwenn"]
description = "SQL parser (as understood by SQLite)"
documentation = "http://docs.rs/sqlite3-parser"
repository = "https://github.com/gwenn/lemon-rs"
readme = "README.md"
categories = ["parser-implementations"]
keywords = ["sql", "parser", "scanner", "tokenizer"]
license = "Apache-2.0/MIT"
build = "build.rs" # Lemon preprocessing
[badges]
maintenance = { status = "experimental" }
[features]
# FIXME: specific to one parser, not global
YYTRACKMAXSTACKDEPTH = []
YYNOERRORRECOVERY = []
YYCOVERAGE = []
NDEBUG = []
default = ["YYNOERRORRECOVERY", "NDEBUG"]
serde = ["dep:serde", "indexmap/serde", "bitflags/serde"]
[dependencies]
log = "0.4.22"
memchr = "2.0"
fallible-iterator = { workspace = true }
bitflags = { workspace = true }
indexmap = { workspace = true }
miette = { workspace = true }
strum = { workspace = true }
strum_macros = {workspace = true }
serde = { workspace = true , optional = true, features = ["derive"] }
smallvec = { version = "1.15.1", features = ["const_generics"] }
[dev-dependencies]
env_logger = { workspace = true, default-features = false }
[build-dependencies]
cc = "1.0"
[lints.rust]
dead_code = "allow"
non_snake_case = "allow"

View File

@@ -1,24 +0,0 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org>

View File

@@ -1,79 +0,0 @@
[![Build Status](https://github.com/gwenn/lemon-rs/workflows/CI/badge.svg)](https://github.com/gwenn/lemon-rs/actions)
[![Latest Version](https://img.shields.io/crates/v/sqlite3-parser.svg)](https://crates.io/crates/sqlite3-parser)
[![Docs](https://docs.rs/sqlite3-parser/badge.svg)](https://docs.rs/sqlite3-parser)
[![dependency status](https://deps.rs/repo/github/gwenn/lemon-rs/status.svg)](https://deps.rs/repo/github/gwenn/lemon-rs)
[LEMON parser generator](https://www.sqlite.org/src/doc/trunk/doc/lemon.html) modified to generate Rust code.
Lemon source and SQLite3 grammar were last synced as of July 2024.
## Unsupported
### Unsupported Grammar syntax
* `%token_destructor`: Code to execute to destroy token data
* `%default_destructor`: Code for the default non-terminal destructor
* `%destructor`: Code which executes whenever this symbol is
popped from the stack during error processing
https://www.codeproject.com/Articles/1056460/Generating-a-High-Speed-Parser-Part-Lemon
https://www.sqlite.org/lemon.html
### SQLite
[SQLite lexer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c) and [SQLite parser](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/parse.y) have been ported from C to Rust.
The parser generates an AST.
Lexer/Parser:
- Keep track of position (line, column).
- Streamable (stop at the end of statement).
- Resumable (restart after the end of statement).
Lexer and parser have been tested with the following scripts:
* https://github.com/bkiers/sqlite-parser/tree/master/src/test/resources
* https://github.com/codeschool/sqlite-parser/tree/master/test/sql/official-suite which can be updated with script in https://github.com/codeschool/sqlite-parser/tree/master/test/misc
TODO:
- [ ] Check generated AST (reparse/reinject)
- [ ] [If a keyword in double quotes is used in a context where it cannot be resolved to an identifier but where a string literal is allowed, then the token is understood to be a string literal instead of an identifier.](https://sqlite.org/lang_keywords.html)
- [ ] Tests
- [ ] Do not panic while parsing
- [x] CREATE VIRTUAL TABLE args
- [ ] Zero copy (at least tokens)
### Unsupported by Rust
* `#line` directive
## API change
* No `ParseAlloc`/`ParseFree` anymore
## Features not tested
* NDEBUG
* YYNOERRORRECOVERY
* YYERRORSYMBOL
## To be fixed
* RHS are moved. Maybe it is not a problem if they are always used once.
Just add a check in lemon...
* `%extra_argument` is not supported.
* Terminal symbols generated by lemon should be dumped in a specified file.
## Raison d'être
* [lemon_rust](https://github.com/rodrigorc/lemon_rust) does the same thing
but with an old version of `lemon`. And it seems not possible to use `yystack`
as a stack because items may be access randomly and the `top+1` item can be used.
* [lalrpop](https://github.com/nikomatsakis/lalrpop) would be the perfect
alternative but it does not support fallback/streaming
(see [this](https://github.com/nikomatsakis/lalrpop/issues/156) issue)
and compilation/generation is slow.
## Minimum supported Rust version (MSRV)
Latest stable Rust version at the time of release. It might compile with older versions.

View File

@@ -1,7 +0,0 @@
When some changes happen in the official SQLite repository,
they can be applied locally:
- $SQLITE/tool/lemon.c => $RLEMON/third_party/lemon.c
- $SQLITE/tool/lempar.c => $RLEMON/third_party/lempar.rs
- $SQLITE/tool/mkkeywordhash.c => $RLEMON/src/dialect/mod.rs
- $SQLITE/src/tokenize.c => $RLEMON/src/lexer/sql/mod.rs
- $SQLITE/src/parse.y => $RLEMON/src/parser/parse.y (and $RLEMON/src/dialect/token.rs, $RLEMON/src/dialect/mod.rs)

View File

@@ -1,154 +0,0 @@
#![cfg(all(test, not(test)))] // never compile this
#![feature(test)]
extern crate test;
use test::Bencher;
use turso_sqlite3_parser::dialect::keyword_token;
static VALUES: [&[u8]; 136] = [
b"ABORT",
b"ACTION",
b"ADD",
b"AFTER",
b"ALL",
b"ALTER",
b"ANALYZE",
b"AND",
b"AS",
b"ASC",
b"ATTACH",
b"AUTOINCREMENT",
b"BEFORE",
b"BEGIN",
b"BETWEEN",
b"BY",
b"CASCADE",
b"CASE",
b"CAST",
b"CHECK",
b"COLLATE",
b"COLUMN",
b"COMMIT",
b"CONFLICT",
b"CONSTRAINT",
b"CREATE",
b"CROSS",
b"CURRENT",
b"CURRENT_DATE",
b"CURRENT_TIME",
b"CURRENT_TIMESTAMP",
b"DATABASE",
b"DEFAULT",
b"DEFERRABLE",
b"DEFERRED",
b"DELETE",
b"DESC",
b"DETACH",
b"DISTINCT",
b"DO",
b"DROP",
b"EACH",
b"ELSE",
b"END",
b"ESCAPE",
b"EXCEPT",
b"EXCLUSIVE",
b"EXISTS",
b"EXPLAIN",
b"FAIL",
b"FILTER",
b"FOLLOWING",
b"FOR",
b"FOREIGN",
b"FROM",
b"FULL",
b"GLOB",
b"GROUP",
b"HAVING",
b"IF",
b"IGNORE",
b"IMMEDIATE",
b"IN",
b"INDEX",
b"INDEXED",
b"INITIALLY",
b"INNER",
b"INSERT",
b"INSTEAD",
b"INTERSECT",
b"INTO",
b"IS",
b"ISNULL",
b"JOIN",
b"KEY",
b"LEFT",
b"LIKE",
b"LIMIT",
b"MATCH",
b"NATURAL",
b"NO",
b"NOT",
b"NOTHING",
b"NOTNULL",
b"NULL",
b"OF",
b"OFFSET",
b"ON",
b"OR",
b"ORDER",
b"OUTER",
b"OVER",
b"PARTITION",
b"PLAN",
b"PRAGMA",
b"PRECEDING",
b"PRIMARY",
b"QUERY",
b"RAISE",
b"RANGE",
b"RECURSIVE",
b"REFERENCES",
b"REGEXP",
b"REINDEX",
b"RELEASE",
b"RENAME",
b"REPLACE",
b"RESTRICT",
b"RIGHT",
b"ROLLBACK",
b"ROW",
b"ROWS",
b"SAVEPOINT",
b"SELECT",
b"SET",
b"TABLE",
b"TEMP",
b"TEMPORARY",
b"THEN",
b"TO",
b"TRANSACTION",
b"TRIGGER",
b"UNBOUNDED",
b"UNION",
b"UNIQUE",
b"UPDATE",
b"USING",
b"VACUUM",
b"VALUES",
b"VIEW",
b"VIRTUAL",
b"WHEN",
b"WHERE",
b"WINDOW",
b"WITH",
b"WITHOUT",
];
#[bench]
fn bench_keyword_token(b: &mut Bencher) {
b.iter(|| {
for value in &VALUES {
assert!(keyword_token(value).is_some())
}
});
}

View File

@@ -1,331 +0,0 @@
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufWriter, Result, Write};
use std::path::Path;
use std::process::Command;
use cc::Build;
/// generates a trie-like function with nested match expressions for parsing SQL keywords
/// example: input: [["ABORT", "TK_ABORT"], ["ACTION", "TK_ACTION"], ["ADD", "TK_ADD"],]
/// A
/// ├─ B
/// │ ├─ O
/// │ │ ├─ R
/// │ │ │ ├─ T -> TK_ABORT
/// ├─ C
/// │ ├─ T
/// │ │ ├─ I
/// │ │ │ ├─ O
/// │ │ │ │ ├─ N -> TK_ACTION
/// ├─ D
/// │ ├─ D -> TK_ADD
fn build_keyword_map(
writer: &mut impl Write,
func_name: &str,
keywords: &[[&'static str; 2]],
) -> Result<()> {
assert!(!keywords.is_empty());
let mut min_len = keywords[0][0].len();
let mut max_len = keywords[0][0].len();
struct PathEntry {
result: Option<&'static str>,
sub_entries: HashMap<u8, Box<PathEntry>>,
}
let mut paths = Box::new(PathEntry {
result: None,
sub_entries: HashMap::new(),
});
for keyword in keywords {
let keyword_b = keyword[0].as_bytes();
if keyword_b.len() < min_len {
min_len = keyword_b.len();
}
if keyword_b.len() > max_len {
max_len = keyword_b.len();
}
let mut current = &mut paths;
for &b in keyword_b {
let upper_b = b.to_ascii_uppercase();
match current.sub_entries.get(&upper_b) {
Some(_) => {
current = current.sub_entries.get_mut(&upper_b).unwrap();
}
None => {
let new_entry = Box::new(PathEntry {
result: None,
sub_entries: HashMap::new(),
});
current.sub_entries.insert(upper_b, new_entry);
current = current.sub_entries.get_mut(&upper_b).unwrap();
}
}
}
assert!(current.result.is_none());
current.result = Some(keyword[1]);
}
fn write_entry(writer: &mut impl Write, entry: &PathEntry) -> Result<()> {
if let Some(result) = entry.result {
writeln!(writer, "if idx == buf.len() {{")?;
writeln!(writer, "return Some(TokenType::{result});")?;
writeln!(writer, "}}")?;
}
if entry.sub_entries.is_empty() {
writeln!(writer, "None")?;
return Ok(());
}
writeln!(writer, "if idx >= buf.len() {{")?;
writeln!(writer, "return None;")?;
writeln!(writer, "}}")?;
writeln!(writer, "match buf[idx] {{")?;
for (&b, sub_entry) in &entry.sub_entries {
if b.is_ascii_alphabetic() {
writeln!(writer, "{} | {} => {{", b, b.to_ascii_lowercase())?;
} else {
writeln!(writer, "{b} => {{")?;
}
writeln!(writer, "idx += 1;")?;
write_entry(writer, sub_entry)?;
writeln!(writer, "}}")?;
}
writeln!(writer, "_ => None")?;
writeln!(writer, "}}")?;
Ok(())
}
writeln!(
writer,
"pub(crate) const MAX_KEYWORD_LEN: usize = {max_len};"
)?;
writeln!(
writer,
"pub(crate) const MIN_KEYWORD_LEN: usize = {min_len};"
)?;
writeln!(writer, "/// Check if `word` is a keyword")?;
writeln!(
writer,
"pub fn {func_name}(buf: &[u8]) -> Option<TokenType> {{"
)?;
writeln!(
writer,
"if buf.len() < MIN_KEYWORD_LEN || buf.len() > MAX_KEYWORD_LEN {{"
)?;
writeln!(writer, "return None;")?;
writeln!(writer, "}}")?;
writeln!(writer, "let mut idx = 0;")?;
write_entry(writer, &paths)?;
writeln!(writer, "}}")?;
Ok(())
}
fn main() -> Result<()> {
let out_dir = env::var("OUT_DIR").unwrap();
let out_path = Path::new(&out_dir);
let rlemon = out_path.join("rlemon");
let lemon_src_dir = Path::new("third_party").join("lemon");
let rlemon_src = lemon_src_dir.join("lemon.c");
// compile rlemon:
{
assert!(Build::new()
.target(&env::var("HOST").unwrap())
.get_compiler()
.to_command()
.arg("-o")
.arg(rlemon.clone())
.arg(rlemon_src)
.status()?
.success());
}
let sql_parser = "src/parser/parse.y";
// run rlemon / generate parser:
{
assert!(Command::new(rlemon)
.arg("-DSQLITE_ENABLE_UPDATE_DELETE_LIMIT")
.arg("-Tthird_party/lemon/lempar.rs")
.arg(format!("-d{out_dir}"))
.arg(sql_parser)
.status()?
.success());
// TODO ./rlemon -m -Tthird_party/lemon/lempar.rs examples/simple.y
}
let keywords = out_path.join("keywords.rs");
let mut keywords = BufWriter::new(File::create(keywords)?);
build_keyword_map(
&mut keywords,
"keyword_token",
&[
["ABORT", "TK_ABORT"],
["ACTION", "TK_ACTION"],
["ADD", "TK_ADD"],
["AFTER", "TK_AFTER"],
["ALL", "TK_ALL"],
["ALTER", "TK_ALTER"],
["ALWAYS", "TK_ALWAYS"],
["ANALYZE", "TK_ANALYZE"],
["AND", "TK_AND"],
["AS", "TK_AS"],
["ASC", "TK_ASC"],
["ATTACH", "TK_ATTACH"],
["AUTOINCREMENT", "TK_AUTOINCR"],
["BEFORE", "TK_BEFORE"],
["BEGIN", "TK_BEGIN"],
["BETWEEN", "TK_BETWEEN"],
["BY", "TK_BY"],
["CASCADE", "TK_CASCADE"],
["CASE", "TK_CASE"],
["CAST", "TK_CAST"],
["CHECK", "TK_CHECK"],
["COLLATE", "TK_COLLATE"],
["COLUMN", "TK_COLUMNKW"],
["COMMIT", "TK_COMMIT"],
["CONFLICT", "TK_CONFLICT"],
["CONSTRAINT", "TK_CONSTRAINT"],
["CREATE", "TK_CREATE"],
["CROSS", "TK_JOIN_KW"],
["CURRENT", "TK_CURRENT"],
["CURRENT_DATE", "TK_CTIME_KW"],
["CURRENT_TIME", "TK_CTIME_KW"],
["CURRENT_TIMESTAMP", "TK_CTIME_KW"],
["DATABASE", "TK_DATABASE"],
["DEFAULT", "TK_DEFAULT"],
["DEFERRABLE", "TK_DEFERRABLE"],
["DEFERRED", "TK_DEFERRED"],
["DELETE", "TK_DELETE"],
["DESC", "TK_DESC"],
["DETACH", "TK_DETACH"],
["DISTINCT", "TK_DISTINCT"],
["DO", "TK_DO"],
["DROP", "TK_DROP"],
["EACH", "TK_EACH"],
["ELSE", "TK_ELSE"],
["END", "TK_END"],
["ESCAPE", "TK_ESCAPE"],
["EXCEPT", "TK_EXCEPT"],
["EXCLUDE", "TK_EXCLUDE"],
["EXCLUSIVE", "TK_EXCLUSIVE"],
["EXISTS", "TK_EXISTS"],
["EXPLAIN", "TK_EXPLAIN"],
["FAIL", "TK_FAIL"],
["FILTER", "TK_FILTER"],
["FIRST", "TK_FIRST"],
["FOLLOWING", "TK_FOLLOWING"],
["FOR", "TK_FOR"],
["FOREIGN", "TK_FOREIGN"],
["FROM", "TK_FROM"],
["FULL", "TK_JOIN_KW"],
["GENERATED", "TK_GENERATED"],
["GLOB", "TK_LIKE_KW"],
["GROUP", "TK_GROUP"],
["GROUPS", "TK_GROUPS"],
["HAVING", "TK_HAVING"],
["IF", "TK_IF"],
["IGNORE", "TK_IGNORE"],
["IMMEDIATE", "TK_IMMEDIATE"],
["IN", "TK_IN"],
["INDEX", "TK_INDEX"],
["INDEXED", "TK_INDEXED"],
["INITIALLY", "TK_INITIALLY"],
["INNER", "TK_JOIN_KW"],
["INSERT", "TK_INSERT"],
["INSTEAD", "TK_INSTEAD"],
["INTERSECT", "TK_INTERSECT"],
["INTO", "TK_INTO"],
["IS", "TK_IS"],
["ISNULL", "TK_ISNULL"],
["JOIN", "TK_JOIN"],
["KEY", "TK_KEY"],
["LAST", "TK_LAST"],
["LEFT", "TK_JOIN_KW"],
["LIKE", "TK_LIKE_KW"],
["LIMIT", "TK_LIMIT"],
["MATCH", "TK_MATCH"],
["MATERIALIZED", "TK_MATERIALIZED"],
["NATURAL", "TK_JOIN_KW"],
["NO", "TK_NO"],
["NOT", "TK_NOT"],
["NOTHING", "TK_NOTHING"],
["NOTNULL", "TK_NOTNULL"],
["NULL", "TK_NULL"],
["NULLS", "TK_NULLS"],
["OF", "TK_OF"],
["OFFSET", "TK_OFFSET"],
["ON", "TK_ON"],
["OR", "TK_OR"],
["ORDER", "TK_ORDER"],
["OTHERS", "TK_OTHERS"],
["OUTER", "TK_JOIN_KW"],
["OVER", "TK_OVER"],
["PARTITION", "TK_PARTITION"],
["PLAN", "TK_PLAN"],
["PRAGMA", "TK_PRAGMA"],
["PRECEDING", "TK_PRECEDING"],
["PRIMARY", "TK_PRIMARY"],
["QUERY", "TK_QUERY"],
["RAISE", "TK_RAISE"],
["RANGE", "TK_RANGE"],
["RECURSIVE", "TK_RECURSIVE"],
["REFERENCES", "TK_REFERENCES"],
["REGEXP", "TK_LIKE_KW"],
["REINDEX", "TK_REINDEX"],
["RELEASE", "TK_RELEASE"],
["RENAME", "TK_RENAME"],
["REPLACE", "TK_REPLACE"],
["RETURNING", "TK_RETURNING"],
["RESTRICT", "TK_RESTRICT"],
["RIGHT", "TK_JOIN_KW"],
["ROLLBACK", "TK_ROLLBACK"],
["ROW", "TK_ROW"],
["ROWS", "TK_ROWS"],
["SAVEPOINT", "TK_SAVEPOINT"],
["SELECT", "TK_SELECT"],
["SET", "TK_SET"],
["TABLE", "TK_TABLE"],
["TEMP", "TK_TEMP"],
["TEMPORARY", "TK_TEMP"],
["THEN", "TK_THEN"],
["TIES", "TK_TIES"],
["TO", "TK_TO"],
["TRANSACTION", "TK_TRANSACTION"],
["TRIGGER", "TK_TRIGGER"],
["UNBOUNDED", "TK_UNBOUNDED"],
["UNION", "TK_UNION"],
["UNIQUE", "TK_UNIQUE"],
["UPDATE", "TK_UPDATE"],
["USING", "TK_USING"],
["VACUUM", "TK_VACUUM"],
["VALUES", "TK_VALUES"],
["VIEW", "TK_VIEW"],
["VIRTUAL", "TK_VIRTUAL"],
["WHEN", "TK_WHEN"],
["WHERE", "TK_WHERE"],
["WINDOW", "TK_WINDOW"],
["WITH", "TK_WITH"],
["WITHOUT", "TK_WITHOUT"],
],
)?;
println!("cargo:rerun-if-changed=third_party/lemon/lemon.c");
println!("cargo:rerun-if-changed=third_party/lemon/lempar.rs");
println!("cargo:rerun-if-changed=src/parser/parse.y");
// TODO examples/simple.y if test
Ok(())
}

View File

@@ -1,124 +0,0 @@
# Extra consistency checks
- `ALTER TABLE ... RENAME TO ...` when old and new table names are the same => `Stmt::check`
- `ALTER TABLE ... ADD COLUMN ...` with new primary key / unique constraint => `Stmt::check`
- `CREATE TABLE ...`
- with duplicated column name => `ColumnDefinition::add_column`
- with STRICT option and invalid or missing column type(s) => `CreateTableBody::check`
- WITHOUT ROWID and without primary key => `CreateTableBody::check`
- `CREATE VIEW ... (...) ...`
- when view columns count does not match select columns count => `Stmt::check`
- with duplicated columns (same name) => `Stmt::check`
- `DELETE FROM ... ORDER BY ...` with ORDER BY but without LIMIT => `Stmt::check`
- `INSERT INTO ... (...) ...` when columns count does not match select columns / values count => `Stmt::check`
- `INSERT INTO ... (...) DEFAULT VALUES` with columns and DEFAULT VALUES => `Stmt::check`
- `SELECT ... EXCEPT|INTERSECT|UNION SELECT ...` when all SELECT does not have the same number of result columns => `SelectBody::push`
- `NATURAL JOIN ...` with ON or USING clause => `FromClause::push`
- `UPDATE ... ORDER BY ...` with ORDER BY but without LIMIT => `Stmt::check`
- `VALUES (...), (...), ...` when all VALUES does not have the same number of terms => `OneSelect::push`
- `WITH ...` with duplicated table name => `CommonTableExpr::add_cte`
## TODO
### `CREATE TABLE`
- [x] qualified (different of `temp`) temporary table
```sql
sqlite> ATTACH DATABASE ':memory:' AS mem;
sqlite> CREATE TEMPORARY TABLE mem.x AS SELECT 1;
Parse error: temporary table name must be unqualified
```
```sql
sqlite> CREATE TEMPORARY TABLE temp.x AS SELECT 1;
-- OK
```
- [x] must have at least one non-generated column
```sql
sqlite> CREATE TABLE test(data AS (1));
Parse error: must have at least one non-generated column
```
- [ ] column constraint(s) checks
```sql
sqlite> CREATE TABLE t(a REFERENCES o(a,b));
Parse error: foreign key on a should reference only one column of table o
CREATE TABLE t(a REFERENCES o(a,b));
error here ---^
sqlite> CREATE TABLE t(a PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY
sqlite> CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
Parse error: AUTOINCREMENT not allowed on WITHOUT ROWID tables
```
- [ ] table constraint(s) checks
```sql
sqlite> CREATE TABLE test (a, b, FOREIGN KEY (b) REFERENCES test(a,b));
Parse error: number of columns in foreign key does not match the number of columns in the referenced table
```
```sql
sqlite> create table test (a,b, primary key(a), primary key(b));
Parse error: table "test" has more than one primary key
sqlite> create table test (a primary key, b primary key);
Parse error: table "test" has more than one primary key
sqlite> create table test (a primary key, b, primary key(a));
Parse error: table "test" has more than one primary key
```
### `HAVING`
- [x] HAVING clause on a non-aggregate query (`GroupBy::having`): grammar already prevents this case (grammar differs from SQLite official grammar).
```sql
sqlite> SELECT 1 as i HAVING i > 1;
Parse error: HAVING clause on a non-aggregate query
```
vs
```
[ERROR sqlite3Parser] near HAVING, "Token(None)": syntax error
Err: near HAVING, "None": syntax error at (1, 21) in SELECT 1 as i HAVING i > 1
```
### `SELECT ...`
- [ ] no duplicated column name in `selcollist`/`Select::columns`
```sql
sqlite> SELECT 1 as i, 2 as i;
-- no error (idem for postgres)
```
### `SELECT ... ORDER BY ...`
- [ ] ORDER BY term does not match any column in the result set (`Select::order_by`)
```sql
sqlite> SELECT 1 as i ORDER BY j;
Parse error: no such column: j
SELECT 1 as i ORDER BY j;
^--- error here
```
### `WITH`
- [ ] no duplicated column name in `CommonTableExpr::IndexedColumn`
### DML
```sql
sqlite> CREATE TABLE test (n, m);
sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO
sqlite> SELECT * FROM test;
1|1
sqlite> UPDATE test SET n = 1, n = 0; -- pgsql KO
sqlite> SELECT * FROM test;
0|1
```

View File

@@ -1,123 +0,0 @@
%token_type { i32 }
// An extra argument to the constructor for the parser, which is available
// to all actions.
%extra_context {ctx: Context}
%left PLUS MINUS.
%left DIVIDE TIMES.
%include {
use log::{debug, error, log_enabled, Level, LevelFilter, Metadata, Record, SetLoggerError};
pub struct Context {
expr: Option<Expr>,
}
#[derive(Debug)]
pub enum Operator {
Add,
Subtract,
Multiply,
Divide,
}
#[derive(Debug)]
pub enum Expr {
Number(i32),
Binary(Operator, Box<Expr>, Box<Expr>),
}
impl Expr {
fn binary(op: Operator, lhs: Expr, rhs: Expr) -> Expr {
Expr::Binary(op, Box::new(lhs), Box::new(rhs))
}
}
fn main() {
init_logger().unwrap();
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(5));
p.Parse(TokenType::PLUS, None);
p.Parse(TokenType::INTEGER, Some(10));
p.Parse(TokenType::TIMES, None);
p.Parse(TokenType::INTEGER, Some(4));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Add, Number(5), Binary(Multiply, Number(10), Number(4))))");
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(15));
p.Parse(TokenType::DIVIDE, None);
p.Parse(TokenType::INTEGER, Some(5));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Divide, Number(15), Number(5)))");
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(50));
p.Parse(TokenType::PLUS, None);
p.Parse(TokenType::INTEGER, Some(125));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Add, Number(50), Number(125)))");
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(50));
p.Parse(TokenType::TIMES, None);
p.Parse(TokenType::INTEGER, Some(125));
p.Parse(TokenType::PLUS, None);
p.Parse(TokenType::INTEGER, Some(125));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Add, Binary(Multiply, Number(50), Number(125)), Number(125)))");
}
static LOGGER: Logger = Logger;
struct Logger;
impl log::Log for Logger {
fn enabled(&self, metadata: &Metadata) -> bool {
metadata.level() <= Level::Debug
}
fn log(&self, record: &Record) {
if self.enabled(record.metadata()) {
eprintln!("{} - {}", record.level(), record.args());
}
}
fn flush(&self) {
}
}
fn init_logger() -> Result<(), SetLoggerError> {
log::set_logger(&LOGGER)?;
log::set_max_level(LevelFilter::Debug);
Ok(())
}
}
%syntax_error {
let _ = yymajor;
println!("near token {:?}: syntax error", yyminor);
}
program ::= expr(A). { self.ctx.expr = Some(A); }
%type expr { Expr }
expr(A) ::= expr(B) MINUS expr(C). { A = Expr::binary(Operator::Subtract, B, C); }
expr(A) ::= expr(B) PLUS expr(C). { A = Expr::binary(Operator::Add, B, C); }
expr(A) ::= expr(B) TIMES expr(C). { A = Expr::binary(Operator::Multiply, B, C); }
expr(A) ::= expr(B) DIVIDE expr(C). { A = Expr::binary(Operator::Divide, B, C); }
expr(A) ::= INTEGER(B). { A = Expr::Number(B.unwrap()); }

View File

@@ -1,55 +0,0 @@
use fallible_iterator::FallibleIterator;
use std::env;
use std::fs::read;
use std::panic;
use turso_sqlite3_parser::lexer::sql::Parser;
/// Parse specified files and check all commands.
fn main() {
env_logger::init();
let args = env::args();
for arg in args.skip(1) {
println!("{arg}");
let result = panic::catch_unwind(|| {
let input = read(arg.clone()).unwrap();
let mut parser = Parser::new(&input);
loop {
match parser.next() {
Ok(None) => break,
Err(err) => {
eprintln!("Err: {err} in {arg}");
break;
}
Ok(Some(cmd)) => {
let input = cmd.to_string();
let mut checker = Parser::new(input.as_bytes());
match checker.next() {
Err(err) => {
eprintln!(
"Check Err in {}:{}, {} in\n{}\n{:?}",
arg,
parser.line(),
err,
input,
cmd
);
}
Ok(None) => {
eprintln!("Check Err in {}:{}, {:?}", arg, parser.line(), cmd);
}
Ok(Some(check)) => {
if cmd != check {
eprintln!("{cmd:?}\n<>\n{check:?}");
}
}
}
}
}
}
});
if let Err(e) = result {
eprintln!("Panic: {e:?} in {arg}");
}
}
}

View File

@@ -1,26 +0,0 @@
use std::env;
use fallible_iterator::FallibleIterator;
use turso_sqlite3_parser::lexer::sql::Parser;
/// Parse args.
// RUST_LOG=sqlite3Parser=debug
fn main() {
env_logger::init();
let args = env::args();
for arg in args.skip(1) {
let mut parser = Parser::new(arg.as_bytes());
loop {
match parser.next() {
Ok(None) => break,
Err(err) => {
eprintln!("Err: {err} in {arg}");
break;
}
Ok(Some(cmd)) => {
println!("{cmd}");
}
}
}
}
}

View File

@@ -1,42 +0,0 @@
use fallible_iterator::FallibleIterator;
use std::env;
use std::fs::read;
use std::panic;
#[cfg(not(feature = "YYNOERRORRECOVERY"))]
use turso_sqlite3_parser::lexer::sql::Error;
use turso_sqlite3_parser::lexer::sql::Parser;
/// Parse specified files and print all commands.
fn main() {
env_logger::init();
let args = env::args();
for arg in args.skip(1) {
println!("{arg}");
let result = panic::catch_unwind(|| {
let input = read(arg.clone()).unwrap();
let mut parser = Parser::new(input.as_ref());
loop {
match parser.next() {
Ok(None) => break,
Err(err) => {
eprintln!("Err: {err} in {arg}");
#[cfg(feature = "YYNOERRORRECOVERY")]
break;
#[cfg(not(feature = "YYNOERRORRECOVERY"))]
if let Error::ParserError(..) = err {
} else {
break;
}
}
Ok(Some(cmd)) => {
println!("{cmd}");
}
}
}
});
if let Err(e) = result {
eprintln!("Panic: {e:?} in {arg}");
}
}
}

View File

@@ -1,90 +0,0 @@
use turso_sqlite3_parser::lexer::sql::{TokenType, Tokenizer};
use turso_sqlite3_parser::lexer::Scanner;
use std::env;
use std::fs::read;
use std::str;
/// Tokenize specified files (and do some checks)
fn main() {
use TokenType::*;
let args = env::args();
for arg in args.skip(1) {
let input = read(arg.clone()).unwrap();
let tokenizer = Tokenizer::new();
let mut s = Scanner::new(tokenizer);
loop {
match s.scan(&input) {
Ok((_, None, _)) => break,
Err(err) => {
//eprintln!("{} at line: {}, column: {}", err, s.line(), s.column());
eprintln!("Err: {err} in {arg}");
break;
}
Ok((_, Some((token, token_type)), _)) => match token_type {
TK_TEMP => debug_assert!(
b"TEMP".eq_ignore_ascii_case(token)
|| b"TEMPORARY".eq_ignore_ascii_case(token)
),
TK_EQ => debug_assert!(b"=" == token || b"==" == token),
TK_NE => debug_assert!(b"<>" == token || b"!=" == token),
//TK_STRING => debug_assert!(),
//TK_ID => debug_assert!(),
//TK_VARIABLE => debug_assert!(),
TK_BLOB => debug_assert!(
token.len() % 2 == 0 && token.iter().all(u8::is_ascii_hexdigit)
),
TK_INTEGER => {
if token.len() > 2
&& token[0] == b'0'
&& (token[1] == b'x' || token[1] == b'X')
{
if let Err(err) =
i64::from_str_radix(str::from_utf8(&token[2..]).unwrap(), 16)
{
eprintln!("Err: {err} in {arg}");
}
} else {
/*let raw = str::from_utf8(token).unwrap();
let res = raw.parse::<i64>();
if res.is_err() {
eprintln!("Err: {} in {}", res.unwrap_err(), arg);
}*/
debug_assert!(token.iter().all(u8::is_ascii_digit))
}
}
TK_FLOAT => {
debug_assert!(str::from_utf8(token).unwrap().parse::<f64>().is_ok())
}
TK_CTIME_KW => debug_assert!(
b"CURRENT_DATE".eq_ignore_ascii_case(token)
|| b"CURRENT_TIME".eq_ignore_ascii_case(token)
|| b"CURRENT_TIMESTAMP".eq_ignore_ascii_case(token)
),
TK_JOIN_KW => debug_assert!(
b"CROSS".eq_ignore_ascii_case(token)
|| b"FULL".eq_ignore_ascii_case(token)
|| b"INNER".eq_ignore_ascii_case(token)
|| b"LEFT".eq_ignore_ascii_case(token)
|| b"NATURAL".eq_ignore_ascii_case(token)
|| b"OUTER".eq_ignore_ascii_case(token)
|| b"RIGHT".eq_ignore_ascii_case(token)
),
TK_LIKE_KW => debug_assert!(
b"GLOB".eq_ignore_ascii_case(token)
|| b"LIKE".eq_ignore_ascii_case(token)
|| b"REGEXP".eq_ignore_ascii_case(token)
),
_ => match token_type.as_str() {
Some(str) => {
debug_assert!(str.eq_ignore_ascii_case(str::from_utf8(token).unwrap()))
}
_ => {
println!("'{}', {:?}", str::from_utf8(token).unwrap(), token_type);
}
},
},
}
}
}
}

View File

@@ -1,19 +0,0 @@
[package]
name = "sqlparser_bench"
version = "0.1.0"
authors = ["Dandandan <danielheres@gmail.com>"]
edition = "2018"
[dependencies]
turso_sqlite3_parser = { path = "..", default-features = false, features = [
"YYNOERRORRECOVERY",
"NDEBUG",
] }
fallible-iterator = { workspace = true }
[dev-dependencies]
criterion = { workspace = true }
[[bench]]
name = "sqlparser_bench"
harness = false

View File

@@ -1,32 +0,0 @@
Adapted from https://github.com/ballista-compute/sqlparser-rs/tree/main/sqlparser_bench
## sqlparser-rs
```
sqlparser-rs parsing benchmark/sqlparser::select
time: [9.9697 µs 10.068 µs 10.184 µs]
Found 14 outliers among 100 measurements (14.00%)
5 (5.00%) high mild
9 (9.00%) high severe
sqlparser-rs parsing benchmark/sqlparser::with_select
time: [59.569 µs 60.088 µs 60.743 µs]
Found 9 outliers among 100 measurements (9.00%)
3 (3.00%) high mild
6 (6.00%) high severe
```
## sqlite3-parser
```
sqlparser-rs parsing benchmark/sqlparser::select
time: [6.5488 µs 6.5773 µs 6.6108 µs]
Found 10 outliers among 100 measurements (10.00%)
4 (4.00%) high mild
6 (6.00%) high severe
sqlparser-rs parsing benchmark/sqlparser::with_select
time: [22.182 µs 22.321 µs 22.473 µs]
Found 8 outliers among 100 measurements (8.00%)
1 (1.00%) low mild
3 (3.00%) high mild
4 (4.00%) high severe
```

View File

@@ -1,194 +0,0 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use criterion::{criterion_group, criterion_main, Criterion};
use fallible_iterator::FallibleIterator;
use turso_sqlite3_parser::{dialect::keyword_token, lexer::sql::Parser};
fn basic_queries(c: &mut Criterion) {
let mut group = c.benchmark_group("sqlparser-rs parsing benchmark");
let string = b"SELECT * FROM `table` WHERE 1 = 1";
group.bench_with_input("sqlparser::select", &string, |b, &s| {
b.iter(|| {
let mut parser = Parser::new(s);
assert!(parser.next().unwrap().unwrap().readonly())
});
});
let with_query = b"
WITH derived AS (
SELECT MAX(a) AS max_a,
COUNT(b) AS b_num,
user_id
FROM `TABLE`
GROUP BY user_id
)
SELECT * FROM `table`
LEFT JOIN derived USING (user_id)
";
group.bench_with_input("sqlparser::with_select", &with_query, |b, &s| {
b.iter(|| {
let mut parser = Parser::new(s);
assert!(parser.next().unwrap().unwrap().readonly())
});
});
static VALUES: [&[u8]; 136] = [
b"ABORT",
b"ACTION",
b"ADD",
b"AFTER",
b"ALL",
b"ALTER",
b"ANALYZE",
b"AND",
b"AS",
b"ASC",
b"ATTACH",
b"AUTOINCREMENT",
b"BEFORE",
b"BEGIN",
b"BETWEEN",
b"BY",
b"CASCADE",
b"CASE",
b"CAST",
b"CHECK",
b"COLLATE",
b"COLUMN",
b"COMMIT",
b"CONFLICT",
b"CONSTRAINT",
b"CREATE",
b"CROSS",
b"CURRENT",
b"CURRENT_DATE",
b"CURRENT_TIME",
b"CURRENT_TIMESTAMP",
b"DATABASE",
b"DEFAULT",
b"DEFERRABLE",
b"DEFERRED",
b"DELETE",
b"DESC",
b"DETACH",
b"DISTINCT",
b"DO",
b"DROP",
b"EACH",
b"ELSE",
b"END",
b"ESCAPE",
b"EXCEPT",
b"EXCLUSIVE",
b"EXISTS",
b"EXPLAIN",
b"FAIL",
b"FILTER",
b"FOLLOWING",
b"FOR",
b"FOREIGN",
b"FROM",
b"FULL",
b"GLOB",
b"GROUP",
b"HAVING",
b"IF",
b"IGNORE",
b"IMMEDIATE",
b"IN",
b"INDEX",
b"INDEXED",
b"INITIALLY",
b"INNER",
b"INSERT",
b"INSTEAD",
b"INTERSECT",
b"INTO",
b"IS",
b"ISNULL",
b"JOIN",
b"KEY",
b"LEFT",
b"LIKE",
b"LIMIT",
b"MATCH",
b"NATURAL",
b"NO",
b"NOT",
b"NOTHING",
b"NOTNULL",
b"NULL",
b"OF",
b"OFFSET",
b"ON",
b"OR",
b"ORDER",
b"OUTER",
b"OVER",
b"PARTITION",
b"PLAN",
b"PRAGMA",
b"PRECEDING",
b"PRIMARY",
b"QUERY",
b"RAISE",
b"RANGE",
b"RECURSIVE",
b"REFERENCES",
b"REGEXP",
b"REINDEX",
b"RELEASE",
b"RENAME",
b"REPLACE",
b"RESTRICT",
b"RIGHT",
b"ROLLBACK",
b"ROW",
b"ROWS",
b"SAVEPOINT",
b"SELECT",
b"SET",
b"TABLE",
b"TEMP",
b"TEMPORARY",
b"THEN",
b"TO",
b"TRANSACTION",
b"TRIGGER",
b"UNBOUNDED",
b"UNION",
b"UNIQUE",
b"UPDATE",
b"USING",
b"VACUUM",
b"VALUES",
b"VIEW",
b"VIRTUAL",
b"WHEN",
b"WHERE",
b"WINDOW",
b"WITH",
b"WITHOUT",
];
group.bench_with_input("keyword_token", &VALUES, |b, &s| {
b.iter(|| {
for value in &s {
assert!(keyword_token(value).is_some())
}
});
});
}
criterion_group!(benches, basic_queries);
criterion_main!(benches);

View File

@@ -1,409 +0,0 @@
//! SQLite dialect
use std::fmt::Formatter;
use std::str;
mod token;
pub use token::TokenType;
/// Token value (lexeme)
#[derive(Clone, Copy)]
pub struct Token<'i>(pub usize, pub &'i [u8], pub usize);
pub(crate) fn sentinel(start: usize) -> Token<'static> {
Token(start, b"", start)
}
impl Token<'_> {
/// Access token value
pub fn unwrap(self) -> String {
from_bytes(self.1)
}
}
impl std::fmt::Debug for Token<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("Token").field(&self.1).finish()
}
}
impl TokenType {
// TODO try Cow<&'static, str> (Borrowed<&'static str> for keyword and Owned<String> for below),
// => Syntax error on keyword will be better
// => `from_token` will become unnecessary
pub(crate) fn to_token(self, start: usize, value: &[u8], end: usize) -> Token<'_> {
Token(start, value, end)
}
}
pub(crate) fn from_bytes(bytes: &[u8]) -> String {
unsafe { str::from_utf8_unchecked(bytes).to_owned() }
}
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
pub(crate) fn is_identifier(name: &str) -> bool {
if name.is_empty() {
return false;
}
let bytes = name.as_bytes();
is_identifier_start(bytes[0])
&& (bytes.len() == 1 || bytes[1..].iter().all(|b| is_identifier_continue(*b)))
}
pub(crate) fn is_identifier_start(b: u8) -> bool {
b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F'
}
pub(crate) fn is_identifier_continue(b: u8) -> bool {
b == b'$'
|| b.is_ascii_digit()
|| b.is_ascii_uppercase()
|| b == b'_'
|| b.is_ascii_lowercase()
|| b > b'\x7F'
}
// keyword may become an identifier
// see %fallback in parse.y
pub(crate) fn from_token(_ty: u16, value: Token) -> String {
from_bytes(value.1)
}
impl TokenType {
/// Return the associated string (mainly for testing)
pub const fn as_str(&self) -> Option<&'static str> {
use TokenType::*;
match self {
TK_ABORT => Some("ABORT"),
TK_ACTION => Some("ACTION"),
TK_ADD => Some("ADD"),
TK_AFTER => Some("AFTER"),
TK_ALL => Some("ALL"),
TK_ALTER => Some("ALTER"),
TK_ANALYZE => Some("ANALYZE"),
TK_ALWAYS => Some("ALWAYS"),
TK_AND => Some("AND"),
TK_AS => Some("AS"),
TK_ASC => Some("ASC"),
TK_ATTACH => Some("ATTACH"),
TK_AUTOINCR => Some("AUTOINCREMENT"),
TK_BEFORE => Some("BEFORE"),
TK_BEGIN => Some("BEGIN"),
TK_BETWEEN => Some("BETWEEN"),
TK_BY => Some("BY"),
TK_CASCADE => Some("CASCADE"),
TK_CASE => Some("CASE"),
TK_CAST => Some("CAST"),
TK_CHECK => Some("CHECK"),
TK_COLLATE => Some("COLLATE"),
TK_COLUMNKW => Some("COLUMN"),
TK_COMMIT => Some("COMMIT"),
TK_CONFLICT => Some("CONFLICT"),
TK_CONSTRAINT => Some("CONSTRAINT"),
TK_CREATE => Some("CREATE"),
TK_CURRENT => Some("CURRENT"),
TK_DATABASE => Some("DATABASE"),
TK_DEFAULT => Some("DEFAULT"),
TK_DEFERRABLE => Some("DEFERRABLE"),
TK_DEFERRED => Some("DEFERRED"),
TK_DELETE => Some("DELETE"),
TK_DESC => Some("DESC"),
TK_DETACH => Some("DETACH"),
TK_DISTINCT => Some("DISTINCT"),
TK_DO => Some("DO"),
TK_DROP => Some("DROP"),
TK_EACH => Some("EACH"),
TK_ELSE => Some("ELSE"),
TK_END => Some("END"),
TK_ESCAPE => Some("ESCAPE"),
TK_EXCEPT => Some("EXCEPT"),
TK_EXCLUDE => Some("EXCLUDE"),
TK_EXCLUSIVE => Some("EXCLUSIVE"),
TK_EXISTS => Some("EXISTS"),
TK_EXPLAIN => Some("EXPLAIN"),
TK_FAIL => Some("FAIL"),
TK_FILTER => Some("FILTER"),
TK_FIRST => Some("FIRST"),
TK_FOLLOWING => Some("FOLLOWING"),
TK_FOR => Some("FOR"),
TK_FOREIGN => Some("FOREIGN"),
TK_FROM => Some("FROM"),
TK_GENERATED => Some("GENERATED"),
TK_GROUP => Some("GROUP"),
TK_GROUPS => Some("GROUPS"),
TK_HAVING => Some("HAVING"),
TK_IF => Some("IF"),
TK_IGNORE => Some("IGNORE"),
TK_IMMEDIATE => Some("IMMEDIATE"),
TK_IN => Some("IN"),
TK_INDEX => Some("INDEX"),
TK_INDEXED => Some("INDEXED"),
TK_INITIALLY => Some("INITIALLY"),
TK_INSERT => Some("INSERT"),
TK_INSTEAD => Some("INSTEAD"),
TK_INTERSECT => Some("INTERSECT"),
TK_INTO => Some("INTO"),
TK_IS => Some("IS"),
TK_ISNULL => Some("ISNULL"),
TK_JOIN => Some("JOIN"),
TK_KEY => Some("KEY"),
TK_LAST => Some("LAST"),
TK_LIMIT => Some("LIMIT"),
TK_MATCH => Some("MATCH"),
TK_MATERIALIZED => Some("MATERIALIZED"),
TK_NO => Some("NO"),
TK_NOT => Some("NOT"),
TK_NOTHING => Some("NOTHING"),
TK_NOTNULL => Some("NOTNULL"),
TK_NULL => Some("NULL"),
TK_NULLS => Some("NULLS"),
TK_OF => Some("OF"),
TK_OFFSET => Some("OFFSET"),
TK_ON => Some("ON"),
TK_OR => Some("OR"),
TK_ORDER => Some("ORDER"),
TK_OTHERS => Some("OTHERS"),
TK_OVER => Some("OVER"),
TK_PARTITION => Some("PARTITION"),
TK_PLAN => Some("PLAN"),
TK_PRAGMA => Some("PRAGMA"),
TK_PRECEDING => Some("PRECEDING"),
TK_PRIMARY => Some("PRIMARY"),
TK_QUERY => Some("QUERY"),
TK_RAISE => Some("RAISE"),
TK_RANGE => Some("RANGE"),
TK_RECURSIVE => Some("RECURSIVE"),
TK_REFERENCES => Some("REFERENCES"),
TK_REINDEX => Some("REINDEX"),
TK_RELEASE => Some("RELEASE"),
TK_RENAME => Some("RENAME"),
TK_REPLACE => Some("REPLACE"),
TK_RETURNING => Some("RETURNING"),
TK_RESTRICT => Some("RESTRICT"),
TK_ROLLBACK => Some("ROLLBACK"),
TK_ROW => Some("ROW"),
TK_ROWS => Some("ROWS"),
TK_SAVEPOINT => Some("SAVEPOINT"),
TK_SELECT => Some("SELECT"),
TK_SET => Some("SET"),
TK_TABLE => Some("TABLE"),
TK_TEMP => Some("TEMP"), // or TEMPORARY
TK_TIES => Some("TIES"),
TK_THEN => Some("THEN"),
TK_TO => Some("TO"),
TK_TRANSACTION => Some("TRANSACTION"),
TK_TRIGGER => Some("TRIGGER"),
TK_UNBOUNDED => Some("UNBOUNDED"),
TK_UNION => Some("UNION"),
TK_UNIQUE => Some("UNIQUE"),
TK_UPDATE => Some("UPDATE"),
TK_USING => Some("USING"),
TK_VACUUM => Some("VACUUM"),
TK_VALUES => Some("VALUES"),
TK_VIEW => Some("VIEW"),
TK_VIRTUAL => Some("VIRTUAL"),
TK_WHEN => Some("WHEN"),
TK_WHERE => Some("WHERE"),
TK_WINDOW => Some("WINDOW"),
TK_WITH => Some("WITH"),
TK_WITHOUT => Some("WITHOUT"),
TK_BITAND => Some("&"),
TK_BITNOT => Some("~"),
TK_BITOR => Some("|"),
TK_COMMA => Some(","),
TK_CONCAT => Some("||"),
TK_DOT => Some("."),
TK_EQ => Some("="), // or ==
TK_GT => Some(">"),
TK_GE => Some(">="),
TK_LP => Some("("),
TK_LSHIFT => Some("<<"),
TK_LE => Some("<="),
TK_LT => Some("<"),
TK_MINUS => Some("-"),
TK_NE => Some("!="), // or <>
TK_PLUS => Some("+"),
TK_REM => Some("%"),
TK_RP => Some(")"),
TK_RSHIFT => Some(">>"),
TK_SEMI => Some(";"),
TK_SLASH => Some("/"),
TK_STAR => Some("*"),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
#[test]
fn test_keyword_token() {
let values = HashMap::from([
("ABORT", TokenType::TK_ABORT),
("ACTION", TokenType::TK_ACTION),
("ADD", TokenType::TK_ADD),
("AFTER", TokenType::TK_AFTER),
("ALL", TokenType::TK_ALL),
("ALTER", TokenType::TK_ALTER),
("ALWAYS", TokenType::TK_ALWAYS),
("ANALYZE", TokenType::TK_ANALYZE),
("AND", TokenType::TK_AND),
("AS", TokenType::TK_AS),
("ASC", TokenType::TK_ASC),
("ATTACH", TokenType::TK_ATTACH),
("AUTOINCREMENT", TokenType::TK_AUTOINCR),
("BEFORE", TokenType::TK_BEFORE),
("BEGIN", TokenType::TK_BEGIN),
("BETWEEN", TokenType::TK_BETWEEN),
("BY", TokenType::TK_BY),
("CASCADE", TokenType::TK_CASCADE),
("CASE", TokenType::TK_CASE),
("CAST", TokenType::TK_CAST),
("CHECK", TokenType::TK_CHECK),
("COLLATE", TokenType::TK_COLLATE),
("COLUMN", TokenType::TK_COLUMNKW),
("COMMIT", TokenType::TK_COMMIT),
("CONFLICT", TokenType::TK_CONFLICT),
("CONSTRAINT", TokenType::TK_CONSTRAINT),
("CREATE", TokenType::TK_CREATE),
("CROSS", TokenType::TK_JOIN_KW),
("CURRENT", TokenType::TK_CURRENT),
("CURRENT_DATE", TokenType::TK_CTIME_KW),
("CURRENT_TIME", TokenType::TK_CTIME_KW),
("CURRENT_TIMESTAMP", TokenType::TK_CTIME_KW),
("DATABASE", TokenType::TK_DATABASE),
("DEFAULT", TokenType::TK_DEFAULT),
("DEFERRABLE", TokenType::TK_DEFERRABLE),
("DEFERRED", TokenType::TK_DEFERRED),
("DELETE", TokenType::TK_DELETE),
("DESC", TokenType::TK_DESC),
("DETACH", TokenType::TK_DETACH),
("DISTINCT", TokenType::TK_DISTINCT),
("DO", TokenType::TK_DO),
("DROP", TokenType::TK_DROP),
("EACH", TokenType::TK_EACH),
("ELSE", TokenType::TK_ELSE),
("END", TokenType::TK_END),
("ESCAPE", TokenType::TK_ESCAPE),
("EXCEPT", TokenType::TK_EXCEPT),
("EXCLUDE", TokenType::TK_EXCLUDE),
("EXCLUSIVE", TokenType::TK_EXCLUSIVE),
("EXISTS", TokenType::TK_EXISTS),
("EXPLAIN", TokenType::TK_EXPLAIN),
("FAIL", TokenType::TK_FAIL),
("FILTER", TokenType::TK_FILTER),
("FIRST", TokenType::TK_FIRST),
("FOLLOWING", TokenType::TK_FOLLOWING),
("FOR", TokenType::TK_FOR),
("FOREIGN", TokenType::TK_FOREIGN),
("FROM", TokenType::TK_FROM),
("FULL", TokenType::TK_JOIN_KW),
("GENERATED", TokenType::TK_GENERATED),
("GLOB", TokenType::TK_LIKE_KW),
("GROUP", TokenType::TK_GROUP),
("GROUPS", TokenType::TK_GROUPS),
("HAVING", TokenType::TK_HAVING),
("IF", TokenType::TK_IF),
("IGNORE", TokenType::TK_IGNORE),
("IMMEDIATE", TokenType::TK_IMMEDIATE),
("IN", TokenType::TK_IN),
("INDEX", TokenType::TK_INDEX),
("INDEXED", TokenType::TK_INDEXED),
("INITIALLY", TokenType::TK_INITIALLY),
("INNER", TokenType::TK_JOIN_KW),
("INSERT", TokenType::TK_INSERT),
("INSTEAD", TokenType::TK_INSTEAD),
("INTERSECT", TokenType::TK_INTERSECT),
("INTO", TokenType::TK_INTO),
("IS", TokenType::TK_IS),
("ISNULL", TokenType::TK_ISNULL),
("JOIN", TokenType::TK_JOIN),
("KEY", TokenType::TK_KEY),
("LAST", TokenType::TK_LAST),
("LEFT", TokenType::TK_JOIN_KW),
("LIKE", TokenType::TK_LIKE_KW),
("LIMIT", TokenType::TK_LIMIT),
("MATCH", TokenType::TK_MATCH),
("MATERIALIZED", TokenType::TK_MATERIALIZED),
("NATURAL", TokenType::TK_JOIN_KW),
("NO", TokenType::TK_NO),
("NOT", TokenType::TK_NOT),
("NOTHING", TokenType::TK_NOTHING),
("NOTNULL", TokenType::TK_NOTNULL),
("NULL", TokenType::TK_NULL),
("NULLS", TokenType::TK_NULLS),
("OF", TokenType::TK_OF),
("OFFSET", TokenType::TK_OFFSET),
("ON", TokenType::TK_ON),
("OR", TokenType::TK_OR),
("ORDER", TokenType::TK_ORDER),
("OTHERS", TokenType::TK_OTHERS),
("OUTER", TokenType::TK_JOIN_KW),
("OVER", TokenType::TK_OVER),
("PARTITION", TokenType::TK_PARTITION),
("PLAN", TokenType::TK_PLAN),
("PRAGMA", TokenType::TK_PRAGMA),
("PRECEDING", TokenType::TK_PRECEDING),
("PRIMARY", TokenType::TK_PRIMARY),
("QUERY", TokenType::TK_QUERY),
("RAISE", TokenType::TK_RAISE),
("RANGE", TokenType::TK_RANGE),
("RECURSIVE", TokenType::TK_RECURSIVE),
("REFERENCES", TokenType::TK_REFERENCES),
("REGEXP", TokenType::TK_LIKE_KW),
("REINDEX", TokenType::TK_REINDEX),
("RELEASE", TokenType::TK_RELEASE),
("RENAME", TokenType::TK_RENAME),
("REPLACE", TokenType::TK_REPLACE),
("RETURNING", TokenType::TK_RETURNING),
("RESTRICT", TokenType::TK_RESTRICT),
("RIGHT", TokenType::TK_JOIN_KW),
("ROLLBACK", TokenType::TK_ROLLBACK),
("ROW", TokenType::TK_ROW),
("ROWS", TokenType::TK_ROWS),
("SAVEPOINT", TokenType::TK_SAVEPOINT),
("SELECT", TokenType::TK_SELECT),
("SET", TokenType::TK_SET),
("TABLE", TokenType::TK_TABLE),
("TEMP", TokenType::TK_TEMP),
("TEMPORARY", TokenType::TK_TEMP),
("THEN", TokenType::TK_THEN),
("TIES", TokenType::TK_TIES),
("TO", TokenType::TK_TO),
("TRANSACTION", TokenType::TK_TRANSACTION),
("TRIGGER", TokenType::TK_TRIGGER),
("UNBOUNDED", TokenType::TK_UNBOUNDED),
("UNION", TokenType::TK_UNION),
("UNIQUE", TokenType::TK_UNIQUE),
("UPDATE", TokenType::TK_UPDATE),
("USING", TokenType::TK_USING),
("VACUUM", TokenType::TK_VACUUM),
("VALUES", TokenType::TK_VALUES),
("VIEW", TokenType::TK_VIEW),
("VIRTUAL", TokenType::TK_VIRTUAL),
("WHEN", TokenType::TK_WHEN),
("WHERE", TokenType::TK_WHERE),
("WINDOW", TokenType::TK_WINDOW),
("WITH", TokenType::TK_WITH),
("WITHOUT", TokenType::TK_WITHOUT),
]);
for (key, value) in &values {
assert!(keyword_token(key.as_bytes()).unwrap() == *value);
assert!(
keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value
);
}
assert!(keyword_token(b"").is_none());
assert!(keyword_token(b"wrong").is_none());
assert!(keyword_token(b"super wrong").is_none());
assert!(keyword_token(b"super_wrong").is_none());
assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none());
assert!(keyword_token("Crème Brulée".as_bytes()).is_none());
assert!(keyword_token("fróm".as_bytes()).is_none());
}
}

View File

@@ -1,181 +0,0 @@
//! All terminal symbols.
/// Token classes
// Generated by lemon (parse.h).
// Renamed manually.
// To be keep in sync.
#[non_exhaustive]
#[allow(non_camel_case_types, missing_docs)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
#[repr(u16)]
pub enum TokenType {
TK_EOF = 0,
TK_SEMI = 1,
TK_EXPLAIN = 2,
TK_QUERY = 3,
TK_PLAN = 4,
TK_BEGIN = 5,
TK_TRANSACTION = 6,
TK_DEFERRED = 7,
TK_IMMEDIATE = 8,
TK_EXCLUSIVE = 9,
TK_COMMIT = 10,
TK_END = 11,
TK_ROLLBACK = 12,
TK_SAVEPOINT = 13,
TK_RELEASE = 14,
TK_TO = 15,
TK_TABLE = 16,
TK_CREATE = 17,
TK_IF = 18,
TK_NOT = 19,
TK_EXISTS = 20,
TK_TEMP = 21,
TK_LP = 22,
TK_RP = 23,
TK_AS = 24,
TK_COMMA = 25,
TK_WITHOUT = 26,
TK_ABORT = 27,
TK_ACTION = 28,
TK_AFTER = 29,
TK_ANALYZE = 30,
TK_ASC = 31,
TK_ATTACH = 32,
TK_BEFORE = 33,
TK_BY = 34,
TK_CASCADE = 35,
TK_CAST = 36,
TK_CONFLICT = 37,
TK_DATABASE = 38,
TK_DESC = 39,
TK_DETACH = 40,
TK_EACH = 41,
TK_FAIL = 42,
TK_OR = 43,
TK_AND = 44,
TK_IS = 45,
TK_ISNOT = 46,
TK_MATCH = 47,
TK_LIKE_KW = 48,
TK_BETWEEN = 49,
TK_IN = 50,
TK_ISNULL = 51,
TK_NOTNULL = 52,
TK_NE = 53,
TK_EQ = 54,
TK_GT = 55,
TK_LE = 56,
TK_LT = 57,
TK_GE = 58,
TK_ESCAPE = 59,
TK_ID = 60,
TK_COLUMNKW = 61,
TK_DO = 62,
TK_FOR = 63,
TK_IGNORE = 64,
TK_INITIALLY = 65,
TK_INSTEAD = 66,
TK_NO = 67,
TK_KEY = 68,
TK_OF = 69,
TK_OFFSET = 70,
TK_PRAGMA = 71,
TK_RAISE = 72,
TK_RECURSIVE = 73,
TK_REPLACE = 74,
TK_RESTRICT = 75,
TK_ROW = 76,
TK_ROWS = 77,
TK_TRIGGER = 78,
TK_VACUUM = 79,
TK_VIEW = 80,
TK_VIRTUAL = 81,
TK_WITH = 82,
TK_NULLS = 83,
TK_FIRST = 84,
TK_LAST = 85,
TK_CURRENT = 86,
TK_FOLLOWING = 87,
TK_PARTITION = 88,
TK_PRECEDING = 89,
TK_RANGE = 90,
TK_UNBOUNDED = 91,
TK_EXCLUDE = 92,
TK_GROUPS = 93,
TK_OTHERS = 94,
TK_TIES = 95,
TK_GENERATED = 96,
TK_ALWAYS = 97,
TK_MATERIALIZED = 98,
TK_REINDEX = 99,
TK_RENAME = 100,
TK_CTIME_KW = 101,
TK_ANY = 102,
TK_BITAND = 103,
TK_BITOR = 104,
TK_LSHIFT = 105,
TK_RSHIFT = 106,
TK_PLUS = 107,
TK_MINUS = 108,
TK_STAR = 109,
TK_SLASH = 110,
TK_REM = 111,
TK_CONCAT = 112,
TK_PTR = 113,
TK_COLLATE = 114,
TK_BITNOT = 115,
TK_ON = 116,
TK_INDEXED = 117,
TK_STRING = 118,
TK_JOIN_KW = 119,
TK_CONSTRAINT = 120,
TK_DEFAULT = 121,
TK_NULL = 122,
TK_PRIMARY = 123,
TK_UNIQUE = 124,
TK_CHECK = 125,
TK_REFERENCES = 126,
TK_AUTOINCR = 127,
TK_INSERT = 128,
TK_DELETE = 129,
TK_UPDATE = 130,
TK_SET = 131,
TK_DEFERRABLE = 132,
TK_FOREIGN = 133,
TK_DROP = 134,
TK_UNION = 135,
TK_ALL = 136,
TK_EXCEPT = 137,
TK_INTERSECT = 138,
TK_SELECT = 139,
TK_VALUES = 140,
TK_DISTINCT = 141,
TK_DOT = 142,
TK_FROM = 143,
TK_JOIN = 144,
TK_USING = 145,
TK_ORDER = 146,
TK_GROUP = 147,
TK_HAVING = 148,
TK_LIMIT = 149,
TK_WHERE = 150,
TK_RETURNING = 151,
TK_INTO = 152,
TK_NOTHING = 153,
TK_BLOB = 154,
TK_FLOAT = 155,
TK_INTEGER = 156,
TK_VARIABLE = 157,
TK_CASE = 158,
TK_WHEN = 159,
TK_THEN = 160,
TK_ELSE = 161,
TK_INDEX = 162,
TK_ALTER = 163,
TK_ADD = 164,
TK_WINDOW = 165,
TK_OVER = 166,
TK_FILTER = 167,
TK_ILLEGAL = 185,
}

View File

@@ -1,6 +0,0 @@
//! Streaming SQLite tokenizer
mod scan;
pub mod sql;
pub use scan::{ScanError, Scanner, Splitter};

View File

@@ -1,173 +0,0 @@
//! Adaptation/port of [Go scanner](http://tip.golang.org/pkg/bufio/#Scanner).
use std::error::Error;
use std::fmt;
/// Error with position
pub trait ScanError: Error + Sized {
/// Update the position where the error occurs
fn position(&mut self, line: u64, column: usize, offset: usize);
}
/// The `(&[u8], TokenType)` is the token.
/// And the `usize` is the amount of bytes to consume.
type SplitResult<'input, TokenType, Error> =
Result<(Option<(&'input [u8], TokenType)>, usize), Error>;
/// Split function used to tokenize the input
pub trait Splitter: Sized {
/// Potential error raised
type Error: ScanError;
//type Item: ?Sized;
/// Token generated
type TokenType;
/// The arguments are an initial substring of the remaining unprocessed
/// data.
///
/// If the returned error is non-nil, scanning stops and the error
/// is returned to the client.
///
/// The function is never called with an empty data slice.
fn split<'input>(
&mut self,
data: &'input [u8],
) -> SplitResult<'input, Self::TokenType, Self::Error>;
}
/// Like a `BufReader` but with a growable buffer.
/// Successive calls to the `scan` method will step through the 'tokens'
/// of a file, skipping the bytes between the tokens.
///
/// Scanning stops unrecoverably at EOF, the first I/O error, or a token too
/// large to fit in the buffer. When a scan stops, the reader may have
/// advanced arbitrarily far past the last token.
pub struct Scanner<S: Splitter> {
/// offset in `input`
offset: usize,
/// mark
mark: (usize, u64, usize),
/// The function to tokenize the input.
splitter: S,
/// current line number
line: u64,
/// current column number (byte offset, not char offset)
column: usize,
}
impl<S: Splitter> Scanner<S> {
/// Constructor
pub fn new(splitter: S) -> Self {
Self {
offset: 0,
mark: (0, 0, 0),
splitter,
line: 1,
column: 1,
}
}
/// Current line number
pub fn line(&self) -> u64 {
self.line
}
/// Current column number (byte offset, not char offset)
pub fn column(&self) -> usize {
self.column
}
/// Current byte offset in the source string
pub fn offset(&self) -> usize {
self.offset
}
/// Associated splitter
pub fn splitter(&self) -> &S {
&self.splitter
}
/// Mark current position
pub fn mark(&mut self) {
self.mark = (self.offset, self.line, self.column);
}
/// Reset to mark
pub fn reset_to_mark(&mut self) {
(self.offset, self.line, self.column) = self.mark;
}
/// Reset the scanner such that it behaves as if it had never been used.
pub fn reset(&mut self) {
self.offset = 0;
self.line = 1;
self.column = 1;
}
}
type ScanResult<'input, TokenType, Error> =
Result<(usize, Option<(&'input [u8], TokenType)>, usize), Error>;
impl<S: Splitter> Scanner<S> {
/// Advance the Scanner to next token.
/// Return the token as a byte slice.
/// Return `None` when the end of the input is reached.
/// Return any error that occurs while reading the input.
pub fn scan<'input>(
&mut self,
input: &'input [u8],
) -> ScanResult<'input, S::TokenType, S::Error> {
// Loop until we have a token.
loop {
// See if we can get a token with what we already have.
if self.offset < input.len() {
let data = &input[self.offset..];
match self.splitter.split(data) {
Err(mut e) => {
e.position(self.line, self.column, self.offset);
return Err(e);
}
Ok((None, 0)) => {
// Done
}
Ok((None, amt)) => {
// Ignore/skip this data
self.consume(data, amt);
continue;
}
Ok((tok, amt)) => {
let start = self.offset;
self.consume(data, amt);
return Ok((start, tok, self.offset));
}
}
}
// We cannot generate a token with what we are holding.
// we are done.
return Ok((self.offset, None, self.offset));
}
}
/// Consume `amt` bytes of the buffer.
fn consume(&mut self, data: &[u8], amt: usize) {
debug_assert!(amt <= data.len());
for byte in &data[..amt] {
if *byte == b'\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
self.offset += amt;
}
}
impl<S: Splitter> fmt::Debug for Scanner<S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Scanner")
.field("offset", &self.offset)
.field("mark", &self.mark)
.field("line", &self.line)
.field("column", &self.column)
.finish()
}
}

View File

@@ -1,148 +0,0 @@
use std::error;
use std::fmt;
use crate::lexer::scan::ScanError;
use crate::parser::ParserError;
/// SQL lexer and parser errors
#[non_exhaustive]
#[derive(Debug, Clone, miette::Diagnostic)]
#[diagnostic()]
pub enum Error {
/// Lexer error
UnrecognizedToken(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Missing quote or double-quote or backtick
UnterminatedLiteral(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Missing `]`
UnterminatedBracket(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Missing `*/`
UnterminatedBlockComment(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Invalid parameter name
BadVariableName(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Invalid number format
#[diagnostic(help("Invalid digit in `{3}`"))]
BadNumber(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
Option<usize>,
String, // Holds the offending number as a string
),
/// Invalid or missing sign after `!`
ExpectedEqualsSign(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character.
MalformedBlobLiteral(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
),
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
MalformedHexInteger(
Option<(u64, usize)>,
#[label("here")] Option<miette::SourceSpan>,
Option<usize>,
#[help] Option<&'static str>,
),
/// Grammar error
ParserError(
ParserError,
Option<(u64, usize)>,
#[label("syntax error")] Option<miette::SourceSpan>,
),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::UnrecognizedToken(pos, _) => {
write!(f, "unrecognized token at {:?}", pos.unwrap())
}
Self::UnterminatedLiteral(pos, _) => {
write!(f, "non-terminated literal at {:?}", pos.unwrap())
}
Self::UnterminatedBracket(pos, _) => {
write!(f, "non-terminated bracket at {:?}", pos.unwrap())
}
Self::UnterminatedBlockComment(pos, _) => {
write!(f, "non-terminated block comment at {:?}", pos.unwrap())
}
Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()),
Self::BadNumber(pos, _, _, _) => write!(f, "bad number at {:?}", pos.unwrap()),
Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()),
Self::MalformedBlobLiteral(pos, _) => {
write!(f, "malformed blob literal at {:?}", pos.unwrap())
}
Self::MalformedHexInteger(pos, _, _, _) => {
write!(f, "malformed hex integer at {:?}", pos.unwrap())
}
Self::ParserError(ref msg, Some(pos), _) => write!(f, "{msg} at {pos:?}"),
Self::ParserError(ref msg, _, _) => write!(f, "{msg}"),
}
}
}
impl error::Error for Error {}
impl From<ParserError> for Error {
fn from(err: ParserError) -> Self {
Self::ParserError(err, None, None)
}
}
impl ScanError for Error {
fn position(&mut self, line: u64, column: usize, offset: usize) {
match *self {
Self::UnrecognizedToken(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::UnterminatedLiteral(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::UnterminatedBracket(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::UnterminatedBlockComment(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::BadVariableName(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::ExpectedEqualsSign(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
Self::MalformedBlobLiteral(ref mut pos, ref mut src) => {
*pos = Some((line, column));
*src = Some((offset).into());
}
// Exact same handling here
Self::MalformedHexInteger(ref mut pos, ref mut src, len, _)
| Self::BadNumber(ref mut pos, ref mut src, len, _) => {
*pos = Some((line, column));
*src = Some((offset, len.unwrap_or(0)).into());
}
Self::ParserError(_, ref mut pos, _) => *pos = Some((line, column)),
}
}
}

View File

@@ -1,762 +0,0 @@
//! Adaptation/port of [`SQLite` tokenizer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c)
use fallible_iterator::FallibleIterator;
use memchr::memchr;
pub use crate::dialect::TokenType;
use crate::dialect::TokenType::*;
use crate::dialect::{is_identifier_continue, is_identifier_start, keyword_token, sentinel};
use crate::parser::ast::Cmd;
use crate::parser::parse::{yyParser, YYCODETYPE};
use crate::parser::Context;
mod error;
#[cfg(test)]
mod test;
use crate::lexer::scan::ScanError;
use crate::lexer::scan::Splitter;
use crate::lexer::Scanner;
pub use crate::parser::ParserError;
pub use error::Error;
// TODO Extract scanning stuff and move this into the parser crate
// to make possible to use the tokenizer without depending on the parser...
/// SQL parser
pub struct Parser<'input> {
input: &'input [u8],
scanner: Scanner<Tokenizer>,
/// lemon parser
parser: yyParser<'input>,
had_error: bool,
}
impl<'input> Parser<'input> {
/// Constructor
pub fn new(input: &'input [u8]) -> Self {
let lexer = Tokenizer::new();
let scanner = Scanner::new(lexer);
let ctx = Context::new(input);
let parser = yyParser::new(ctx);
Parser {
input,
scanner,
parser,
had_error: false,
}
}
/// Parse new `input`
pub fn reset(&mut self, input: &'input [u8]) {
self.input = input;
self.scanner.reset();
self.had_error = false;
}
/// Current line position in input
pub fn line(&self) -> u64 {
self.scanner.line()
}
/// Current column position in input
pub fn column(&self) -> usize {
self.scanner.column()
}
/// Current byte offset in input
pub fn offset(&self) -> usize {
self.scanner.offset()
}
/// Public API for sqlite3ParserFinalize()
pub fn finalize(&mut self) {
self.parser.sqlite3ParserFinalize();
}
}
/*
** Return the id of the next token in input.
*/
fn get_token(scanner: &mut Scanner<Tokenizer>, input: &[u8]) -> Result<TokenType, Error> {
let mut t = {
let (_, token_type) = match scanner.scan(input)? {
(_, None, _) => {
return Ok(TK_EOF);
}
(_, Some(tuple), _) => tuple,
};
token_type
};
if t == TK_ID
|| t == TK_STRING
|| t == TK_JOIN_KW
|| t == TK_WINDOW
|| t == TK_OVER
|| yyParser::parse_fallback(t as YYCODETYPE) == TK_ID as YYCODETYPE
{
t = TK_ID;
}
Ok(t)
}
/*
** The following three functions are called immediately after the tokenizer
** reads the keywords WINDOW, OVER and FILTER, respectively, to determine
** whether the token should be treated as a keyword or an SQL identifier.
** This cannot be handled by the usual lemon %fallback method, due to
** the ambiguity in some constructions. e.g.
**
** SELECT sum(x) OVER ...
**
** In the above, "OVER" might be a keyword, or it might be an alias for the
** sum(x) expression. If a "%fallback ID OVER" directive were added to
** grammar, then SQLite would always treat "OVER" as an alias, making it
** impossible to call a window-function without a FILTER clause.
**
** WINDOW is treated as a keyword if:
**
** * the following token is an identifier, or a keyword that can fallback
** to being an identifier, and
** * the token after than one is TK_AS.
**
** OVER is a keyword if:
**
** * the previous token was TK_RP, and
** * the next token is either TK_LP or an identifier.
**
** FILTER is a keyword if:
**
** * the previous token was TK_RP, and
** * the next token is TK_LP.
*/
fn analyze_window_keyword(
scanner: &mut Scanner<Tokenizer>,
input: &[u8],
) -> Result<TokenType, Error> {
let t = get_token(scanner, input)?;
if t != TK_ID {
return Ok(TK_ID);
};
let t = get_token(scanner, input)?;
if t != TK_AS {
return Ok(TK_ID);
};
Ok(TK_WINDOW)
}
fn analyze_over_keyword(
scanner: &mut Scanner<Tokenizer>,
input: &[u8],
last_token: TokenType,
) -> Result<TokenType, Error> {
if last_token == TK_RP {
let t = get_token(scanner, input)?;
if t == TK_LP || t == TK_ID {
return Ok(TK_OVER);
}
}
Ok(TK_ID)
}
fn analyze_filter_keyword(
scanner: &mut Scanner<Tokenizer>,
input: &[u8],
last_token: TokenType,
) -> Result<TokenType, Error> {
if last_token == TK_RP && get_token(scanner, input)? == TK_LP {
return Ok(TK_FILTER);
}
Ok(TK_ID)
}
macro_rules! try_with_position {
($scanner:expr, $expr:expr) => {
match $expr {
Ok(val) => val,
Err(err) => {
let mut err = Error::from(err);
err.position($scanner.line(), $scanner.column(), $scanner.offset() - 1);
return Err(err);
}
}
};
}
impl FallibleIterator for Parser<'_> {
type Item = Cmd;
type Error = Error;
fn next(&mut self) -> Result<Option<Cmd>, Error> {
//print!("line: {}, column: {}: ", self.scanner.line(), self.scanner.column());
// if we have already encountered an error, return None to signal that to fallible_iterator that we are done parsing
if self.had_error {
return Ok(None);
}
self.parser.ctx.reset();
let mut last_token_parsed = TK_EOF;
let mut eof = false;
loop {
let (start, (value, mut token_type), end) = match self.scanner.scan(self.input)? {
(_, None, _) => {
eof = true;
break;
}
(start, Some(tuple), end) => (start, tuple, end),
};
if token_type == TK_ILLEGAL {
// break out of parsing loop and return error
self.parser.sqlite3ParserFinalize();
self.had_error = true;
return Err(Error::UnrecognizedToken(
Some((self.scanner.line(), self.scanner.column())),
Some(start.into()),
));
}
let token = if token_type >= TK_WINDOW {
debug_assert!(
token_type == TK_OVER || token_type == TK_FILTER || token_type == TK_WINDOW
);
self.scanner.mark();
if token_type == TK_WINDOW {
token_type = analyze_window_keyword(&mut self.scanner, self.input)?;
} else if token_type == TK_OVER {
token_type =
analyze_over_keyword(&mut self.scanner, self.input, last_token_parsed)?;
} else if token_type == TK_FILTER {
token_type =
analyze_filter_keyword(&mut self.scanner, self.input, last_token_parsed)?;
}
self.scanner.reset_to_mark();
token_type.to_token(start, value, end)
} else {
token_type.to_token(start, value, end)
};
//println!("({:?}, {:?})", token_type, token);
try_with_position!(self.scanner, self.parser.sqlite3Parser(token_type, token));
last_token_parsed = token_type;
if self.parser.ctx.done() {
//println!();
break;
}
}
if last_token_parsed == TK_EOF {
return Ok(None); // empty input
}
/* Upon reaching the end of input, call the parser two more times
with tokens TK_SEMI and 0, in that order. */
if eof && self.parser.ctx.is_ok() {
if last_token_parsed != TK_SEMI {
try_with_position!(
self.scanner,
self.parser
.sqlite3Parser(TK_SEMI, sentinel(self.input.len()))
);
if self.parser.ctx.error().is_some() {
self.had_error = true;
}
}
try_with_position!(
self.scanner,
self.parser
.sqlite3Parser(TK_EOF, sentinel(self.input.len()))
);
if self.parser.ctx.error().is_some() {
self.had_error = true;
}
}
self.parser.sqlite3ParserFinalize();
if let Some(e) = self.parser.ctx.error() {
let err = Error::ParserError(
e,
Some((self.scanner.line(), self.scanner.column())),
Some((self.offset() - 1).into()),
);
self.had_error = true;
return Err(err);
}
let cmd = self.parser.ctx.cmd();
if let Some(ref cmd) = cmd {
if let Err(e) = cmd.check() {
let err = Error::ParserError(
e,
Some((self.scanner.line(), self.scanner.column())),
Some((self.offset() - 1).into()),
);
self.had_error = true;
return Err(err);
}
}
Ok(cmd)
}
}
/// SQL token
pub type Token<'input> = (&'input [u8], TokenType);
/// SQL lexer
#[derive(Default)]
pub struct Tokenizer {}
impl Tokenizer {
/// Constructor
pub fn new() -> Self {
Self {}
}
}
/// ```rust
/// use turso_sqlite3_parser::lexer::sql::Tokenizer;
/// use turso_sqlite3_parser::lexer::Scanner;
///
/// let tokenizer = Tokenizer::new();
/// let input = b"PRAGMA parser_trace=ON;";
/// let mut s = Scanner::new(tokenizer);
/// let Ok((_, Some((token1, _)), _)) = s.scan(input) else { panic!() };
/// s.scan(input).unwrap();
/// assert!(b"PRAGMA".eq_ignore_ascii_case(token1));
/// ```
impl Splitter for Tokenizer {
type Error = Error;
type TokenType = TokenType;
fn split<'input>(
&mut self,
data: &'input [u8],
) -> Result<(Option<Token<'input>>, usize), Error> {
if data[0].is_ascii_whitespace() {
// eat as much space as possible
return Ok((
None,
match data.iter().skip(1).position(|&b| !b.is_ascii_whitespace()) {
Some(i) => i + 1,
_ => data.len(),
},
));
}
match data[0] {
b'-' => {
if let Some(b) = data.get(1) {
if *b == b'-' {
// eat comment
if let Some(i) = memchr(b'\n', data) {
Ok((None, i + 1))
} else {
Ok((None, data.len()))
}
} else if *b == b'>' {
if let Some(b) = data.get(2) {
if *b == b'>' {
return Ok((Some((&data[..3], TK_PTR)), 3));
}
}
Ok((Some((&data[..2], TK_PTR)), 2))
} else {
Ok((Some((&data[..1], TK_MINUS)), 1))
}
} else {
Ok((Some((&data[..1], TK_MINUS)), 1))
}
}
b'(' => Ok((Some((&data[..1], TK_LP)), 1)),
b')' => Ok((Some((&data[..1], TK_RP)), 1)),
b';' => Ok((Some((&data[..1], TK_SEMI)), 1)),
b'+' => Ok((Some((&data[..1], TK_PLUS)), 1)),
b'*' => Ok((Some((&data[..1], TK_STAR)), 1)),
b'/' => {
if let Some(b) = data.get(1) {
if *b == b'*' {
// eat comment
let mut pb = 0;
let mut end = None;
for (i, b) in data.iter().enumerate().skip(2) {
if *b == b'/' && pb == b'*' {
end = Some(i);
break;
}
pb = *b;
}
if let Some(i) = end {
Ok((None, i + 1))
} else {
Err(Error::UnterminatedBlockComment(None, None))
}
} else {
Ok((Some((&data[..1], TK_SLASH)), 1))
}
} else {
Ok((Some((&data[..1], TK_SLASH)), 1))
}
}
b'%' => Ok((Some((&data[..1], TK_REM)), 1)),
b'=' => {
if let Some(b) = data.get(1) {
Ok(if *b == b'=' {
(Some((&data[..2], TK_EQ)), 2)
} else {
(Some((&data[..1], TK_EQ)), 1)
})
} else {
Ok((Some((&data[..1], TK_EQ)), 1))
}
}
b'<' => {
if let Some(b) = data.get(1) {
Ok(match *b {
b'=' => (Some((&data[..2], TK_LE)), 2),
b'>' => (Some((&data[..2], TK_NE)), 2),
b'<' => (Some((&data[..2], TK_LSHIFT)), 2),
_ => (Some((&data[..1], TK_LT)), 1),
})
} else {
Ok((Some((&data[..1], TK_LT)), 1))
}
}
b'>' => {
if let Some(b) = data.get(1) {
Ok(match *b {
b'=' => (Some((&data[..2], TK_GE)), 2),
b'>' => (Some((&data[..2], TK_RSHIFT)), 2),
_ => (Some((&data[..1], TK_GT)), 1),
})
} else {
Ok((Some((&data[..1], TK_GT)), 1))
}
}
b'!' => {
if let Some(b) = data.get(1) {
if *b == b'=' {
Ok((Some((&data[..2], TK_NE)), 2))
} else {
Err(Error::ExpectedEqualsSign(None, None))
}
} else {
Err(Error::ExpectedEqualsSign(None, None))
}
}
b'|' => {
if let Some(b) = data.get(1) {
Ok(if *b == b'|' {
(Some((&data[..2], TK_CONCAT)), 2)
} else {
(Some((&data[..1], TK_BITOR)), 1)
})
} else {
Ok((Some((&data[..1], TK_BITOR)), 1))
}
}
b',' => Ok((Some((&data[..1], TK_COMMA)), 1)),
b'&' => Ok((Some((&data[..1], TK_BITAND)), 1)),
b'~' => Ok((Some((&data[..1], TK_BITNOT)), 1)),
quote @ (b'`' | b'\'' | b'"') => literal(data, quote),
b'.' => {
if let Some(b) = data.get(1) {
if b.is_ascii_digit() {
fractional_part(data, 0)
} else {
Ok((Some((&data[..1], TK_DOT)), 1))
}
} else {
Ok((Some((&data[..1], TK_DOT)), 1))
}
}
b'0'..=b'9' => number(data),
b'[' => {
if let Some(i) = memchr(b']', data) {
// Keep original quotes / '[' ... ]'
Ok((Some((&data[0..=i], TK_ID)), i + 1))
} else {
Err(Error::UnterminatedBracket(None, None))
}
}
b'?' => {
match data.iter().skip(1).position(|&b| !b.is_ascii_digit()) {
Some(i) => {
// do not include the '?' in the token
Ok((Some((&data[1..=i], TK_VARIABLE)), i + 1))
}
None => {
if !data[1..].is_empty() && data[1..].iter().all(|ch| *ch == b'0') {
return Err(Error::BadVariableName(None, None));
}
Ok((Some((&data[1..], TK_VARIABLE)), data.len()))
}
}
}
b'$' | b'@' | b'#' | b':' => {
match data
.iter()
.skip(1)
.position(|&b| !is_identifier_continue(b))
{
Some(0) => Err(Error::BadVariableName(None, None)),
Some(i) => {
// '$' is included as part of the name
Ok((Some((&data[..=i], TK_VARIABLE)), i + 1))
}
None => {
if data.len() == 1 {
return Err(Error::BadVariableName(None, None));
}
Ok((Some((data, TK_VARIABLE)), data.len()))
}
}
}
b if is_identifier_start(b) => {
if b == b'x' || b == b'X' {
if let Some(&b'\'') = data.get(1) {
blob_literal(data)
} else {
Ok(self.identifierish(data))
}
} else {
Ok(self.identifierish(data))
}
}
// Return TK_ILLEGAL
_ => handle_unrecognized(data),
}
}
}
fn handle_unrecognized(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
let mut end = 1;
while end < data.len() && !data[end].is_ascii_whitespace() {
end += 1;
}
Ok((Some((&data[..end], TokenType::TK_ILLEGAL)), end))
}
fn literal(data: &[u8], quote: u8) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert_eq!(data[0], quote);
let tt = if quote == b'\'' { TK_STRING } else { TK_ID };
let mut pb = 0;
let mut end = None;
// data[0] == quote => skip(1)
for (i, b) in data.iter().enumerate().skip(1) {
if *b == quote {
if pb == quote {
// escaped quote
pb = 0;
continue;
}
} else if pb == quote {
end = Some(i);
break;
}
pb = *b;
}
if end.is_some() || pb == quote {
let i = match end {
Some(i) => i,
_ => data.len(),
};
// keep original quotes in the token
Ok((Some((&data[0..i], tt)), i))
} else {
Err(Error::UnterminatedLiteral(None, None))
}
}
fn blob_literal(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert!(data[0] == b'x' || data[0] == b'X');
debug_assert_eq!(data[1], b'\'');
let mut end = 2;
let mut valid = true;
while end < data.len() && data[end] != b'\'' {
if !data[end].is_ascii_hexdigit() {
valid = false;
}
end += 1;
}
let total_len = if end < data.len() { end + 1 } else { end };
if !valid || (end - 2) % 2 != 0 || end >= data.len() {
return Ok((Some((&data[..total_len], TokenType::TK_ILLEGAL)), total_len));
}
Ok((Some((&data[2..end], TokenType::TK_BLOB)), total_len))
}
fn number(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert!(data[0].is_ascii_digit());
if data[0] == b'0' {
if let Some(b) = data.get(1) {
if *b == b'x' || *b == b'X' {
return hex_integer(data);
}
} else {
return Ok((Some((data, TK_INTEGER)), data.len()));
}
}
if let Some((i, b)) = find_end_of_number(data, 1, u8::is_ascii_digit)? {
if b == b'.' {
return fractional_part(data, i);
} else if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None, None, Some(i + 1), unsafe {
String::from_utf8_unchecked(data[..i + 1].to_vec())
}));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
Ok((Some((data, TK_INTEGER)), data.len()))
}
}
fn hex_integer(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert_eq!(data[0], b'0');
debug_assert!(data[1] == b'x' || data[1] == b'X');
if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? {
// Must not be empty (Ox is invalid)
if i == 2 || is_identifier_start(b) {
let (len, help) = if i == 2 && !is_identifier_start(b) {
(i, "Did you forget to add digits after '0x' or '0X'?")
} else {
(i + 1, "There are some invalid digits after '0x' or '0X'")
};
return Err(Error::MalformedHexInteger(
None,
None,
Some(len), // Length of the malformed hex
Some(help), // Help Message
));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
// Must not be empty (Ox is invalid)
if data.len() == 2 {
return Err(Error::MalformedHexInteger(
None,
None,
Some(2), // Length of the malformed hex
Some("Did you forget to add digits after '0x' or '0X'?"), // Help Message
));
}
Ok((Some((data, TK_INTEGER)), data.len()))
}
}
fn fractional_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert_eq!(data[i], b'.');
if let Some((i, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None, None, Some(i + 1), unsafe {
String::from_utf8_unchecked(data[..i + 1].to_vec())
}));
}
Ok((Some((&data[..i], TK_FLOAT)), i))
} else {
Ok((Some((data, TK_FLOAT)), data.len()))
}
}
fn exponential_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert!(data[i] == b'e' || data[i] == b'E');
// data[i] == 'e'|'E'
if let Some(b) = data.get(i + 1) {
let i = if *b == b'+' || *b == b'-' { i + 1 } else { i };
if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
if j == i + 1 || is_identifier_start(b) {
let len = if is_identifier_start(b) { j + 1 } else { j };
return Err(Error::BadNumber(None, None, Some(len), unsafe {
String::from_utf8_unchecked(data[..len].to_vec())
}));
}
Ok((Some((&data[..j], TK_FLOAT)), j))
} else {
if data.len() == i + 1 {
return Err(Error::BadNumber(None, None, Some(i + 1), unsafe {
String::from_utf8_unchecked(data[..i + 1].to_vec())
}));
}
Ok((Some((data, TK_FLOAT)), data.len()))
}
} else {
Err(Error::BadNumber(None, None, Some(data.len()), unsafe {
String::from_utf8_unchecked(data.to_vec())
}))
}
}
fn find_end_of_number(
data: &[u8],
i: usize,
test: fn(&u8) -> bool,
) -> Result<Option<(usize, u8)>, Error> {
for (j, &b) in data.iter().enumerate().skip(i) {
if test(&b) {
continue;
} else if b == b'_' {
if j >= 1 && data.get(j - 1).is_some_and(test) && data.get(j + 1).is_some_and(test) {
continue;
}
return Err(Error::BadNumber(None, None, Some(j), unsafe {
String::from_utf8_unchecked(data[..j].to_vec())
}));
} else {
return Ok(Some((j, b)));
}
}
Ok(None)
}
impl Tokenizer {
fn identifierish<'input>(&mut self, data: &'input [u8]) -> (Option<Token<'input>>, usize) {
debug_assert!(is_identifier_start(data[0]));
// data[0] is_identifier_start => skip(1)
let end = data
.iter()
.skip(1)
.position(|&b| !is_identifier_continue(b));
let i = match end {
Some(i) => i + 1,
_ => data.len(),
};
let word = &data[..i];
(Some((word, keyword_token(word).unwrap_or(TK_ID))), i)
}
}
#[cfg(test)]
mod tests {
use super::Tokenizer;
use crate::dialect::TokenType;
use crate::lexer::sql::Error;
use crate::lexer::Scanner;
#[test]
fn fallible_iterator() -> Result<(), Error> {
let tokenizer = Tokenizer::new();
let input = b"PRAGMA parser_trace=ON;";
let mut s = Scanner::new(tokenizer);
expect_token(&mut s, input, b"PRAGMA", TokenType::TK_PRAGMA)?;
expect_token(&mut s, input, b"parser_trace", TokenType::TK_ID)?;
Ok(())
}
#[test]
fn invalid_number_literal() -> Result<(), Error> {
let tokenizer = Tokenizer::new();
let input = b"SELECT 1E;";
let mut s = Scanner::new(tokenizer);
expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?;
let err = s.scan(input).unwrap_err();
assert!(matches!(err, Error::BadNumber(_, _, _, _)));
Ok(())
}
fn expect_token(
s: &mut Scanner<Tokenizer>,
input: &[u8],
token: &[u8],
token_type: TokenType,
) -> Result<(), Error> {
let (t, tt) = s.scan(input)?.1.unwrap();
assert_eq!(token, t);
assert_eq!(token_type, tt);
Ok(())
}
}

View File

@@ -1,375 +0,0 @@
use fallible_iterator::FallibleIterator;
use super::{Error, Parser};
use crate::parser::ast::fmt::ToTokens;
use crate::parser::{
ast::{Cmd, ParameterInfo, Stmt},
ParserError,
};
#[test]
fn count_placeholders() {
let ast = parse_cmd(b"SELECT ? WHERE 1 = ?");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 2);
}
#[test]
fn count_numbered_placeholders() {
let ast = parse_cmd(b"SELECT ?1 WHERE 1 = ?2 AND 0 = ?1");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 2);
}
#[test]
fn count_unused_placeholders() {
let ast = parse_cmd(b"SELECT ?1 WHERE 1 = ?3");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 3);
}
#[test]
fn count_named_placeholders() {
let ast = parse_cmd(b"SELECT :x, :y WHERE 1 = :y");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 2);
assert_eq!(info.names.len(), 2);
assert!(info.names.contains(":x"));
assert!(info.names.contains(":y"));
}
#[test]
fn duplicate_column() {
expect_parser_err_msg(
b"CREATE TABLE t (x TEXT, x TEXT)",
"duplicate column name: x",
);
expect_parser_err_msg(
b"CREATE TABLE t (x TEXT, \"x\" TEXT)",
"duplicate column name: \"x\"",
);
expect_parser_err_msg(
b"CREATE TABLE t (x TEXT, `x` TEXT)",
"duplicate column name: `x`",
);
}
#[test]
fn create_table_without_column() {
expect_parser_err(
b"CREATE TABLE t ()",
ParserError::SyntaxError(")".to_owned()),
);
}
#[test]
fn vtab_args() -> Result<(), Error> {
let sql = b"CREATE VIRTUAL TABLE mail USING fts3(
subject VARCHAR(256) NOT NULL,
body TEXT CHECK(length(body)<10240)
);";
let r = parse_cmd(sql);
let Cmd::Stmt(Stmt::CreateVirtualTable(create_virtual_table)) = r else {
panic!("unexpected AST")
};
assert_eq!(create_virtual_table.tbl_name.name, "mail");
assert_eq!(create_virtual_table.module_name.as_str(), "fts3");
let args = create_virtual_table.args.as_ref().unwrap();
assert_eq!(args.len(), 2);
assert_eq!(args[0], "subject VARCHAR(256) NOT NULL");
assert_eq!(args[1], "body TEXT CHECK(length(body)<10240)");
Ok(())
}
#[test]
fn only_semicolons_no_statements() {
let sqls = ["", ";", ";;;"];
for sql in &sqls {
let r = parse(sql.as_bytes());
assert_eq!(r.unwrap(), None);
}
}
#[test]
fn extra_semicolons_between_statements() {
let sqls = [
"SELECT 1; SELECT 2",
"SELECT 1; SELECT 2;",
"; SELECT 1; SELECT 2",
";; SELECT 1;; SELECT 2;;",
];
for sql in &sqls {
let mut parser = Parser::new(sql.as_bytes());
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert_eq!(parser.next().unwrap(), None);
}
}
#[test]
fn extra_comments_between_statements() {
let sqls = [
"-- abc\nSELECT 1; --def\nSELECT 2 -- ghj",
"/* abc */ SELECT 1; /* def */ SELECT 2; /* ghj */",
"/* abc */; SELECT 1 /* def */; SELECT 2 /* ghj */",
"/* abc */;; SELECT 1;/* def */; SELECT 2; /* ghj */; /* klm */",
];
for sql in &sqls {
let mut parser = Parser::new(sql.as_bytes());
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert_eq!(parser.next().unwrap(), None);
}
}
#[test]
fn insert_mismatch_count() {
expect_parser_err_msg(b"INSERT INTO t (a, b) VALUES (1)", "1 values for 2 columns");
}
#[test]
fn insert_default_values() {
expect_parser_err_msg(
b"INSERT INTO t (a) DEFAULT VALUES",
"0 values for 1 columns",
);
}
#[test]
fn create_view_mismatch_count() {
expect_parser_err_msg(
b"CREATE VIEW v (c1, c2) AS SELECT 1",
"expected 2 columns for v but got 1",
);
}
#[test]
fn create_view_duplicate_column_name() {
expect_parser_err_msg(
b"CREATE VIEW v (c1, c1) AS SELECT 1, 2",
"duplicate column name: c1",
);
}
#[test]
fn create_table_without_rowid_missing_pk() {
expect_parser_err_msg(
b"CREATE TABLE t (c1) WITHOUT ROWID",
"PRIMARY KEY missing on table t",
);
}
#[test]
fn create_temporary_table_with_qualified_name() {
expect_parser_err_msg(
b"CREATE TEMPORARY TABLE mem.x AS SELECT 1",
"temporary table name must be unqualified",
);
parse_cmd(b"CREATE TEMPORARY TABLE temp.x AS SELECT 1");
}
#[test]
fn create_table_with_only_generated_column() {
expect_parser_err_msg(
b"CREATE TABLE test (data AS (1))",
"must have at least one non-generated column",
);
}
#[test]
fn create_strict_table_missing_datatype() {
expect_parser_err_msg(b"CREATE TABLE t (c1) STRICT", "missing datatype for t.c1");
}
#[test]
fn create_strict_table_unknown_datatype() {
expect_parser_err_msg(
b"CREATE TABLE t (c1 BOOL) STRICT",
"unknown datatype for t.c1: \"BOOL\"",
);
}
#[test]
fn foreign_key_on_column() {
expect_parser_err_msg(
b"CREATE TABLE t (a REFERENCES o(a,b))",
"foreign key on a should reference only one column of table o",
);
}
#[test]
fn create_strict_table_generated_column() {
parse_cmd(
b"CREATE TABLE IF NOT EXISTS transactions (
debit REAL,
credit REAL,
amount REAL GENERATED ALWAYS AS (ifnull(credit, 0.0) -ifnull(debit, 0.0))
) STRICT;",
);
}
#[test]
fn selects_compound_mismatch_columns_count() {
expect_parser_err_msg(
b"SELECT 1 UNION SELECT 1, 2",
"SELECTs to the left and right of UNION do not have the same number of result columns",
);
}
#[test]
fn delete_order_by_without_limit() {
expect_parser_err_msg(
b"DELETE FROM t ORDER BY x",
"ORDER BY without LIMIT on DELETE",
);
}
#[test]
fn update_order_by_without_limit() {
expect_parser_err_msg(
b"UPDATE t SET x = 1 ORDER BY x",
"ORDER BY without LIMIT on UPDATE",
);
}
#[test]
fn values_mismatch_columns_count() {
expect_parser_err_msg(
b"INSERT INTO t VALUES (1), (1,2)",
"all VALUES must have the same number of terms",
);
}
#[test]
fn column_specified_more_than_once() {
expect_parser_err_msg(
b"INSERT INTO t (n, n, m) VALUES (1, 0, 2)",
"column \"n\" specified more than once",
)
}
#[test]
fn alter_add_column_primary_key() {
expect_parser_err_msg(
b"ALTER TABLE t ADD COLUMN c PRIMARY KEY",
"Cannot add a PRIMARY KEY column",
);
}
#[test]
fn alter_add_column_unique() {
expect_parser_err_msg(
b"ALTER TABLE t ADD COLUMN c UNIQUE",
"Cannot add a UNIQUE column",
);
}
#[test]
fn natural_join_on() {
expect_parser_err_msg(
b"SELECT x FROM t NATURAL JOIN t USING (x)",
"a NATURAL join may not have an ON or USING clause",
);
expect_parser_err_msg(
b"SELECT x FROM t NATURAL JOIN t ON t.x = t.x",
"a NATURAL join may not have an ON or USING clause",
);
}
#[test]
fn missing_join_clause() {
expect_parser_err_msg(
b"SELECT a FROM tt ON b",
"a JOIN clause is required before ON",
);
}
#[test]
fn cast_without_typename() {
parse_cmd(b"SELECT CAST(a AS ) FROM t");
}
#[test]
fn unknown_table_option() {
expect_parser_err_msg(b"CREATE TABLE t (x)o", "unknown table option: o");
expect_parser_err_msg(b"CREATE TABLE t (x) WITHOUT o", "unknown table option: o");
}
#[test]
fn qualified_table_name_within_triggers() {
expect_parser_err_msg(
b"CREATE TRIGGER tr1 AFTER INSERT ON t1 BEGIN
DELETE FROM main.t2;
END;",
"qualified table names are not allowed on INSERT, UPDATE, and DELETE statements \
within triggers",
);
}
#[test]
fn select_from_error_stops_at_first_error() {
let mut parser = Parser::new(b"SELECT FROM foo;");
// First next() call should return the first syntax error
let err = parser.next().unwrap_err();
assert!(matches!(err, Error::ParserError(_, _, _)));
// Second next() call should return Ok(None) since parsing should have stopped
assert_eq!(parser.next().unwrap(), None);
// Third next() call should also return Ok(None)
assert_eq!(parser.next().unwrap(), None);
}
#[test]
fn indexed_by_clause_within_triggers() {
expect_parser_err_msg(
b"CREATE TRIGGER main.t16err5 AFTER INSERT ON tA BEGIN
UPDATE t16 INDEXED BY t16a SET rowid=rowid+1 WHERE a=1;
END;",
"the INDEXED BY clause is not allowed on UPDATE or DELETE statements \
within triggers",
);
expect_parser_err_msg(
b"CREATE TRIGGER main.t16err6 AFTER INSERT ON tA BEGIN
DELETE FROM t16 NOT INDEXED WHERE a=123;
END;",
"the NOT INDEXED clause is not allowed on UPDATE or DELETE statements \
within triggers",
);
}
fn expect_parser_err_msg(input: &[u8], error_msg: &str) {
expect_parser_err(input, ParserError::Custom(error_msg.to_owned()))
}
fn expect_parser_err(input: &[u8], err: ParserError) {
let r = parse(input);
if let Error::ParserError(e, _, _) = r.unwrap_err() {
assert_eq!(e, err);
} else {
panic!("unexpected error type")
};
}
fn parse_cmd(input: &[u8]) -> Cmd {
parse(input).unwrap().unwrap()
}
fn parse(input: &[u8]) -> Result<Option<Cmd>, Error> {
let mut parser = Parser::new(input);
parser.next()
}

View File

@@ -1,9 +0,0 @@
//! SQLite3 syntax lexer and parser
#![warn(missing_docs)]
pub mod dialect;
// In Lemon, the tokenizer calls the parser.
pub mod lexer;
mod parser;
pub use parser::ast;
pub mod to_sql_string;

View File

@@ -1,358 +0,0 @@
//! Check for additional syntax error
use crate::ast::*;
use crate::custom_err;
use std::fmt::{Display, Formatter};
impl Cmd {
/// Statement accessor
pub fn stmt(&self) -> &Stmt {
match self {
Self::Explain(stmt) => stmt,
Self::ExplainQueryPlan(stmt) => stmt,
Self::Stmt(stmt) => stmt,
}
}
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
match self {
Self::Explain(_) => ColumnCount::Fixed(8),
Self::ExplainQueryPlan(_) => ColumnCount::Fixed(4),
Self::Stmt(stmt) => stmt.column_count(),
}
}
/// Like `sqlite3_stmt_isexplain`
pub fn is_explain(&self) -> bool {
matches!(self, Self::Explain(_) | Self::ExplainQueryPlan(_))
}
/// Like `sqlite3_stmt_readonly`
pub fn readonly(&self) -> bool {
self.stmt().readonly()
}
/// check for extra rules
pub fn check(&self) -> Result<(), ParserError> {
self.stmt().check()
}
}
/// Column count
pub enum ColumnCount {
/// With `SELECT *` / PRAGMA
Dynamic,
/// Constant count
Fixed(usize),
/// No column
None,
}
impl ColumnCount {
fn incr(&mut self) {
if let Self::Fixed(n) = self {
*n += 1;
}
}
}
impl Stmt {
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
match self {
Self::Delete(delete) => {
let Delete { returning, .. } = &**delete;
match returning {
Some(returning) => column_count(returning),
None => ColumnCount::None,
}
}
Self::Insert(insert) => {
let Insert { returning, .. } = &**insert;
match returning {
Some(returning) => column_count(returning),
None => ColumnCount::None,
}
}
Self::Pragma(..) => ColumnCount::Dynamic,
Self::Select(s) => s.column_count(),
Self::Update(update) => {
let Update { returning, .. } = &**update;
match returning {
Some(returning) => column_count(returning),
None => ColumnCount::None,
}
}
_ => ColumnCount::None,
}
}
/// Like `sqlite3_stmt_readonly`
pub fn readonly(&self) -> bool {
match self {
Self::Attach { .. } => true,
Self::Begin(..) => true,
Self::Commit(..) => true,
Self::Detach(..) => true,
Self::Pragma(..) => true, // TODO check all
Self::Reindex { .. } => true,
Self::Release(..) => true,
Self::Rollback { .. } => true,
Self::Savepoint(..) => true,
Self::Select(..) => true,
_ => false,
}
}
/// check for extra rules
pub fn check(&self) -> Result<(), ParserError> {
match self {
Self::AlterTable(alter_table) => {
let (_, body) = &**alter_table;
if let AlterTableBody::AddColumn(cd) = body {
for c in cd {
if let ColumnConstraint::PrimaryKey { .. } = c {
return Err(custom_err!("Cannot add a PRIMARY KEY column"));
}
if let ColumnConstraint::Unique(..) = c {
return Err(custom_err!("Cannot add a UNIQUE column"));
}
}
}
Ok(())
}
Self::CreateTable {
temporary,
tbl_name,
body,
..
} => {
if *temporary {
if let Some(ref db_name) = tbl_name.db_name {
if db_name != "TEMP" {
return Err(custom_err!("temporary table name must be unqualified"));
}
}
}
body.check(tbl_name)
}
Self::CreateView {
view_name,
columns: Some(columns),
select,
..
} => {
// SQLite3 engine renames duplicates:
for (i, c) in columns.iter().enumerate() {
for o in &columns[i + 1..] {
if c.col_name == o.col_name {
return Err(custom_err!("duplicate column name: {}", c.col_name,));
}
}
}
// SQLite3 engine raises this error later (not while parsing):
match select.column_count() {
ColumnCount::Fixed(n) if n != columns.len() => Err(custom_err!(
"expected {} columns for {} but got {}",
columns.len(),
view_name,
n
)),
_ => Ok(()),
}
}
Self::Delete(delete) => {
let Delete {
order_by, limit, ..
} = &**delete;
if order_by.is_some() && limit.is_none() {
return Err(custom_err!("ORDER BY without LIMIT on DELETE"));
}
Ok(())
}
Self::Insert(insert) => {
let Insert { columns, body, .. } = &**insert;
if columns.is_none() {
return Ok(());
}
let columns = columns.as_ref().unwrap();
match body {
InsertBody::Select(select, ..) => match select.body.select.column_count() {
ColumnCount::Fixed(n) if n != columns.len() => {
Err(custom_err!("{} values for {} columns", n, columns.len()))
}
_ => Ok(()),
},
InsertBody::DefaultValues => {
Err(custom_err!("0 values for {} columns", columns.len()))
}
}
}
Self::Update(update) => {
let Update {
order_by, limit, ..
} = &**update;
if order_by.is_some() && limit.is_none() {
return Err(custom_err!("ORDER BY without LIMIT on UPDATE"));
}
Ok(())
}
_ => Ok(()),
}
}
}
impl CreateTableBody {
/// check for extra rules
pub fn check(&self, tbl_name: &QualifiedName) -> Result<(), ParserError> {
if let Self::ColumnsAndConstraints {
columns,
constraints: _,
options,
} = self
{
let mut generated_count = 0;
for c in columns.values() {
if c.col_name == "rowid" {
return Err(custom_err!("cannot use reserved word: ROWID"));
}
for cs in &c.constraints {
if let ColumnConstraint::Generated { .. } = cs.constraint {
generated_count += 1;
}
}
}
if generated_count == columns.len() {
return Err(custom_err!("must have at least one non-generated column"));
}
if options.contains(TableOptions::STRICT) {
for c in columns.values() {
match &c.col_type {
Some(Type { name, .. }) => {
// The datatype must be one of following: INT INTEGER REAL TEXT BLOB ANY
if !(name.eq_ignore_ascii_case("INT")
|| name.eq_ignore_ascii_case("INTEGER")
|| name.eq_ignore_ascii_case("REAL")
|| name.eq_ignore_ascii_case("TEXT")
|| name.eq_ignore_ascii_case("BLOB")
|| name.eq_ignore_ascii_case("ANY"))
{
return Err(custom_err!(
"unknown datatype for {}.{}: \"{}\"",
tbl_name,
c.col_name,
name
));
}
}
_ => {
// Every column definition must specify a datatype for that column. The freedom to specify a column without a datatype is removed.
return Err(custom_err!(
"missing datatype for {}.{}",
tbl_name,
c.col_name
));
}
}
}
}
if options.contains(TableOptions::WITHOUT_ROWID) && !self.has_primary_key() {
return Err(custom_err!("PRIMARY KEY missing on table {}", tbl_name,));
}
}
Ok(())
}
/// explicit primary key constraint ?
pub fn has_primary_key(&self) -> bool {
if let Self::ColumnsAndConstraints {
columns,
constraints,
..
} = self
{
for col in columns.values() {
for c in col {
if let ColumnConstraint::PrimaryKey { .. } = c {
return true;
}
}
}
if let Some(constraints) = constraints {
for c in constraints {
if let TableConstraint::PrimaryKey { .. } = c.constraint {
return true;
}
}
}
}
false
}
}
impl<'a> IntoIterator for &'a ColumnDefinition {
type Item = &'a ColumnConstraint;
type IntoIter = std::iter::Map<
std::slice::Iter<'a, NamedColumnConstraint>,
fn(&'a NamedColumnConstraint) -> &'a ColumnConstraint,
>;
fn into_iter(self) -> Self::IntoIter {
self.constraints.iter().map(|nc| &nc.constraint)
}
}
impl Select {
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
self.body.select.column_count()
}
}
impl OneSelect {
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
match self {
Self::Select(select) => {
let SelectInner { columns, .. } = &**select;
column_count(columns)
}
Self::Values(values) => {
assert!(!values.is_empty()); // TODO Validate
ColumnCount::Fixed(values[0].len())
}
}
}
/// Check all VALUES have the same number of terms
pub fn push(values: &mut Vec<Vec<Expr>>, v: Vec<Expr>) -> Result<(), ParserError> {
if values[0].len() != v.len() {
return Err(custom_err!("all VALUES must have the same number of terms"));
}
values.push(v);
Ok(())
}
}
impl Display for QualifiedName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.to_fmt(f)
}
}
impl ResultColumn {
fn column_count(&self) -> ColumnCount {
match self {
Self::Expr(..) => ColumnCount::Fixed(1),
_ => ColumnCount::Dynamic,
}
}
}
fn column_count(cols: &[ResultColumn]) -> ColumnCount {
assert!(!cols.is_empty());
let mut count = ColumnCount::Fixed(0);
for col in cols {
match col.column_count() {
ColumnCount::Fixed(_) => count.incr(),
_ => return ColumnCount::Dynamic,
}
}
count
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,154 +0,0 @@
//! SQLite parser
pub mod ast;
pub mod parse {
#![expect(unused_braces)]
#![expect(clippy::if_same_then_else)]
#![expect(clippy::absurd_extreme_comparisons)] // FIXME
#![expect(clippy::needless_return)]
#![expect(clippy::upper_case_acronyms)]
#![expect(clippy::manual_range_patterns)]
include!(concat!(env!("OUT_DIR"), "/parse.rs"));
}
use crate::dialect::Token;
use ast::{Cmd, ExplainKind, Name, Stmt};
/// Parser error
#[derive(Debug, Clone, PartialEq)]
pub enum ParserError {
/// Syntax error
SyntaxError(String),
/// Unexpected EOF
UnexpectedEof,
/// Custom error
Custom(String),
}
impl std::fmt::Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::SyntaxError(s) => {
write!(f, "near \"{s}\": syntax error")
}
Self::UnexpectedEof => f.write_str("unexpected end of input"),
Self::Custom(s) => f.write_str(s),
}
}
}
impl std::error::Error for ParserError {}
/// Custom error constructor
#[macro_export]
macro_rules! custom_err {
($msg:literal $(,)?) => {
$crate::parser::ParserError::Custom($msg.to_owned())
};
($err:expr $(,)?) => {
$crate::parser::ParserError::Custom(format!($err))
};
($fmt:expr, $($arg:tt)*) => {
$crate::parser::ParserError::Custom(format!($fmt, $($arg)*))
};
}
/// Parser context
pub struct Context<'input> {
input: &'input [u8],
explain: Option<ExplainKind>,
stmt: Option<Stmt>,
constraint_name: Option<Name>, // transient
module_arg: Option<(usize, usize)>, // Complete text of a module argument
module_args: Option<Vec<String>>, // CREATE VIRTUAL TABLE args
done: bool,
error: Option<ParserError>,
}
impl<'input> Context<'input> {
pub fn new(input: &'input [u8]) -> Self {
Context {
input,
explain: None,
stmt: None,
constraint_name: None,
module_arg: None,
module_args: None,
done: false,
error: None,
}
}
/// Consume parsed command
pub fn cmd(&mut self) -> Option<Cmd> {
if let Some(stmt) = self.stmt.take() {
match self.explain.take() {
Some(ExplainKind::Explain) => Some(Cmd::Explain(stmt)),
Some(ExplainKind::QueryPlan) => Some(Cmd::ExplainQueryPlan(stmt)),
None => Some(Cmd::Stmt(stmt)),
}
} else {
None
}
}
fn constraint_name(&mut self) -> Option<Name> {
self.constraint_name.take()
}
fn no_constraint_name(&self) -> bool {
self.constraint_name.is_none()
}
fn vtab_arg_init(&mut self) {
self.add_module_arg();
self.module_arg = None;
}
fn vtab_arg_extend(&mut self, any: Token) {
if let Some((_, ref mut n)) = self.module_arg {
*n = any.2
} else {
self.module_arg = Some((any.0, any.2))
}
}
fn add_module_arg(&mut self) {
if let Some((start, end)) = self.module_arg.take() {
if let Ok(arg) = std::str::from_utf8(&self.input[start..end]) {
self.module_args.get_or_insert(vec![]).push(arg.to_owned());
} // FIXME error handling
}
}
fn module_args(&mut self) -> Option<Vec<String>> {
self.add_module_arg();
self.module_args.take()
}
/// This routine is called after a single SQL statement has been parsed.
fn sqlite3_finish_coding(&mut self) {
self.done = true;
}
/// Return `true` if parser completes either successfully or with an error.
pub fn done(&self) -> bool {
self.done || self.error.is_some()
}
pub fn is_ok(&self) -> bool {
self.error.is_none()
}
/// Consume error generated by parser
pub fn error(&mut self) -> Option<ParserError> {
self.error.take()
}
pub fn reset(&mut self) {
self.explain = None;
self.stmt = None;
self.constraint_name = None;
self.module_arg = None;
self.module_args = None;
self.done = false;
self.error = None;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,2 +0,0 @@
#[cfg(test)]
mod tests {}

View File

@@ -1,37 +0,0 @@
//! ToSqlString trait definition and implementations
mod expr;
mod stmt;
use crate::ast::TableInternalId;
/// Context to be used in ToSqlString
pub trait ToSqlContext {
/// Given an id, get the table name
///
/// Currently not considering aliases
fn get_table_name(&self, id: TableInternalId) -> &str;
/// Given a table id and a column index, get the column name
fn get_column_name(&self, table_id: TableInternalId, col_idx: usize) -> String;
}
#[cfg(test)]
mod tests {
use super::ToSqlContext;
struct TestContext;
impl ToSqlContext for TestContext {
fn get_column_name(
&self,
_table_id: crate::ast::TableInternalId,
_col_idx: usize,
) -> String {
"placeholder_column".to_string()
}
fn get_table_name(&self, _id: crate::ast::TableInternalId) -> &str {
"placeholder_table"
}
}
}

View File

@@ -1,74 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
to_sql_string_test!(
test_alter_table_rename,
"ALTER TABLE t RENAME TO new_table_name"
);
to_sql_string_test!(
test_alter_table_add_column,
"ALTER TABLE t ADD COLUMN c INTEGER"
);
to_sql_string_test!(
test_alter_table_add_column_with_default,
"ALTER TABLE t ADD COLUMN c TEXT DEFAULT 'value'"
);
to_sql_string_test!(
test_alter_table_add_column_not_null_default,
"ALTER TABLE t ADD COLUMN c REAL NOT NULL DEFAULT 0.0"
);
to_sql_string_test!(
test_alter_table_add_column_unique,
"ALTER TABLE t ADD COLUMN c TEXT UNIQUE",
ignore = "ParserError = Cannot add a UNIQUE column"
);
to_sql_string_test!(
test_alter_table_rename_column,
"ALTER TABLE t RENAME COLUMN old_name TO new_name"
);
to_sql_string_test!(test_alter_table_drop_column, "ALTER TABLE t DROP COLUMN c");
to_sql_string_test!(
test_alter_table_add_column_check,
"ALTER TABLE t ADD COLUMN c INTEGER CHECK (c > 0)"
);
to_sql_string_test!(
test_alter_table_add_column_foreign_key,
"ALTER TABLE t ADD COLUMN c INTEGER REFERENCES t2 (id) ON DELETE CASCADE"
);
to_sql_string_test!(
test_alter_table_add_column_collate,
"ALTER TABLE t ADD COLUMN c TEXT COLLATE NOCASE"
);
to_sql_string_test!(
test_alter_table_add_column_primary_key,
"ALTER TABLE t ADD COLUMN c INTEGER PRIMARY KEY",
ignore = "ParserError = Cannot add a PRIMARY KEY column"
);
to_sql_string_test!(
test_alter_table_add_column_primary_key_autoincrement,
"ALTER TABLE t ADD COLUMN c INTEGER PRIMARY KEY AUTOINCREMENT",
ignore = "ParserError = Cannot add a PRIMARY KEY column"
);
to_sql_string_test!(
test_alter_table_add_generated_column,
"ALTER TABLE t ADD COLUMN c_generated AS (a + b) STORED"
);
to_sql_string_test!(
test_alter_table_add_column_schema,
"ALTER TABLE schema_name.t ADD COLUMN c INTEGER"
);
}

View File

@@ -1,121 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
to_sql_string_test!(
test_create_table_simple,
"CREATE TABLE t (a INTEGER, b TEXT)"
);
to_sql_string_test!(
test_create_table_primary_key,
"CREATE TABLE t (id INTEGER PRIMARY KEY, name TEXT)"
);
to_sql_string_test!(
test_create_table_multi_primary_key,
"CREATE TABLE t (a INTEGER, b TEXT, PRIMARY KEY (a, b))"
);
to_sql_string_test!(
test_create_table_data_types,
"CREATE TABLE t (a INTEGER, b TEXT, c REAL, d BLOB, e NUMERIC)"
);
to_sql_string_test!(
test_create_table_foreign_key,
"CREATE TABLE t2 (id INTEGER PRIMARY KEY, t_id INTEGER, FOREIGN KEY (t_id) REFERENCES t (id))"
);
to_sql_string_test!(
test_create_table_foreign_key_cascade,
"CREATE TABLE t2 (id INTEGER PRIMARY KEY, t_id INTEGER, FOREIGN KEY (t_id) REFERENCES t (id) ON DELETE CASCADE)"
);
to_sql_string_test!(
test_create_table_unique,
"CREATE TABLE t (a INTEGER UNIQUE, b TEXT)"
);
to_sql_string_test!(
test_create_table_not_null,
"CREATE TABLE t (a INTEGER NOT NULL, b TEXT)"
);
to_sql_string_test!(
test_create_table_check,
"CREATE TABLE t (a INTEGER CHECK (a > 0), b TEXT)"
);
to_sql_string_test!(
test_create_table_default,
"CREATE TABLE t (a INTEGER DEFAULT 0, b TEXT)"
);
to_sql_string_test!(
test_create_table_multiple_constraints,
"CREATE TABLE t (a INTEGER NOT NULL UNIQUE, b TEXT DEFAULT 'default')"
);
to_sql_string_test!(
test_create_table_generated_column,
"CREATE TABLE t (a INTEGER, b INTEGER, c INTEGER AS (a + b))"
);
to_sql_string_test!(
test_create_table_generated_stored,
"CREATE TABLE t (a INTEGER, b INTEGER, c INTEGER AS (a + b) STORED)"
);
to_sql_string_test!(
test_create_table_generated_virtual,
"CREATE TABLE t (a INTEGER, b INTEGER, c INTEGER AS (a + b) VIRTUAL)"
);
to_sql_string_test!(
test_create_table_quoted_columns,
"CREATE TABLE t (\"select\" INTEGER, \"from\" TEXT)"
);
to_sql_string_test!(
test_create_table_quoted_table,
"CREATE TABLE \"my table\" (a INTEGER)"
);
to_sql_string_test!(
test_create_table_if_not_exists,
"CREATE TABLE IF NOT EXISTS t (a INTEGER)"
);
to_sql_string_test!(test_create_temp_table, "CREATE TEMP TABLE t (a INTEGER)");
to_sql_string_test!(
test_create_table_without_rowid,
"CREATE TABLE t (a INTEGER PRIMARY KEY, b TEXT) WITHOUT ROWID"
);
to_sql_string_test!(
test_create_table_named_primary_key,
"CREATE TABLE t (a INTEGER CONSTRAINT pk_a PRIMARY KEY)"
);
to_sql_string_test!(
test_create_table_named_unique,
"CREATE TABLE t (a INTEGER, CONSTRAINT unique_a UNIQUE (a))"
);
to_sql_string_test!(
test_create_table_named_foreign_key,
"CREATE TABLE t2 (id INTEGER, t_id INTEGER, CONSTRAINT fk_t FOREIGN KEY (t_id) REFERENCES t (id))"
);
to_sql_string_test!(
test_create_table_complex,
"CREATE TABLE t (id INTEGER PRIMARY KEY, a INTEGER NOT NULL, b TEXT DEFAULT 'default', c INTEGER AS (a * 2), CONSTRAINT unique_a UNIQUE (a))"
);
to_sql_string_test!(
test_create_table_multiple_foreign_keys,
"CREATE TABLE t3 (id INTEGER PRIMARY KEY, t1_id INTEGER, t2_id INTEGER, FOREIGN KEY (t1_id) REFERENCES t1 (id), FOREIGN KEY (t2_id) REFERENCES t2 (id))"
);
}

View File

@@ -1,59 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
to_sql_string_test!(
test_log_employee_insert,
"CREATE TRIGGER log_employee_insert
AFTER INSERT ON employees
FOR EACH ROW
BEGIN
INSERT INTO employee_log (action, employee_id, timestamp)
VALUES ('INSERT', NEW.id, CURRENT_TIMESTAMP);
END"
);
to_sql_string_test!(
test_log_salary_update,
"CREATE TRIGGER log_salary_update
AFTER UPDATE OF salary ON employees
FOR EACH ROW
BEGIN
INSERT INTO employee_log (action, employee_id, old_value, new_value, timestamp)
VALUES ('UPDATE', OLD.id, OLD.salary, NEW.salary, CURRENT_TIMESTAMP);
END"
);
to_sql_string_test!(
test_log_employee_delete,
"CREATE TRIGGER log_employee_delete
AFTER DELETE ON employees
FOR EACH ROW
BEGIN
INSERT INTO employee_log (action, employee_id, timestamp)
VALUES ('DELETE', OLD.id, CURRENT_TIMESTAMP);
END"
);
to_sql_string_test!(
test_check_salary_insert,
"CREATE TRIGGER check_salary_insert
BEFORE INSERT ON employees
FOR EACH ROW
WHEN NEW.salary < 0
BEGIN
SELECT RAISE (FAIL, 'Salary cannot be negative');
END"
);
to_sql_string_test!(
test_insert_employee_dept,
"CREATE TRIGGER insert_employee_dept
INSTEAD OF INSERT ON employee_dept
FOR EACH ROW
BEGIN
INSERT INTO departments (name) SELECT NEW.department WHERE NOT EXISTS (SELECT 1 FROM departments WHERE name = NEW.department);
INSERT INTO employees (name, department_id) VALUES (NEW.name, (SELECT id FROM departments WHERE name = NEW.department));
END"
);
}

View File

@@ -1,64 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
to_sql_string_test!(
test_create_virtual_table_fts5_basic,
"CREATE VIRTUAL TABLE docs USING fts5 (title, content)"
);
to_sql_string_test!(
test_create_virtual_table_fts5_tokenizer,
"CREATE VIRTUAL TABLE docs USING fts5 (title, content, tokenize = 'porter')"
);
to_sql_string_test!(
test_create_virtual_table_fts5_unindexed,
"CREATE VIRTUAL TABLE docs USING fts5 (title, content, metadata UNINDEXED)"
);
to_sql_string_test!(
test_create_virtual_table_fts5_prefix,
"CREATE VIRTUAL TABLE docs USING fts5 (title, content, tokenize = 'unicode61', prefix = '2 4')"
);
to_sql_string_test!(
test_create_virtual_table_fts5_contentless,
"CREATE VIRTUAL TABLE docs USING fts5 (title, content, content = '')"
);
to_sql_string_test!(
test_create_virtual_table_fts5_external_content,
"CREATE VIRTUAL TABLE docs_fts USING fts5 (title, content, content = 'documents')"
);
to_sql_string_test!(
test_create_virtual_table_rtree,
"CREATE VIRTUAL TABLE geo USING rtree (id, min_x, max_x, min_y, max_y)"
);
to_sql_string_test!(
test_create_virtual_table_rtree_aux,
"CREATE VIRTUAL TABLE geo USING rtree (id, min_x, max_x, min_y, max_y, +name TEXT, +category INTEGER)"
);
to_sql_string_test!(
test_create_virtual_table_if_not_exists,
"CREATE VIRTUAL TABLE IF NOT EXISTS docs USING fts5 (title, content)"
);
to_sql_string_test!(
test_create_virtual_table_fts4,
"CREATE VIRTUAL TABLE docs USING fts4 (title, content, matchinfo = 'fts3')"
);
to_sql_string_test!(
test_create_virtual_table_fts5_detail,
"CREATE VIRTUAL TABLE docs USING fts5 (title, body TEXT, detail = 'none')"
);
to_sql_string_test!(
test_create_virtual_table_schema,
"CREATE VIRTUAL TABLE main.docs USING fts5 (title, content)"
);
}

View File

@@ -1,82 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
// Basic DELETE from a single table
to_sql_string_test!(test_delete_all, "DELETE FROM employees");
// DELETE with a simple WHERE clause
to_sql_string_test!(test_delete_with_where, "DELETE FROM employees WHERE id = 1");
// DELETE with multiple WHERE conditions
to_sql_string_test!(
test_delete_with_multi_where,
"DELETE FROM employees WHERE salary < 50000 AND department_id = 3"
);
// DELETE with IN clause
to_sql_string_test!(
test_delete_with_in,
"DELETE FROM employees WHERE id IN (1, 2, 3)"
);
// DELETE with subquery in WHERE
to_sql_string_test!(
test_delete_with_subquery,
"DELETE FROM employees WHERE department_id IN (SELECT id FROM departments WHERE name = 'Sales')"
);
// DELETE with EXISTS clause
to_sql_string_test!(
test_delete_with_exists,
"DELETE FROM employees WHERE EXISTS (SELECT 1 FROM orders WHERE orders.employee_id = employees.id AND orders.status = 'pending')"
);
// DELETE with RETURNING clause
to_sql_string_test!(
test_delete_with_returning,
"DELETE FROM employees WHERE salary < 30000 RETURNING id, name"
);
// DELETE with LIMIT clause
to_sql_string_test!(
test_delete_with_limit,
"DELETE FROM employees WHERE salary < 40000 LIMIT 5"
);
// DELETE with ORDER BY and LIMIT
to_sql_string_test!(
test_delete_with_order_by_limit,
"DELETE FROM employees WHERE salary < 40000 ORDER BY id DESC LIMIT 5"
);
// DELETE from schema-qualified table
to_sql_string_test!(
test_delete_schema_qualified,
"DELETE FROM main.employees WHERE id = 1"
);
// DELETE with BETWEEN clause
to_sql_string_test!(
test_delete_with_between,
"DELETE FROM employees WHERE salary BETWEEN 30000 AND 50000"
);
// DELETE with NULL check
to_sql_string_test!(
test_delete_with_null,
"DELETE FROM employees WHERE department_id IS NULL"
);
// DELETE with LIKE clause
to_sql_string_test!(
test_delete_with_like,
"DELETE FROM employees WHERE name LIKE 'J%'"
);
// DELETE with complex expression in WHERE
to_sql_string_test!(
test_delete_with_complex_expression,
"DELETE FROM employees WHERE (salary * 1.1) > 60000 AND department_id != 1"
);
}

View File

@@ -1,94 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
// Basic INSERT with all columns
to_sql_string_test!(
test_insert_basic,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000)"
);
// INSERT with multiple rows
to_sql_string_test!(
test_insert_multiple_rows,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000), (2, 'Jane Smith', 60000)"
);
// INSERT with specific columns
to_sql_string_test!(
test_insert_specific_columns,
"INSERT INTO employees (name, salary) VALUES ('Alice Brown', 55000)"
);
// INSERT with DEFAULT VALUES
to_sql_string_test!(
test_insert_default_values,
"INSERT INTO employees DEFAULT VALUES"
);
// INSERT with SELECT subquery
to_sql_string_test!(
test_insert_select_subquery,
"INSERT INTO employees (id, name, salary) SELECT id, name, salary FROM temp_employees WHERE salary > 40000"
);
// INSERT with ON CONFLICT IGNORE
to_sql_string_test!(
test_insert_on_conflict_ignore,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000) ON CONFLICT (id) DO NOTHING"
);
// INSERT with ON CONFLICT REPLACE
to_sql_string_test!(
test_insert_on_conflict_replace,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000) ON CONFLICT (id) DO UPDATE SET name = excluded.name, salary = excluded.salary"
);
// INSERT with RETURNING clause
to_sql_string_test!(
test_insert_with_returning,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000) RETURNING id, name"
);
// INSERT with NULL values
to_sql_string_test!(
test_insert_with_null,
"INSERT INTO employees (id, name, salary, department_id) VALUES (1, 'John Doe', NULL, NULL)"
);
// INSERT with expression in VALUES
to_sql_string_test!(
test_insert_with_expression,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000 * 1.1)"
);
// INSERT into schema-qualified table
to_sql_string_test!(
test_insert_schema_qualified,
"INSERT INTO main.employees (id, name, salary) VALUES (1, 'John Doe', 50000)"
);
// INSERT with subquery and JOIN
to_sql_string_test!(
test_insert_subquery_join,
"INSERT INTO employees (id, name, department_id) SELECT e.id, e.name, d.id FROM temp_employees e JOIN departments d ON e.dept_name = d.name"
);
// INSERT with all columns from SELECT
to_sql_string_test!(
test_insert_all_columns_select,
"INSERT INTO employees SELECT * FROM temp_employees"
);
// INSERT with ON CONFLICT and WHERE clause
to_sql_string_test!(
test_insert_on_conflict_where,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000) ON CONFLICT (id) DO UPDATE SET salary = excluded.salary WHERE excluded.salary > employees.salary"
);
// INSERT with quoted column names (reserved words)
to_sql_string_test!(
test_insert_quoted_columns,
"INSERT INTO employees (\"select\", \"from\") VALUES (1, 'data')"
);
}

View File

@@ -1,229 +0,0 @@
mod alter_table;
mod create_table;
mod create_trigger;
mod create_virtual_table;
mod delete;
mod insert;
mod select;
mod update;
#[cfg(test)]
mod tests {
use crate::to_sql_string::ToSqlContext;
#[macro_export]
/// Create a test that first parses then input, the converts the parsed ast back to a string and compares with original input
macro_rules! to_sql_string_test {
($test_name:ident, $input:expr) => {
#[test]
fn $test_name() {
use $crate::parser::ast::fmt::ToTokens;
let context = $crate::to_sql_string::stmt::tests::TestContext;
let input = $input.split_whitespace().collect::<Vec<&str>>().join(" ");
let mut parser = $crate::lexer::sql::Parser::new(input.as_bytes());
let cmd = fallible_iterator::FallibleIterator::next(&mut parser)
.unwrap()
.unwrap();
assert_eq!(
input,
cmd.stmt().format_with_context(&context).unwrap().replace('\n', " "),
);
}
};
($test_name:ident, $input:expr, $($attribute:meta),*) => {
#[test]
$(#[$attribute])*
fn $test_name() {
use $crate::parser::ast::fmt::ToTokens;
let context = $crate::to_sql_string::stmt::tests::TestContext;
let input = $input.split_whitespace().collect::<Vec<&str>>().join(" ");
let mut parser = $crate::lexer::sql::Parser::new(input.as_bytes());
let cmd = fallible_iterator::FallibleIterator::next(&mut parser)
.unwrap()
.unwrap();
assert_eq!(
input,
cmd.stmt().format_with_context(&context).unwrap().replace('\n', " "),
);
}
}
}
pub(crate) struct TestContext;
// Placeholders for compilation
// Context only necessary parsing inside turso_core or in the simulator
impl ToSqlContext for TestContext {
fn get_column_name(
&self,
_table_id: crate::ast::TableInternalId,
_col_idx: usize,
) -> String {
todo!()
}
fn get_table_name(&self, _id: crate::ast::TableInternalId) -> &str {
todo!()
}
}
to_sql_string_test!(test_analyze, "ANALYZE");
to_sql_string_test!(
test_analyze_table,
"ANALYZE table",
ignore = "parser can't parse table name"
);
to_sql_string_test!(
test_analyze_schema_table,
"ANALYZE schema.table",
ignore = "parser can't parse schema.table name"
);
to_sql_string_test!(test_attach, "ATTACH './test.db' AS test_db");
to_sql_string_test!(test_transaction, "BEGIN");
to_sql_string_test!(test_transaction_deferred, "BEGIN DEFERRED");
to_sql_string_test!(test_transaction_immediate, "BEGIN IMMEDIATE");
to_sql_string_test!(test_transaction_exclusive, "BEGIN EXCLUSIVE");
to_sql_string_test!(test_commit, "COMMIT");
// Test a simple index on a single column
to_sql_string_test!(
test_create_index_simple,
"CREATE INDEX idx_name ON employees (last_name)"
);
// Test a unique index to enforce uniqueness on a column
to_sql_string_test!(
test_create_unique_index,
"CREATE UNIQUE INDEX idx_unique_email ON users (email)"
);
// Test a multi-column index
to_sql_string_test!(
test_create_index_multi_column,
"CREATE INDEX idx_name_salary ON employees (last_name, salary)"
);
// Test a partial index with a WHERE clause
to_sql_string_test!(
test_create_partial_index,
"CREATE INDEX idx_active_users ON users (username) WHERE active = true"
);
// Test an index on an expression
to_sql_string_test!(
test_create_index_on_expression,
"CREATE INDEX idx_upper_name ON employees (UPPER (last_name))"
);
// Test an index with descending order
to_sql_string_test!(
test_create_index_descending,
"CREATE INDEX idx_salary_desc ON employees (salary DESC)"
);
// Test an index with mixed ascending and descending orders on multiple columns
to_sql_string_test!(
test_create_index_mixed_order,
"CREATE INDEX idx_name_asc_salary_desc ON employees (last_name ASC, salary DESC)"
);
// Test 1: View with DISTINCT keyword
to_sql_string_test!(
test_create_view_distinct,
"CREATE VIEW view_distinct AS SELECT DISTINCT name FROM employees"
);
// Test 2: View with LIMIT clause
to_sql_string_test!(
test_create_view_limit,
"CREATE VIEW view_limit AS SELECT id, name FROM employees LIMIT 10"
);
// Test 3: View with CASE expression
to_sql_string_test!(
test_create_view_case,
"CREATE VIEW view_case AS SELECT name, CASE WHEN salary > 70000 THEN 'High' ELSE 'Low' END AS salary_level FROM employees"
);
// Test 4: View with LEFT JOIN
to_sql_string_test!(
test_create_view_left_join,
"CREATE VIEW view_left_join AS SELECT e.name, d.name AS department FROM employees e LEFT OUTER JOIN departments d ON e.department_id = d.id"
);
// Test 5: View with HAVING clause
to_sql_string_test!(
test_create_view_having,
"CREATE VIEW view_having AS SELECT department_id, AVG (salary) AS avg_salary FROM employees GROUP BY department_id HAVING AVG (salary) > 55000"
);
// Test 6: View with CTE (Common Table Expression)
to_sql_string_test!(
test_create_view_cte,
"CREATE VIEW view_cte AS WITH high_earners AS (SELECT * FROM employees WHERE salary > 80000) SELECT id, name FROM high_earners"
);
// Test 7: View with multiple conditions in WHERE
to_sql_string_test!(
test_create_view_multi_where,
"CREATE VIEW view_multi_where AS SELECT id, name FROM employees WHERE salary > 50000 AND department_id = 3"
);
// Test 8: View with NULL handling
to_sql_string_test!(
test_create_view_null,
"CREATE VIEW view_null AS SELECT name, COALESCE (salary, 0) AS salary FROM employees"
);
// Test 9: View with subquery in WHERE clause
to_sql_string_test!(
test_create_view_subquery_where,
"CREATE VIEW view_subquery_where AS SELECT name FROM employees WHERE department_id IN (SELECT id FROM departments WHERE name = 'Sales')"
);
// Test 10: View with arithmetic expression
to_sql_string_test!(
test_create_view_arithmetic,
"CREATE VIEW view_arithmetic AS SELECT name, salary * 1.1 AS adjusted_salary FROM employees"
);
to_sql_string_test!(test_detach, "DETACH 'x.db'");
to_sql_string_test!(test_drop_index, "DROP INDEX schema_name.test_index");
to_sql_string_test!(test_drop_table, "DROP TABLE schema_name.test_table");
to_sql_string_test!(test_drop_trigger, "DROP TRIGGER schema_name.test_trigger");
to_sql_string_test!(test_drop_view, "DROP VIEW schema_name.test_view");
to_sql_string_test!(test_pragma_equals, "PRAGMA schema_name.Pragma_name = 1");
to_sql_string_test!(test_pragma_call, "PRAGMA schema_name.Pragma_name_2 (1)");
to_sql_string_test!(test_reindex, "REINDEX schema_name.test_table");
to_sql_string_test!(test_reindex_2, "REINDEX");
to_sql_string_test!(test_release, "RELEASE savepoint_name");
to_sql_string_test!(test_rollback, "ROLLBACK");
to_sql_string_test!(test_rollback_2, "ROLLBACK TO savepoint_name");
to_sql_string_test!(test_savepoint, "SAVEPOINT savepoint_name");
to_sql_string_test!(test_vacuum, "VACUUM");
to_sql_string_test!(test_vacuum_2, "VACUUM schema_name");
to_sql_string_test!(test_vacuum_3, "VACUUM schema_name INTO test.db");
}

View File

@@ -1,138 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
to_sql_string_test!(test_select_basic, "SELECT 1");
to_sql_string_test!(test_select_table, "SELECT * FROM t");
to_sql_string_test!(test_select_table_2, "SELECT a FROM t");
to_sql_string_test!(test_select_multiple_columns, "SELECT a, b, c FROM t");
to_sql_string_test!(test_select_with_alias, "SELECT a AS col1 FROM t");
to_sql_string_test!(test_select_with_table_alias, "SELECT t1.a FROM t AS t1");
to_sql_string_test!(test_select_with_where, "SELECT a FROM t WHERE b = 1");
to_sql_string_test!(
test_select_with_multiple_conditions,
"SELECT a FROM t WHERE b = 1 AND c > 2"
);
to_sql_string_test!(test_select_with_order_by, "SELECT a FROM t ORDER BY a DESC");
to_sql_string_test!(test_select_with_limit, "SELECT a FROM t LIMIT 10");
to_sql_string_test!(test_select_with_offset, "SELECT a FROM t LIMIT 10 OFFSET 5");
to_sql_string_test!(
test_select_with_join,
"SELECT a FROM t JOIN t2 ON t.b = t2.b"
);
to_sql_string_test!(
test_select_with_group_by,
"SELECT a, COUNT (*) FROM t GROUP BY a"
);
to_sql_string_test!(
test_select_with_having,
"SELECT a, COUNT (*) FROM t GROUP BY a HAVING COUNT (*) > 1"
);
to_sql_string_test!(test_select_with_distinct, "SELECT DISTINCT a FROM t");
to_sql_string_test!(test_select_with_function, "SELECT COUNT (a) FROM t");
to_sql_string_test!(
test_select_with_subquery,
"SELECT a FROM (SELECT b FROM t) AS sub"
);
to_sql_string_test!(
test_select_nested_subquery,
"SELECT a FROM (SELECT b FROM (SELECT c FROM t WHERE c > 10) AS sub1 WHERE b < 20) AS sub2"
);
to_sql_string_test!(
test_select_multiple_joins,
"SELECT t1.a, t2.b, t3.c FROM t1 JOIN t2 ON t1.id = t2.id LEFT OUTER JOIN t3 ON t2.id = t3.id"
);
to_sql_string_test!(
test_select_with_cte,
"WITH cte AS (SELECT a FROM t WHERE b = 1) SELECT a FROM cte WHERE a > 10"
);
to_sql_string_test!(
test_select_with_window_function,
"SELECT a, ROW_NUMBER () OVER (PARTITION BY b ORDER BY c DESC) AS rn FROM t"
);
to_sql_string_test!(
test_select_with_complex_where,
"SELECT a FROM t WHERE b IN (1, 2, 3) AND c BETWEEN 10 AND 20 OR d IS NULL"
);
to_sql_string_test!(
test_select_with_case,
"SELECT CASE WHEN a > 0 THEN 'positive' ELSE 'non-positive' END AS result FROM t"
);
to_sql_string_test!(test_select_with_aggregate_and_join, "SELECT t1.a, COUNT (t2.b) FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.id GROUP BY t1.a HAVING COUNT (t2.b) > 5");
to_sql_string_test!(test_select_with_multiple_ctes, "WITH cte1 AS (SELECT a FROM t WHERE b = 1), cte2 AS (SELECT c FROM t2 WHERE d = 2) SELECT cte1.a, cte2.c FROM cte1 JOIN cte2 ON cte1.a = cte2.c");
to_sql_string_test!(
test_select_with_union,
"SELECT a FROM t1 UNION SELECT b FROM t2"
);
to_sql_string_test!(
test_select_with_union_all,
"SELECT a FROM t1 UNION ALL SELECT b FROM t2"
);
to_sql_string_test!(
test_select_with_exists,
"SELECT a FROM t WHERE EXISTS (SELECT 1 FROM t2 WHERE t2.b = t.a)"
);
to_sql_string_test!(
test_select_with_correlated_subquery,
"SELECT a, (SELECT COUNT (*) FROM t2 WHERE t2.b = t.a) AS count_b FROM t"
);
to_sql_string_test!(
test_select_with_complex_order_by,
"SELECT a, b FROM t ORDER BY CASE WHEN a IS NULL THEN 1 ELSE 0 END, b ASC, c DESC"
);
to_sql_string_test!(
test_select_with_full_outer_join,
"SELECT t1.a, t2.b FROM t1 FULL OUTER JOIN t2 ON t1.id = t2.id",
ignore = "OUTER JOIN is incorrectly parsed in parser"
);
to_sql_string_test!(test_select_with_aggregate_window, "SELECT a, SUM (b) OVER (PARTITION BY c ORDER BY d ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS running_sum FROM t");
to_sql_string_test!(
test_select_with_exclude,
"SELECT
c.name,
o.order_id,
o.order_amount,
SUM (o.order_amount) OVER (PARTITION BY c.id
ORDER BY o.order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
EXCLUDE CURRENT ROW) AS running_total_excluding_current
FROM customers c
JOIN orders o ON c.id = o.customer_id
WHERE EXISTS (SELECT 1
FROM orders o2
WHERE o2.customer_id = c.id
AND o2.order_amount > 1000)"
);
}

View File

@@ -1,94 +0,0 @@
#[cfg(test)]
mod tests {
use crate::to_sql_string_test;
// Basic UPDATE with a single column
to_sql_string_test!(
test_update_single_column,
"UPDATE employees SET salary = 55000"
);
// UPDATE with multiple columns
to_sql_string_test!(
test_update_multiple_columns,
"UPDATE employees SET salary = 60000, name = 'John Smith'"
);
// UPDATE with a WHERE clause
to_sql_string_test!(
test_update_with_where,
"UPDATE employees SET salary = 60000 WHERE id = 1"
);
// UPDATE with multiple WHERE conditions
to_sql_string_test!(
test_update_with_multi_where,
"UPDATE employees SET salary = 65000 WHERE department_id = 3 AND salary < 50000"
);
// UPDATE with a subquery in SET
to_sql_string_test!(
test_update_with_subquery_set,
"UPDATE employees SET department_id = (SELECT id FROM departments WHERE name = 'Sales') WHERE id = 1"
);
// UPDATE with a subquery in WHERE
to_sql_string_test!(
test_update_with_subquery_where,
"UPDATE employees SET salary = 70000 WHERE department_id IN (SELECT id FROM departments WHERE name = 'Marketing')"
);
// UPDATE with EXISTS clause
to_sql_string_test!(
test_update_with_exists,
"UPDATE employees SET salary = 75000 WHERE EXISTS (SELECT 1 FROM orders WHERE orders.employee_id = employees.id AND orders.status = 'pending')"
);
// UPDATE with FROM clause (join-like behavior)
to_sql_string_test!(
test_update_with_from,
"UPDATE employees SET salary = 80000 FROM departments WHERE employees.department_id = departments.id AND departments.name = 'Engineering'"
);
// UPDATE with RETURNING clause
to_sql_string_test!(
test_update_with_returning,
"UPDATE employees SET salary = 60000 WHERE id = 1 RETURNING id, name, salary"
);
// UPDATE with expression in SET
to_sql_string_test!(
test_update_with_expression,
"UPDATE employees SET salary = salary * 1.1 WHERE department_id = 2"
);
// UPDATE with NULL value
to_sql_string_test!(
test_update_with_null,
"UPDATE employees SET department_id = NULL WHERE id = 1"
);
// UPDATE with schema-qualified table
to_sql_string_test!(
test_update_schema_qualified,
"UPDATE main.employees SET salary = 65000 WHERE id = 1"
);
// UPDATE with CASE expression
to_sql_string_test!(
test_update_with_case,
"UPDATE employees SET salary = CASE WHEN salary < 50000 THEN 55000 ELSE salary * 1.05 END WHERE department_id = 3"
);
// UPDATE with LIKE clause in WHERE
to_sql_string_test!(
test_update_with_like,
"UPDATE employees SET name = 'Updated' WHERE name LIKE 'J%'"
);
// UPDATE with ON CONFLICT (upsert-like behavior)
to_sql_string_test!(
test_update_with_on_conflict,
"INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 50000) ON CONFLICT (id) DO UPDATE SET name = excluded.name, salary = excluded.salary"
);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,919 +0,0 @@
/*
** 2000-05-29
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Driver template for the LEMON parser generator.
**
** The "lemon" program processes an LALR(1) input grammar file, then uses
** this template to construct a parser. The "lemon" program inserts text
** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the
** interstitial "-" characters) contained in this template is changed into
** the value of the %name directive from the grammar. Otherwise, the content
** of this template is copied straight through into the generate parser
** source file.
**
** The following is the concatenation of all %include directives from the
** input grammar file:
*/
/************ Begin %include sections from the grammar ************************/
%%
/**************** End of %include directives **********************************/
/* These constants specify the various numeric values for terminal symbols.
***************** Begin token definitions *************************************/
%%
/**************** End token definitions ***************************************/
/* The next sections is a series of control #defines.
** various aspects of the generated parser.
** YYCODETYPE is the data type used to store the integer codes
** that represent terminal and non-terminal symbols.
** "unsigned char" is used if there are fewer than
** 256 symbols. Larger types otherwise.
** YYNOCODE is a number of type YYCODETYPE that is not used for
** any terminal or nonterminal symbol.
** YYFALLBACK If defined, this indicates that one or more tokens
** (also known as: "terminal symbols") have fall-back
** values which should be used if the original symbol
** would not parse. This permits keywords to sometimes
** be used as identifiers, for example.
** YYACTIONTYPE is the data type used for "action codes" - numbers
** that indicate what to do in response to the next
** token.
** ParseTOKENTYPE is the data type used for minor type for terminal
** symbols. Background: A "minor type" is a semantic
** value associated with a terminal or non-terminal
** symbols. For example, for an "ID" terminal symbol,
** the minor type might be the name of the identifier.
** Each non-terminal can have a different minor type.
** Terminal symbols all have the same minor type, though.
** This macros defines the minor type for terminal
** symbols.
** YYMINORTYPE is the data type used for all minor types.
** This is typically a union of many types, one of
** which is ParseTOKENTYPE. The entry in the union
** for terminal symbols is called "yy0".
** YYSTACKDEPTH is the maximum depth of the parser's stack. If
** zero the stack is dynamically sized using realloc()
** YYERRORSYMBOL is the code number of the error symbol. If not
** defined, then do no error processing.
** YYNSTATE the combined number of states.
** YYNRULE the number of rules in the grammar
** YYNTOKEN Number of terminal symbols
** YY_MAX_SHIFT Maximum value for shift actions
** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
** YY_ERROR_ACTION The yy_action[] code for syntax error
** YY_ACCEPT_ACTION The yy_action[] code for accept
** YY_NO_ACTION The yy_action[] code for no-op
** YY_MIN_REDUCE Minimum value for reduce actions
** YY_MAX_REDUCE Maximum value for reduce actions
*/
/************* Begin control #defines *****************************************/
%%
/************* End control #defines *******************************************/
/* Next are the tables used to determine what action to take based on the
** current state and lookahead token. These tables are used to implement
** functions that take a state number and lookahead value and return an
** action integer.
**
** Suppose the action integer is N. Then the action is determined as
** follows
**
** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead
** token onto the stack and goto state N.
**
** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE.
**
** N == YY_ERROR_ACTION A syntax error has occurred.
**
** N == YY_ACCEPT_ACTION The parser accepts its input.
**
** N == YY_NO_ACTION No such action. Denotes unused
** slots in the yy_action[] table.
**
** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE
** and YY_MAX_REDUCE
**
** The action table is constructed as a single large table named yy_action[].
** Given state S and lookahead X, the action is computed as either:
**
** (A) N = yy_action[ yy_shift_ofst[S] + X ]
** (B) N = yy_default[S]
**
** The (A) formula is preferred. The B formula is used instead if
** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
**
** The formulas above are for computing the action when the lookahead is
** a terminal symbol. If the lookahead is a non-terminal (as occurs after
** a reduce action) then the yy_reduce_ofst[] array is used in place of
** the yy_shift_ofst[] array.
**
** The following are the tables generated in this section:
**
** yy_action[] A single table containing all actions.
** yy_lookahead[] A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** yy_shift_ofst[] For each state, the offset into yy_action for
** shifting terminals.
** yy_reduce_ofst[] For each state, the offset into yy_action for
** shifting non-terminals after a reduce.
** yy_default[] Default action for each state.
**
*********** Begin parsing tables **********************************************/
%%
/********** End of lemon-generated parsing tables *****************************/
/* The next table maps tokens (terminal symbols) into fallback tokens.
** If a construct like the following:
**
** %fallback ID X Y Z.
**
** appears in the grammar, then ID becomes a fallback token for X, Y,
** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
** but it does not parse, the type of the token is changed to ID and
** the parse is retried before an error is thrown.
**
** This feature can be used, for example, to cause some keywords in a language
** to revert to identifiers if they keyword does not apply in the context where
** it appears.
*/
%%
/* The following structure represents a single element of the
** parser's stack. Information stored includes:
**
** + The state number for the parser at this level of the stack.
**
** + The value of the token stored at this level of the stack.
** (In other words, the "major" token.)
**
** + The semantic value stored at this level of the stack. This is
** the information used by the action routines in the grammar.
** It is sometimes called the "minor" token.
**
** After the "shift" half of a SHIFTREDUCE action, the stateno field
** actually contains the reduce action for the second half of the
** SHIFTREDUCE.
*/
#[expect(non_camel_case_types)]
#[derive(Default)]
pub struct yyStackEntry<'i> {
stateno: YYACTIONTYPE, /* The state-number, or reduce action in SHIFTREDUCE */
major: YYCODETYPE, /* The major token value. This is the code
** number for the token at this stack level */
minor: YYMINORTYPE<'i>, /* The user-supplied minor token value. This
** is the value of the token */
}
/* The state of the parser is completely contained in an instance of
** the following structure */
#[expect(non_camel_case_types)]
pub struct yyParser<'input> {
yyidx: usize, /* Index to top element of the stack */
#[cfg(feature = "YYTRACKMAXSTACKDEPTH")]
yyhwm: usize, /* High-water mark of the stack */
//#[cfg(not(feature = "YYNOERRORRECOVERY"))]
yyerrcnt: i32, /* Shifts left before out of the error */
%% /* A place to hold %extra_context */
yystack: smallvec::SmallVec<[yyStackEntry<'input>; YYSTACKDEPTH]>, /* The parser's stack */
}
use std::cmp::Ordering;
use std::ops::Neg;
impl<'input> yyParser<'input> {
#[inline]
fn shift(&self, shift: i8) -> usize {
assert!(shift <= 1);
match shift.cmp(&0) {
Ordering::Equal => self.yyidx,
Ordering::Greater => self.yyidx + shift as usize,
Ordering::Less => self.yyidx.checked_sub(shift.neg() as usize).unwrap(),
}
}
#[inline]
fn yyidx_shift(&mut self, shift: i8) {
match shift.cmp(&0) {
Ordering::Greater => self.yyidx += shift as usize,
Ordering::Less => self.yyidx = self.yyidx.checked_sub(shift.neg() as usize).unwrap(),
Ordering::Equal => {}
}
}
#[inline]
fn yy_move(&mut self, shift: i8) -> yyStackEntry<'input> {
let idx = self.shift(shift);
// TODO: The compiler optimizes `std::mem::take` to two `memcpy`
// but `yyStackEntry` requires 208 bytes, so it is not worth it (maybe).
#[cfg(not(target_family = "wasm"))]
assert_eq!(std::mem::size_of::<yyStackEntry>(), 208);
std::mem::take(&mut self.yystack[idx])
}
#[inline]
fn push(&mut self, entry: yyStackEntry<'input>) {
if self.yyidx == self.yystack.len() {
self.yystack.push(entry);
} else {
self.yystack[self.yyidx] = entry;
}
}
}
use std::ops::{Index, IndexMut};
impl<'input> Index<i8> for yyParser<'input> {
type Output = yyStackEntry<'input>;
#[inline]
fn index(&self, shift: i8) -> &yyStackEntry<'input> {
let idx = self.shift(shift);
&self.yystack[idx]
}
}
impl<'input> IndexMut<i8> for yyParser<'input> {
#[inline]
fn index_mut(&mut self, shift: i8) -> &mut yyStackEntry<'input> {
let idx = self.shift(shift);
&mut self.yystack[idx]
}
}
#[cfg(not(feature = "NDEBUG"))]
use log::{debug, log_enabled, Level::Debug};
static TARGET: &str = "Parse";
/* For tracing shifts, the names of all terminals and nonterminals
** are required. The following table supplies these names */
#[cfg(any(feature = "YYCOVERAGE", not(feature = "NDEBUG")))]
%%
/* For tracing reduce actions, the names of all rules are required.
*/
#[cfg(not(feature = "NDEBUG"))]
#[rustfmt::skip]
#[expect(non_upper_case_globals)]
static yyRuleName: [&str; YYNRULE] = [
%%
];
/*
** Try to increase the size of the parser stack. Return the number
** of errors. Return 0 on success.
*/
impl yyParser<'_> {
#[inline]
fn yy_grow_stack_if_needed(&mut self) -> bool {
false
}
#[inline]
fn yy_grow_stack_for_push(&mut self) -> bool {
// yystack is not prefilled with zero value like in C.
if self.yyidx == self.yystack.len() {
self.yystack.push(yyStackEntry::default());
} else if self.yyidx + 1 == self.yystack.len() {
self.yystack.push(yyStackEntry::default());
}
false
}
}
/* Initialize a new parser.
*/
impl yyParser<'_> {
pub fn new(
%% /* Optional %extra_context parameter */
) -> yyParser {
yyParser {
yyidx: 0,
#[cfg(feature = "YYTRACKMAXSTACKDEPTH")]
yyhwm: 0,
yystack: smallvec::smallvec![yyStackEntry::default()],
//#[cfg(not(feature = "YYNOERRORRECOVERY"))]
yyerrcnt: -1,
%% /* Optional %extra_context store */
}
}
}
/*
** Pop the parser's stack once.
*/
impl yyParser<'_> {
#[inline]
fn yy_pop_parser_stack(&mut self) {
use std::mem::take;
let _yytos = take(&mut self.yystack[self.yyidx]);
self.yyidx = self.yyidx.checked_sub(1).unwrap();
//assert_eq!(self.yyidx+1, self.yystack.len());
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"Popping {}", yyTokenName[_yytos.major as usize]
);
}
}
}
/*
** Clear all secondary memory allocations from the parser
*/
impl yyParser<'_> {
#[expect(non_snake_case)]
#[inline]
pub fn ParseFinalize(&mut self) {
while self.yyidx > 0 {
self.yy_pop_parser_stack();
}
// TODO check all elements remaining in yystack are yyinit()
}
}
/*
** Return the peak depth of the stack for a parser.
*/
#[cfg(feature = "YYTRACKMAXSTACKDEPTH")]
impl yyParser<'_> {
#[expect(non_snake_case)]
#[inline]
pub fn ParseStackPeak(&self) -> usize {
self.yyhwm
}
#[inline]
fn yyhwm_incr(&mut self) {
if self.yyidx > self.yyhwm {
self.yyhwm += 1;
assert_eq!(self.yyhwm, self.yyidx);
}
}
}
#[cfg(not(feature = "YYTRACKMAXSTACKDEPTH"))]
impl yyParser<'_> {
#[inline]
fn yyhwm_incr(&mut self) {}
}
/* This array of booleans keeps track of the parser statement
** coverage. The element yycoverage[X][Y] is set when the parser
** is in state X and has a lookahead token Y. In a well-tested
** systems, every element of this matrix should end up being set.
*/
// #[cfg(feature = "YYCOVERAGE")]
// static yycoverage: [[bool; YYNTOKEN]; YYNSTATE] = [];
/*
** Write into out a description of every state/lookahead combination that
**
** (1) has not been used by the parser, and
** (2) is not a syntax error.
**
** Return the number of missed state/lookahead combinations.
*/
#[cfg(feature = "YYCOVERAGE")]
fn ParseCoverage(/*FILE *out*/) -> i32 {
//int stateno, iLookAhead, i;
let nMissed = 0;
/*for(stateno=0; stateno<YYNSTATE; stateno++){
i = yy_shift_ofst[stateno];
for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
if( out ){
fprintf(out,"State %d lookahead %s %s\n", stateno,
yyTokenName[iLookAhead],
yycoverage[stateno][iLookAhead] ? "ok" : "missed");
}
}
}*/
return nMissed;
}
/*
** Find the appropriate action for a parser given the terminal
** look-ahead token iLookAhead.
*/
#[expect(non_snake_case)]
fn yy_find_shift_action(
mut iLookAhead: YYCODETYPE, /* The look-ahead token */
stateno: YYACTIONTYPE, /* Current state number */
) -> YYACTIONTYPE {
if stateno > YY_MAX_SHIFT {
return stateno;
}
assert!(stateno <= YY_SHIFT_COUNT);
#[cfg(feature = "YYCOVERAGE")]
{
//yycoverage[stateno][iLookAhead] = true;
}
loop {
let mut i = yy_shift_ofst[stateno as usize] as usize;
assert!(i <= YY_ACTTAB_COUNT!());
assert!(i + usize::from(YYNTOKEN) <= yy_lookahead.len());
assert_ne!(iLookAhead, YYNOCODE);
assert!((iLookAhead as YYACTIONTYPE) < YYNTOKEN);
i += iLookAhead as usize;
if yy_lookahead[i] != iLookAhead {
if YYFALLBACK {
let iFallback = yyFallback[iLookAhead as usize]; /* Fallback token */
if iFallback != 0 {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"FALLBACK {} => {}",
yyTokenName[iLookAhead as usize],
yyTokenName[iFallback as usize]
);
}
assert_eq!(yyFallback[iFallback as usize], 0); /* Fallback loop must terminate */
iLookAhead = iFallback;
continue;
}
}
if YYWILDCARD > 0 {
let j = i - iLookAhead as usize + YYWILDCARD as usize;
if yy_lookahead[j] == YYWILDCARD && iLookAhead > 0 {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"WILDCARD {} => {}",
yyTokenName[iLookAhead as usize],
yyTokenName[YYWILDCARD as usize]
);
}
return yy_action[j];
}
} /* YYWILDCARD */
return yy_default[stateno as usize];
} else {
return yy_action[i];
}
}
}
/*
** Find the appropriate action for a parser given the non-terminal
** look-ahead token iLookAhead.
*/
#[expect(non_snake_case)]
fn yy_find_reduce_action(
stateno: YYACTIONTYPE, /* Current state number */
iLookAhead: YYCODETYPE, /* The look-ahead token */
) -> YYACTIONTYPE {
if YYERRORSYMBOL > 0 {
if stateno > YY_REDUCE_COUNT {
return yy_default[stateno as usize];
}
} else {
assert!(stateno <= YY_REDUCE_COUNT);
}
let mut i: i32 = yy_reduce_ofst[stateno as usize].into();
assert_ne!(iLookAhead, YYNOCODE);
i += i32::from(iLookAhead);
if YYERRORSYMBOL > 0 {
if !(0..YY_ACTTAB_COUNT!()).contains(&i) || yy_lookahead[i as usize] != iLookAhead {
return yy_default[stateno as usize];
}
} else {
assert!((0..YY_ACTTAB_COUNT!()).contains(&i));
assert_eq!(yy_lookahead[i as usize], iLookAhead);
}
yy_action[i as usize]
}
/******** Begin %stack_overflow code ******************************************
%%
******** End %stack_overflow code ********************************************/
/*
** Print tracing information for a SHIFT action
*/
impl yyParser<'_> {
#[expect(non_snake_case)]
#[cfg(feature = "NDEBUG")]
#[inline]
fn yyTraceShift(&self, _: YYACTIONTYPE, _: &str) {
}
#[expect(non_snake_case)]
#[cfg(not(feature = "NDEBUG"))]
fn yyTraceShift(&self, yyNewState: YYACTIONTYPE, zTag: &str) {
let yytos = &self[0];
if yyNewState < YYNSTATE {
debug!(
target: TARGET,
"{} '{}', go to state {}", zTag, yyTokenName[yytos.major as usize], yyNewState
);
} else {
debug!(
target: TARGET,
"{} '{}', pending reduce {:?}",
zTag,
yyTokenName[yytos.major as usize],
yyNewState.checked_sub(YY_MIN_REDUCE)
);
}
}
}
/*
** Perform a shift action.
*/
impl<'input> yyParser<'input> {
#[expect(non_snake_case)]
fn yy_shift(
&mut self,
mut yyNewState: YYACTIONTYPE, /* The new state to shift in */
yyMajor: YYCODETYPE, /* The major token to shift in */
yyMinor: ParseTOKENTYPE<'input>, /* The minor token to shift in */
) {
self.yyidx_shift(1);
self.yyhwm_incr();
if self.yy_grow_stack_if_needed() {
return;
}
if yyNewState > YY_MAX_SHIFT {
yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
}
let yytos = yyStackEntry {
stateno: yyNewState,
major: yyMajor,
minor: YYMINORTYPE::yy0(yyMinor),
};
self.push(yytos);
self.yyTraceShift(yyNewState, "Shift");
}
}
/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side
** of that rule */
#[expect(non_upper_case_globals)]
static yyRuleInfoLhs: [YYCODETYPE; YYNRULE] = [
%%
];
/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
** of symbols on the right-hand side of that rule. */
#[expect(non_upper_case_globals)]
static yyRuleInfoNRhs: [i8; YYNRULE] = [
%%
];
/*
** Perform a reduce action and the shift that must immediately
** follow the reduce.
**
** The yyLookahead and yyLookaheadToken parameters provide reduce actions
** access to the lookahead token (if any). The yyLookahead will be YYNOCODE
** if the lookahead token has already been consumed. As this procedure is
** only called from one place, optimizing compilers will in-line it, which
** means that the extra parameters have no performance impact.
*/
impl yyParser<'_> {
fn yy_reduce(
&mut self,
yyruleno: YYACTIONTYPE, /* Number of the rule by which to reduce */
yy_look_ahead: YYCODETYPE, /* Lookahead token, or YYNOCODE if none */
yy_lookahead_token: &ParseTOKENTYPE, /* Value of the lookahead token */
) -> Result<YYACTIONTYPE, ParseError> {
let _ = yy_look_ahead;
let _ = yy_lookahead_token;
let yylhsminor: YYMINORTYPE<'_>;
match yyruleno {
/* Beginning here are the reduction cases. A typical example
** follows:
** case 0:
** #line <lineno> <grammarfile>
** { ... } // User supplied code
** #line <lineno> <thisfile>
** break;
*/
/********** Begin reduce actions **********************************************/
%%
/********** End reduce actions ************************************************/
};
let yygoto: YYCODETYPE = yyRuleInfoLhs[yyruleno as usize]; /* The next state */
let yysize: i8 = yyRuleInfoNRhs[yyruleno as usize]; /* Amount to pop the stack */
let yyact: YYACTIONTYPE = yy_find_reduce_action(self[yysize].stateno, yygoto); /* The next action */
/* There are no SHIFTREDUCE actions on nonterminals because the table
** generator has simplified them to pure REDUCE actions. */
assert!(!(yyact > YY_MAX_SHIFT && yyact <= YY_MAX_SHIFTREDUCE));
/* It is not possible for a REDUCE to be followed by an error */
assert_ne!(yyact, YY_ERROR_ACTION);
self.yyidx_shift(yysize + 1);
{
let yymsp = &mut self[0];
yymsp.stateno = yyact;
yymsp.major = yygoto;
}
self.yyTraceShift(yyact, "... then shift");
Ok(yyact)
}
}
/*
** The following code executes when the parse fails
*/
impl yyParser<'_> {
#[cfg(not(feature = "YYNOERRORRECOVERY"))]
fn yy_parse_failed(&mut self) {
#[cfg(not(feature = "NDEBUG"))]
{
error!(target: TARGET, "Fail!");
}
while self.yyidx > 0 {
self.yy_pop_parser_stack();
}
/* Here code is inserted which will be executed whenever the
** parser fails */
/************ Begin %parse_failure code ***************************************/
%%
/************ End %parse_failure code *****************************************/
}
#[cfg(feature = "YYNOERRORRECOVERY")]
fn yy_parse_failed(&mut self) {}
}
/*
** The following code executes when a syntax error first occurs.
*/
impl yyParser<'_> {
fn yy_syntax_error(
&mut self,
yymajor: YYCODETYPE, /* The major type of the error token */
yyminor: &ParseTOKENTYPE, /* The minor type of the error token */
) {
/************ Begin %syntax_error code ****************************************/
%%
/************ End %syntax_error code ******************************************/
}
}
/*
** The following is executed when the parser accepts
*/
impl yyParser<'_> {
fn yy_accept(&mut self) {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(target: TARGET, "Accept!");
}
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt = -1;
}
assert_eq!(self.yyidx, 0);
/* Here code is inserted which will be executed whenever the
** parser accepts */
/*********** Begin %parse_accept code *****************************************/
%%
/*********** End %parse_accept code *******************************************/
}
}
/* The main parser program.
** The first argument is a pointer to a structure obtained from
** "ParseAlloc" which describes the current state of the parser.
** The second argument is the major token number. The third is
** the minor token. The fourth optional argument is whatever the
** user wants (and specified in the grammar) and is available for
** use by the action routines.
**
** Inputs:
** <ul>
** <li> A pointer to the parser (an opaque structure.)
** <li> The major token number.
** <li> The minor token number.
** <li> An option argument of a grammar-specified type.
** </ul>
**
** Outputs:
** None.
*/
impl<'input> yyParser<'input> {
#[expect(non_snake_case)]
pub fn Parse(
&mut self,
yymajor: TokenType, /* The major token code number */
yyminor: ParseTOKENTYPE<'input>, /* The value for the token */
) -> Result<(), ParseError> {
let mut yymajor = yymajor as YYCODETYPE;
//#[cfg(all(not(feature = "YYERRORSYMBOL"), not(feature = "YYNOERRORRECOVERY")))]
let mut yyendofinput: bool = false; /* True if we are at the end of input */
//#[cfg(feature = "YYERRORSYMBOL")]
let mut yyerrorhit: bool = false; /* True if yymajor has invoked an error */
//assert_ne!( self[0], null );
if YYERRORSYMBOL == 0 && cfg!(not(feature = "YYNOERRORRECOVERY")) {
yyendofinput = yymajor == 0;
}
let mut yyact: YYACTIONTYPE = self[0].stateno; /* The parser action. */
#[cfg(not(feature = "NDEBUG"))]
{
if yyact < YY_MIN_REDUCE {
debug!(
target: TARGET,
"Input '{}' in state {}", yyTokenName[yymajor as usize], yyact
);
} else {
debug!(
target: TARGET,
"Input '{}' with pending reduce {}",
yyTokenName[yymajor as usize],
yyact - YY_MIN_REDUCE
);
}
}
loop {
assert_eq!(yyact, self[0].stateno);
yyact = yy_find_shift_action(yymajor, yyact);
if yyact >= YY_MIN_REDUCE {
let yyruleno = yyact - YY_MIN_REDUCE; /* Reduce by this rule */
#[cfg(not(feature = "NDEBUG"))]
{
assert!((yyruleno as usize) < yyRuleName.len());
let yysize = yyRuleInfoNRhs[yyruleno as usize];
let action = if yyruleno < YYNRULE_WITH_ACTION {
""
} else {
" without external action"
};
if yysize != 0 {
debug!(
target: TARGET,
"Reduce {} [{}]{}, pop back to state {}.",
yyruleno,
yyRuleName[yyruleno as usize],
action,
self[yysize].stateno
);
} else {
debug!(
target: TARGET,
"Reduce {} [{}]{}.", yyruleno, yyRuleName[yyruleno as usize], action
);
}
}
/* Check that the stack is large enough to grow by a single entry
** if the RHS of the rule is empty. This ensures that there is room
** enough on the stack to push the LHS value */
if yyRuleInfoNRhs[yyruleno as usize] == 0 {
self.yyhwm_incr();
if self.yy_grow_stack_for_push() {
break;
}
}
yyact = self.yy_reduce(yyruleno, yymajor, &yyminor)?;
} else if yyact <= YY_MAX_SHIFTREDUCE {
self.yy_shift(yyact, yymajor, yyminor);
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt -= 1;
}
break;
} else if yyact == YY_ACCEPT_ACTION {
self.yyidx_shift(-1);
self.yy_accept();
return Ok(());
} else {
assert_eq!(yyact, YY_ERROR_ACTION);
#[cfg(not(feature = "NDEBUG"))]
{
debug!(target: TARGET, "Syntax Error!");
}
if YYERRORSYMBOL > 0 {
/* A syntax error has occurred.
** The response to an error depends upon whether or not the
** grammar defines an error token "ERROR".
**
** This is what we do if the grammar does define ERROR:
**
** * Call the %syntax_error function.
**
** * Begin popping the stack until we enter a state where
** it is legal to shift the error symbol, then shift
** the error symbol.
**
** * Set the error count to three.
**
** * Begin accepting and shifting new tokens. No new error
** processing will occur until three tokens have been
** shifted successfully.
**
*/
if self.yyerrcnt < 0 {
self.yy_syntax_error(yymajor, &yyminor);
}
let yymx = self[0].major;
if yymx == YYERRORSYMBOL || yyerrorhit {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"Discard input token {}", yyTokenName[yymajor as usize]
);
}
yymajor = YYNOCODE;
} else {
while self.yyidx > 0 {
yyact = yy_find_reduce_action(self[0].stateno, YYERRORSYMBOL);
if yyact <= YY_MAX_SHIFTREDUCE {
break;
}
self.yy_pop_parser_stack();
}
if self.yyidx <= 0 || yymajor == 0 {
self.yy_parse_failed();
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt = -1;
}
yymajor = YYNOCODE;
} else if yymx != YYERRORSYMBOL {
self.yy_shift(yyact, YYERRORSYMBOL, yyminor);
}
}
self.yyerrcnt = 3;
yyerrorhit = true;
if yymajor == YYNOCODE {
break;
}
yyact = self[0].stateno;
} else if cfg!(feature = "YYNOERRORRECOVERY") {
/* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
** do any kind of error recovery. Instead, simply invoke the syntax
** error routine and continue going as if nothing had happened.
**
** Applications can set this macro (for example inside %include) if
** they intend to abandon the parse upon the first syntax error seen.
*/
self.yy_syntax_error(yymajor, &yyminor);
break;
} else {
/* YYERRORSYMBOL is not defined */
/* This is what we do if the grammar does not define ERROR:
**
** * Report an error message, and throw away the input token.
**
** * If the input token is $, then fail the parse.
**
** As before, subsequent error messages are suppressed until
** three input tokens have been successfully shifted.
*/
if self.yyerrcnt <= 0 {
self.yy_syntax_error(yymajor, &yyminor);
}
self.yyerrcnt = 3;
if yyendofinput {
self.yy_parse_failed();
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt = -1;
}
}
break;
}
}
if self.yyidx <= 0 {
break;
}
}
#[cfg(not(feature = "NDEBUG"))]
{
if log_enabled!(target: TARGET, Debug) {
let msg = self.yystack[1..=self.yyidx]
.iter()
.map(|entry| yyTokenName[entry.major as usize])
.collect::<Vec<&str>>()
.join(" ");
debug!(target: TARGET, "Return. Stack=[{}]", msg);
}
}
return Ok(());
}
/*
** Return the fallback token corresponding to canonical token iToken, or
** 0 if iToken has no fallback.
*/
#[inline]
pub fn parse_fallback(i_token: YYCODETYPE) -> YYCODETYPE {
if YYFALLBACK {
return yyFallback[i_token as usize];
}
0
}
}