vendor sqlite3-parser (lemon-rs)

This commit is contained in:
jussisaurio
2024-11-16 13:22:26 +02:00
parent 8efeb16b82
commit 3cc9d9d79f
38 changed files with 15973 additions and 37 deletions

3
Cargo.lock generated
View File

@@ -2022,11 +2022,10 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]] [[package]]
name = "sqlite3-parser" name = "sqlite3-parser"
version = "0.13.0" version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb5307dad6cb84730ce8bdefde56ff4cf95fe516972d52e2bbdc8a8cd8f2520b"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.6.0",
"cc", "cc",
"env_logger 0.11.5",
"fallible-iterator 0.3.0", "fallible-iterator 0.3.0",
"indexmap", "indexmap",
"log", "log",

View File

@@ -40,7 +40,7 @@ libc = "0.2.155"
log = "0.4.20" log = "0.4.20"
nix = { version = "0.29.0", features = ["fs"] } nix = { version = "0.29.0", features = ["fs"] }
sieve-cache = "0.1.4" sieve-cache = "0.1.4"
sqlite3-parser = "0.13.0" sqlite3-parser = { path = "../vendored/sqlite3-parser" }
thiserror = "1.0.61" thiserror = "1.0.61"
getrandom = { version = "0.2.15", features = ["js"] } getrandom = { version = "0.2.15", features = ["js"] }
regex = "1.10.5" regex = "1.10.5"

View File

@@ -17,6 +17,21 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.5.0" version = "0.5.0"
@@ -89,12 +104,6 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "anyhow"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.1.0" version = "1.1.0"
@@ -119,6 +128,15 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.16.0" version = "3.16.0"
@@ -127,9 +145,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.4.3" version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "cc" name = "cc"
@@ -158,6 +176,20 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18758054972164c3264f7c8386f5fc6da6114cb46b619fd365d4e3b2dc3ae487" checksum = "18758054972164c3264f7c8386f5fc6da6114cb46b619fd365d4e3b2dc3ae487"
[[package]]
name = "chrono"
version = "0.4.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets 0.52.0",
]
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.4.2" version = "4.4.2"
@@ -213,6 +245,21 @@ dependencies = [
"crossbeam-utils", "crossbeam-utils",
] ]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "cpufeatures"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
version = "1.3.2" version = "1.3.2"
@@ -238,6 +285,32 @@ version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "downcast"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1"
[[package]] [[package]]
name = "env_filter" name = "env_filter"
version = "0.1.0" version = "0.1.0"
@@ -283,6 +356,12 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "fast-float"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.0.27" version = "1.0.27"
@@ -293,6 +372,22 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "fragile"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.15" version = "0.2.15"
@@ -308,9 +403,9 @@ dependencies = [
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.14.3" version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
[[package]] [[package]]
name = "hdrhistogram" name = "hdrhistogram"
@@ -338,6 +433,12 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]] [[package]]
name = "humantime" name = "humantime"
version = "2.1.0" version = "2.1.0"
@@ -345,13 +446,37 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]] [[package]]
name = "indexmap" name = "iana-time-zone"
version = "2.1.0" version = "0.1.61"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "indexmap"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
dependencies = [ dependencies = [
"equivalent", "equivalent",
"hashbrown", "hashbrown",
"serde",
] ]
[[package]] [[package]]
@@ -364,6 +489,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "itoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.69" version = "0.3.69"
@@ -373,6 +504,31 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "jsonb"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bbc6531bb5e8401565ee91098fbdf3a78bca480256075ea4230a042b87c3a8c"
dependencies = [
"byteorder",
"fast-float",
"itoa",
"nom",
"ordered-float",
"rand",
"ryu",
"serde_json",
]
[[package]]
name = "julian_day_converter"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b8b1d2decaeec65c1d9729098450800ffb40767dcab1b9be3e6e7eb21c6e7a5"
dependencies = [
"chrono",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.155" version = "0.2.155"
@@ -401,20 +557,29 @@ dependencies = [
[[package]] [[package]]
name = "limbo_core" name = "limbo_core"
version = "0.0.1" version = "0.0.4"
dependencies = [ dependencies = [
"anyhow",
"cfg_block", "cfg_block",
"chrono",
"fallible-iterator", "fallible-iterator",
"getrandom", "getrandom",
"hex",
"indexmap",
"io-uring", "io-uring",
"jsonb",
"julian_day_converter",
"libc", "libc",
"log", "log",
"mimalloc", "mimalloc",
"mockall",
"nix", "nix",
"pest",
"pest_derive",
"polling", "polling",
"rand",
"regex", "regex",
"rustix", "rustix",
"serde",
"sieve-cache", "sieve-cache",
"sqlite3-parser", "sqlite3-parser",
"thiserror", "thiserror",
@@ -428,9 +593,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.20" version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]] [[package]]
name = "memchr" name = "memchr"
@@ -462,6 +627,32 @@ dependencies = [
"adler", "adler",
] ]
[[package]]
name = "mockall"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4c28b3fb6d753d28c20e826cd46ee611fda1cf3cde03a443a974043247c065a"
dependencies = [
"cfg-if",
"downcast",
"fragile",
"mockall_derive",
"predicates",
"predicates-tree",
]
[[package]]
name = "mockall_derive"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "341014e7f530314e9a1fdbc7400b244efea7122662c96bfa248c31da5bfb2020"
dependencies = [
"cfg-if",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "nix" name = "nix"
version = "0.29.0" version = "0.29.0"
@@ -499,6 +690,60 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "ordered-float"
version = "4.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e"
dependencies = [
"num-traits",
]
[[package]]
name = "pest"
version = "2.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442"
dependencies = [
"memchr",
"thiserror",
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pest_meta"
version = "2.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d"
dependencies = [
"once_cell",
"pest",
"sha2",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.11.2" version = "0.11.2"
@@ -559,6 +804,41 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "ppv-lite86"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
dependencies = [
"zerocopy",
]
[[package]]
name = "predicates"
version = "3.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97"
dependencies = [
"anstyle",
"predicates-core",
]
[[package]]
name = "predicates-core"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931"
[[package]]
name = "predicates-tree"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13"
dependencies = [
"predicates-core",
"termtree",
]
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.78" version = "1.0.78"
@@ -583,6 +863,18 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [ dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core", "rand_core",
] ]
@@ -591,6 +883,9 @@ name = "rand_core"
version = "0.6.4" version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "regex" name = "regex"
@@ -634,6 +929,56 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "ryu"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "serde"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
dependencies = [
"indexmap",
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]] [[package]]
name = "sieve-cache" name = "sieve-cache"
version = "0.1.4" version = "0.1.4"
@@ -646,17 +991,9 @@ version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "smallvec"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
[[package]] [[package]]
name = "sqlite3-parser" name = "sqlite3-parser"
version = "0.11.0" version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b64003a3617746eb65b39e6dc422139a2f99cfd54683fc973f4763eb786e0c1"
dependencies = [ dependencies = [
"bitflags 2.4.0", "bitflags 2.4.0",
"cc", "cc",
@@ -667,7 +1004,6 @@ dependencies = [
"phf", "phf",
"phf_codegen", "phf_codegen",
"phf_shared", "phf_shared",
"smallvec",
"uncased", "uncased",
] ]
@@ -688,6 +1024,12 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "termtree"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.63" version = "1.0.63"
@@ -724,6 +1066,18 @@ version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ucd-trie"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]] [[package]]
name = "uncased" name = "uncased"
version = "0.9.10" version = "0.9.10"
@@ -811,6 +1165,15 @@ version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
[[package]]
name = "windows-core"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
"windows-targets 0.52.0",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"
@@ -942,3 +1305,24 @@ name = "windows_x86_64_msvc"
version = "0.52.0" version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"byteorder",
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@@ -4,7 +4,7 @@
use clap::Parser; use clap::Parser;
use hdrhistogram::Histogram; use hdrhistogram::Histogram;
use limbo_core::{Database, IO, PlatformIO}; use limbo_core::{Database, PlatformIO, IO};
use std::ops::{Coroutine, CoroutineState}; use std::ops::{Coroutine, CoroutineState};
use std::pin::Pin; use std::pin::Pin;
use std::sync::Arc; use std::sync::Arc;
@@ -24,7 +24,8 @@ fn main() {
for i in 0..opts.count { for i in 0..opts.count {
let mut recorder = hist.recorder(); let mut recorder = hist.recorder();
let io = io.clone(); let io = io.clone();
let tenant = #[coroutine] move || { let tenant = #[coroutine]
move || {
let database = format!("database{}.db", i); let database = format!("database{}.db", i);
let db = Database::open_file(io.clone(), &database).unwrap(); let db = Database::open_file(io.clone(), &database).unwrap();
let conn = db.connect(); let conn = db.connect();
@@ -45,7 +46,7 @@ fn main() {
} }
} }
assert!(count == 100); assert!(count == 100);
recorder.record(now.elapsed().as_nanos() as u64).unwrap(); recorder.record(now.elapsed().as_nanos() as u64).unwrap();
} }
yield; yield;
} }
@@ -63,10 +64,9 @@ fn main() {
let _ = tenants.remove(i); let _ = tenants.remove(i);
completed += 1; completed += 1;
} }
CoroutineState::Yielded(_) => { CoroutineState::Yielded(_) => {}
} }
} }
},
None => { None => {
continue; continue;
} }

View File

@@ -0,0 +1,11 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "cargo" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "weekly"

View File

@@ -0,0 +1,36 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]
permissions:
contents: read
jobs:
build:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build
- name: Run tests
run: cargo test
direct-minimal-versions:
name: Test min versions
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: hecrj/setup-rust-action@v2
with:
rust-version: nightly
- run: |
cargo update -Z direct-minimal-versions
cargo test

6
vendored/sqlite3-parser/.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
target
rlemon
*.h
*.out
Cargo.lock
cmake-build-debug

View File

@@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.6)
project(rlemon)
set(SOURCE_FILES third_party/lemon/lemon.c)
add_executable(rlemon ${SOURCE_FILES})

View File

@@ -0,0 +1,42 @@
[package]
name = "sqlite3-parser"
version = "0.13.0"
edition = "2021"
authors = ["gwenn"]
description = "SQL parser (as understood by SQLite)"
documentation = "http://docs.rs/sqlite3-parser"
repository = "https://github.com/gwenn/lemon-rs"
readme = "README.md"
categories = ["parser-implementations"]
keywords = ["sql", "parser", "scanner", "tokenizer"]
license = "Apache-2.0/MIT"
build = "build.rs" # Lemon preprocessing
[badges]
maintenance = { status = "experimental" }
[features]
# FIXME: specific to one parser, not global
YYTRACKMAXSTACKDEPTH = []
YYNOERRORRECOVERY = []
YYCOVERAGE = []
NDEBUG = []
default = ["YYNOERRORRECOVERY", "NDEBUG"]
[dependencies]
phf = { version = "0.11", features = ["uncased"] }
log = "0.4.22"
memchr = "2.0"
fallible-iterator = "0.3"
bitflags = "2.0"
uncased = "0.9.10"
indexmap = "2.0"
[dev-dependencies]
env_logger = { version = "0.11", default-features = false }
[build-dependencies]
cc = "1.0"
phf_shared = { version = "0.11", features = ["uncased"] }
phf_codegen = "0.11"
uncased = "0.9.10"

View File

@@ -0,0 +1,24 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org>

View File

@@ -0,0 +1,79 @@
[![Build Status](https://github.com/gwenn/lemon-rs/workflows/CI/badge.svg)](https://github.com/gwenn/lemon-rs/actions)
[![Latest Version](https://img.shields.io/crates/v/sqlite3-parser.svg)](https://crates.io/crates/sqlite3-parser)
[![Docs](https://docs.rs/sqlite3-parser/badge.svg)](https://docs.rs/sqlite3-parser)
[![dependency status](https://deps.rs/repo/github/gwenn/lemon-rs/status.svg)](https://deps.rs/repo/github/gwenn/lemon-rs)
[LEMON parser generator](https://www.sqlite.org/src/doc/trunk/doc/lemon.html) modified to generate Rust code.
Lemon source and SQLite3 grammar were last synced as of July 2024.
## Unsupported
### Unsupported Grammar syntax
* `%token_destructor`: Code to execute to destroy token data
* `%default_destructor`: Code for the default non-terminal destructor
* `%destructor`: Code which executes whenever this symbol is
popped from the stack during error processing
https://www.codeproject.com/Articles/1056460/Generating-a-High-Speed-Parser-Part-Lemon
https://www.sqlite.org/lemon.html
### SQLite
[SQLite lexer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c) and [SQLite parser](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/parse.y) have been ported from C to Rust.
The parser generates an AST.
Lexer/Parser:
- Keep track of position (line, column).
- Streamable (stop at the end of statement).
- Resumable (restart after the end of statement).
Lexer and parser have been tested with the following scripts:
* https://github.com/bkiers/sqlite-parser/tree/master/src/test/resources
* https://github.com/codeschool/sqlite-parser/tree/master/test/sql/official-suite which can be updated with script in https://github.com/codeschool/sqlite-parser/tree/master/test/misc
TODO:
- [ ] Check generated AST (reparse/reinject)
- [ ] [If a keyword in double quotes is used in a context where it cannot be resolved to an identifier but where a string literal is allowed, then the token is understood to be a string literal instead of an identifier.](https://sqlite.org/lang_keywords.html)
- [ ] Tests
- [ ] Do not panic while parsing
- [x] CREATE VIRTUAL TABLE args
- [ ] Zero copy (at least tokens)
### Unsupported by Rust
* `#line` directive
## API change
* No `ParseAlloc`/`ParseFree` anymore
## Features not tested
* NDEBUG
* YYNOERRORRECOVERY
* YYERRORSYMBOL
## To be fixed
* RHS are moved. Maybe it is not a problem if they are always used once.
Just add a check in lemon...
* `%extra_argument` is not supported.
* Terminal symbols generated by lemon should be dumped in a specified file.
## Raison d'être
* [lemon_rust](https://github.com/rodrigorc/lemon_rust) does the same thing
but with an old version of `lemon`. And it seems not possible to use `yystack`
as a stack because items may be access randomly and the `top+1` item can be used.
* [lalrpop](https://github.com/nikomatsakis/lalrpop) would be the perfect
alternative but it does not support fallback/streaming
(see [this](https://github.com/nikomatsakis/lalrpop/issues/156) issue)
and compilation/generation is slow.
## Minimum supported Rust version (MSRV)
Latest stable Rust version at the time of release. It might compile with older versions.

View File

@@ -0,0 +1,7 @@
When some changes happen in the official SQLite repository,
they can be applied locally:
- $SQLITE/tool/lemon.c => $RLEMON/third_party/lemon.c
- $SQLITE/tool/lempar.c => $RLEMON/third_party/lempar.rs
- $SQLITE/tool/mkkeywordhash.c => $RLEMON/src/dialect/mod.rs
- $SQLITE/src/tokenize.c => $RLEMON/src/lexer/sql/mod.rs
- $SQLITE/src/parse.y => $RLEMON/src/parser/parse.y (and $RLEMON/src/dialect/token.rs, $RLEMON/src/dialect/mod.rs)

View File

@@ -0,0 +1,154 @@
#![cfg(all(test, not(test)))] // never compile this
#![feature(test)]
extern crate test;
use sqlite3_parser::dialect::keyword_token;
use test::Bencher;
static VALUES: [&[u8]; 136] = [
b"ABORT",
b"ACTION",
b"ADD",
b"AFTER",
b"ALL",
b"ALTER",
b"ANALYZE",
b"AND",
b"AS",
b"ASC",
b"ATTACH",
b"AUTOINCREMENT",
b"BEFORE",
b"BEGIN",
b"BETWEEN",
b"BY",
b"CASCADE",
b"CASE",
b"CAST",
b"CHECK",
b"COLLATE",
b"COLUMN",
b"COMMIT",
b"CONFLICT",
b"CONSTRAINT",
b"CREATE",
b"CROSS",
b"CURRENT",
b"CURRENT_DATE",
b"CURRENT_TIME",
b"CURRENT_TIMESTAMP",
b"DATABASE",
b"DEFAULT",
b"DEFERRABLE",
b"DEFERRED",
b"DELETE",
b"DESC",
b"DETACH",
b"DISTINCT",
b"DO",
b"DROP",
b"EACH",
b"ELSE",
b"END",
b"ESCAPE",
b"EXCEPT",
b"EXCLUSIVE",
b"EXISTS",
b"EXPLAIN",
b"FAIL",
b"FILTER",
b"FOLLOWING",
b"FOR",
b"FOREIGN",
b"FROM",
b"FULL",
b"GLOB",
b"GROUP",
b"HAVING",
b"IF",
b"IGNORE",
b"IMMEDIATE",
b"IN",
b"INDEX",
b"INDEXED",
b"INITIALLY",
b"INNER",
b"INSERT",
b"INSTEAD",
b"INTERSECT",
b"INTO",
b"IS",
b"ISNULL",
b"JOIN",
b"KEY",
b"LEFT",
b"LIKE",
b"LIMIT",
b"MATCH",
b"NATURAL",
b"NO",
b"NOT",
b"NOTHING",
b"NOTNULL",
b"NULL",
b"OF",
b"OFFSET",
b"ON",
b"OR",
b"ORDER",
b"OUTER",
b"OVER",
b"PARTITION",
b"PLAN",
b"PRAGMA",
b"PRECEDING",
b"PRIMARY",
b"QUERY",
b"RAISE",
b"RANGE",
b"RECURSIVE",
b"REFERENCES",
b"REGEXP",
b"REINDEX",
b"RELEASE",
b"RENAME",
b"REPLACE",
b"RESTRICT",
b"RIGHT",
b"ROLLBACK",
b"ROW",
b"ROWS",
b"SAVEPOINT",
b"SELECT",
b"SET",
b"TABLE",
b"TEMP",
b"TEMPORARY",
b"THEN",
b"TO",
b"TRANSACTION",
b"TRIGGER",
b"UNBOUNDED",
b"UNION",
b"UNIQUE",
b"UPDATE",
b"USING",
b"VACUUM",
b"VALUES",
b"VIEW",
b"VIRTUAL",
b"WHEN",
b"WHERE",
b"WINDOW",
b"WITH",
b"WITHOUT",
];
#[bench]
fn bench_keyword_token(b: &mut Bencher) {
b.iter(|| {
for value in &VALUES {
assert!(keyword_token(value).is_some())
}
});
}

View File

@@ -0,0 +1,211 @@
use std::env;
use std::fs::File;
use std::io::{BufWriter, Result, Write};
use std::path::Path;
use std::process::Command;
use cc::Build;
use uncased::UncasedStr;
fn main() -> Result<()> {
let out_dir = env::var("OUT_DIR").unwrap();
let out_path = Path::new(&out_dir);
let rlemon = out_path.join("rlemon");
let lemon_src_dir = Path::new("third_party").join("lemon");
let rlemon_src = lemon_src_dir.join("lemon.c");
// compile rlemon:
{
assert!(Build::new()
.target(&env::var("HOST").unwrap())
.get_compiler()
.to_command()
.arg("-o")
.arg(rlemon.clone())
.arg(rlemon_src)
.status()?
.success());
}
let sql_parser = "src/parser/parse.y";
// run rlemon / generate parser:
{
assert!(Command::new(rlemon)
.arg("-DSQLITE_ENABLE_UPDATE_DELETE_LIMIT")
.arg("-Tthird_party/lemon/lempar.rs")
.arg(format!("-d{out_dir}"))
.arg(sql_parser)
.status()?
.success());
// TODO ./rlemon -m -Tthird_party/lemon/lempar.rs examples/simple.y
}
let keywords = out_path.join("keywords.rs");
let mut keywords = BufWriter::new(File::create(keywords)?);
write!(
&mut keywords,
"static KEYWORDS: ::phf::Map<&'static UncasedStr, TokenType> = \n{};",
phf_codegen::Map::new()
.entry(UncasedStr::new("ABORT"), "TokenType::TK_ABORT")
.entry(UncasedStr::new("ACTION"), "TokenType::TK_ACTION")
.entry(UncasedStr::new("ADD"), "TokenType::TK_ADD")
.entry(UncasedStr::new("AFTER"), "TokenType::TK_AFTER")
.entry(UncasedStr::new("ALL"), "TokenType::TK_ALL")
.entry(UncasedStr::new("ALTER"), "TokenType::TK_ALTER")
.entry(UncasedStr::new("ALWAYS"), "TokenType::TK_ALWAYS")
.entry(UncasedStr::new("ANALYZE"), "TokenType::TK_ANALYZE")
.entry(UncasedStr::new("AND"), "TokenType::TK_AND")
.entry(UncasedStr::new("AS"), "TokenType::TK_AS")
.entry(UncasedStr::new("ASC"), "TokenType::TK_ASC")
.entry(UncasedStr::new("ATTACH"), "TokenType::TK_ATTACH")
.entry(UncasedStr::new("AUTOINCREMENT"), "TokenType::TK_AUTOINCR")
.entry(UncasedStr::new("BEFORE"), "TokenType::TK_BEFORE")
.entry(UncasedStr::new("BEGIN"), "TokenType::TK_BEGIN")
.entry(UncasedStr::new("BETWEEN"), "TokenType::TK_BETWEEN")
.entry(UncasedStr::new("BY"), "TokenType::TK_BY")
.entry(UncasedStr::new("CASCADE"), "TokenType::TK_CASCADE")
.entry(UncasedStr::new("CASE"), "TokenType::TK_CASE")
.entry(UncasedStr::new("CAST"), "TokenType::TK_CAST")
.entry(UncasedStr::new("CHECK"), "TokenType::TK_CHECK")
.entry(UncasedStr::new("COLLATE"), "TokenType::TK_COLLATE")
.entry(UncasedStr::new("COLUMN"), "TokenType::TK_COLUMNKW")
.entry(UncasedStr::new("COMMIT"), "TokenType::TK_COMMIT")
.entry(UncasedStr::new("CONFLICT"), "TokenType::TK_CONFLICT")
.entry(UncasedStr::new("CONSTRAINT"), "TokenType::TK_CONSTRAINT")
.entry(UncasedStr::new("CREATE"), "TokenType::TK_CREATE")
.entry(UncasedStr::new("CROSS"), "TokenType::TK_JOIN_KW")
.entry(UncasedStr::new("CURRENT"), "TokenType::TK_CURRENT")
.entry(UncasedStr::new("CURRENT_DATE"), "TokenType::TK_CTIME_KW")
.entry(UncasedStr::new("CURRENT_TIME"), "TokenType::TK_CTIME_KW")
.entry(
UncasedStr::new("CURRENT_TIMESTAMP"),
"TokenType::TK_CTIME_KW"
)
.entry(UncasedStr::new("DATABASE"), "TokenType::TK_DATABASE")
.entry(UncasedStr::new("DEFAULT"), "TokenType::TK_DEFAULT")
.entry(UncasedStr::new("DEFERRABLE"), "TokenType::TK_DEFERRABLE")
.entry(UncasedStr::new("DEFERRED"), "TokenType::TK_DEFERRED")
.entry(UncasedStr::new("DELETE"), "TokenType::TK_DELETE")
.entry(UncasedStr::new("DESC"), "TokenType::TK_DESC")
.entry(UncasedStr::new("DETACH"), "TokenType::TK_DETACH")
.entry(UncasedStr::new("DISTINCT"), "TokenType::TK_DISTINCT")
.entry(UncasedStr::new("DO"), "TokenType::TK_DO")
.entry(UncasedStr::new("DROP"), "TokenType::TK_DROP")
.entry(UncasedStr::new("EACH"), "TokenType::TK_EACH")
.entry(UncasedStr::new("ELSE"), "TokenType::TK_ELSE")
.entry(UncasedStr::new("END"), "TokenType::TK_END")
.entry(UncasedStr::new("ESCAPE"), "TokenType::TK_ESCAPE")
.entry(UncasedStr::new("EXCEPT"), "TokenType::TK_EXCEPT")
.entry(UncasedStr::new("EXCLUDE"), "TokenType::TK_EXCLUDE")
.entry(UncasedStr::new("EXCLUSIVE"), "TokenType::TK_EXCLUSIVE")
.entry(UncasedStr::new("EXISTS"), "TokenType::TK_EXISTS")
.entry(UncasedStr::new("EXPLAIN"), "TokenType::TK_EXPLAIN")
.entry(UncasedStr::new("FAIL"), "TokenType::TK_FAIL")
.entry(UncasedStr::new("FILTER"), "TokenType::TK_FILTER")
.entry(UncasedStr::new("FIRST"), "TokenType::TK_FIRST")
.entry(UncasedStr::new("FOLLOWING"), "TokenType::TK_FOLLOWING")
.entry(UncasedStr::new("FOR"), "TokenType::TK_FOR")
.entry(UncasedStr::new("FOREIGN"), "TokenType::TK_FOREIGN")
.entry(UncasedStr::new("FROM"), "TokenType::TK_FROM")
.entry(UncasedStr::new("FULL"), "TokenType::TK_JOIN_KW")
.entry(UncasedStr::new("GENERATED"), "TokenType::TK_GENERATED")
.entry(UncasedStr::new("GLOB"), "TokenType::TK_LIKE_KW")
.entry(UncasedStr::new("GROUP"), "TokenType::TK_GROUP")
.entry(UncasedStr::new("GROUPS"), "TokenType::TK_GROUPS")
.entry(UncasedStr::new("HAVING"), "TokenType::TK_HAVING")
.entry(UncasedStr::new("IF"), "TokenType::TK_IF")
.entry(UncasedStr::new("IGNORE"), "TokenType::TK_IGNORE")
.entry(UncasedStr::new("IMMEDIATE"), "TokenType::TK_IMMEDIATE")
.entry(UncasedStr::new("IN"), "TokenType::TK_IN")
.entry(UncasedStr::new("INDEX"), "TokenType::TK_INDEX")
.entry(UncasedStr::new("INDEXED"), "TokenType::TK_INDEXED")
.entry(UncasedStr::new("INITIALLY"), "TokenType::TK_INITIALLY")
.entry(UncasedStr::new("INNER"), "TokenType::TK_JOIN_KW")
.entry(UncasedStr::new("INSERT"), "TokenType::TK_INSERT")
.entry(UncasedStr::new("INSTEAD"), "TokenType::TK_INSTEAD")
.entry(UncasedStr::new("INTERSECT"), "TokenType::TK_INTERSECT")
.entry(UncasedStr::new("INTO"), "TokenType::TK_INTO")
.entry(UncasedStr::new("IS"), "TokenType::TK_IS")
.entry(UncasedStr::new("ISNULL"), "TokenType::TK_ISNULL")
.entry(UncasedStr::new("JOIN"), "TokenType::TK_JOIN")
.entry(UncasedStr::new("KEY"), "TokenType::TK_KEY")
.entry(UncasedStr::new("LAST"), "TokenType::TK_LAST")
.entry(UncasedStr::new("LEFT"), "TokenType::TK_JOIN_KW")
.entry(UncasedStr::new("LIKE"), "TokenType::TK_LIKE_KW")
.entry(UncasedStr::new("LIMIT"), "TokenType::TK_LIMIT")
.entry(UncasedStr::new("MATCH"), "TokenType::TK_MATCH")
.entry(
UncasedStr::new("MATERIALIZED"),
"TokenType::TK_MATERIALIZED"
)
.entry(UncasedStr::new("NATURAL"), "TokenType::TK_JOIN_KW")
.entry(UncasedStr::new("NO"), "TokenType::TK_NO")
.entry(UncasedStr::new("NOT"), "TokenType::TK_NOT")
.entry(UncasedStr::new("NOTHING"), "TokenType::TK_NOTHING")
.entry(UncasedStr::new("NOTNULL"), "TokenType::TK_NOTNULL")
.entry(UncasedStr::new("NULL"), "TokenType::TK_NULL")
.entry(UncasedStr::new("NULLS"), "TokenType::TK_NULLS")
.entry(UncasedStr::new("OF"), "TokenType::TK_OF")
.entry(UncasedStr::new("OFFSET"), "TokenType::TK_OFFSET")
.entry(UncasedStr::new("ON"), "TokenType::TK_ON")
.entry(UncasedStr::new("OR"), "TokenType::TK_OR")
.entry(UncasedStr::new("ORDER"), "TokenType::TK_ORDER")
.entry(UncasedStr::new("OTHERS"), "TokenType::TK_OTHERS")
.entry(UncasedStr::new("OUTER"), "TokenType::TK_JOIN_KW")
.entry(UncasedStr::new("OVER"), "TokenType::TK_OVER")
.entry(UncasedStr::new("PARTITION"), "TokenType::TK_PARTITION")
.entry(UncasedStr::new("PLAN"), "TokenType::TK_PLAN")
.entry(UncasedStr::new("PRAGMA"), "TokenType::TK_PRAGMA")
.entry(UncasedStr::new("PRECEDING"), "TokenType::TK_PRECEDING")
.entry(UncasedStr::new("PRIMARY"), "TokenType::TK_PRIMARY")
.entry(UncasedStr::new("QUERY"), "TokenType::TK_QUERY")
.entry(UncasedStr::new("RAISE"), "TokenType::TK_RAISE")
.entry(UncasedStr::new("RANGE"), "TokenType::TK_RANGE")
.entry(UncasedStr::new("RECURSIVE"), "TokenType::TK_RECURSIVE")
.entry(UncasedStr::new("REFERENCES"), "TokenType::TK_REFERENCES")
.entry(UncasedStr::new("REGEXP"), "TokenType::TK_LIKE_KW")
.entry(UncasedStr::new("REINDEX"), "TokenType::TK_REINDEX")
.entry(UncasedStr::new("RELEASE"), "TokenType::TK_RELEASE")
.entry(UncasedStr::new("RENAME"), "TokenType::TK_RENAME")
.entry(UncasedStr::new("REPLACE"), "TokenType::TK_REPLACE")
.entry(UncasedStr::new("RETURNING"), "TokenType::TK_RETURNING")
.entry(UncasedStr::new("RESTRICT"), "TokenType::TK_RESTRICT")
.entry(UncasedStr::new("RIGHT"), "TokenType::TK_JOIN_KW")
.entry(UncasedStr::new("ROLLBACK"), "TokenType::TK_ROLLBACK")
.entry(UncasedStr::new("ROW"), "TokenType::TK_ROW")
.entry(UncasedStr::new("ROWS"), "TokenType::TK_ROWS")
.entry(UncasedStr::new("SAVEPOINT"), "TokenType::TK_SAVEPOINT")
.entry(UncasedStr::new("SELECT"), "TokenType::TK_SELECT")
.entry(UncasedStr::new("SET"), "TokenType::TK_SET")
.entry(UncasedStr::new("TABLE"), "TokenType::TK_TABLE")
.entry(UncasedStr::new("TEMP"), "TokenType::TK_TEMP")
.entry(UncasedStr::new("TEMPORARY"), "TokenType::TK_TEMP")
.entry(UncasedStr::new("THEN"), "TokenType::TK_THEN")
.entry(UncasedStr::new("TIES"), "TokenType::TK_TIES")
.entry(UncasedStr::new("TO"), "TokenType::TK_TO")
.entry(UncasedStr::new("TRANSACTION"), "TokenType::TK_TRANSACTION")
.entry(UncasedStr::new("TRIGGER"), "TokenType::TK_TRIGGER")
.entry(UncasedStr::new("UNBOUNDED"), "TokenType::TK_UNBOUNDED")
.entry(UncasedStr::new("UNION"), "TokenType::TK_UNION")
.entry(UncasedStr::new("UNIQUE"), "TokenType::TK_UNIQUE")
.entry(UncasedStr::new("UPDATE"), "TokenType::TK_UPDATE")
.entry(UncasedStr::new("USING"), "TokenType::TK_USING")
.entry(UncasedStr::new("VACUUM"), "TokenType::TK_VACUUM")
.entry(UncasedStr::new("VALUES"), "TokenType::TK_VALUES")
.entry(UncasedStr::new("VIEW"), "TokenType::TK_VIEW")
.entry(UncasedStr::new("VIRTUAL"), "TokenType::TK_VIRTUAL")
.entry(UncasedStr::new("WHEN"), "TokenType::TK_WHEN")
.entry(UncasedStr::new("WHERE"), "TokenType::TK_WHERE")
.entry(UncasedStr::new("WINDOW"), "TokenType::TK_WINDOW")
.entry(UncasedStr::new("WITH"), "TokenType::TK_WITH")
.entry(UncasedStr::new("WITHOUT"), "TokenType::TK_WITHOUT")
.build()
)?;
println!("cargo:rerun-if-changed=third_party/lemon/lemon.c");
println!("cargo:rerun-if-changed=third_party/lemon/lempar.rs");
println!("cargo:rerun-if-changed=src/parser/parse.y");
// TODO examples/simple.y if test
Ok(())
}

View File

@@ -0,0 +1,124 @@
# Extra consistency checks
- `ALTER TABLE ... RENAME TO ...` when old and new table names are the same => `Stmt::check`
- `ALTER TABLE ... ADD COLUMN ...` with new primary key / unique constraint => `Stmt::check`
- `CREATE TABLE ...`
- with duplicated column name => `ColumnDefinition::add_column`
- with STRICT option and invalid or missing column type(s) => `CreateTableBody::check`
- WITHOUT ROWID and without primary key => `CreateTableBody::check`
- `CREATE VIEW ... (...) ...`
- when view columns count does not match select columns count => `Stmt::check`
- with duplicated columns (same name) => `Stmt::check`
- `DELETE FROM ... ORDER BY ...` with ORDER BY but without LIMIT => `Stmt::check`
- `INSERT INTO ... (...) ...` when columns count does not match select columns / values count => `Stmt::check`
- `INSERT INTO ... (...) DEFAULT VALUES` with columns and DEFAULT VALUES => `Stmt::check`
- `SELECT ... EXCEPT|INTERSECT|UNION SELECT ...` when all SELECT does not have the same number of result columns => `SelectBody::push`
- `NATURAL JOIN ...` with ON or USING clause => `FromClause::push`
- `UPDATE ... ORDER BY ...` with ORDER BY but without LIMIT => `Stmt::check`
- `VALUES (...), (...), ...` when all VALUES does not have the same number of terms => `OneSelect::push`
- `WITH ...` with duplicated table name => `CommonTableExpr::add_cte`
## TODO
### `CREATE TABLE`
- [x] qualified (different of `temp`) temporary table
```sql
sqlite> ATTACH DATABASE ':memory:' AS mem;
sqlite> CREATE TEMPORARY TABLE mem.x AS SELECT 1;
Parse error: temporary table name must be unqualified
```
```sql
sqlite> CREATE TEMPORARY TABLE temp.x AS SELECT 1;
-- OK
```
- [x] must have at least one non-generated column
```sql
sqlite> CREATE TABLE test(data AS (1));
Parse error: must have at least one non-generated column
```
- [ ] column constraint(s) checks
```sql
sqlite> CREATE TABLE t(a REFERENCES o(a,b));
Parse error: foreign key on a should reference only one column of table o
CREATE TABLE t(a REFERENCES o(a,b));
error here ---^
sqlite> CREATE TABLE t(a PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY
sqlite> CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
Parse error: AUTOINCREMENT not allowed on WITHOUT ROWID tables
```
- [ ] table constraint(s) checks
```sql
sqlite> CREATE TABLE test (a, b, FOREIGN KEY (b) REFERENCES test(a,b));
Parse error: number of columns in foreign key does not match the number of columns in the referenced table
```
```sql
sqlite> create table test (a,b, primary key(a), primary key(b));
Parse error: table "test" has more than one primary key
sqlite> create table test (a primary key, b primary key);
Parse error: table "test" has more than one primary key
sqlite> create table test (a primary key, b, primary key(a));
Parse error: table "test" has more than one primary key
```
### `HAVING`
- [x] HAVING clause on a non-aggregate query (`GroupBy::having`): grammar already prevents this case (grammar differs from SQLite official grammar).
```sql
sqlite> SELECT 1 as i HAVING i > 1;
Parse error: HAVING clause on a non-aggregate query
```
vs
```
[ERROR sqlite3Parser] near HAVING, "Token(None)": syntax error
Err: near HAVING, "None": syntax error at (1, 21) in SELECT 1 as i HAVING i > 1
```
### `SELECT ...`
- [ ] no duplicated column name in `selcollist`/`Select::columns`
```sql
sqlite> SELECT 1 as i, 2 as i;
-- no error (idem for postgres)
```
### `SELECT ... ORDER BY ...`
- [ ] ORDER BY term does not match any column in the result set (`Select::order_by`)
```sql
sqlite> SELECT 1 as i ORDER BY j;
Parse error: no such column: j
SELECT 1 as i ORDER BY j;
^--- error here
```
### `WITH`
- [ ] no duplicated column name in `CommonTableExpr::IndexedColumn`
### DML
```sql
sqlite> CREATE TABLE test (n, m);
sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO
sqlite> SELECT * FROM test;
1|1
sqlite> UPDATE test SET n = 1, n = 0; -- pgsql KO
sqlite> SELECT * FROM test;
0|1
```

View File

@@ -0,0 +1,123 @@
%token_type { i32 }
// An extra argument to the constructor for the parser, which is available
// to all actions.
%extra_context {ctx: Context}
%left PLUS MINUS.
%left DIVIDE TIMES.
%include {
use log::{debug, error, log_enabled, Level, LevelFilter, Metadata, Record, SetLoggerError};
pub struct Context {
expr: Option<Expr>,
}
#[derive(Debug)]
pub enum Operator {
Add,
Subtract,
Multiply,
Divide,
}
#[derive(Debug)]
pub enum Expr {
Number(i32),
Binary(Operator, Box<Expr>, Box<Expr>),
}
impl Expr {
fn binary(op: Operator, lhs: Expr, rhs: Expr) -> Expr {
Expr::Binary(op, Box::new(lhs), Box::new(rhs))
}
}
fn main() {
init_logger().unwrap();
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(5));
p.Parse(TokenType::PLUS, None);
p.Parse(TokenType::INTEGER, Some(10));
p.Parse(TokenType::TIMES, None);
p.Parse(TokenType::INTEGER, Some(4));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Add, Number(5), Binary(Multiply, Number(10), Number(4))))");
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(15));
p.Parse(TokenType::DIVIDE, None);
p.Parse(TokenType::INTEGER, Some(5));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Divide, Number(15), Number(5)))");
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(50));
p.Parse(TokenType::PLUS, None);
p.Parse(TokenType::INTEGER, Some(125));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Add, Number(50), Number(125)))");
let r = Context { expr: None };
let mut p = yyParser::new(r);
p.Parse(TokenType::INTEGER, Some(50));
p.Parse(TokenType::TIMES, None);
p.Parse(TokenType::INTEGER, Some(125));
p.Parse(TokenType::PLUS, None);
p.Parse(TokenType::INTEGER, Some(125));
p.Parse(TokenType::EOF, None);
p.ParseFinalize();
let s = format!("{:?}", p.ctx.expr);
assert_eq!(s, "Some(Binary(Add, Binary(Multiply, Number(50), Number(125)), Number(125)))");
}
static LOGGER: Logger = Logger;
struct Logger;
impl log::Log for Logger {
fn enabled(&self, metadata: &Metadata) -> bool {
metadata.level() <= Level::Debug
}
fn log(&self, record: &Record) {
if self.enabled(record.metadata()) {
eprintln!("{} - {}", record.level(), record.args());
}
}
fn flush(&self) {
}
}
fn init_logger() -> Result<(), SetLoggerError> {
log::set_logger(&LOGGER)?;
log::set_max_level(LevelFilter::Debug);
Ok(())
}
}
%syntax_error {
let _ = yymajor;
println!("near token {:?}: syntax error", yyminor);
}
program ::= expr(A). { self.ctx.expr = Some(A); }
%type expr { Expr }
expr(A) ::= expr(B) MINUS expr(C). { A = Expr::binary(Operator::Subtract, B, C); }
expr(A) ::= expr(B) PLUS expr(C). { A = Expr::binary(Operator::Add, B, C); }
expr(A) ::= expr(B) TIMES expr(C). { A = Expr::binary(Operator::Multiply, B, C); }
expr(A) ::= expr(B) DIVIDE expr(C). { A = Expr::binary(Operator::Divide, B, C); }
expr(A) ::= INTEGER(B). { A = Expr::Number(B.unwrap()); }

View File

@@ -0,0 +1,55 @@
use fallible_iterator::FallibleIterator;
use std::env;
use std::fs::read;
use std::panic;
use sqlite3_parser::lexer::sql::Parser;
/// Parse specified files and check all commands.
fn main() {
env_logger::init();
let args = env::args();
for arg in args.skip(1) {
println!("{arg}");
let result = panic::catch_unwind(|| {
let input = read(arg.clone()).unwrap();
let mut parser = Parser::new(&input);
loop {
match parser.next() {
Ok(None) => break,
Err(err) => {
eprintln!("Err: {err} in {arg}");
break;
}
Ok(Some(cmd)) => {
let input = cmd.to_string();
let mut checker = Parser::new(input.as_bytes());
match checker.next() {
Err(err) => {
eprintln!(
"Check Err in {}:{}, {} in\n{}\n{:?}",
arg,
parser.line(),
err,
input,
cmd
);
}
Ok(None) => {
eprintln!("Check Err in {}:{}, {:?}", arg, parser.line(), cmd);
}
Ok(Some(check)) => {
if cmd != check {
eprintln!("{cmd:?}\n<>\n{check:?}");
}
}
}
}
}
}
});
if let Err(e) = result {
eprintln!("Panic: {e:?} in {arg}");
}
}
}

View File

@@ -0,0 +1,26 @@
use std::env;
use fallible_iterator::FallibleIterator;
use sqlite3_parser::lexer::sql::Parser;
/// Parse args.
// RUST_LOG=sqlite3Parser=debug
fn main() {
env_logger::init();
let args = env::args();
for arg in args.skip(1) {
let mut parser = Parser::new(arg.as_bytes());
loop {
match parser.next() {
Ok(None) => break,
Err(err) => {
eprintln!("Err: {err} in {arg}");
break;
}
Ok(Some(cmd)) => {
println!("{cmd}");
}
}
}
}
}

View File

@@ -0,0 +1,42 @@
use fallible_iterator::FallibleIterator;
use std::env;
use std::fs::read;
use std::panic;
#[cfg(not(feature = "YYNOERRORRECOVERY"))]
use sqlite3_parser::lexer::sql::Error;
use sqlite3_parser::lexer::sql::Parser;
/// Parse specified files and print all commands.
fn main() {
env_logger::init();
let args = env::args();
for arg in args.skip(1) {
println!("{arg}");
let result = panic::catch_unwind(|| {
let input = read(arg.clone()).unwrap();
let mut parser = Parser::new(input.as_ref());
loop {
match parser.next() {
Ok(None) => break,
Err(err) => {
eprintln!("Err: {err} in {arg}");
#[cfg(feature = "YYNOERRORRECOVERY")]
break;
#[cfg(not(feature = "YYNOERRORRECOVERY"))]
if let Error::ParserError(..) = err {
} else {
break;
}
}
Ok(Some(cmd)) => {
println!("{cmd}");
}
}
}
});
if let Err(e) = result {
eprintln!("Panic: {e:?} in {arg}");
}
}
}

View File

@@ -0,0 +1,90 @@
use sqlite3_parser::lexer::sql::{TokenType, Tokenizer};
use sqlite3_parser::lexer::Scanner;
use std::env;
use std::fs::read;
use std::str;
/// Tokenize specified files (and do some checks)
fn main() {
use TokenType::*;
let args = env::args();
for arg in args.skip(1) {
let input = read(arg.clone()).unwrap();
let tokenizer = Tokenizer::new();
let mut s = Scanner::new(tokenizer);
loop {
match s.scan(&input) {
Ok((_, None, _)) => break,
Err(err) => {
//eprintln!("{} at line: {}, column: {}", err, s.line(), s.column());
eprintln!("Err: {err} in {arg}");
break;
}
Ok((_, Some((token, token_type)), _)) => match token_type {
TK_TEMP => debug_assert!(
b"TEMP".eq_ignore_ascii_case(token)
|| b"TEMPORARY".eq_ignore_ascii_case(token)
),
TK_EQ => debug_assert!(b"=" == token || b"==" == token),
TK_NE => debug_assert!(b"<>" == token || b"!=" == token),
//TK_STRING => debug_assert!(),
//TK_ID => debug_assert!(),
//TK_VARIABLE => debug_assert!(),
TK_BLOB => debug_assert!(
token.len() % 2 == 0 && token.iter().all(u8::is_ascii_hexdigit)
),
TK_INTEGER => {
if token.len() > 2
&& token[0] == b'0'
&& (token[1] == b'x' || token[1] == b'X')
{
if let Err(err) =
i64::from_str_radix(str::from_utf8(&token[2..]).unwrap(), 16)
{
eprintln!("Err: {err} in {arg}");
}
} else {
/*let raw = str::from_utf8(token).unwrap();
let res = raw.parse::<i64>();
if res.is_err() {
eprintln!("Err: {} in {}", res.unwrap_err(), arg);
}*/
debug_assert!(token.iter().all(u8::is_ascii_digit))
}
}
TK_FLOAT => {
debug_assert!(str::from_utf8(token).unwrap().parse::<f64>().is_ok())
}
TK_CTIME_KW => debug_assert!(
b"CURRENT_DATE".eq_ignore_ascii_case(token)
|| b"CURRENT_TIME".eq_ignore_ascii_case(token)
|| b"CURRENT_TIMESTAMP".eq_ignore_ascii_case(token)
),
TK_JOIN_KW => debug_assert!(
b"CROSS".eq_ignore_ascii_case(token)
|| b"FULL".eq_ignore_ascii_case(token)
|| b"INNER".eq_ignore_ascii_case(token)
|| b"LEFT".eq_ignore_ascii_case(token)
|| b"NATURAL".eq_ignore_ascii_case(token)
|| b"OUTER".eq_ignore_ascii_case(token)
|| b"RIGHT".eq_ignore_ascii_case(token)
),
TK_LIKE_KW => debug_assert!(
b"GLOB".eq_ignore_ascii_case(token)
|| b"LIKE".eq_ignore_ascii_case(token)
|| b"REGEXP".eq_ignore_ascii_case(token)
),
_ => match token_type.as_str() {
Some(str) => {
debug_assert!(str.eq_ignore_ascii_case(str::from_utf8(token).unwrap()))
}
_ => {
println!("'{}', {:?}", str::from_utf8(token).unwrap(), token_type);
}
},
},
}
}
}
}

View File

@@ -0,0 +1,19 @@
[package]
name = "sqlparser_bench"
version = "0.1.0"
authors = ["Dandandan <danielheres@gmail.com>"]
edition = "2018"
[dependencies]
sqlite3-parser = { path = "..", default-features = false, features = [
"YYNOERRORRECOVERY",
"NDEBUG",
] }
fallible-iterator = "0.3"
[dev-dependencies]
criterion = "0.5"
[[bench]]
name = "sqlparser_bench"
harness = false

View File

@@ -0,0 +1,32 @@
Adapted from https://github.com/ballista-compute/sqlparser-rs/tree/main/sqlparser_bench
## sqlparser-rs
```
sqlparser-rs parsing benchmark/sqlparser::select
time: [9.9697 µs 10.068 µs 10.184 µs]
Found 14 outliers among 100 measurements (14.00%)
5 (5.00%) high mild
9 (9.00%) high severe
sqlparser-rs parsing benchmark/sqlparser::with_select
time: [59.569 µs 60.088 µs 60.743 µs]
Found 9 outliers among 100 measurements (9.00%)
3 (3.00%) high mild
6 (6.00%) high severe
```
## sqlite3-parser
```
sqlparser-rs parsing benchmark/sqlparser::select
time: [6.5488 µs 6.5773 µs 6.6108 µs]
Found 10 outliers among 100 measurements (10.00%)
4 (4.00%) high mild
6 (6.00%) high severe
sqlparser-rs parsing benchmark/sqlparser::with_select
time: [22.182 µs 22.321 µs 22.473 µs]
Found 8 outliers among 100 measurements (8.00%)
1 (1.00%) low mild
3 (3.00%) high mild
4 (4.00%) high severe
```

View File

@@ -0,0 +1,48 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use criterion::{criterion_group, criterion_main, Criterion};
use fallible_iterator::FallibleIterator;
use sqlite3_parser::lexer::sql::Parser;
fn basic_queries(c: &mut Criterion) {
let mut group = c.benchmark_group("sqlparser-rs parsing benchmark");
let string = b"SELECT * FROM `table` WHERE 1 = 1";
group.bench_with_input("sqlparser::select", &string, |b, &s| {
b.iter(|| {
let mut parser = Parser::new(s);
assert!(parser.next().unwrap().unwrap().readonly())
});
});
let with_query = b"
WITH derived AS (
SELECT MAX(a) AS max_a,
COUNT(b) AS b_num,
user_id
FROM `TABLE`
GROUP BY user_id
)
SELECT * FROM `table`
LEFT JOIN derived USING (user_id)
";
group.bench_with_input("sqlparser::with_select", &with_query, |b, &s| {
b.iter(|| {
let mut parser = Parser::new(s);
assert!(parser.next().unwrap().unwrap().readonly())
});
});
}
criterion_group!(benches, basic_queries);
criterion_main!(benches);

View File

@@ -0,0 +1,245 @@
//! SQLite dialect
use std::fmt::Formatter;
use std::str;
use uncased::UncasedStr;
mod token;
pub use token::TokenType;
/// Token value (lexeme)
#[derive(Clone, Copy)]
pub struct Token<'i>(pub usize, pub &'i [u8], pub usize);
pub(crate) fn sentinel(start: usize) -> Token<'static> {
Token(start, b"", start)
}
impl Token<'_> {
/// Access token value
pub fn unwrap(self) -> String {
from_bytes(self.1)
}
}
impl std::fmt::Debug for Token<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("Token").field(&self.1).finish()
}
}
impl TokenType {
// TODO try Cow<&'static, str> (Borrowed<&'static str> for keyword and Owned<String> for below),
// => Syntax error on keyword will be better
// => `from_token` will become unnecessary
pub(crate) fn to_token(self, start: usize, value: &[u8], end: usize) -> Token<'_> {
Token(start, value, end)
}
}
pub(crate) fn from_bytes(bytes: &[u8]) -> String {
unsafe { str::from_utf8_unchecked(bytes).to_owned() }
}
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
pub(crate) const MAX_KEYWORD_LEN: usize = 17;
/// Check if `word` is a keyword
pub fn keyword_token(word: &[u8]) -> Option<TokenType> {
KEYWORDS
.get(UncasedStr::new(unsafe { str::from_utf8_unchecked(word) }))
.copied()
}
pub(crate) fn is_identifier(name: &str) -> bool {
if name.is_empty() {
return false;
}
let bytes = name.as_bytes();
is_identifier_start(bytes[0])
&& (bytes.len() == 1 || bytes[1..].iter().all(|b| is_identifier_continue(*b)))
}
pub(crate) fn is_identifier_start(b: u8) -> bool {
b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F'
}
pub(crate) fn is_identifier_continue(b: u8) -> bool {
b == b'$'
|| b.is_ascii_digit()
|| b.is_ascii_uppercase()
|| b == b'_'
|| b.is_ascii_lowercase()
|| b > b'\x7F'
}
// keyword may become an identifier
// see %fallback in parse.y
pub(crate) fn from_token(_ty: u16, value: Token) -> String {
from_bytes(value.1)
}
impl TokenType {
/// Return the associated string (mainly for testing)
pub const fn as_str(&self) -> Option<&'static str> {
use TokenType::*;
match self {
TK_ABORT => Some("ABORT"),
TK_ACTION => Some("ACTION"),
TK_ADD => Some("ADD"),
TK_AFTER => Some("AFTER"),
TK_ALL => Some("ALL"),
TK_ALTER => Some("ALTER"),
TK_ANALYZE => Some("ANALYZE"),
TK_ALWAYS => Some("ALWAYS"),
TK_AND => Some("AND"),
TK_AS => Some("AS"),
TK_ASC => Some("ASC"),
TK_ATTACH => Some("ATTACH"),
TK_AUTOINCR => Some("AUTOINCREMENT"),
TK_BEFORE => Some("BEFORE"),
TK_BEGIN => Some("BEGIN"),
TK_BETWEEN => Some("BETWEEN"),
TK_BY => Some("BY"),
TK_CASCADE => Some("CASCADE"),
TK_CASE => Some("CASE"),
TK_CAST => Some("CAST"),
TK_CHECK => Some("CHECK"),
TK_COLLATE => Some("COLLATE"),
TK_COLUMNKW => Some("COLUMN"),
TK_COMMIT => Some("COMMIT"),
TK_CONFLICT => Some("CONFLICT"),
TK_CONSTRAINT => Some("CONSTRAINT"),
TK_CREATE => Some("CREATE"),
TK_CURRENT => Some("CURRENT"),
TK_DATABASE => Some("DATABASE"),
TK_DEFAULT => Some("DEFAULT"),
TK_DEFERRABLE => Some("DEFERRABLE"),
TK_DEFERRED => Some("DEFERRED"),
TK_DELETE => Some("DELETE"),
TK_DESC => Some("DESC"),
TK_DETACH => Some("DETACH"),
TK_DISTINCT => Some("DISTINCT"),
TK_DO => Some("DO"),
TK_DROP => Some("DROP"),
TK_EACH => Some("EACH"),
TK_ELSE => Some("ELSE"),
TK_END => Some("END"),
TK_ESCAPE => Some("ESCAPE"),
TK_EXCEPT => Some("EXCEPT"),
TK_EXCLUDE => Some("EXCLUDE"),
TK_EXCLUSIVE => Some("EXCLUSIVE"),
TK_EXISTS => Some("EXISTS"),
TK_EXPLAIN => Some("EXPLAIN"),
TK_FAIL => Some("FAIL"),
TK_FILTER => Some("FILTER"),
TK_FIRST => Some("FIRST"),
TK_FOLLOWING => Some("FOLLOWING"),
TK_FOR => Some("FOR"),
TK_FOREIGN => Some("FOREIGN"),
TK_FROM => Some("FROM"),
TK_GENERATED => Some("GENERATED"),
TK_GROUP => Some("GROUP"),
TK_GROUPS => Some("GROUPS"),
TK_HAVING => Some("HAVING"),
TK_IF => Some("IF"),
TK_IGNORE => Some("IGNORE"),
TK_IMMEDIATE => Some("IMMEDIATE"),
TK_IN => Some("IN"),
TK_INDEX => Some("INDEX"),
TK_INDEXED => Some("INDEXED"),
TK_INITIALLY => Some("INITIALLY"),
TK_INSERT => Some("INSERT"),
TK_INSTEAD => Some("INSTEAD"),
TK_INTERSECT => Some("INTERSECT"),
TK_INTO => Some("INTO"),
TK_IS => Some("IS"),
TK_ISNULL => Some("ISNULL"),
TK_JOIN => Some("JOIN"),
TK_KEY => Some("KEY"),
TK_LAST => Some("LAST"),
TK_LIMIT => Some("LIMIT"),
TK_MATCH => Some("MATCH"),
TK_MATERIALIZED => Some("MATERIALIZED"),
TK_NO => Some("NO"),
TK_NOT => Some("NOT"),
TK_NOTHING => Some("NOTHING"),
TK_NOTNULL => Some("NOTNULL"),
TK_NULL => Some("NULL"),
TK_NULLS => Some("NULLS"),
TK_OF => Some("OF"),
TK_OFFSET => Some("OFFSET"),
TK_ON => Some("ON"),
TK_OR => Some("OR"),
TK_ORDER => Some("ORDER"),
TK_OTHERS => Some("OTHERS"),
TK_OVER => Some("OVER"),
TK_PARTITION => Some("PARTITION"),
TK_PLAN => Some("PLAN"),
TK_PRAGMA => Some("PRAGMA"),
TK_PRECEDING => Some("PRECEDING"),
TK_PRIMARY => Some("PRIMARY"),
TK_QUERY => Some("QUERY"),
TK_RAISE => Some("RAISE"),
TK_RANGE => Some("RANGE"),
TK_RECURSIVE => Some("RECURSIVE"),
TK_REFERENCES => Some("REFERENCES"),
TK_REINDEX => Some("REINDEX"),
TK_RELEASE => Some("RELEASE"),
TK_RENAME => Some("RENAME"),
TK_REPLACE => Some("REPLACE"),
TK_RETURNING => Some("RETURNING"),
TK_RESTRICT => Some("RESTRICT"),
TK_ROLLBACK => Some("ROLLBACK"),
TK_ROW => Some("ROW"),
TK_ROWS => Some("ROWS"),
TK_SAVEPOINT => Some("SAVEPOINT"),
TK_SELECT => Some("SELECT"),
TK_SET => Some("SET"),
TK_TABLE => Some("TABLE"),
TK_TEMP => Some("TEMP"), // or TEMPORARY
TK_TIES => Some("TIES"),
TK_THEN => Some("THEN"),
TK_TO => Some("TO"),
TK_TRANSACTION => Some("TRANSACTION"),
TK_TRIGGER => Some("TRIGGER"),
TK_UNBOUNDED => Some("UNBOUNDED"),
TK_UNION => Some("UNION"),
TK_UNIQUE => Some("UNIQUE"),
TK_UPDATE => Some("UPDATE"),
TK_USING => Some("USING"),
TK_VACUUM => Some("VACUUM"),
TK_VALUES => Some("VALUES"),
TK_VIEW => Some("VIEW"),
TK_VIRTUAL => Some("VIRTUAL"),
TK_WHEN => Some("WHEN"),
TK_WHERE => Some("WHERE"),
TK_WINDOW => Some("WINDOW"),
TK_WITH => Some("WITH"),
TK_WITHOUT => Some("WITHOUT"),
TK_BITAND => Some("&"),
TK_BITNOT => Some("~"),
TK_BITOR => Some("|"),
TK_COMMA => Some(","),
TK_CONCAT => Some("||"),
TK_DOT => Some("."),
TK_EQ => Some("="), // or ==
TK_GT => Some(">"),
TK_GE => Some(">="),
TK_LP => Some("("),
TK_LSHIFT => Some("<<"),
TK_LE => Some("<="),
TK_LT => Some("<"),
TK_MINUS => Some("-"),
TK_NE => Some("<>"), // or !=
TK_PLUS => Some("+"),
TK_REM => Some("%"),
TK_RP => Some(")"),
TK_RSHIFT => Some(">>"),
TK_SEMI => Some(";"),
TK_SLASH => Some("/"),
TK_STAR => Some("*"),
_ => None,
}
}
}

View File

@@ -0,0 +1,180 @@
//! All terminal symbols.
/// Token classes
// Generated by lemon (parse.h).
// Renamed manually.
// To be keep in sync.
#[non_exhaustive]
#[allow(non_camel_case_types, missing_docs)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
#[repr(u16)]
pub enum TokenType {
TK_EOF = 0,
TK_SEMI = 1,
TK_EXPLAIN = 2,
TK_QUERY = 3,
TK_PLAN = 4,
TK_BEGIN = 5,
TK_TRANSACTION = 6,
TK_DEFERRED = 7,
TK_IMMEDIATE = 8,
TK_EXCLUSIVE = 9,
TK_COMMIT = 10,
TK_END = 11,
TK_ROLLBACK = 12,
TK_SAVEPOINT = 13,
TK_RELEASE = 14,
TK_TO = 15,
TK_TABLE = 16,
TK_CREATE = 17,
TK_IF = 18,
TK_NOT = 19,
TK_EXISTS = 20,
TK_TEMP = 21,
TK_LP = 22,
TK_RP = 23,
TK_AS = 24,
TK_COMMA = 25,
TK_WITHOUT = 26,
TK_ABORT = 27,
TK_ACTION = 28,
TK_AFTER = 29,
TK_ANALYZE = 30,
TK_ASC = 31,
TK_ATTACH = 32,
TK_BEFORE = 33,
TK_BY = 34,
TK_CASCADE = 35,
TK_CAST = 36,
TK_CONFLICT = 37,
TK_DATABASE = 38,
TK_DESC = 39,
TK_DETACH = 40,
TK_EACH = 41,
TK_FAIL = 42,
TK_OR = 43,
TK_AND = 44,
TK_IS = 45,
TK_ISNOT = 46,
TK_MATCH = 47,
TK_LIKE_KW = 48,
TK_BETWEEN = 49,
TK_IN = 50,
TK_ISNULL = 51,
TK_NOTNULL = 52,
TK_NE = 53,
TK_EQ = 54,
TK_GT = 55,
TK_LE = 56,
TK_LT = 57,
TK_GE = 58,
TK_ESCAPE = 59,
TK_ID = 60,
TK_COLUMNKW = 61,
TK_DO = 62,
TK_FOR = 63,
TK_IGNORE = 64,
TK_INITIALLY = 65,
TK_INSTEAD = 66,
TK_NO = 67,
TK_KEY = 68,
TK_OF = 69,
TK_OFFSET = 70,
TK_PRAGMA = 71,
TK_RAISE = 72,
TK_RECURSIVE = 73,
TK_REPLACE = 74,
TK_RESTRICT = 75,
TK_ROW = 76,
TK_ROWS = 77,
TK_TRIGGER = 78,
TK_VACUUM = 79,
TK_VIEW = 80,
TK_VIRTUAL = 81,
TK_WITH = 82,
TK_NULLS = 83,
TK_FIRST = 84,
TK_LAST = 85,
TK_CURRENT = 86,
TK_FOLLOWING = 87,
TK_PARTITION = 88,
TK_PRECEDING = 89,
TK_RANGE = 90,
TK_UNBOUNDED = 91,
TK_EXCLUDE = 92,
TK_GROUPS = 93,
TK_OTHERS = 94,
TK_TIES = 95,
TK_GENERATED = 96,
TK_ALWAYS = 97,
TK_MATERIALIZED = 98,
TK_REINDEX = 99,
TK_RENAME = 100,
TK_CTIME_KW = 101,
TK_ANY = 102,
TK_BITAND = 103,
TK_BITOR = 104,
TK_LSHIFT = 105,
TK_RSHIFT = 106,
TK_PLUS = 107,
TK_MINUS = 108,
TK_STAR = 109,
TK_SLASH = 110,
TK_REM = 111,
TK_CONCAT = 112,
TK_PTR = 113,
TK_COLLATE = 114,
TK_BITNOT = 115,
TK_ON = 116,
TK_INDEXED = 117,
TK_STRING = 118,
TK_JOIN_KW = 119,
TK_CONSTRAINT = 120,
TK_DEFAULT = 121,
TK_NULL = 122,
TK_PRIMARY = 123,
TK_UNIQUE = 124,
TK_CHECK = 125,
TK_REFERENCES = 126,
TK_AUTOINCR = 127,
TK_INSERT = 128,
TK_DELETE = 129,
TK_UPDATE = 130,
TK_SET = 131,
TK_DEFERRABLE = 132,
TK_FOREIGN = 133,
TK_DROP = 134,
TK_UNION = 135,
TK_ALL = 136,
TK_EXCEPT = 137,
TK_INTERSECT = 138,
TK_SELECT = 139,
TK_VALUES = 140,
TK_DISTINCT = 141,
TK_DOT = 142,
TK_FROM = 143,
TK_JOIN = 144,
TK_USING = 145,
TK_ORDER = 146,
TK_GROUP = 147,
TK_HAVING = 148,
TK_LIMIT = 149,
TK_WHERE = 150,
TK_RETURNING = 151,
TK_INTO = 152,
TK_NOTHING = 153,
TK_BLOB = 154,
TK_FLOAT = 155,
TK_INTEGER = 156,
TK_VARIABLE = 157,
TK_CASE = 158,
TK_WHEN = 159,
TK_THEN = 160,
TK_ELSE = 161,
TK_INDEX = 162,
TK_ALTER = 163,
TK_ADD = 164,
TK_WINDOW = 165,
TK_OVER = 166,
TK_FILTER = 167,
}

View File

@@ -0,0 +1,6 @@
//! Streaming SQLite tokenizer
mod scan;
pub mod sql;
pub use scan::{ScanError, Scanner, Splitter};

View File

@@ -0,0 +1,172 @@
//! Adaptation/port of [Go scanner](http://tip.golang.org/pkg/bufio/#Scanner).
use log::debug;
use std::error::Error;
use std::fmt;
use std::io;
/// Error with position
pub trait ScanError: Error + From<io::Error> + Sized {
/// Update the position where the error occurs
fn position(&mut self, line: u64, column: usize);
}
/// The `(&[u8], TokenType)` is the token.
/// And the `usize` is the amount of bytes to consume.
type SplitResult<'input, TokenType, Error> =
Result<(Option<(&'input [u8], TokenType)>, usize), Error>;
/// Split function used to tokenize the input
pub trait Splitter: Sized {
/// Potential error raised
type Error: ScanError;
//type Item: ?Sized;
/// Token generated
type TokenType;
/// The arguments are an initial substring of the remaining unprocessed
/// data.
///
/// If the returned error is non-nil, scanning stops and the error
/// is returned to the client.
///
/// The function is never called with an empty data slice.
fn split<'input>(
&mut self,
data: &'input [u8],
) -> SplitResult<'input, Self::TokenType, Self::Error>;
}
/// Like a `BufReader` but with a growable buffer.
/// Successive calls to the `scan` method will step through the 'tokens'
/// of a file, skipping the bytes between the tokens.
///
/// Scanning stops unrecoverably at EOF, the first I/O error, or a token too
/// large to fit in the buffer. When a scan stops, the reader may have
/// advanced arbitrarily far past the last token.
pub struct Scanner<S: Splitter> {
/// offset in `input`
offset: usize,
/// mark
mark: (usize, u64, usize),
/// The function to tokenize the input.
splitter: S,
/// current line number
line: u64,
/// current column number (byte offset, not char offset)
column: usize,
}
impl<S: Splitter> Scanner<S> {
/// Constructor
pub fn new(splitter: S) -> Self {
Self {
offset: 0,
mark: (0, 0, 0),
splitter,
line: 1,
column: 1,
}
}
/// Current line number
pub fn line(&self) -> u64 {
self.line
}
/// Current column number (byte offset, not char offset)
pub fn column(&self) -> usize {
self.column
}
/// Associated splitter
pub fn splitter(&self) -> &S {
&self.splitter
}
/// Mark current position
pub fn mark(&mut self) {
self.mark = (self.offset, self.line, self.column);
}
/// Reset to mark
pub fn reset_to_mark(&mut self) {
(self.offset, self.line, self.column) = self.mark;
}
/// Reset the scanner such that it behaves as if it had never been used.
pub fn reset(&mut self) {
self.offset = 0;
self.line = 1;
self.column = 1;
}
}
type ScanResult<'input, TokenType, Error> =
Result<(usize, Option<(&'input [u8], TokenType)>, usize), Error>;
impl<S: Splitter> Scanner<S> {
/// Advance the Scanner to next token.
/// Return the token as a byte slice.
/// Return `None` when the end of the input is reached.
/// Return any error that occurs while reading the input.
pub fn scan<'input>(
&mut self,
input: &'input [u8],
) -> ScanResult<'input, S::TokenType, S::Error> {
debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column);
// Loop until we have a token.
loop {
// See if we can get a token with what we already have.
if self.offset < input.len() {
let data = &input[self.offset..];
match self.splitter.split(data) {
Err(mut e) => {
e.position(self.line, self.column);
return Err(e);
}
Ok((None, 0)) => {
// Done
}
Ok((None, amt)) => {
// Ignore/skip this data
self.consume(data, amt);
continue;
}
Ok((tok, amt)) => {
let start = self.offset;
self.consume(data, amt);
return Ok((start, tok, self.offset));
}
}
}
// We cannot generate a token with what we are holding.
// we are done.
return Ok((self.offset, None, self.offset));
}
}
/// Consume `amt` bytes of the buffer.
fn consume(&mut self, data: &[u8], amt: usize) {
debug!(target: "scanner", "consume({})", amt);
debug_assert!(amt <= data.len());
for byte in &data[..amt] {
if *byte == b'\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
self.offset += amt;
}
}
impl<S: Splitter> fmt::Debug for Scanner<S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Scanner")
.field("offset", &self.offset)
.field("mark", &self.mark)
.field("line", &self.line)
.field("column", &self.column)
.finish()
}
}

View File

@@ -0,0 +1,95 @@
use std::error;
use std::fmt;
use std::io;
use crate::lexer::scan::ScanError;
use crate::parser::ParserError;
/// SQL lexer and parser errors
#[non_exhaustive]
#[derive(Debug)]
pub enum Error {
/// I/O Error
Io(io::Error),
/// Lexer error
UnrecognizedToken(Option<(u64, usize)>),
/// Missing quote or double-quote or backtick
UnterminatedLiteral(Option<(u64, usize)>),
/// Missing `]`
UnterminatedBracket(Option<(u64, usize)>),
/// Missing `*/`
UnterminatedBlockComment(Option<(u64, usize)>),
/// Invalid parameter name
BadVariableName(Option<(u64, usize)>),
/// Invalid number format
BadNumber(Option<(u64, usize)>),
/// Invalid or missing sign after `!`
ExpectedEqualsSign(Option<(u64, usize)>),
/// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character.
MalformedBlobLiteral(Option<(u64, usize)>),
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
MalformedHexInteger(Option<(u64, usize)>),
/// Grammar error
ParserError(ParserError, Option<(u64, usize)>),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::Io(ref err) => err.fmt(f),
Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()),
Self::UnterminatedLiteral(pos) => {
write!(f, "non-terminated literal at {:?}", pos.unwrap())
}
Self::UnterminatedBracket(pos) => {
write!(f, "non-terminated bracket at {:?}", pos.unwrap())
}
Self::UnterminatedBlockComment(pos) => {
write!(f, "non-terminated block comment at {:?}", pos.unwrap())
}
Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()),
Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()),
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()),
Self::MalformedBlobLiteral(pos) => {
write!(f, "malformed blob literal at {:?}", pos.unwrap())
}
Self::MalformedHexInteger(pos) => {
write!(f, "malformed hex integer at {:?}", pos.unwrap())
}
Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"),
Self::ParserError(ref msg, _) => write!(f, "{msg}"),
}
}
}
impl error::Error for Error {}
impl From<io::Error> for Error {
fn from(err: io::Error) -> Self {
Self::Io(err)
}
}
impl From<ParserError> for Error {
fn from(err: ParserError) -> Self {
Self::ParserError(err, None)
}
}
impl ScanError for Error {
fn position(&mut self, line: u64, column: usize) {
match *self {
Self::Io(_) => {}
Self::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)),
Self::BadVariableName(ref mut pos) => *pos = Some((line, column)),
Self::BadNumber(ref mut pos) => *pos = Some((line, column)),
Self::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)),
Self::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)),
Self::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)),
Self::ParserError(_, ref mut pos) => *pos = Some((line, column)),
}
}
}

View File

@@ -0,0 +1,678 @@
//! Adaptation/port of [`SQLite` tokenizer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c)
use fallible_iterator::FallibleIterator;
use memchr::memchr;
pub use crate::dialect::TokenType;
use crate::dialect::TokenType::*;
use crate::dialect::{
is_identifier_continue, is_identifier_start, keyword_token, sentinel, MAX_KEYWORD_LEN,
};
use crate::parser::ast::Cmd;
use crate::parser::parse::{yyParser, YYCODETYPE};
use crate::parser::Context;
mod error;
#[cfg(test)]
mod test;
use crate::lexer::scan::ScanError;
use crate::lexer::scan::Splitter;
use crate::lexer::Scanner;
pub use crate::parser::ParserError;
pub use error::Error;
// TODO Extract scanning stuff and move this into the parser crate
// to make possible to use the tokenizer without depending on the parser...
/// SQL parser
pub struct Parser<'input> {
input: &'input [u8],
scanner: Scanner<Tokenizer>,
parser: yyParser<'input>,
}
impl<'input> Parser<'input> {
/// Constructor
pub fn new(input: &'input [u8]) -> Self {
let lexer = Tokenizer::new();
let scanner = Scanner::new(lexer);
let ctx = Context::new(input);
let parser = yyParser::new(ctx);
Parser {
input,
scanner,
parser,
}
}
/// Parse new `input`
pub fn reset(&mut self, input: &'input [u8]) {
self.input = input;
self.scanner.reset();
}
/// Current line position in input
pub fn line(&self) -> u64 {
self.scanner.line()
}
/// Current column position in input
pub fn column(&self) -> usize {
self.scanner.column()
}
}
/*
** Return the id of the next token in input.
*/
fn get_token(scanner: &mut Scanner<Tokenizer>, input: &[u8]) -> Result<TokenType, Error> {
let mut t = {
let (_, token_type) = match scanner.scan(input)? {
(_, None, _) => {
return Ok(TK_EOF);
}
(_, Some(tuple), _) => tuple,
};
token_type
};
if t == TK_ID
|| t == TK_STRING
|| t == TK_JOIN_KW
|| t == TK_WINDOW
|| t == TK_OVER
|| yyParser::parse_fallback(t as YYCODETYPE) == TK_ID as YYCODETYPE
{
t = TK_ID;
}
Ok(t)
}
/*
** The following three functions are called immediately after the tokenizer
** reads the keywords WINDOW, OVER and FILTER, respectively, to determine
** whether the token should be treated as a keyword or an SQL identifier.
** This cannot be handled by the usual lemon %fallback method, due to
** the ambiguity in some constructions. e.g.
**
** SELECT sum(x) OVER ...
**
** In the above, "OVER" might be a keyword, or it might be an alias for the
** sum(x) expression. If a "%fallback ID OVER" directive were added to
** grammar, then SQLite would always treat "OVER" as an alias, making it
** impossible to call a window-function without a FILTER clause.
**
** WINDOW is treated as a keyword if:
**
** * the following token is an identifier, or a keyword that can fallback
** to being an identifier, and
** * the token after than one is TK_AS.
**
** OVER is a keyword if:
**
** * the previous token was TK_RP, and
** * the next token is either TK_LP or an identifier.
**
** FILTER is a keyword if:
**
** * the previous token was TK_RP, and
** * the next token is TK_LP.
*/
fn analyze_window_keyword(
scanner: &mut Scanner<Tokenizer>,
input: &[u8],
) -> Result<TokenType, Error> {
let t = get_token(scanner, input)?;
if t != TK_ID {
return Ok(TK_ID);
};
let t = get_token(scanner, input)?;
if t != TK_AS {
return Ok(TK_ID);
};
Ok(TK_WINDOW)
}
fn analyze_over_keyword(
scanner: &mut Scanner<Tokenizer>,
input: &[u8],
last_token: TokenType,
) -> Result<TokenType, Error> {
if last_token == TK_RP {
let t = get_token(scanner, input)?;
if t == TK_LP || t == TK_ID {
return Ok(TK_OVER);
}
}
Ok(TK_ID)
}
fn analyze_filter_keyword(
scanner: &mut Scanner<Tokenizer>,
input: &[u8],
last_token: TokenType,
) -> Result<TokenType, Error> {
if last_token == TK_RP && get_token(scanner, input)? == TK_LP {
return Ok(TK_FILTER);
}
Ok(TK_ID)
}
macro_rules! try_with_position {
($scanner:expr, $expr:expr) => {
match $expr {
Ok(val) => val,
Err(err) => {
let mut err = Error::from(err);
err.position($scanner.line(), $scanner.column());
return Err(err);
}
}
};
}
impl FallibleIterator for Parser<'_> {
type Item = Cmd;
type Error = Error;
fn next(&mut self) -> Result<Option<Cmd>, Error> {
//print!("line: {}, column: {}: ", self.scanner.line(), self.scanner.column());
self.parser.ctx.reset();
let mut last_token_parsed = TK_EOF;
let mut eof = false;
loop {
let (start, (value, mut token_type), end) = match self.scanner.scan(self.input)? {
(_, None, _) => {
eof = true;
break;
}
(start, Some(tuple), end) => (start, tuple, end),
};
let token = if token_type >= TK_WINDOW {
debug_assert!(
token_type == TK_OVER || token_type == TK_FILTER || token_type == TK_WINDOW
);
self.scanner.mark();
if token_type == TK_WINDOW {
token_type = analyze_window_keyword(&mut self.scanner, self.input)?;
} else if token_type == TK_OVER {
token_type =
analyze_over_keyword(&mut self.scanner, self.input, last_token_parsed)?;
} else if token_type == TK_FILTER {
token_type =
analyze_filter_keyword(&mut self.scanner, self.input, last_token_parsed)?;
}
self.scanner.reset_to_mark();
token_type.to_token(start, value, end)
} else {
token_type.to_token(start, value, end)
};
//println!("({:?}, {:?})", token_type, token);
try_with_position!(self.scanner, self.parser.sqlite3Parser(token_type, token));
last_token_parsed = token_type;
if self.parser.ctx.done() {
//println!();
break;
}
}
if last_token_parsed == TK_EOF {
return Ok(None); // empty input
}
/* Upon reaching the end of input, call the parser two more times
with tokens TK_SEMI and 0, in that order. */
if eof && self.parser.ctx.is_ok() {
if last_token_parsed != TK_SEMI {
try_with_position!(
self.scanner,
self.parser
.sqlite3Parser(TK_SEMI, sentinel(self.input.len()))
);
}
try_with_position!(
self.scanner,
self.parser
.sqlite3Parser(TK_EOF, sentinel(self.input.len()))
);
}
self.parser.sqlite3ParserFinalize();
if let Some(e) = self.parser.ctx.error() {
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
return Err(err);
}
let cmd = self.parser.ctx.cmd();
if let Some(ref cmd) = cmd {
if let Err(e) = cmd.check() {
let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column())));
return Err(err);
}
}
Ok(cmd)
}
}
/// SQL token
pub type Token<'input> = (&'input [u8], TokenType);
/// SQL lexer
#[derive(Default)]
pub struct Tokenizer {}
impl Tokenizer {
/// Constructor
pub fn new() -> Self {
Self {}
}
}
/// ```rust
/// use sqlite3_parser::lexer::sql::Tokenizer;
/// use sqlite3_parser::lexer::Scanner;
///
/// let tokenizer = Tokenizer::new();
/// let input = b"PRAGMA parser_trace=ON;";
/// let mut s = Scanner::new(tokenizer);
/// let Ok((_, Some((token1, _)), _)) = s.scan(input) else { panic!() };
/// s.scan(input).unwrap();
/// assert!(b"PRAGMA".eq_ignore_ascii_case(token1));
/// ```
impl Splitter for Tokenizer {
type Error = Error;
type TokenType = TokenType;
fn split<'input>(
&mut self,
data: &'input [u8],
) -> Result<(Option<Token<'input>>, usize), Error> {
if data[0].is_ascii_whitespace() {
// eat as much space as possible
return Ok((
None,
match data.iter().skip(1).position(|&b| !b.is_ascii_whitespace()) {
Some(i) => i + 1,
_ => data.len(),
},
));
}
match data[0] {
b'-' => {
if let Some(b) = data.get(1) {
if *b == b'-' {
// eat comment
if let Some(i) = memchr(b'\n', data) {
Ok((None, i + 1))
} else {
Ok((None, data.len()))
}
} else if *b == b'>' {
if let Some(b) = data.get(2) {
if *b == b'>' {
return Ok((Some((&data[..3], TK_PTR)), 3));
}
}
Ok((Some((&data[..2], TK_PTR)), 2))
} else {
Ok((Some((&data[..1], TK_MINUS)), 1))
}
} else {
Ok((Some((&data[..1], TK_MINUS)), 1))
}
}
b'(' => Ok((Some((&data[..1], TK_LP)), 1)),
b')' => Ok((Some((&data[..1], TK_RP)), 1)),
b';' => Ok((Some((&data[..1], TK_SEMI)), 1)),
b'+' => Ok((Some((&data[..1], TK_PLUS)), 1)),
b'*' => Ok((Some((&data[..1], TK_STAR)), 1)),
b'/' => {
if let Some(b) = data.get(1) {
if *b == b'*' {
// eat comment
let mut pb = 0;
let mut end = None;
for (i, b) in data.iter().enumerate().skip(2) {
if *b == b'/' && pb == b'*' {
end = Some(i);
break;
}
pb = *b;
}
if let Some(i) = end {
Ok((None, i + 1))
} else {
Err(Error::UnterminatedBlockComment(None))
}
} else {
Ok((Some((&data[..1], TK_SLASH)), 1))
}
} else {
Ok((Some((&data[..1], TK_SLASH)), 1))
}
}
b'%' => Ok((Some((&data[..1], TK_REM)), 1)),
b'=' => {
if let Some(b) = data.get(1) {
Ok(if *b == b'=' {
(Some((&data[..2], TK_EQ)), 2)
} else {
(Some((&data[..1], TK_EQ)), 1)
})
} else {
Ok((Some((&data[..1], TK_EQ)), 1))
}
}
b'<' => {
if let Some(b) = data.get(1) {
Ok(match *b {
b'=' => (Some((&data[..2], TK_LE)), 2),
b'>' => (Some((&data[..2], TK_NE)), 2),
b'<' => (Some((&data[..2], TK_LSHIFT)), 2),
_ => (Some((&data[..1], TK_LT)), 1),
})
} else {
Ok((Some((&data[..1], TK_LT)), 1))
}
}
b'>' => {
if let Some(b) = data.get(1) {
Ok(match *b {
b'=' => (Some((&data[..2], TK_GE)), 2),
b'>' => (Some((&data[..2], TK_RSHIFT)), 2),
_ => (Some((&data[..1], TK_GT)), 1),
})
} else {
Ok((Some((&data[..1], TK_GT)), 1))
}
}
b'!' => {
if let Some(b) = data.get(1) {
if *b == b'=' {
Ok((Some((&data[..2], TK_NE)), 2))
} else {
Err(Error::ExpectedEqualsSign(None))
}
} else {
Err(Error::ExpectedEqualsSign(None))
}
}
b'|' => {
if let Some(b) = data.get(1) {
Ok(if *b == b'|' {
(Some((&data[..2], TK_CONCAT)), 2)
} else {
(Some((&data[..1], TK_BITOR)), 1)
})
} else {
Ok((Some((&data[..1], TK_BITOR)), 1))
}
}
b',' => Ok((Some((&data[..1], TK_COMMA)), 1)),
b'&' => Ok((Some((&data[..1], TK_BITAND)), 1)),
b'~' => Ok((Some((&data[..1], TK_BITNOT)), 1)),
quote @ (b'`' | b'\'' | b'"') => literal(data, quote),
b'.' => {
if let Some(b) = data.get(1) {
if b.is_ascii_digit() {
fractional_part(data, 0)
} else {
Ok((Some((&data[..1], TK_DOT)), 1))
}
} else {
Ok((Some((&data[..1], TK_DOT)), 1))
}
}
b'0'..=b'9' => number(data),
b'[' => {
if let Some(i) = memchr(b']', data) {
// Keep original quotes / '[' ... ]'
Ok((Some((&data[0..=i], TK_ID)), i + 1))
} else {
Err(Error::UnterminatedBracket(None))
}
}
b'?' => {
match data.iter().skip(1).position(|&b| !b.is_ascii_digit()) {
Some(i) => {
// do not include the '?' in the token
Ok((Some((&data[1..=i], TK_VARIABLE)), i + 1))
}
None => Ok((Some((&data[1..], TK_VARIABLE)), data.len())),
}
}
b'$' | b'@' | b'#' | b':' => {
match data
.iter()
.skip(1)
.position(|&b| !is_identifier_continue(b))
{
Some(0) => Err(Error::BadVariableName(None)),
Some(i) => {
// '$' is included as part of the name
Ok((Some((&data[..=i], TK_VARIABLE)), i + 1))
}
None => {
if data.len() == 1 {
return Err(Error::BadVariableName(None));
}
Ok((Some((data, TK_VARIABLE)), data.len()))
}
}
}
b if is_identifier_start(b) => {
if b == b'x' || b == b'X' {
if let Some(&b'\'') = data.get(1) {
blob_literal(data)
} else {
Ok(self.identifierish(data))
}
} else {
Ok(self.identifierish(data))
}
}
_ => Err(Error::UnrecognizedToken(None)),
}
}
}
fn literal(data: &[u8], quote: u8) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert_eq!(data[0], quote);
let tt = if quote == b'\'' { TK_STRING } else { TK_ID };
let mut pb = 0;
let mut end = None;
// data[0] == quote => skip(1)
for (i, b) in data.iter().enumerate().skip(1) {
if *b == quote {
if pb == quote {
// escaped quote
pb = 0;
continue;
}
} else if pb == quote {
end = Some(i);
break;
}
pb = *b;
}
if end.is_some() || pb == quote {
let i = match end {
Some(i) => i,
_ => data.len(),
};
// keep original quotes in the token
Ok((Some((&data[0..i], tt)), i))
} else {
Err(Error::UnterminatedLiteral(None))
}
}
fn blob_literal(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert!(data[0] == b'x' || data[0] == b'X');
debug_assert_eq!(data[1], b'\'');
if let Some((i, b)) = data
.iter()
.enumerate()
.skip(2)
.find(|&(_, &b)| !b.is_ascii_hexdigit())
{
if *b != b'\'' || i % 2 != 0 {
return Err(Error::MalformedBlobLiteral(None));
}
Ok((Some((&data[2..i], TK_BLOB)), i + 1))
} else {
Err(Error::MalformedBlobLiteral(None))
}
}
fn number(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert!(data[0].is_ascii_digit());
if data[0] == b'0' {
if let Some(b) = data.get(1) {
if *b == b'x' || *b == b'X' {
return hex_integer(data);
}
} else {
return Ok((Some((data, TK_INTEGER)), data.len()));
}
}
if let Some((i, b)) = find_end_of_number(data, 1, u8::is_ascii_digit)? {
if b == b'.' {
return fractional_part(data, i);
} else if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
Ok((Some((data, TK_INTEGER)), data.len()))
}
}
fn hex_integer(data: &[u8]) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert_eq!(data[0], b'0');
debug_assert!(data[1] == b'x' || data[1] == b'X');
if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? {
// Must not be empty (Ox is invalid)
if i == 2 || is_identifier_start(b) {
return Err(Error::MalformedHexInteger(None));
}
Ok((Some((&data[..i], TK_INTEGER)), i))
} else {
// Must not be empty (Ox is invalid)
if data.len() == 2 {
return Err(Error::MalformedHexInteger(None));
}
Ok((Some((data, TK_INTEGER)), data.len()))
}
}
fn fractional_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert_eq!(data[i], b'.');
if let Some((i, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
if b == b'e' || b == b'E' {
return exponential_part(data, i);
} else if is_identifier_start(b) {
return Err(Error::BadNumber(None));
}
Ok((Some((&data[..i], TK_FLOAT)), i))
} else {
Ok((Some((data, TK_FLOAT)), data.len()))
}
}
fn exponential_part(data: &[u8], i: usize) -> Result<(Option<Token<'_>>, usize), Error> {
debug_assert!(data[i] == b'e' || data[i] == b'E');
// data[i] == 'e'|'E'
if let Some(b) = data.get(i + 1) {
let i = if *b == b'+' || *b == b'-' { i + 1 } else { i };
if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? {
if j == i + 1 || is_identifier_start(b) {
return Err(Error::BadNumber(None));
}
Ok((Some((&data[..j], TK_FLOAT)), j))
} else {
if data.len() == i + 1 {
return Err(Error::BadNumber(None));
}
Ok((Some((data, TK_FLOAT)), data.len()))
}
} else {
Err(Error::BadNumber(None))
}
}
fn find_end_of_number(
data: &[u8],
i: usize,
test: fn(&u8) -> bool,
) -> Result<Option<(usize, u8)>, Error> {
for (j, &b) in data.iter().enumerate().skip(i) {
if test(&b) {
continue;
} else if b == b'_' {
if j >= 1 && data.get(j - 1).map_or(false, test) && data.get(j + 1).map_or(false, test)
{
continue;
}
return Err(Error::BadNumber(None));
} else {
return Ok(Some((j, b)));
}
}
Ok(None)
}
impl Tokenizer {
fn identifierish<'input>(&mut self, data: &'input [u8]) -> (Option<Token<'input>>, usize) {
debug_assert!(is_identifier_start(data[0]));
// data[0] is_identifier_start => skip(1)
let end = data
.iter()
.skip(1)
.position(|&b| !is_identifier_continue(b));
let i = match end {
Some(i) => i + 1,
_ => data.len(),
};
let word = &data[..i];
let tt = if word.len() >= 2 && word.len() <= MAX_KEYWORD_LEN && word.is_ascii() {
keyword_token(word).unwrap_or(TK_ID)
} else {
TK_ID
};
(Some((word, tt)), i)
}
}
#[cfg(test)]
mod tests {
use super::Tokenizer;
use crate::dialect::TokenType;
use crate::lexer::sql::Error;
use crate::lexer::Scanner;
#[test]
fn fallible_iterator() -> Result<(), Error> {
let tokenizer = Tokenizer::new();
let input = b"PRAGMA parser_trace=ON;";
let mut s = Scanner::new(tokenizer);
expect_token(&mut s, input, b"PRAGMA", TokenType::TK_PRAGMA)?;
expect_token(&mut s, input, b"parser_trace", TokenType::TK_ID)?;
Ok(())
}
#[test]
fn invalid_number_literal() -> Result<(), Error> {
let tokenizer = Tokenizer::new();
let input = b"SELECT 1E;";
let mut s = Scanner::new(tokenizer);
expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?;
let err = s.scan(input).unwrap_err();
assert!(matches!(err, Error::BadNumber(_)));
Ok(())
}
fn expect_token(
s: &mut Scanner<Tokenizer>,
input: &[u8],
token: &[u8],
token_type: TokenType,
) -> Result<(), Error> {
let (t, tt) = s.scan(input)?.1.unwrap();
assert_eq!(token, t);
assert_eq!(token_type, tt);
Ok(())
}
}

View File

@@ -0,0 +1,376 @@
use fallible_iterator::FallibleIterator;
use super::{Error, Parser};
use crate::parser::ast::fmt::ToTokens;
use crate::parser::{
ast::{Cmd, Name, ParameterInfo, QualifiedName, Stmt},
ParserError,
};
#[test]
fn count_placeholders() {
let ast = parse_cmd(b"SELECT ? WHERE 1 = ?");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 2);
}
#[test]
fn count_numbered_placeholders() {
let ast = parse_cmd(b"SELECT ?1 WHERE 1 = ?2 AND 0 = ?1");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 2);
}
#[test]
fn count_unused_placeholders() {
let ast = parse_cmd(b"SELECT ?1 WHERE 1 = ?3");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 3);
}
#[test]
fn count_named_placeholders() {
let ast = parse_cmd(b"SELECT :x, :y WHERE 1 = :y");
let mut info = ParameterInfo::default();
ast.to_tokens(&mut info).unwrap();
assert_eq!(info.count, 2);
assert_eq!(info.names.len(), 2);
assert!(info.names.contains(":x"));
assert!(info.names.contains(":y"));
}
#[test]
fn duplicate_column() {
expect_parser_err_msg(
b"CREATE TABLE t (x TEXT, x TEXT)",
"duplicate column name: x",
);
expect_parser_err_msg(
b"CREATE TABLE t (x TEXT, \"x\" TEXT)",
"duplicate column name: \"x\"",
);
expect_parser_err_msg(
b"CREATE TABLE t (x TEXT, `x` TEXT)",
"duplicate column name: `x`",
);
}
#[test]
fn create_table_without_column() {
expect_parser_err(
b"CREATE TABLE t ()",
ParserError::SyntaxError(")".to_owned()),
);
}
#[test]
fn vtab_args() -> Result<(), Error> {
let sql = b"CREATE VIRTUAL TABLE mail USING fts3(
subject VARCHAR(256) NOT NULL,
body TEXT CHECK(length(body)<10240)
);";
let r = parse_cmd(sql);
let Cmd::Stmt(Stmt::CreateVirtualTable {
tbl_name: QualifiedName {
name: Name(tbl_name),
..
},
module_name: Name(module_name),
args: Some(args),
..
}) = r
else {
panic!("unexpected AST")
};
assert_eq!(tbl_name, "mail");
assert_eq!(module_name, "fts3");
assert_eq!(args.len(), 2);
assert_eq!(args[0], "subject VARCHAR(256) NOT NULL");
assert_eq!(args[1], "body TEXT CHECK(length(body)<10240)");
Ok(())
}
#[test]
fn only_semicolons_no_statements() {
let sqls = ["", ";", ";;;"];
for sql in &sqls {
let r = parse(sql.as_bytes());
assert_eq!(r.unwrap(), None);
}
}
#[test]
fn extra_semicolons_between_statements() {
let sqls = [
"SELECT 1; SELECT 2",
"SELECT 1; SELECT 2;",
"; SELECT 1; SELECT 2",
";; SELECT 1;; SELECT 2;;",
];
for sql in &sqls {
let mut parser = Parser::new(sql.as_bytes());
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert_eq!(parser.next().unwrap(), None);
}
}
#[test]
fn extra_comments_between_statements() {
let sqls = [
"-- abc\nSELECT 1; --def\nSELECT 2 -- ghj",
"/* abc */ SELECT 1; /* def */ SELECT 2; /* ghj */",
"/* abc */; SELECT 1 /* def */; SELECT 2 /* ghj */",
"/* abc */;; SELECT 1;/* def */; SELECT 2; /* ghj */; /* klm */",
];
for sql in &sqls {
let mut parser = Parser::new(sql.as_bytes());
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert!(matches!(
parser.next().unwrap(),
Some(Cmd::Stmt(Stmt::Select { .. }))
));
assert_eq!(parser.next().unwrap(), None);
}
}
#[test]
fn insert_mismatch_count() {
expect_parser_err_msg(b"INSERT INTO t (a, b) VALUES (1)", "1 values for 2 columns");
}
#[test]
fn insert_default_values() {
expect_parser_err_msg(
b"INSERT INTO t (a) DEFAULT VALUES",
"0 values for 1 columns",
);
}
#[test]
fn create_view_mismatch_count() {
expect_parser_err_msg(
b"CREATE VIEW v (c1, c2) AS SELECT 1",
"expected 2 columns for v but got 1",
);
}
#[test]
fn create_view_duplicate_column_name() {
expect_parser_err_msg(
b"CREATE VIEW v (c1, c1) AS SELECT 1, 2",
"duplicate column name: c1",
);
}
#[test]
fn create_table_without_rowid_missing_pk() {
expect_parser_err_msg(
b"CREATE TABLE t (c1) WITHOUT ROWID",
"PRIMARY KEY missing on table t",
);
}
#[test]
fn create_temporary_table_with_qualified_name() {
expect_parser_err_msg(
b"CREATE TEMPORARY TABLE mem.x AS SELECT 1",
"temporary table name must be unqualified",
);
parse_cmd(b"CREATE TEMPORARY TABLE temp.x AS SELECT 1");
}
#[test]
fn create_table_with_only_generated_column() {
expect_parser_err_msg(
b"CREATE TABLE test(data AS (1))",
"must have at least one non-generated column",
);
}
#[test]
fn create_strict_table_missing_datatype() {
expect_parser_err_msg(b"CREATE TABLE t (c1) STRICT", "missing datatype for t.c1");
}
#[test]
fn create_strict_table_unknown_datatype() {
expect_parser_err_msg(
b"CREATE TABLE t (c1 BOOL) STRICT",
"unknown datatype for t.c1: \"BOOL\"",
);
}
#[test]
fn foreign_key_on_column() {
expect_parser_err_msg(
b"CREATE TABLE t(a REFERENCES o(a,b))",
"foreign key on a should reference only one column of table o",
);
}
#[test]
fn create_strict_table_generated_column() {
parse_cmd(
b"CREATE TABLE IF NOT EXISTS transactions (
debit REAL,
credit REAL,
amount REAL GENERATED ALWAYS AS (ifnull(credit, 0.0) -ifnull(debit, 0.0))
) STRICT;",
);
}
#[test]
fn selects_compound_mismatch_columns_count() {
expect_parser_err_msg(
b"SELECT 1 UNION SELECT 1, 2",
"SELECTs to the left and right of UNION do not have the same number of result columns",
);
}
#[test]
fn delete_order_by_without_limit() {
expect_parser_err_msg(
b"DELETE FROM t ORDER BY x",
"ORDER BY without LIMIT on DELETE",
);
}
#[test]
fn update_order_by_without_limit() {
expect_parser_err_msg(
b"UPDATE t SET x = 1 ORDER BY x",
"ORDER BY without LIMIT on UPDATE",
);
}
#[test]
fn values_mismatch_columns_count() {
expect_parser_err_msg(
b"INSERT INTO t VALUES (1), (1,2)",
"all VALUES must have the same number of terms",
);
}
#[test]
fn column_specified_more_than_once() {
expect_parser_err_msg(
b"INSERT INTO t (n, n, m) VALUES (1, 0, 2)",
"column \"n\" specified more than once",
)
}
#[test]
fn alter_add_column_primary_key() {
expect_parser_err_msg(
b"ALTER TABLE t ADD COLUMN c PRIMARY KEY",
"Cannot add a PRIMARY KEY column",
);
}
#[test]
fn alter_add_column_unique() {
expect_parser_err_msg(
b"ALTER TABLE t ADD COLUMN c UNIQUE",
"Cannot add a UNIQUE column",
);
}
#[test]
fn alter_rename_same() {
expect_parser_err_msg(
b"ALTER TABLE t RENAME TO t",
"there is already another table or index with this name: t",
);
}
#[test]
fn natural_join_on() {
expect_parser_err_msg(
b"SELECT x FROM t NATURAL JOIN t USING (x)",
"a NATURAL join may not have an ON or USING clause",
);
expect_parser_err_msg(
b"SELECT x FROM t NATURAL JOIN t ON t.x = t.x",
"a NATURAL join may not have an ON or USING clause",
);
}
#[test]
fn missing_join_clause() {
expect_parser_err_msg(
b"SELECT a FROM tt ON b",
"a JOIN clause is required before ON",
);
}
#[test]
fn cast_without_typename() {
parse_cmd(b"SELECT CAST(a AS ) FROM t");
}
#[test]
fn unknown_table_option() {
expect_parser_err_msg(b"CREATE TABLE t(x)o", "unknown table option: o");
expect_parser_err_msg(b"CREATE TABLE t(x) WITHOUT o", "unknown table option: o");
}
#[test]
fn qualified_table_name_within_triggers() {
expect_parser_err_msg(
b"CREATE TRIGGER tr1 AFTER INSERT ON t1 BEGIN
DELETE FROM main.t2;
END;",
"qualified table names are not allowed on INSERT, UPDATE, and DELETE statements \
within triggers",
);
}
#[test]
fn indexed_by_clause_within_triggers() {
expect_parser_err_msg(
b"CREATE TRIGGER main.t16err5 AFTER INSERT ON tA BEGIN
UPDATE t16 INDEXED BY t16a SET rowid=rowid+1 WHERE a=1;
END;",
"the INDEXED BY clause is not allowed on UPDATE or DELETE statements \
within triggers",
);
expect_parser_err_msg(
b"CREATE TRIGGER main.t16err6 AFTER INSERT ON tA BEGIN
DELETE FROM t16 NOT INDEXED WHERE a=123;
END;",
"the NOT INDEXED clause is not allowed on UPDATE or DELETE statements \
within triggers",
);
}
fn expect_parser_err_msg(input: &[u8], error_msg: &str) {
expect_parser_err(input, ParserError::Custom(error_msg.to_owned()))
}
fn expect_parser_err(input: &[u8], err: ParserError) {
let r = parse(input);
if let Error::ParserError(e, _) = r.unwrap_err() {
assert_eq!(e, err);
} else {
panic!("unexpected error type")
};
}
fn parse_cmd(input: &[u8]) -> Cmd {
parse(input).unwrap().unwrap()
}
fn parse(input: &[u8]) -> Result<Option<Cmd>, Error> {
let mut parser = Parser::new(input);
parser.next()
}

View File

@@ -0,0 +1,8 @@
//! SQLite3 syntax lexer and parser
#![warn(missing_docs)]
pub mod dialect;
// In Lemon, the tokenizer calls the parser.
pub mod lexer;
mod parser;
pub use parser::ast;

View File

@@ -0,0 +1,336 @@
//! Check for additional syntax error
use crate::ast::*;
use crate::custom_err;
use std::fmt::{Display, Formatter};
impl Cmd {
/// Statement accessor
pub fn stmt(&self) -> &Stmt {
match self {
Self::Explain(stmt) => stmt,
Self::ExplainQueryPlan(stmt) => stmt,
Self::Stmt(stmt) => stmt,
}
}
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
match self {
Self::Explain(_) => ColumnCount::Fixed(8),
Self::ExplainQueryPlan(_) => ColumnCount::Fixed(4),
Self::Stmt(stmt) => stmt.column_count(),
}
}
/// Like `sqlite3_stmt_isexplain`
pub fn is_explain(&self) -> bool {
matches!(self, Self::Explain(_) | Self::ExplainQueryPlan(_))
}
/// Like `sqlite3_stmt_readonly`
pub fn readonly(&self) -> bool {
self.stmt().readonly()
}
/// check for extra rules
pub fn check(&self) -> Result<(), ParserError> {
self.stmt().check()
}
}
/// Column count
pub enum ColumnCount {
/// With `SELECT *` / PRAGMA
Dynamic,
/// Constant count
Fixed(usize),
/// No column
None,
}
impl ColumnCount {
fn incr(&mut self) {
if let Self::Fixed(n) = self {
*n += 1;
}
}
}
impl Stmt {
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
match self {
Self::Delete {
returning: Some(returning),
..
} => column_count(returning),
Self::Insert {
returning: Some(returning),
..
} => column_count(returning),
Self::Pragma(..) => ColumnCount::Dynamic,
Self::Select(s) => s.column_count(),
Self::Update {
returning: Some(returning),
..
} => column_count(returning),
_ => ColumnCount::None,
}
}
/// Like `sqlite3_stmt_readonly`
pub fn readonly(&self) -> bool {
match self {
Self::Attach { .. } => true,
Self::Begin(..) => true,
Self::Commit(..) => true,
Self::Detach(..) => true,
Self::Pragma(..) => true, // TODO check all
Self::Reindex { .. } => true,
Self::Release(..) => true,
Self::Rollback { .. } => true,
Self::Savepoint(..) => true,
Self::Select(..) => true,
_ => false,
}
}
/// check for extra rules
pub fn check(&self) -> Result<(), ParserError> {
match self {
Self::AlterTable(old_name, AlterTableBody::RenameTo(new_name)) => {
if *new_name == old_name.name {
return Err(custom_err!(
"there is already another table or index with this name: {}",
new_name
));
}
Ok(())
}
Self::AlterTable(.., AlterTableBody::AddColumn(cd)) => {
for c in cd {
if let ColumnConstraint::PrimaryKey { .. } = c {
return Err(custom_err!("Cannot add a PRIMARY KEY column"));
} else if let ColumnConstraint::Unique(..) = c {
return Err(custom_err!("Cannot add a UNIQUE column"));
}
}
Ok(())
}
Self::CreateTable {
temporary,
tbl_name,
body,
..
} => {
if *temporary {
if let Some(ref db_name) = tbl_name.db_name {
if db_name != "TEMP" {
return Err(custom_err!("temporary table name must be unqualified"));
}
}
}
body.check(tbl_name)
}
Self::CreateView {
view_name,
columns: Some(columns),
select,
..
} => {
// SQLite3 engine renames duplicates:
for (i, c) in columns.iter().enumerate() {
for o in &columns[i + 1..] {
if c.col_name == o.col_name {
return Err(custom_err!("duplicate column name: {}", c.col_name,));
}
}
}
// SQLite3 engine raises this error later (not while parsing):
match select.column_count() {
ColumnCount::Fixed(n) if n != columns.len() => Err(custom_err!(
"expected {} columns for {} but got {}",
columns.len(),
view_name,
n
)),
_ => Ok(()),
}
}
Self::Delete {
order_by: Some(_),
limit: None,
..
} => Err(custom_err!("ORDER BY without LIMIT on DELETE")),
Self::Insert {
columns: Some(columns),
body: InsertBody::Select(select, ..),
..
} => match select.body.select.column_count() {
ColumnCount::Fixed(n) if n != columns.len() => {
Err(custom_err!("{} values for {} columns", n, columns.len()))
}
_ => Ok(()),
},
Self::Insert {
columns: Some(columns),
body: InsertBody::DefaultValues,
..
} => Err(custom_err!("0 values for {} columns", columns.len())),
Self::Update {
order_by: Some(_),
limit: None,
..
} => Err(custom_err!("ORDER BY without LIMIT on UPDATE")),
_ => Ok(()),
}
}
}
impl CreateTableBody {
/// check for extra rules
pub fn check(&self, tbl_name: &QualifiedName) -> Result<(), ParserError> {
if let Self::ColumnsAndConstraints {
columns,
constraints: _,
options,
} = self
{
let mut generated_count = 0;
for c in columns.values() {
for cs in &c.constraints {
if let ColumnConstraint::Generated { .. } = cs.constraint {
generated_count += 1;
}
}
}
if generated_count == columns.len() {
return Err(custom_err!("must have at least one non-generated column"));
}
if options.contains(TableOptions::STRICT) {
for c in columns.values() {
match &c.col_type {
Some(Type { name, .. }) => {
// The datatype must be one of following: INT INTEGER REAL TEXT BLOB ANY
if !(name.eq_ignore_ascii_case("INT")
|| name.eq_ignore_ascii_case("INTEGER")
|| name.eq_ignore_ascii_case("REAL")
|| name.eq_ignore_ascii_case("TEXT")
|| name.eq_ignore_ascii_case("BLOB")
|| name.eq_ignore_ascii_case("ANY"))
{
return Err(custom_err!(
"unknown datatype for {}.{}: \"{}\"",
tbl_name,
c.col_name,
name
));
}
}
_ => {
// Every column definition must specify a datatype for that column. The freedom to specify a column without a datatype is removed.
return Err(custom_err!(
"missing datatype for {}.{}",
tbl_name,
c.col_name
));
}
}
}
}
if options.contains(TableOptions::WITHOUT_ROWID) && !self.has_primary_key() {
return Err(custom_err!("PRIMARY KEY missing on table {}", tbl_name,));
}
}
Ok(())
}
/// explicit primary key constraint ?
pub fn has_primary_key(&self) -> bool {
if let Self::ColumnsAndConstraints {
columns,
constraints,
..
} = self
{
for col in columns.values() {
for c in col {
if let ColumnConstraint::PrimaryKey { .. } = c {
return true;
}
}
}
if let Some(constraints) = constraints {
for c in constraints {
if let TableConstraint::PrimaryKey { .. } = c.constraint {
return true;
}
}
}
}
false
}
}
impl<'a> IntoIterator for &'a ColumnDefinition {
type Item = &'a ColumnConstraint;
type IntoIter = std::iter::Map<
std::slice::Iter<'a, NamedColumnConstraint>,
fn(&'a NamedColumnConstraint) -> &'a ColumnConstraint,
>;
fn into_iter(self) -> Self::IntoIter {
self.constraints.iter().map(|nc| &nc.constraint)
}
}
impl Select {
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
self.body.select.column_count()
}
}
impl OneSelect {
/// Like `sqlite3_column_count` but more limited
pub fn column_count(&self) -> ColumnCount {
match self {
Self::Select { columns, .. } => column_count(columns),
Self::Values(values) => {
assert!(!values.is_empty()); // TODO Validate
ColumnCount::Fixed(values[0].len())
}
}
}
/// Check all VALUES have the same number of terms
pub fn push(values: &mut Vec<Vec<Expr>>, v: Vec<Expr>) -> Result<(), ParserError> {
if values[0].len() != v.len() {
return Err(custom_err!("all VALUES must have the same number of terms"));
}
values.push(v);
Ok(())
}
}
impl Display for QualifiedName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.to_fmt(f)
}
}
impl ResultColumn {
fn column_count(&self) -> ColumnCount {
match self {
Self::Expr(..) => ColumnCount::Fixed(1),
_ => ColumnCount::Dynamic,
}
}
}
fn column_count(cols: &[ResultColumn]) -> ColumnCount {
assert!(!cols.is_empty());
let mut count = ColumnCount::Fixed(0);
for col in cols {
match col.column_count() {
ColumnCount::Fixed(_) => count.incr(),
_ => return ColumnCount::Dynamic,
}
}
count
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,154 @@
//! SQLite parser
pub mod ast;
pub mod parse {
#![expect(unused_braces)]
#![expect(clippy::if_same_then_else)]
#![expect(clippy::absurd_extreme_comparisons)] // FIXME
#![expect(clippy::needless_return)]
#![expect(clippy::upper_case_acronyms)]
#![expect(clippy::manual_range_patterns)]
include!(concat!(env!("OUT_DIR"), "/parse.rs"));
}
use crate::dialect::Token;
use ast::{Cmd, ExplainKind, Name, Stmt};
/// Parser error
#[derive(Debug, PartialEq)]
pub enum ParserError {
/// Syntax error
SyntaxError(String),
/// Unexpected EOF
UnexpectedEof,
/// Custom error
Custom(String),
}
impl std::fmt::Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::SyntaxError(s) => {
write!(f, "near \"{s}\": syntax error")
}
Self::UnexpectedEof => f.write_str("unexpected end of input"),
Self::Custom(s) => f.write_str(s),
}
}
}
impl std::error::Error for ParserError {}
/// Custom error constructor
#[macro_export]
macro_rules! custom_err {
($msg:literal $(,)?) => {
$crate::parser::ParserError::Custom($msg.to_owned())
};
($err:expr $(,)?) => {
$crate::parser::ParserError::Custom(format!($err))
};
($fmt:expr, $($arg:tt)*) => {
$crate::parser::ParserError::Custom(format!($fmt, $($arg)*))
};
}
/// Parser context
pub struct Context<'input> {
input: &'input [u8],
explain: Option<ExplainKind>,
stmt: Option<Stmt>,
constraint_name: Option<Name>, // transient
module_arg: Option<(usize, usize)>, // Complete text of a module argument
module_args: Option<Vec<String>>, // CREATE VIRTUAL TABLE args
done: bool,
error: Option<ParserError>,
}
impl<'input> Context<'input> {
pub fn new(input: &'input [u8]) -> Self {
Context {
input,
explain: None,
stmt: None,
constraint_name: None,
module_arg: None,
module_args: None,
done: false,
error: None,
}
}
/// Consume parsed command
pub fn cmd(&mut self) -> Option<Cmd> {
if let Some(stmt) = self.stmt.take() {
match self.explain.take() {
Some(ExplainKind::Explain) => Some(Cmd::Explain(stmt)),
Some(ExplainKind::QueryPlan) => Some(Cmd::ExplainQueryPlan(stmt)),
None => Some(Cmd::Stmt(stmt)),
}
} else {
None
}
}
fn constraint_name(&mut self) -> Option<Name> {
self.constraint_name.take()
}
fn no_constraint_name(&self) -> bool {
self.constraint_name.is_none()
}
fn vtab_arg_init(&mut self) {
self.add_module_arg();
self.module_arg = None;
}
fn vtab_arg_extend(&mut self, any: Token) {
if let Some((_, ref mut n)) = self.module_arg {
*n = any.2
} else {
self.module_arg = Some((any.0, any.2))
}
}
fn add_module_arg(&mut self) {
if let Some((start, end)) = self.module_arg.take() {
if let Ok(arg) = std::str::from_utf8(&self.input[start..end]) {
self.module_args.get_or_insert(vec![]).push(arg.to_owned());
} // FIXME error handling
}
}
fn module_args(&mut self) -> Option<Vec<String>> {
self.add_module_arg();
self.module_args.take()
}
/// This routine is called after a single SQL statement has been parsed.
fn sqlite3_finish_coding(&mut self) {
self.done = true;
}
/// Return `true` if parser completes either successfully or with an error.
pub fn done(&self) -> bool {
self.done || self.error.is_some()
}
pub fn is_ok(&self) -> bool {
self.error.is_none()
}
/// Consume error generated by parser
pub fn error(&mut self) -> Option<ParserError> {
self.error.take()
}
pub fn reset(&mut self) {
self.explain = None;
self.stmt = None;
self.constraint_name = None;
self.module_arg = None;
self.module_args = None;
self.done = false;
self.error = None;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,902 @@
/*
** 2000-05-29
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Driver template for the LEMON parser generator.
**
** The "lemon" program processes an LALR(1) input grammar file, then uses
** this template to construct a parser. The "lemon" program inserts text
** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the
** interstitial "-" characters) contained in this template is changed into
** the value of the %name directive from the grammar. Otherwise, the content
** of this template is copied straight through into the generate parser
** source file.
**
** The following is the concatenation of all %include directives from the
** input grammar file:
*/
/************ Begin %include sections from the grammar ************************/
%%
/**************** End of %include directives **********************************/
/* These constants specify the various numeric values for terminal symbols.
***************** Begin token definitions *************************************/
%%
/**************** End token definitions ***************************************/
/* The next sections is a series of control #defines.
** various aspects of the generated parser.
** YYCODETYPE is the data type used to store the integer codes
** that represent terminal and non-terminal symbols.
** "unsigned char" is used if there are fewer than
** 256 symbols. Larger types otherwise.
** YYNOCODE is a number of type YYCODETYPE that is not used for
** any terminal or nonterminal symbol.
** YYFALLBACK If defined, this indicates that one or more tokens
** (also known as: "terminal symbols") have fall-back
** values which should be used if the original symbol
** would not parse. This permits keywords to sometimes
** be used as identifiers, for example.
** YYACTIONTYPE is the data type used for "action codes" - numbers
** that indicate what to do in response to the next
** token.
** ParseTOKENTYPE is the data type used for minor type for terminal
** symbols. Background: A "minor type" is a semantic
** value associated with a terminal or non-terminal
** symbols. For example, for an "ID" terminal symbol,
** the minor type might be the name of the identifier.
** Each non-terminal can have a different minor type.
** Terminal symbols all have the same minor type, though.
** This macros defines the minor type for terminal
** symbols.
** YYMINORTYPE is the data type used for all minor types.
** This is typically a union of many types, one of
** which is ParseTOKENTYPE. The entry in the union
** for terminal symbols is called "yy0".
** YYSTACKDEPTH is the maximum depth of the parser's stack. If
** zero the stack is dynamically sized using realloc()
** YYERRORSYMBOL is the code number of the error symbol. If not
** defined, then do no error processing.
** YYNSTATE the combined number of states.
** YYNRULE the number of rules in the grammar
** YYNTOKEN Number of terminal symbols
** YY_MAX_SHIFT Maximum value for shift actions
** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
** YY_ERROR_ACTION The yy_action[] code for syntax error
** YY_ACCEPT_ACTION The yy_action[] code for accept
** YY_NO_ACTION The yy_action[] code for no-op
** YY_MIN_REDUCE Minimum value for reduce actions
** YY_MAX_REDUCE Maximum value for reduce actions
*/
/************* Begin control #defines *****************************************/
%%
/************* End control #defines *******************************************/
/* Next are the tables used to determine what action to take based on the
** current state and lookahead token. These tables are used to implement
** functions that take a state number and lookahead value and return an
** action integer.
**
** Suppose the action integer is N. Then the action is determined as
** follows
**
** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead
** token onto the stack and goto state N.
**
** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE.
**
** N == YY_ERROR_ACTION A syntax error has occurred.
**
** N == YY_ACCEPT_ACTION The parser accepts its input.
**
** N == YY_NO_ACTION No such action. Denotes unused
** slots in the yy_action[] table.
**
** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE
** and YY_MAX_REDUCE
**
** The action table is constructed as a single large table named yy_action[].
** Given state S and lookahead X, the action is computed as either:
**
** (A) N = yy_action[ yy_shift_ofst[S] + X ]
** (B) N = yy_default[S]
**
** The (A) formula is preferred. The B formula is used instead if
** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
**
** The formulas above are for computing the action when the lookahead is
** a terminal symbol. If the lookahead is a non-terminal (as occurs after
** a reduce action) then the yy_reduce_ofst[] array is used in place of
** the yy_shift_ofst[] array.
**
** The following are the tables generated in this section:
**
** yy_action[] A single table containing all actions.
** yy_lookahead[] A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** yy_shift_ofst[] For each state, the offset into yy_action for
** shifting terminals.
** yy_reduce_ofst[] For each state, the offset into yy_action for
** shifting non-terminals after a reduce.
** yy_default[] Default action for each state.
**
*********** Begin parsing tables **********************************************/
%%
/********** End of lemon-generated parsing tables *****************************/
/* The next table maps tokens (terminal symbols) into fallback tokens.
** If a construct like the following:
**
** %fallback ID X Y Z.
**
** appears in the grammar, then ID becomes a fallback token for X, Y,
** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
** but it does not parse, the type of the token is changed to ID and
** the parse is retried before an error is thrown.
**
** This feature can be used, for example, to cause some keywords in a language
** to revert to identifiers if they keyword does not apply in the context where
** it appears.
*/
%%
/* The following structure represents a single element of the
** parser's stack. Information stored includes:
**
** + The state number for the parser at this level of the stack.
**
** + The value of the token stored at this level of the stack.
** (In other words, the "major" token.)
**
** + The semantic value stored at this level of the stack. This is
** the information used by the action routines in the grammar.
** It is sometimes called the "minor" token.
**
** After the "shift" half of a SHIFTREDUCE action, the stateno field
** actually contains the reduce action for the second half of the
** SHIFTREDUCE.
*/
#[expect(non_camel_case_types)]
#[derive(Default)]
pub struct yyStackEntry<'i> {
stateno: YYACTIONTYPE, /* The state-number, or reduce action in SHIFTREDUCE */
major: YYCODETYPE, /* The major token value. This is the code
** number for the token at this stack level */
minor: YYMINORTYPE<'i>, /* The user-supplied minor token value. This
** is the value of the token */
}
/* The state of the parser is completely contained in an instance of
** the following structure */
#[expect(non_camel_case_types)]
pub struct yyParser<'input> {
yyidx: usize, /* Index to top element of the stack */
#[cfg(feature = "YYTRACKMAXSTACKDEPTH")]
yyhwm: usize, /* High-water mark of the stack */
//#[cfg(not(feature = "YYNOERRORRECOVERY"))]
yyerrcnt: i32, /* Shifts left before out of the error */
%% /* A place to hold %extra_context */
yystack: Vec<yyStackEntry<'input>>, /* The parser's stack */
}
use std::cmp::Ordering;
use std::ops::Neg;
impl<'input> yyParser<'input> {
fn shift(&self, shift: i8) -> usize {
assert!(shift <= 1);
match shift.cmp(&0) {
Ordering::Equal => self.yyidx,
Ordering::Greater => self.yyidx + shift as usize,
Ordering::Less => self.yyidx.checked_sub(shift.neg() as usize).unwrap(),
}
}
fn yyidx_shift(&mut self, shift: i8) {
match shift.cmp(&0) {
Ordering::Greater => self.yyidx += shift as usize,
Ordering::Less => self.yyidx = self.yyidx.checked_sub(shift.neg() as usize).unwrap(),
Ordering::Equal => {}
}
}
fn yy_move(&mut self, shift: i8) -> yyStackEntry<'input> {
use std::mem::take;
let idx = self.shift(shift);
take(&mut self.yystack[idx])
}
fn push(&mut self, entry: yyStackEntry<'input>) {
if self.yyidx == self.yystack.len() {
self.yystack.push(entry);
} else {
self.yystack[self.yyidx] = entry;
}
}
}
use std::ops::{Index, IndexMut};
impl<'input> Index<i8> for yyParser<'input> {
type Output = yyStackEntry<'input>;
fn index(&self, shift: i8) -> &yyStackEntry<'input> {
let idx = self.shift(shift);
&self.yystack[idx]
}
}
impl<'input> IndexMut<i8> for yyParser<'input> {
fn index_mut(&mut self, shift: i8) -> &mut yyStackEntry<'input> {
let idx = self.shift(shift);
&mut self.yystack[idx]
}
}
#[cfg(not(feature = "NDEBUG"))]
use log::{debug, log_enabled, Level::Debug};
static TARGET: &str = "Parse";
/* For tracing shifts, the names of all terminals and nonterminals
** are required. The following table supplies these names */
#[cfg(any(feature = "YYCOVERAGE", not(feature = "NDEBUG")))]
%%
/* For tracing reduce actions, the names of all rules are required.
*/
#[cfg(not(feature = "NDEBUG"))]
#[rustfmt::skip]
#[expect(non_upper_case_globals)]
static yyRuleName: [&str; YYNRULE] = [
%%
];
/*
** Try to increase the size of the parser stack. Return the number
** of errors. Return 0 on success.
*/
impl yyParser<'_> {
fn yy_grow_stack_if_needed(&mut self) -> bool {
false
}
fn yy_grow_stack_for_push(&mut self) -> bool {
// yystack is not prefilled with zero value like in C.
if self.yyidx == self.yystack.len() {
self.yystack.push(yyStackEntry::default());
} else if self.yyidx + 1 == self.yystack.len() {
self.yystack.push(yyStackEntry::default());
}
false
}
}
/* Initialize a new parser.
*/
impl yyParser<'_> {
pub fn new(
%% /* Optional %extra_context parameter */
) -> yyParser {
let mut p = yyParser {
yyidx: 0,
#[cfg(feature = "YYTRACKMAXSTACKDEPTH")]
yyhwm: 0,
yystack: Vec::new(),
//#[cfg(not(feature = "YYNOERRORRECOVERY"))]
yyerrcnt: -1,
%% /* Optional %extra_context store */
};
p.push(yyStackEntry::default());
p
}
}
/*
** Pop the parser's stack once.
*/
impl yyParser<'_> {
fn yy_pop_parser_stack(&mut self) {
use std::mem::take;
let _yytos = take(&mut self.yystack[self.yyidx]);
self.yyidx = self.yyidx.checked_sub(1).unwrap();
//assert_eq!(self.yyidx+1, self.yystack.len());
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"Popping {}", yyTokenName[_yytos.major as usize]
);
}
}
}
/*
** Clear all secondary memory allocations from the parser
*/
impl yyParser<'_> {
#[expect(non_snake_case)]
pub fn ParseFinalize(&mut self) {
while self.yyidx > 0 {
self.yy_pop_parser_stack();
}
// TODO check all elements remaining in yystack are yyinit()
}
}
/*
** Return the peak depth of the stack for a parser.
*/
#[cfg(feature = "YYTRACKMAXSTACKDEPTH")]
impl yyParser<'_> {
#[expect(non_snake_case)]
pub fn ParseStackPeak(&self) -> usize {
self.yyhwm
}
fn yyhwm_incr(&mut self) {
if self.yyidx > self.yyhwm {
self.yyhwm += 1;
assert_eq!(self.yyhwm, self.yyidx);
}
}
}
#[cfg(not(feature = "YYTRACKMAXSTACKDEPTH"))]
impl yyParser<'_> {
#[inline]
fn yyhwm_incr(&mut self) {}
}
/* This array of booleans keeps track of the parser statement
** coverage. The element yycoverage[X][Y] is set when the parser
** is in state X and has a lookahead token Y. In a well-tested
** systems, every element of this matrix should end up being set.
*/
#[cfg(feature = "YYCOVERAGE")]
static yycoverage: [[bool; YYNTOKEN]; YYNSTATE] = [];
/*
** Write into out a description of every state/lookahead combination that
**
** (1) has not been used by the parser, and
** (2) is not a syntax error.
**
** Return the number of missed state/lookahead combinations.
*/
#[cfg(feature = "YYCOVERAGE")]
fn ParseCoverage(/*FILE *out*/) -> i32 {
//int stateno, iLookAhead, i;
let mut nMissed = 0;
/*for(stateno=0; stateno<YYNSTATE; stateno++){
i = yy_shift_ofst[stateno];
for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
if( out ){
fprintf(out,"State %d lookahead %s %s\n", stateno,
yyTokenName[iLookAhead],
yycoverage[stateno][iLookAhead] ? "ok" : "missed");
}
}
}*/
return nMissed;
}
/*
** Find the appropriate action for a parser given the terminal
** look-ahead token iLookAhead.
*/
#[expect(non_snake_case)]
fn yy_find_shift_action(
mut iLookAhead: YYCODETYPE, /* The look-ahead token */
stateno: YYACTIONTYPE, /* Current state number */
) -> YYACTIONTYPE {
if stateno > YY_MAX_SHIFT {
return stateno;
}
assert!(stateno <= YY_SHIFT_COUNT);
#[cfg(feature = "YYCOVERAGE")]
{
//yycoverage[stateno][iLookAhead] = true;
}
loop {
let mut i = yy_shift_ofst[stateno as usize] as usize;
assert!(i <= YY_ACTTAB_COUNT!());
assert!(i + usize::from(YYNTOKEN) <= yy_lookahead.len());
assert_ne!(iLookAhead, YYNOCODE);
assert!((iLookAhead as YYACTIONTYPE) < YYNTOKEN);
i += iLookAhead as usize;
if yy_lookahead[i] != iLookAhead {
if YYFALLBACK {
let iFallback = yyFallback[iLookAhead as usize]; /* Fallback token */
if iFallback != 0 {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"FALLBACK {} => {}",
yyTokenName[iLookAhead as usize],
yyTokenName[iFallback as usize]
);
}
assert_eq!(yyFallback[iFallback as usize], 0); /* Fallback loop must terminate */
iLookAhead = iFallback;
continue;
}
}
if YYWILDCARD > 0 {
let j = i - iLookAhead as usize + YYWILDCARD as usize;
if yy_lookahead[j] == YYWILDCARD && iLookAhead > 0 {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"WILDCARD {} => {}",
yyTokenName[iLookAhead as usize],
yyTokenName[YYWILDCARD as usize]
);
}
return yy_action[j];
}
} /* YYWILDCARD */
return yy_default[stateno as usize];
} else {
return yy_action[i];
}
}
}
/*
** Find the appropriate action for a parser given the non-terminal
** look-ahead token iLookAhead.
*/
#[expect(non_snake_case)]
fn yy_find_reduce_action(
stateno: YYACTIONTYPE, /* Current state number */
iLookAhead: YYCODETYPE, /* The look-ahead token */
) -> YYACTIONTYPE {
if YYERRORSYMBOL > 0 {
if stateno > YY_REDUCE_COUNT {
return yy_default[stateno as usize];
}
} else {
assert!(stateno <= YY_REDUCE_COUNT);
}
let mut i: i32 = yy_reduce_ofst[stateno as usize].into();
assert_ne!(iLookAhead, YYNOCODE);
i += i32::from(iLookAhead);
if YYERRORSYMBOL > 0 {
if !(0..YY_ACTTAB_COUNT!()).contains(&i) || yy_lookahead[i as usize] != iLookAhead {
return yy_default[stateno as usize];
}
} else {
assert!((0..YY_ACTTAB_COUNT!()).contains(&i));
assert_eq!(yy_lookahead[i as usize], iLookAhead);
}
yy_action[i as usize]
}
/******** Begin %stack_overflow code ******************************************
%%
******** End %stack_overflow code ********************************************/
/*
** Print tracing information for a SHIFT action
*/
impl yyParser<'_> {
#[expect(non_snake_case)]
#[cfg(feature = "NDEBUG")]
fn yyTraceShift(&self, _: YYACTIONTYPE, _: &str) {
}
#[expect(non_snake_case)]
#[cfg(not(feature = "NDEBUG"))]
fn yyTraceShift(&self, yyNewState: YYACTIONTYPE, zTag: &str) {
let yytos = &self[0];
if yyNewState < YYNSTATE {
debug!(
target: TARGET,
"{} '{}', go to state {}", zTag, yyTokenName[yytos.major as usize], yyNewState
);
} else {
debug!(
target: TARGET,
"{} '{}', pending reduce {:?}",
zTag,
yyTokenName[yytos.major as usize],
yyNewState.checked_sub(YY_MIN_REDUCE)
);
}
}
}
/*
** Perform a shift action.
*/
impl<'input> yyParser<'input> {
#[expect(non_snake_case)]
fn yy_shift(
&mut self,
mut yyNewState: YYACTIONTYPE, /* The new state to shift in */
yyMajor: YYCODETYPE, /* The major token to shift in */
yyMinor: ParseTOKENTYPE<'input>, /* The minor token to shift in */
) {
self.yyidx_shift(1);
self.yyhwm_incr();
if self.yy_grow_stack_if_needed() {
return;
}
if yyNewState > YY_MAX_SHIFT {
yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
}
let yytos = yyStackEntry {
stateno: yyNewState,
major: yyMajor,
minor: YYMINORTYPE::yy0(yyMinor),
};
self.push(yytos);
self.yyTraceShift(yyNewState, "Shift");
}
}
/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side
** of that rule */
#[expect(non_upper_case_globals)]
static yyRuleInfoLhs: [YYCODETYPE; YYNRULE] = [
%%
];
/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
** of symbols on the right-hand side of that rule. */
#[expect(non_upper_case_globals)]
static yyRuleInfoNRhs: [i8; YYNRULE] = [
%%
];
/*
** Perform a reduce action and the shift that must immediately
** follow the reduce.
**
** The yyLookahead and yyLookaheadToken parameters provide reduce actions
** access to the lookahead token (if any). The yyLookahead will be YYNOCODE
** if the lookahead token has already been consumed. As this procedure is
** only called from one place, optimizing compilers will in-line it, which
** means that the extra parameters have no performance impact.
*/
impl yyParser<'_> {
fn yy_reduce(
&mut self,
yyruleno: YYACTIONTYPE, /* Number of the rule by which to reduce */
yy_look_ahead: YYCODETYPE, /* Lookahead token, or YYNOCODE if none */
yy_lookahead_token: &ParseTOKENTYPE, /* Value of the lookahead token */
) -> Result<YYACTIONTYPE, ParseError> {
let _ = yy_look_ahead;
let _ = yy_lookahead_token;
let yylhsminor: YYMINORTYPE<'_>;
match yyruleno {
/* Beginning here are the reduction cases. A typical example
** follows:
** case 0:
** #line <lineno> <grammarfile>
** { ... } // User supplied code
** #line <lineno> <thisfile>
** break;
*/
/********** Begin reduce actions **********************************************/
%%
/********** End reduce actions ************************************************/
};
let yygoto: YYCODETYPE = yyRuleInfoLhs[yyruleno as usize]; /* The next state */
let yysize: i8 = yyRuleInfoNRhs[yyruleno as usize]; /* Amount to pop the stack */
let yyact: YYACTIONTYPE = yy_find_reduce_action(self[yysize].stateno, yygoto); /* The next action */
/* There are no SHIFTREDUCE actions on nonterminals because the table
** generator has simplified them to pure REDUCE actions. */
assert!(!(yyact > YY_MAX_SHIFT && yyact <= YY_MAX_SHIFTREDUCE));
/* It is not possible for a REDUCE to be followed by an error */
assert_ne!(yyact, YY_ERROR_ACTION);
self.yyidx_shift(yysize + 1);
{
let yymsp = &mut self[0];
yymsp.stateno = yyact;
yymsp.major = yygoto;
}
self.yyTraceShift(yyact, "... then shift");
Ok(yyact)
}
}
/*
** The following code executes when the parse fails
*/
impl yyParser<'_> {
#[cfg(not(feature = "YYNOERRORRECOVERY"))]
fn yy_parse_failed(&mut self) {
#[cfg(not(feature = "NDEBUG"))]
{
error!(target: TARGET, "Fail!");
}
while self.yyidx > 0 {
self.yy_pop_parser_stack();
}
/* Here code is inserted which will be executed whenever the
** parser fails */
/************ Begin %parse_failure code ***************************************/
%%
/************ End %parse_failure code *****************************************/
}
#[cfg(feature = "YYNOERRORRECOVERY")]
fn yy_parse_failed(&mut self) {}
}
/*
** The following code executes when a syntax error first occurs.
*/
impl yyParser<'_> {
fn yy_syntax_error(
&mut self,
yymajor: YYCODETYPE, /* The major type of the error token */
yyminor: &ParseTOKENTYPE, /* The minor type of the error token */
) {
/************ Begin %syntax_error code ****************************************/
%%
/************ End %syntax_error code ******************************************/
}
}
/*
** The following is executed when the parser accepts
*/
impl yyParser<'_> {
fn yy_accept(&mut self) {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(target: TARGET, "Accept!");
}
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt = -1;
}
assert_eq!(self.yyidx, 0);
/* Here code is inserted which will be executed whenever the
** parser accepts */
/*********** Begin %parse_accept code *****************************************/
%%
/*********** End %parse_accept code *******************************************/
}
}
/* The main parser program.
** The first argument is a pointer to a structure obtained from
** "ParseAlloc" which describes the current state of the parser.
** The second argument is the major token number. The third is
** the minor token. The fourth optional argument is whatever the
** user wants (and specified in the grammar) and is available for
** use by the action routines.
**
** Inputs:
** <ul>
** <li> A pointer to the parser (an opaque structure.)
** <li> The major token number.
** <li> The minor token number.
** <li> An option argument of a grammar-specified type.
** </ul>
**
** Outputs:
** None.
*/
impl<'input> yyParser<'input> {
#[expect(non_snake_case)]
pub fn Parse(
&mut self,
yymajor: TokenType, /* The major token code number */
yyminor: ParseTOKENTYPE<'input>, /* The value for the token */
) -> Result<(), ParseError> {
let mut yymajor = yymajor as YYCODETYPE;
//#[cfg(all(not(feature = "YYERRORSYMBOL"), not(feature = "YYNOERRORRECOVERY")))]
let mut yyendofinput: bool = false; /* True if we are at the end of input */
//#[cfg(feature = "YYERRORSYMBOL")]
let mut yyerrorhit: bool = false; /* True if yymajor has invoked an error */
//assert_ne!( self[0], null );
if YYERRORSYMBOL == 0 && cfg!(not(feature = "YYNOERRORRECOVERY")) {
yyendofinput = yymajor == 0;
}
let mut yyact: YYACTIONTYPE = self[0].stateno; /* The parser action. */
#[cfg(not(feature = "NDEBUG"))]
{
if yyact < YY_MIN_REDUCE {
debug!(
target: TARGET,
"Input '{}' in state {}", yyTokenName[yymajor as usize], yyact
);
} else {
debug!(
target: TARGET,
"Input '{}' with pending reduce {}",
yyTokenName[yymajor as usize],
yyact - YY_MIN_REDUCE
);
}
}
loop {
assert_eq!(yyact, self[0].stateno);
yyact = yy_find_shift_action(yymajor, yyact);
if yyact >= YY_MIN_REDUCE {
let yyruleno = yyact - YY_MIN_REDUCE; /* Reduce by this rule */
#[cfg(not(feature = "NDEBUG"))]
{
assert!((yyruleno as usize) < yyRuleName.len());
let yysize = yyRuleInfoNRhs[yyruleno as usize];
let action = if yyruleno < YYNRULE_WITH_ACTION {
""
} else {
" without external action"
};
if yysize != 0 {
debug!(
target: TARGET,
"Reduce {} [{}]{}, pop back to state {}.",
yyruleno,
yyRuleName[yyruleno as usize],
action,
self[yysize].stateno
);
} else {
debug!(
target: TARGET,
"Reduce {} [{}]{}.", yyruleno, yyRuleName[yyruleno as usize], action
);
}
}
/* Check that the stack is large enough to grow by a single entry
** if the RHS of the rule is empty. This ensures that there is room
** enough on the stack to push the LHS value */
if yyRuleInfoNRhs[yyruleno as usize] == 0 {
self.yyhwm_incr();
if self.yy_grow_stack_for_push() {
break;
}
}
yyact = self.yy_reduce(yyruleno, yymajor, &yyminor)?;
} else if yyact <= YY_MAX_SHIFTREDUCE {
self.yy_shift(yyact, yymajor, yyminor);
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt -= 1;
}
break;
} else if yyact == YY_ACCEPT_ACTION {
self.yyidx_shift(-1);
self.yy_accept();
return Ok(());
} else {
assert_eq!(yyact, YY_ERROR_ACTION);
#[cfg(not(feature = "NDEBUG"))]
{
debug!(target: TARGET, "Syntax Error!");
}
if YYERRORSYMBOL > 0 {
/* A syntax error has occurred.
** The response to an error depends upon whether or not the
** grammar defines an error token "ERROR".
**
** This is what we do if the grammar does define ERROR:
**
** * Call the %syntax_error function.
**
** * Begin popping the stack until we enter a state where
** it is legal to shift the error symbol, then shift
** the error symbol.
**
** * Set the error count to three.
**
** * Begin accepting and shifting new tokens. No new error
** processing will occur until three tokens have been
** shifted successfully.
**
*/
if self.yyerrcnt < 0 {
self.yy_syntax_error(yymajor, &yyminor);
}
let yymx = self[0].major;
if yymx == YYERRORSYMBOL || yyerrorhit {
#[cfg(not(feature = "NDEBUG"))]
{
debug!(
target: TARGET,
"Discard input token {}", yyTokenName[yymajor as usize]
);
}
yymajor = YYNOCODE;
} else {
while self.yyidx > 0 {
yyact = yy_find_reduce_action(self[0].stateno, YYERRORSYMBOL);
if yyact <= YY_MAX_SHIFTREDUCE {
break;
}
self.yy_pop_parser_stack();
}
if self.yyidx <= 0 || yymajor == 0 {
self.yy_parse_failed();
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt = -1;
}
yymajor = YYNOCODE;
} else if yymx != YYERRORSYMBOL {
self.yy_shift(yyact, YYERRORSYMBOL, yyminor);
}
}
self.yyerrcnt = 3;
yyerrorhit = true;
if yymajor == YYNOCODE {
break;
}
yyact = self[0].stateno;
} else if cfg!(feature = "YYNOERRORRECOVERY") {
/* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
** do any kind of error recovery. Instead, simply invoke the syntax
** error routine and continue going as if nothing had happened.
**
** Applications can set this macro (for example inside %include) if
** they intend to abandon the parse upon the first syntax error seen.
*/
self.yy_syntax_error(yymajor, &yyminor);
break;
} else {
/* YYERRORSYMBOL is not defined */
/* This is what we do if the grammar does not define ERROR:
**
** * Report an error message, and throw away the input token.
**
** * If the input token is $, then fail the parse.
**
** As before, subsequent error messages are suppressed until
** three input tokens have been successfully shifted.
*/
if self.yyerrcnt <= 0 {
self.yy_syntax_error(yymajor, &yyminor);
}
self.yyerrcnt = 3;
if yyendofinput {
self.yy_parse_failed();
if cfg!(not(feature = "YYNOERRORRECOVERY")) {
self.yyerrcnt = -1;
}
}
break;
}
}
if self.yyidx <= 0 {
break;
}
}
#[cfg(not(feature = "NDEBUG"))]
{
if log_enabled!(target: TARGET, Debug) {
let msg = self.yystack[1..=self.yyidx]
.iter()
.map(|entry| yyTokenName[entry.major as usize])
.collect::<Vec<&str>>()
.join(" ");
debug!(target: TARGET, "Return. Stack=[{}]", msg);
}
}
return Ok(());
}
/*
** Return the fallback token corresponding to canonical token iToken, or
** 0 if iToken has no fallback.
*/
pub fn parse_fallback(i_token: YYCODETYPE) -> YYCODETYPE {
if YYFALLBACK {
return yyFallback[i_token as usize];
}
0
}
}