mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-03 00:14:21 +01:00
rewrite grammar generator and add fuzz test for arithmetic expressions
This commit is contained in:
127
Cargo.lock
generated
127
Cargo.lock
generated
@@ -24,10 +24,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
"zerocopy 0.7.35",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -60,7 +60,7 @@ version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18a1e15a87b13ae79e04e07b3714fc41d5f6993dff11662fdbe0b207c6ad0fe0"
|
||||
dependencies = [
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -472,6 +472,8 @@ dependencies = [
|
||||
"env_logger 0.10.2",
|
||||
"limbo_core",
|
||||
"log",
|
||||
"rand 0.9.0",
|
||||
"rand_chacha 0.9.0",
|
||||
"rexpect",
|
||||
"rusqlite",
|
||||
"rustyline",
|
||||
@@ -977,10 +979,22 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi 0.13.3+wasi-0.2.2",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.31.1"
|
||||
@@ -1400,7 +1414,7 @@ dependencies = [
|
||||
"itoa",
|
||||
"nom",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
"ryu",
|
||||
"serde_json",
|
||||
]
|
||||
@@ -1557,7 +1571,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"criterion",
|
||||
"fallible-iterator 0.3.0",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"hex",
|
||||
"indexmap",
|
||||
"io-uring",
|
||||
@@ -1579,7 +1593,7 @@ dependencies = [
|
||||
"pest_derive",
|
||||
"polling",
|
||||
"pprof",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"rstest",
|
||||
@@ -1647,8 +1661,8 @@ dependencies = [
|
||||
"limbo_core",
|
||||
"log",
|
||||
"notify",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"rand 0.8.5",
|
||||
"rand_chacha 0.3.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
@@ -1680,7 +1694,7 @@ dependencies = [
|
||||
"limbo_ext",
|
||||
"quickcheck",
|
||||
"quickcheck_macros",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1798,7 +1812,7 @@ checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
@@ -2067,7 +2081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2176,7 +2190,7 @@ version = "0.2.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
|
||||
dependencies = [
|
||||
"zerocopy",
|
||||
"zerocopy 0.7.35",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2307,7 +2321,7 @@ checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
|
||||
dependencies = [
|
||||
"env_logger 0.8.4",
|
||||
"log",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2347,8 +2361,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
"rand_chacha 0.3.1",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
|
||||
dependencies = [
|
||||
"rand_chacha 0.9.0",
|
||||
"rand_core 0.9.0",
|
||||
"zerocopy 0.8.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2358,7 +2383,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2367,7 +2402,17 @@ version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff"
|
||||
dependencies = [
|
||||
"getrandom 0.3.1",
|
||||
"zerocopy 0.8.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2405,7 +2450,7 @@ version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"libredox",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
@@ -2847,7 +2892,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.59.0",
|
||||
@@ -3079,7 +3124,7 @@ version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3119,6 +3164,15 @@ version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.13.3+wasi-0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
|
||||
dependencies = [
|
||||
"wit-bindgen-rt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.100"
|
||||
@@ -3454,6 +3508,15 @@ version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rt"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "write16"
|
||||
version = "1.0.0"
|
||||
@@ -3497,7 +3560,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"zerocopy-derive",
|
||||
"zerocopy-derive 0.7.35",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468"
|
||||
dependencies = [
|
||||
"zerocopy-derive 0.8.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3511,6 +3583,17 @@ dependencies = [
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerofrom"
|
||||
version = "0.1.5"
|
||||
|
||||
@@ -29,7 +29,9 @@ rusqlite = { version = "0.29", features = ["bundled"] }
|
||||
tempfile = "3.0.7"
|
||||
log = "0.4.22"
|
||||
assert_cmd = "^2"
|
||||
rand_chacha = "0.9.0"
|
||||
rand = "0.9.0"
|
||||
|
||||
# rexpect does not support windows.
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
rexpect = "0.6.0"
|
||||
rexpect = "0.6.0"
|
||||
|
||||
385
tests/fuzz/grammar_generator.rs
Normal file
385
tests/fuzz/grammar_generator.rs
Normal file
@@ -0,0 +1,385 @@
|
||||
/// Grammar generator is a helper to build a probabilistic grammar and generate random string from it
|
||||
/// Grammar consists of terminal (characters) and symbols (non-terminal with some expansion rule)
|
||||
///
|
||||
/// Current, supported expansion rules are:
|
||||
/// 1. Symbol -> [Str]: generate terminals which form fixed length string with constant prefix and random suffix
|
||||
/// 2. Symbol -> [Int]: generate terminals which form integer from specified range
|
||||
/// 3. Symbol -> (Inner)?: generate expansion for Inner symbol with some probability
|
||||
/// 4. Symbol -> (Inner){n..m}: generate k expansions for Inner symbol where k \in [n..m) with uniform distribution
|
||||
/// (note, that every repetition will be expanded independently)
|
||||
/// 5. Symbol -> Inner1 Inner2 .. Inner[n]: concatenate expansions from inner symbols and insert separator string between them
|
||||
/// 6. Symbol -> Choice1 | Choice2 | .. | Choice[n]: pick random choice according to their weights randomly and generate expansion for it
|
||||
///
|
||||
/// (this is more or less [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) with very minor differences)
|
||||
///
|
||||
/// The idea behind this code is to provide a way to "build" grammar generator with all these rules and their dependencies and after that
|
||||
/// we can randomly sample strings from this generator easily.
|
||||
use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
|
||||
|
||||
use rand::Rng;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum SymbolType {
|
||||
Str {
|
||||
fixed_prefix: String,
|
||||
random_length: usize,
|
||||
},
|
||||
Int {
|
||||
range: Range<i32>,
|
||||
},
|
||||
Optional {
|
||||
value: SymbolHandle,
|
||||
prob: f64,
|
||||
},
|
||||
Repeat {
|
||||
value: SymbolHandle,
|
||||
range: Range<usize>,
|
||||
separator: String,
|
||||
},
|
||||
Concat {
|
||||
values: Vec<SymbolHandle>,
|
||||
separator: String,
|
||||
},
|
||||
Choice {
|
||||
values: Vec<(SymbolHandle, f64)>,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn const_str(s: &str) -> SymbolType {
|
||||
SymbolType::Str {
|
||||
fixed_prefix: s.to_string(),
|
||||
random_length: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rand_str(fixed_prefix: &str, random_length: usize) -> SymbolType {
|
||||
SymbolType::Str {
|
||||
fixed_prefix: fixed_prefix.to_string(),
|
||||
random_length,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rand_int(range: Range<i32>) -> SymbolType {
|
||||
SymbolType::Int { range }
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
|
||||
pub struct SymbolHandle(i32);
|
||||
pub struct SymbolDefinitionBuilder {
|
||||
generator: GrammarGenerator,
|
||||
handle: SymbolHandle,
|
||||
symbol: Option<SymbolType>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum GrammarFrontierNode {
|
||||
Handle(SymbolHandle),
|
||||
String(String),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GrammarGenerator(Rc<RefCell<GrammarGeneratorInner>>);
|
||||
|
||||
struct GrammarGeneratorInner {
|
||||
last_symbol_id: i32,
|
||||
symbols: HashMap<SymbolHandle, SymbolType>,
|
||||
}
|
||||
|
||||
impl GrammarGenerator {
|
||||
pub fn new() -> Self {
|
||||
GrammarGenerator(Rc::new(RefCell::new(GrammarGeneratorInner {
|
||||
last_symbol_id: 0,
|
||||
symbols: HashMap::new(),
|
||||
})))
|
||||
}
|
||||
pub fn create_handle(&self) -> (SymbolHandle, SymbolDefinitionBuilder) {
|
||||
let handle = SymbolHandle(self.0.borrow().last_symbol_id);
|
||||
self.0.borrow_mut().last_symbol_id += 1;
|
||||
|
||||
let builder = SymbolDefinitionBuilder {
|
||||
generator: self.clone(),
|
||||
handle,
|
||||
symbol: None,
|
||||
};
|
||||
(handle, builder)
|
||||
}
|
||||
pub fn create(&self) -> SymbolDefinitionBuilder {
|
||||
let (_, builder) = self.create_handle();
|
||||
builder
|
||||
}
|
||||
pub fn register(&self, handle: SymbolHandle, value: SymbolType) {
|
||||
let result = self.0.borrow_mut().symbols.insert(handle, value);
|
||||
assert!(result.is_none(), "handle can be registered only once");
|
||||
}
|
||||
|
||||
// this helper runs DFS for directed graph and set is_recursive[v] = true for all reachable from root vertices
|
||||
// if path of infinite lengths exists for v
|
||||
fn is_recursive_from_root(
|
||||
&self,
|
||||
root: SymbolHandle,
|
||||
is_recursive: &mut HashMap<SymbolHandle, bool>,
|
||||
) -> bool {
|
||||
if let Some(_) = is_recursive.get(&root) {
|
||||
is_recursive.insert(root, true);
|
||||
return true;
|
||||
}
|
||||
is_recursive.insert(root, false);
|
||||
let symbols = &self.0.borrow().symbols;
|
||||
let recursive = match symbols.get(&root).expect("symbol must be registered") {
|
||||
SymbolType::Str { .. } | SymbolType::Int { .. } => false,
|
||||
SymbolType::Optional { value, .. } | SymbolType::Repeat { value, .. } => {
|
||||
self.is_recursive_from_root(*value, is_recursive)
|
||||
}
|
||||
SymbolType::Concat { values, .. } => {
|
||||
let mut recursive = false;
|
||||
for value in values.iter() {
|
||||
recursive |= self.is_recursive_from_root(*value, is_recursive);
|
||||
}
|
||||
recursive
|
||||
}
|
||||
SymbolType::Choice { values, .. } => {
|
||||
let mut recursive = false;
|
||||
for (value, _) in values.iter() {
|
||||
recursive |= self.is_recursive_from_root(*value, is_recursive);
|
||||
}
|
||||
recursive
|
||||
}
|
||||
};
|
||||
is_recursive.insert(root, recursive);
|
||||
recursive
|
||||
}
|
||||
|
||||
// we generate random sample from grammar in BFS fashion instead of DFS because in such a way we can force abort generation of string in more fair fashion
|
||||
// the problem with probabilistic grammar, is that it's recursive rules can have infinite (or very large) average length of expanded terminals
|
||||
// in order to fight with this problem, we provide length_limit_hint which will change logic of generation and start using only non-recursive rules (if this is possible) in case
|
||||
// when "frontier" of the generation already have >= length_limit_hint nodes
|
||||
pub fn generate(
|
||||
&self,
|
||||
rng: &mut ChaCha8Rng,
|
||||
root: SymbolHandle,
|
||||
length_limit_hint: usize,
|
||||
) -> String {
|
||||
let mut frontier = vec![GrammarFrontierNode::Handle(root)];
|
||||
|
||||
let mut is_recursive = HashMap::new();
|
||||
self.is_recursive_from_root(root, &mut is_recursive);
|
||||
|
||||
let symbols = &self.0.borrow().symbols;
|
||||
let terminals = loop {
|
||||
let mut next = Vec::new();
|
||||
let mut expanded = false;
|
||||
let limit_exceeded = frontier.len() >= length_limit_hint;
|
||||
for node in frontier.into_iter() {
|
||||
let GrammarFrontierNode::Handle(handle) = node else {
|
||||
next.push(node);
|
||||
continue;
|
||||
};
|
||||
|
||||
expanded = true;
|
||||
match symbols.get(&handle).expect("symbol must be registered") {
|
||||
SymbolType::Str {
|
||||
fixed_prefix,
|
||||
random_length,
|
||||
} => {
|
||||
let mut s = fixed_prefix.clone();
|
||||
for _ in 0..*random_length {
|
||||
s.push(rng.random_range('A'..'Z'));
|
||||
}
|
||||
next.push(GrammarFrontierNode::String(s));
|
||||
}
|
||||
SymbolType::Int { range } => {
|
||||
next.push(GrammarFrontierNode::String(
|
||||
rng.random_range(range.clone()).to_string(),
|
||||
));
|
||||
}
|
||||
SymbolType::Optional { value, prob } => {
|
||||
if !limit_exceeded && rng.random_bool(*prob) {
|
||||
next.push(GrammarFrontierNode::Handle(*value));
|
||||
}
|
||||
}
|
||||
SymbolType::Repeat {
|
||||
value,
|
||||
range,
|
||||
separator,
|
||||
} => {
|
||||
let repetitions = if !limit_exceeded {
|
||||
rng.random_range(range.clone())
|
||||
} else {
|
||||
range.start
|
||||
};
|
||||
for i in 0..repetitions {
|
||||
if i > 0 {
|
||||
next.push(GrammarFrontierNode::String(separator.to_string()));
|
||||
}
|
||||
next.push(GrammarFrontierNode::Handle(*value));
|
||||
}
|
||||
}
|
||||
SymbolType::Concat { values, separator } => {
|
||||
for (i, value) in values.iter().enumerate() {
|
||||
if i > 0 {
|
||||
next.push(GrammarFrontierNode::String(separator.to_string()));
|
||||
}
|
||||
next.push(GrammarFrontierNode::Handle(*value));
|
||||
}
|
||||
}
|
||||
SymbolType::Choice { values } => {
|
||||
let mut handles = if !limit_exceeded {
|
||||
values.clone()
|
||||
} else {
|
||||
values
|
||||
.iter()
|
||||
.filter(|x| is_recursive.get(&x.0) != Some(&true))
|
||||
.map(|x| *x)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
if handles.len() == 0 {
|
||||
handles = values.clone();
|
||||
}
|
||||
|
||||
let sum: f64 = handles.iter().map(|x| x.1).sum();
|
||||
let mut sample = rng.random_range(0.0..sum);
|
||||
for (i, (handle, weight)) in handles.iter().enumerate() {
|
||||
sample -= weight;
|
||||
if sample > 0.0 && i < handles.len() - 1 {
|
||||
continue;
|
||||
}
|
||||
next.push(GrammarFrontierNode::Handle(*handle));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !expanded {
|
||||
break next;
|
||||
}
|
||||
frontier = next;
|
||||
};
|
||||
let mut result = String::new();
|
||||
for node in terminals {
|
||||
let GrammarFrontierNode::String(string) = node else {
|
||||
panic!("frontier in the end must contain only string nodes");
|
||||
};
|
||||
result.push_str(&string);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolDefinitionBuilder {
|
||||
pub fn use_symbol(self, symbol: SymbolType) -> Self {
|
||||
assert!(self.symbol.is_none(), "symbol must be unset");
|
||||
Self {
|
||||
symbol: Some(symbol),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn concat(self, separator: &str) -> Self {
|
||||
assert!(self.symbol.is_none(), "symbol must be unset");
|
||||
Self {
|
||||
symbol: Some(SymbolType::Concat {
|
||||
values: vec![],
|
||||
separator: separator.to_string(),
|
||||
}),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn push(mut self, handle: SymbolHandle) -> Self {
|
||||
let Some(SymbolType::Concat {
|
||||
mut values,
|
||||
separator,
|
||||
}) = self.symbol.take()
|
||||
else {
|
||||
panic!("symbol must be set to Concat type");
|
||||
};
|
||||
values.push(handle);
|
||||
Self {
|
||||
symbol: Some(SymbolType::Concat { values, separator }),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn push_symbol(self, symbol: SymbolType) -> Self {
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
self.push(handle)
|
||||
}
|
||||
pub fn push_str(self, s: &str) -> Self {
|
||||
self.push_symbol(const_str(s))
|
||||
}
|
||||
pub fn choice(self) -> Self {
|
||||
assert!(self.symbol.is_none(), "symbol must be unset");
|
||||
Self {
|
||||
symbol: Some(SymbolType::Choice { values: vec![] }),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn option_w(mut self, handle: SymbolHandle, weight: f64) -> Self {
|
||||
let Some(SymbolType::Choice { mut values }) = self.symbol.take() else {
|
||||
panic!("symbol must be set to Choice type");
|
||||
};
|
||||
values.push((handle, weight));
|
||||
Self {
|
||||
symbol: Some(SymbolType::Choice { values }),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn option(self, handle: SymbolHandle) -> Self {
|
||||
self.option_w(handle, 1.0)
|
||||
}
|
||||
pub fn option_symbol_w(self, symbol: SymbolType, weight: f64) -> Self {
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
self.option_w(handle, weight)
|
||||
}
|
||||
pub fn option_symbol(self, symbol: SymbolType) -> Self {
|
||||
self.option_symbol_w(symbol, 1.0)
|
||||
}
|
||||
pub fn option_str(self, s: &str) -> Self {
|
||||
self.option_symbol(const_str(s))
|
||||
}
|
||||
pub fn options_symbol<const N: usize>(mut self, symbols: [SymbolType; N]) -> Self {
|
||||
for symbol in symbols {
|
||||
self = self.option_symbol(symbol)
|
||||
}
|
||||
self
|
||||
}
|
||||
pub fn options_str<const N: usize>(mut self, strs: [&str; N]) -> Self {
|
||||
for s in strs {
|
||||
self = self.option_str(s)
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn repeat(self, range: Range<usize>, separator: &str) -> Self {
|
||||
let symbol = self.symbol.expect("symbol must be set");
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
Self {
|
||||
symbol: Some(SymbolType::Repeat {
|
||||
value: handle,
|
||||
range,
|
||||
separator: separator.to_string(),
|
||||
}),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn optional(self, prob: f64) -> Self {
|
||||
let symbol = self.symbol.expect("symbol must be set");
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
Self {
|
||||
symbol: Some(SymbolType::Optional {
|
||||
value: handle,
|
||||
prob,
|
||||
}),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build(self) -> SymbolHandle {
|
||||
let symbol = self.symbol.expect("symbol must be set");
|
||||
self.generator.register(self.handle, symbol);
|
||||
self.handle
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,16 @@
|
||||
pub mod grammar_generator;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{rc::Rc, sync::Arc};
|
||||
|
||||
use limbo_core::Database;
|
||||
use rand::SeedableRng;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
use rusqlite::params;
|
||||
|
||||
use crate::grammar_generator::{rand_int, GrammarGenerator};
|
||||
|
||||
fn sqlite_exec_row(conn: &rusqlite::Connection, query: &str) -> Vec<rusqlite::types::Value> {
|
||||
let mut stmt = conn.prepare(&query).unwrap();
|
||||
let mut rows = stmt.query(params![]).unwrap();
|
||||
@@ -49,19 +55,63 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn kek() {
|
||||
pub fn arithmetic_expression_fuzz() {
|
||||
let g = GrammarGenerator::new();
|
||||
let (expr, expr_builder) = g.create_handle();
|
||||
let (bin_op, bin_op_builder) = g.create_handle();
|
||||
let (unary_op, unary_op_builder) = g.create_handle();
|
||||
let (paren, paren_builder) = g.create_handle();
|
||||
|
||||
paren_builder
|
||||
.concat("")
|
||||
.push_str("(")
|
||||
.push(expr)
|
||||
.push_str(")")
|
||||
.build();
|
||||
|
||||
unary_op_builder
|
||||
.concat(" ")
|
||||
.push(g.create().choice().options_str(["~", "+", "-"]).build())
|
||||
.push(expr)
|
||||
.build();
|
||||
|
||||
bin_op_builder
|
||||
.concat(" ")
|
||||
.push(expr)
|
||||
.push(
|
||||
g.create()
|
||||
.choice()
|
||||
.options_str(["+", "-", "*", "/", "%", "&", "|", "<<", ">>"])
|
||||
.build(),
|
||||
)
|
||||
.push(expr)
|
||||
.build();
|
||||
|
||||
expr_builder
|
||||
.choice()
|
||||
.option_w(unary_op, 1.0)
|
||||
.option_w(bin_op, 1.0)
|
||||
.option_w(paren, 1.0)
|
||||
.option_symbol_w(rand_int(-10..10), 1.0)
|
||||
.build();
|
||||
|
||||
let sql = g.create().concat(" ").push_str("SELECT").push(expr).build();
|
||||
|
||||
let io = Arc::new(limbo_core::PlatformIO::new().unwrap());
|
||||
let limbo_db = Database::open_file(io, ":memory:").unwrap();
|
||||
let limbo_conn = limbo_db.connect();
|
||||
let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap();
|
||||
|
||||
println!(
|
||||
"column: {:?}",
|
||||
sqlite_exec_row(&sqlite_conn, "SELECT 1 = 1.0")
|
||||
);
|
||||
println!(
|
||||
"column: {:?}",
|
||||
limbo_exec_row(&limbo_conn, "SELECT 1 = 1.0")
|
||||
);
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(0);
|
||||
for _ in 0..16 * 1024 {
|
||||
let query = g.generate(&mut rng, sql, 50);
|
||||
let limbo = limbo_exec_row(&limbo_conn, &query);
|
||||
let sqlite = sqlite_exec_row(&sqlite_conn, &query);
|
||||
assert_eq!(
|
||||
limbo, sqlite,
|
||||
"query: {}, limbo: {:?}, sqlite: {:?}",
|
||||
query, limbo, sqlite
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user