From 2c958d7e2d8d0188c56c68d00c74d220eef5e541 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 14:11:41 +0400 Subject: [PATCH 1/9] derive Debug trait for limbo step result --- core/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index fdced3f2f..0addaffc5 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -494,7 +494,7 @@ impl Statement { } } -#[derive(PartialEq)] +#[derive(Debug, PartialEq)] pub enum StepResult<'a> { Row(Row<'a>), IO, @@ -503,7 +503,7 @@ pub enum StepResult<'a> { Busy, } -#[derive(PartialEq)] +#[derive(Debug, PartialEq)] pub struct Row<'a> { pub values: Vec>, } From f716919b10209c5ffa704edcc824b4814e475fce Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 14:12:12 +0400 Subject: [PATCH 2/9] setup basic playground for fuzzing against sqlite --- tests/Cargo.toml | 6 ++++- tests/fuzz/mod.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tests/fuzz/mod.rs diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 473fed1cd..9deb78859 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -5,7 +5,7 @@ authors.workspace = true edition.workspace = true license.workspace = true repository.workspace = true -description = "Integration tests" +description = "Integration & fuzz tests" [lib] path = "lib.rs" @@ -14,6 +14,10 @@ path = "lib.rs" name = "integration_tests" path = "integration/mod.rs" +[[test]] +name = "fuzz_tests" +path = "fuzz/mod.rs" + [dependencies] anyhow = "1.0.75" clap = { version = "4.5", features = ["derive"] } diff --git a/tests/fuzz/mod.rs b/tests/fuzz/mod.rs new file mode 100644 index 000000000..57c7d1381 --- /dev/null +++ b/tests/fuzz/mod.rs @@ -0,0 +1,67 @@ +#[cfg(test)] +mod tests { + use std::{rc::Rc, sync::Arc}; + + use limbo_core::Database; + use rusqlite::params; + + fn sqlite_exec_row(conn: &rusqlite::Connection, query: &str) -> Vec { + let mut stmt = conn.prepare(&query).unwrap(); + let mut rows = stmt.query(params![]).unwrap(); + let mut columns = Vec::new(); + let row = rows.next().unwrap().unwrap(); + for i in 0.. { + let column: rusqlite::types::Value = match row.get(i) { + Ok(column) => column, + Err(rusqlite::Error::InvalidColumnIndex(_)) => break, + Err(err) => panic!("unexpected rusqlite error: {}", err), + }; + columns.push(column); + } + assert!(rows.next().unwrap().is_none()); + + columns + } + + fn limbo_exec_row( + conn: &Rc, + query: &str, + ) -> Vec { + let mut stmt = conn.prepare(query).unwrap(); + let result = stmt.step().unwrap(); + let row = loop { + match result { + limbo_core::StepResult::Row(row) => break row, + limbo_core::StepResult::IO => continue, + r => panic!("unexpected result {:?}: expecting single row", r), + } + }; + row.values + .iter() + .map(|x| match x { + limbo_core::Value::Null => rusqlite::types::Value::Null, + limbo_core::Value::Integer(x) => rusqlite::types::Value::Integer(*x), + limbo_core::Value::Float(x) => rusqlite::types::Value::Real(*x), + limbo_core::Value::Text(x) => rusqlite::types::Value::Text((*x).clone()), + limbo_core::Value::Blob(x) => rusqlite::types::Value::Blob((*x).clone()), + }) + .collect() + } + + #[test] + pub fn kek() { + let io = Arc::new(limbo_core::PlatformIO::new().unwrap()); + let limbo_db = Database::open_file(io, ":memory:").unwrap(); + let limbo_conn = limbo_db.connect(); + let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); + + println!( + "column: {:?}", + sqlite_exec_row(&sqlite_conn, "SELECT 1 = 1.0") + ); + println!( + "column: {:?}", + limbo_exec_row(&limbo_conn, "SELECT 1 = 1.0") + ); + } +} From 91fcb67b061d2cc505ece9a8736108abe209f73a Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 18:39:24 +0400 Subject: [PATCH 3/9] rewrite grammar generator and add fuzz test for arithmetic expressions --- Cargo.lock | 127 +++++++++-- tests/Cargo.toml | 4 +- tests/fuzz/grammar_generator.rs | 385 ++++++++++++++++++++++++++++++++ tests/fuzz/mod.rs | 68 +++++- 4 files changed, 552 insertions(+), 32 deletions(-) create mode 100644 tests/fuzz/grammar_generator.rs diff --git a/Cargo.lock b/Cargo.lock index 23a263f9c..884a59ef2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,10 +24,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "getrandom", + "getrandom 0.2.15", "once_cell", "version_check", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -60,7 +60,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18a1e15a87b13ae79e04e07b3714fc41d5f6993dff11662fdbe0b207c6ad0fe0" dependencies = [ - "rand", + "rand 0.8.5", ] [[package]] @@ -472,6 +472,8 @@ dependencies = [ "env_logger 0.10.2", "limbo_core", "log", + "rand 0.9.0", + "rand_chacha 0.9.0", "rexpect", "rusqlite", "rustyline", @@ -977,10 +979,22 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", +] + [[package]] name = "gimli" version = "0.31.1" @@ -1400,7 +1414,7 @@ dependencies = [ "itoa", "nom", "ordered-float", - "rand", + "rand 0.8.5", "ryu", "serde_json", ] @@ -1557,7 +1571,7 @@ dependencies = [ "chrono", "criterion", "fallible-iterator 0.3.0", - "getrandom", + "getrandom 0.2.15", "hex", "indexmap", "io-uring", @@ -1579,7 +1593,7 @@ dependencies = [ "pest_derive", "polling", "pprof", - "rand", + "rand 0.8.5", "regex", "regex-syntax", "rstest", @@ -1647,8 +1661,8 @@ dependencies = [ "limbo_core", "log", "notify", - "rand", - "rand_chacha", + "rand 0.8.5", + "rand_chacha 0.3.1", "serde", "serde_json", "tempfile", @@ -1680,7 +1694,7 @@ dependencies = [ "limbo_ext", "quickcheck", "quickcheck_macros", - "rand", + "rand 0.8.5", ] [[package]] @@ -1798,7 +1812,7 @@ checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", "log", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", ] @@ -2067,7 +2081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -2176,7 +2190,7 @@ version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -2307,7 +2321,7 @@ checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" dependencies = [ "env_logger 0.8.4", "log", - "rand", + "rand 0.8.5", ] [[package]] @@ -2347,8 +2361,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.0", + "zerocopy 0.8.14", ] [[package]] @@ -2358,7 +2383,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.0", ] [[package]] @@ -2367,7 +2402,17 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", +] + +[[package]] +name = "rand_core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" +dependencies = [ + "getrandom 0.3.1", + "zerocopy 0.8.14", ] [[package]] @@ -2405,7 +2450,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", + "getrandom 0.2.15", "libredox", "thiserror 1.0.69", ] @@ -2847,7 +2892,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" dependencies = [ "cfg-if", "fastrand", - "getrandom", + "getrandom 0.2.15", "once_cell", "rustix", "windows-sys 0.59.0", @@ -3079,7 +3124,7 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -3119,6 +3164,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -3454,6 +3508,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "write16" version = "1.0.0" @@ -3497,7 +3560,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468" +dependencies = [ + "zerocopy-derive 0.8.14", ] [[package]] @@ -3511,6 +3583,17 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "zerocopy-derive" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "zerofrom" version = "0.1.5" diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 9deb78859..3ee12139f 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -29,7 +29,9 @@ rusqlite = { version = "0.29", features = ["bundled"] } tempfile = "3.0.7" log = "0.4.22" assert_cmd = "^2" +rand_chacha = "0.9.0" +rand = "0.9.0" # rexpect does not support windows. [target.'cfg(not(windows))'.dependencies] -rexpect = "0.6.0" \ No newline at end of file +rexpect = "0.6.0" diff --git a/tests/fuzz/grammar_generator.rs b/tests/fuzz/grammar_generator.rs new file mode 100644 index 000000000..6b6a692d6 --- /dev/null +++ b/tests/fuzz/grammar_generator.rs @@ -0,0 +1,385 @@ +/// Grammar generator is a helper to build a probabilistic grammar and generate random string from it +/// Grammar consists of terminal (characters) and symbols (non-terminal with some expansion rule) +/// +/// Current, supported expansion rules are: +/// 1. Symbol -> [Str]: generate terminals which form fixed length string with constant prefix and random suffix +/// 2. Symbol -> [Int]: generate terminals which form integer from specified range +/// 3. Symbol -> (Inner)?: generate expansion for Inner symbol with some probability +/// 4. Symbol -> (Inner){n..m}: generate k expansions for Inner symbol where k \in [n..m) with uniform distribution +/// (note, that every repetition will be expanded independently) +/// 5. Symbol -> Inner1 Inner2 .. Inner[n]: concatenate expansions from inner symbols and insert separator string between them +/// 6. Symbol -> Choice1 | Choice2 | .. | Choice[n]: pick random choice according to their weights randomly and generate expansion for it +/// +/// (this is more or less [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) with very minor differences) +/// +/// The idea behind this code is to provide a way to "build" grammar generator with all these rules and their dependencies and after that +/// we can randomly sample strings from this generator easily. +use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc}; + +use rand::Rng; +use rand_chacha::ChaCha8Rng; + +#[derive(Clone, Debug)] +pub enum SymbolType { + Str { + fixed_prefix: String, + random_length: usize, + }, + Int { + range: Range, + }, + Optional { + value: SymbolHandle, + prob: f64, + }, + Repeat { + value: SymbolHandle, + range: Range, + separator: String, + }, + Concat { + values: Vec, + separator: String, + }, + Choice { + values: Vec<(SymbolHandle, f64)>, + }, +} + +pub fn const_str(s: &str) -> SymbolType { + SymbolType::Str { + fixed_prefix: s.to_string(), + random_length: 0, + } +} + +pub fn rand_str(fixed_prefix: &str, random_length: usize) -> SymbolType { + SymbolType::Str { + fixed_prefix: fixed_prefix.to_string(), + random_length, + } +} + +pub fn rand_int(range: Range) -> SymbolType { + SymbolType::Int { range } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct SymbolHandle(i32); +pub struct SymbolDefinitionBuilder { + generator: GrammarGenerator, + handle: SymbolHandle, + symbol: Option, +} + +#[derive(Debug)] +enum GrammarFrontierNode { + Handle(SymbolHandle), + String(String), +} + +#[derive(Clone)] +pub struct GrammarGenerator(Rc>); + +struct GrammarGeneratorInner { + last_symbol_id: i32, + symbols: HashMap, +} + +impl GrammarGenerator { + pub fn new() -> Self { + GrammarGenerator(Rc::new(RefCell::new(GrammarGeneratorInner { + last_symbol_id: 0, + symbols: HashMap::new(), + }))) + } + pub fn create_handle(&self) -> (SymbolHandle, SymbolDefinitionBuilder) { + let handle = SymbolHandle(self.0.borrow().last_symbol_id); + self.0.borrow_mut().last_symbol_id += 1; + + let builder = SymbolDefinitionBuilder { + generator: self.clone(), + handle, + symbol: None, + }; + (handle, builder) + } + pub fn create(&self) -> SymbolDefinitionBuilder { + let (_, builder) = self.create_handle(); + builder + } + pub fn register(&self, handle: SymbolHandle, value: SymbolType) { + let result = self.0.borrow_mut().symbols.insert(handle, value); + assert!(result.is_none(), "handle can be registered only once"); + } + + // this helper runs DFS for directed graph and set is_recursive[v] = true for all reachable from root vertices + // if path of infinite lengths exists for v + fn is_recursive_from_root( + &self, + root: SymbolHandle, + is_recursive: &mut HashMap, + ) -> bool { + if let Some(_) = is_recursive.get(&root) { + is_recursive.insert(root, true); + return true; + } + is_recursive.insert(root, false); + let symbols = &self.0.borrow().symbols; + let recursive = match symbols.get(&root).expect("symbol must be registered") { + SymbolType::Str { .. } | SymbolType::Int { .. } => false, + SymbolType::Optional { value, .. } | SymbolType::Repeat { value, .. } => { + self.is_recursive_from_root(*value, is_recursive) + } + SymbolType::Concat { values, .. } => { + let mut recursive = false; + for value in values.iter() { + recursive |= self.is_recursive_from_root(*value, is_recursive); + } + recursive + } + SymbolType::Choice { values, .. } => { + let mut recursive = false; + for (value, _) in values.iter() { + recursive |= self.is_recursive_from_root(*value, is_recursive); + } + recursive + } + }; + is_recursive.insert(root, recursive); + recursive + } + + // we generate random sample from grammar in BFS fashion instead of DFS because in such a way we can force abort generation of string in more fair fashion + // the problem with probabilistic grammar, is that it's recursive rules can have infinite (or very large) average length of expanded terminals + // in order to fight with this problem, we provide length_limit_hint which will change logic of generation and start using only non-recursive rules (if this is possible) in case + // when "frontier" of the generation already have >= length_limit_hint nodes + pub fn generate( + &self, + rng: &mut ChaCha8Rng, + root: SymbolHandle, + length_limit_hint: usize, + ) -> String { + let mut frontier = vec![GrammarFrontierNode::Handle(root)]; + + let mut is_recursive = HashMap::new(); + self.is_recursive_from_root(root, &mut is_recursive); + + let symbols = &self.0.borrow().symbols; + let terminals = loop { + let mut next = Vec::new(); + let mut expanded = false; + let limit_exceeded = frontier.len() >= length_limit_hint; + for node in frontier.into_iter() { + let GrammarFrontierNode::Handle(handle) = node else { + next.push(node); + continue; + }; + + expanded = true; + match symbols.get(&handle).expect("symbol must be registered") { + SymbolType::Str { + fixed_prefix, + random_length, + } => { + let mut s = fixed_prefix.clone(); + for _ in 0..*random_length { + s.push(rng.random_range('A'..'Z')); + } + next.push(GrammarFrontierNode::String(s)); + } + SymbolType::Int { range } => { + next.push(GrammarFrontierNode::String( + rng.random_range(range.clone()).to_string(), + )); + } + SymbolType::Optional { value, prob } => { + if !limit_exceeded && rng.random_bool(*prob) { + next.push(GrammarFrontierNode::Handle(*value)); + } + } + SymbolType::Repeat { + value, + range, + separator, + } => { + let repetitions = if !limit_exceeded { + rng.random_range(range.clone()) + } else { + range.start + }; + for i in 0..repetitions { + if i > 0 { + next.push(GrammarFrontierNode::String(separator.to_string())); + } + next.push(GrammarFrontierNode::Handle(*value)); + } + } + SymbolType::Concat { values, separator } => { + for (i, value) in values.iter().enumerate() { + if i > 0 { + next.push(GrammarFrontierNode::String(separator.to_string())); + } + next.push(GrammarFrontierNode::Handle(*value)); + } + } + SymbolType::Choice { values } => { + let mut handles = if !limit_exceeded { + values.clone() + } else { + values + .iter() + .filter(|x| is_recursive.get(&x.0) != Some(&true)) + .map(|x| *x) + .collect::>() + }; + if handles.len() == 0 { + handles = values.clone(); + } + + let sum: f64 = handles.iter().map(|x| x.1).sum(); + let mut sample = rng.random_range(0.0..sum); + for (i, (handle, weight)) in handles.iter().enumerate() { + sample -= weight; + if sample > 0.0 && i < handles.len() - 1 { + continue; + } + next.push(GrammarFrontierNode::Handle(*handle)); + break; + } + } + } + } + if !expanded { + break next; + } + frontier = next; + }; + let mut result = String::new(); + for node in terminals { + let GrammarFrontierNode::String(string) = node else { + panic!("frontier in the end must contain only string nodes"); + }; + result.push_str(&string); + } + result + } +} + +impl SymbolDefinitionBuilder { + pub fn use_symbol(self, symbol: SymbolType) -> Self { + assert!(self.symbol.is_none(), "symbol must be unset"); + Self { + symbol: Some(symbol), + ..self + } + } + pub fn concat(self, separator: &str) -> Self { + assert!(self.symbol.is_none(), "symbol must be unset"); + Self { + symbol: Some(SymbolType::Concat { + values: vec![], + separator: separator.to_string(), + }), + ..self + } + } + pub fn push(mut self, handle: SymbolHandle) -> Self { + let Some(SymbolType::Concat { + mut values, + separator, + }) = self.symbol.take() + else { + panic!("symbol must be set to Concat type"); + }; + values.push(handle); + Self { + symbol: Some(SymbolType::Concat { values, separator }), + ..self + } + } + pub fn push_symbol(self, symbol: SymbolType) -> Self { + let (handle, builder) = self.generator.create_handle(); + builder.use_symbol(symbol).build(); + self.push(handle) + } + pub fn push_str(self, s: &str) -> Self { + self.push_symbol(const_str(s)) + } + pub fn choice(self) -> Self { + assert!(self.symbol.is_none(), "symbol must be unset"); + Self { + symbol: Some(SymbolType::Choice { values: vec![] }), + ..self + } + } + pub fn option_w(mut self, handle: SymbolHandle, weight: f64) -> Self { + let Some(SymbolType::Choice { mut values }) = self.symbol.take() else { + panic!("symbol must be set to Choice type"); + }; + values.push((handle, weight)); + Self { + symbol: Some(SymbolType::Choice { values }), + ..self + } + } + pub fn option(self, handle: SymbolHandle) -> Self { + self.option_w(handle, 1.0) + } + pub fn option_symbol_w(self, symbol: SymbolType, weight: f64) -> Self { + let (handle, builder) = self.generator.create_handle(); + builder.use_symbol(symbol).build(); + self.option_w(handle, weight) + } + pub fn option_symbol(self, symbol: SymbolType) -> Self { + self.option_symbol_w(symbol, 1.0) + } + pub fn option_str(self, s: &str) -> Self { + self.option_symbol(const_str(s)) + } + pub fn options_symbol(mut self, symbols: [SymbolType; N]) -> Self { + for symbol in symbols { + self = self.option_symbol(symbol) + } + self + } + pub fn options_str(mut self, strs: [&str; N]) -> Self { + for s in strs { + self = self.option_str(s) + } + self + } + + pub fn repeat(self, range: Range, separator: &str) -> Self { + let symbol = self.symbol.expect("symbol must be set"); + let (handle, builder) = self.generator.create_handle(); + builder.use_symbol(symbol).build(); + Self { + symbol: Some(SymbolType::Repeat { + value: handle, + range, + separator: separator.to_string(), + }), + ..self + } + } + + pub fn optional(self, prob: f64) -> Self { + let symbol = self.symbol.expect("symbol must be set"); + let (handle, builder) = self.generator.create_handle(); + builder.use_symbol(symbol).build(); + Self { + symbol: Some(SymbolType::Optional { + value: handle, + prob, + }), + ..self + } + } + + pub fn build(self) -> SymbolHandle { + let symbol = self.symbol.expect("symbol must be set"); + self.generator.register(self.handle, symbol); + self.handle + } +} diff --git a/tests/fuzz/mod.rs b/tests/fuzz/mod.rs index 57c7d1381..4c4944c93 100644 --- a/tests/fuzz/mod.rs +++ b/tests/fuzz/mod.rs @@ -1,10 +1,16 @@ +pub mod grammar_generator; + #[cfg(test)] mod tests { use std::{rc::Rc, sync::Arc}; use limbo_core::Database; + use rand::SeedableRng; + use rand_chacha::ChaCha8Rng; use rusqlite::params; + use crate::grammar_generator::{rand_int, GrammarGenerator}; + fn sqlite_exec_row(conn: &rusqlite::Connection, query: &str) -> Vec { let mut stmt = conn.prepare(&query).unwrap(); let mut rows = stmt.query(params![]).unwrap(); @@ -49,19 +55,63 @@ mod tests { } #[test] - pub fn kek() { + pub fn arithmetic_expression_fuzz() { + let g = GrammarGenerator::new(); + let (expr, expr_builder) = g.create_handle(); + let (bin_op, bin_op_builder) = g.create_handle(); + let (unary_op, unary_op_builder) = g.create_handle(); + let (paren, paren_builder) = g.create_handle(); + + paren_builder + .concat("") + .push_str("(") + .push(expr) + .push_str(")") + .build(); + + unary_op_builder + .concat(" ") + .push(g.create().choice().options_str(["~", "+", "-"]).build()) + .push(expr) + .build(); + + bin_op_builder + .concat(" ") + .push(expr) + .push( + g.create() + .choice() + .options_str(["+", "-", "*", "/", "%", "&", "|", "<<", ">>"]) + .build(), + ) + .push(expr) + .build(); + + expr_builder + .choice() + .option_w(unary_op, 1.0) + .option_w(bin_op, 1.0) + .option_w(paren, 1.0) + .option_symbol_w(rand_int(-10..10), 1.0) + .build(); + + let sql = g.create().concat(" ").push_str("SELECT").push(expr).build(); + let io = Arc::new(limbo_core::PlatformIO::new().unwrap()); let limbo_db = Database::open_file(io, ":memory:").unwrap(); let limbo_conn = limbo_db.connect(); let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); - println!( - "column: {:?}", - sqlite_exec_row(&sqlite_conn, "SELECT 1 = 1.0") - ); - println!( - "column: {:?}", - limbo_exec_row(&limbo_conn, "SELECT 1 = 1.0") - ); + let mut rng = ChaCha8Rng::seed_from_u64(0); + for _ in 0..16 * 1024 { + let query = g.generate(&mut rng, sql, 50); + let limbo = limbo_exec_row(&limbo_conn, &query); + let sqlite = sqlite_exec_row(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } } } From 9cc6cc99d4110805f895291f8363cdd82fc5b527 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 18:42:40 +0400 Subject: [PATCH 4/9] add examples found by fuzzer --- tests/fuzz/mod.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/fuzz/mod.rs b/tests/fuzz/mod.rs index 4c4944c93..3dc40f7b4 100644 --- a/tests/fuzz/mod.rs +++ b/tests/fuzz/mod.rs @@ -54,6 +54,27 @@ mod tests { .collect() } + #[test] + pub fn arithmetic_expression_fuzz_ex1() { + let io = Arc::new(limbo_core::PlatformIO::new().unwrap()); + let limbo_db = Database::open_file(io, ":memory:").unwrap(); + let limbo_conn = limbo_db.connect(); + let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); + + for query in [ + "SELECT ~1 >> 1536", + "SELECT ~ + 3 << - ~ (~ (8)) - + -1 - 3 >> 3 + -6 * (-7 * 9 >> - 2)", + ] { + let limbo = limbo_exec_row(&limbo_conn, query); + let sqlite = sqlite_exec_row(&sqlite_conn, query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + #[test] pub fn arithmetic_expression_fuzz() { let g = GrammarGenerator::new(); From 43c9fc3c5c9a5997ac4302ee7c884414a2c60c47 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 19:24:22 +0400 Subject: [PATCH 5/9] fix binary shift implementation --- core/vdbe/insn.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 0d5c84a7d..fd033b11a 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -880,16 +880,7 @@ pub fn exec_shift_left(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue } fn compute_shl(lhs: i64, rhs: i64) -> i64 { - if rhs == 0 { - lhs - } else if rhs >= 64 || rhs <= -64 { - 0 - } else if rhs < 0 { - // if negative do right shift - lhs >> (-rhs) - } else { - lhs << rhs - } + compute_shr(lhs, -rhs) } pub fn exec_shift_right(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { @@ -927,11 +918,15 @@ pub fn exec_shift_right(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValu } } +// compute binary shift to the right if rhs >= 0 and binary shift to the left - if rhs < 0 +// note, that binary shift to the right is sign-extended fn compute_shr(lhs: i64, rhs: i64) -> i64 { if rhs == 0 { lhs - } else if rhs >= 64 || rhs <= -64 { + } else if rhs >= 64 && lhs >= 0 || rhs <= -64 { 0 + } else if rhs >= 64 && lhs < 0 { + -1 } else if rhs < 0 { // if negative do left shift lhs << (-rhs) From 300f278ff302757f49e09a6c99c1040ad2a6fe2c Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 19:34:15 +0400 Subject: [PATCH 6/9] use TempDatabase from commons in tests/ --- tests/Cargo.toml | 4 --- tests/integration/common.rs | 7 ++++ .../fuzz/grammar_generator.rs | 0 tests/{ => integration}/fuzz/mod.rs | 32 ++++++++++++------- tests/integration/mod.rs | 1 + 5 files changed, 29 insertions(+), 15 deletions(-) rename tests/{ => integration}/fuzz/grammar_generator.rs (100%) rename tests/{ => integration}/fuzz/mod.rs (85%) diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 3ee12139f..f3e67d73a 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -14,10 +14,6 @@ path = "lib.rs" name = "integration_tests" path = "integration/mod.rs" -[[test]] -name = "fuzz_tests" -path = "fuzz/mod.rs" - [dependencies] anyhow = "1.0.75" clap = { version = "4.5", features = ["derive"] } diff --git a/tests/integration/common.rs b/tests/integration/common.rs index 07c840b23..9fbb9eb41 100644 --- a/tests/integration/common.rs +++ b/tests/integration/common.rs @@ -12,6 +12,13 @@ pub struct TempDatabase { #[allow(dead_code, clippy::arc_with_non_send_sync)] impl TempDatabase { + pub fn new_empty() -> Self { + let mut path = TempDir::new().unwrap().into_path(); + path.push("test.db"); + let io: Arc = Arc::new(limbo_core::PlatformIO::new().unwrap()); + + Self { path, io } + } pub fn new(table_sql: &str) -> Self { let mut path = TempDir::new().unwrap().into_path(); path.push("test.db"); diff --git a/tests/fuzz/grammar_generator.rs b/tests/integration/fuzz/grammar_generator.rs similarity index 100% rename from tests/fuzz/grammar_generator.rs rename to tests/integration/fuzz/grammar_generator.rs diff --git a/tests/fuzz/mod.rs b/tests/integration/fuzz/mod.rs similarity index 85% rename from tests/fuzz/mod.rs rename to tests/integration/fuzz/mod.rs index 3dc40f7b4..91e8c5f2f 100644 --- a/tests/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -2,14 +2,25 @@ pub mod grammar_generator; #[cfg(test)] mod tests { - use std::{rc::Rc, sync::Arc}; + use std::rc::Rc; - use limbo_core::Database; use rand::SeedableRng; use rand_chacha::ChaCha8Rng; use rusqlite::params; - use crate::grammar_generator::{rand_int, GrammarGenerator}; + use crate::{ + common::TempDatabase, + fuzz::grammar_generator::{rand_int, GrammarGenerator}, + }; + + fn rng_from_time() -> (ChaCha8Rng, u64) { + let seed = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(); + let rng = ChaCha8Rng::seed_from_u64(seed); + (rng, seed) + } fn sqlite_exec_row(conn: &rusqlite::Connection, query: &str) -> Vec { let mut stmt = conn.prepare(&query).unwrap(); @@ -56,9 +67,8 @@ mod tests { #[test] pub fn arithmetic_expression_fuzz_ex1() { - let io = Arc::new(limbo_core::PlatformIO::new().unwrap()); - let limbo_db = Database::open_file(io, ":memory:").unwrap(); - let limbo_conn = limbo_db.connect(); + let db = TempDatabase::new_empty(); + let limbo_conn = db.connect_limbo(); let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); for query in [ @@ -118,13 +128,13 @@ mod tests { let sql = g.create().concat(" ").push_str("SELECT").push(expr).build(); - let io = Arc::new(limbo_core::PlatformIO::new().unwrap()); - let limbo_db = Database::open_file(io, ":memory:").unwrap(); - let limbo_conn = limbo_db.connect(); + let db = TempDatabase::new_empty(); + let limbo_conn = db.connect_limbo(); let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); - let mut rng = ChaCha8Rng::seed_from_u64(0); - for _ in 0..16 * 1024 { + let (mut rng, seed) = rng_from_time(); + println!("seed: {}", seed); + for _ in 0..1024 { let query = g.generate(&mut rng, sql, 50); let limbo = limbo_exec_row(&limbo_conn, &query); let sqlite = sqlite_exec_row(&sqlite_conn, &query); diff --git a/tests/integration/mod.rs b/tests/integration/mod.rs index 221ca089f..01bb224dd 100644 --- a/tests/integration/mod.rs +++ b/tests/integration/mod.rs @@ -1,4 +1,5 @@ mod common; mod functions; +mod fuzz; mod pragma; mod query_processing; From 8d513b229f7ae05e5eb0495cb0c3c4ab30c8cdfe Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 19:43:13 +0400 Subject: [PATCH 7/9] add simple tcl tests --- testing/select.test | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/testing/select.test b/testing/select.test index 13cff8892..934c4e879 100755 --- a/testing/select.test +++ b/testing/select.test @@ -123,3 +123,23 @@ do_execsql_test select-is-not-null { select 4 is not null, '4' is not null, 0 is not null, (1 / 2) is not null; } {0|0|0|0 1|1|1|1} + +do_execsql_test select_bin_shr { + select 997623670 >> 0, 997623670 >> 1, 997623670 >> 10, 997623670 >> 30; + select -997623670 >> 0, -997623670 >> 1, -997623670 >> 10, -997623670 >> 30; + select 997623670 << 0, 997623670 << -1, 997623670 << -10, 997623670 << -30; + select -997623670 << 0, -997623670 << -1, -997623670 << -10, -997623670 << -30; +} {997623670|498811835|974241|0 +-997623670|-498811835|-974242|-1 +997623670|498811835|974241|0 +-997623670|-498811835|-974242|-1} + +do_execsql_test select_bin_shl { + select 997623670 << 0, 997623670 << 1, 997623670 << 10, 997623670 << 30; + select -997623670 << 0, -997623670 << 1, -997623670 << 10, -997623670 << 30; + select 997623670 >> 0, 997623670 >> -1, 997623670 >> -10, 997623670 >> -30; + select -997623670 >> 0, -997623670 >> -1, -997623670 >> -10, -997623670 >> -30; +} {997623670|1995247340|1021566638080|1071190259091374080 +-997623670|-1995247340|-1021566638080|-1071190259091374080 +997623670|1995247340|1021566638080|1071190259091374080 +-997623670|-1995247340|-1021566638080|-1071190259091374080} From 3ff76e657e1a215b44b06cebc7af303695e16630 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 19:55:04 +0400 Subject: [PATCH 8/9] allow a bit of dead code for now --- tests/integration/fuzz/grammar_generator.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/integration/fuzz/grammar_generator.rs b/tests/integration/fuzz/grammar_generator.rs index 6b6a692d6..013d1ee5b 100644 --- a/tests/integration/fuzz/grammar_generator.rs +++ b/tests/integration/fuzz/grammar_generator.rs @@ -28,10 +28,12 @@ pub enum SymbolType { Int { range: Range, }, + #[allow(dead_code)] Optional { value: SymbolHandle, prob: f64, }, + #[allow(dead_code)] Repeat { value: SymbolHandle, range: Range, @@ -53,6 +55,7 @@ pub fn const_str(s: &str) -> SymbolType { } } +#[allow(dead_code)] pub fn rand_str(fixed_prefix: &str, random_length: usize) -> SymbolType { SymbolType::Str { fixed_prefix: fixed_prefix.to_string(), @@ -323,6 +326,7 @@ impl SymbolDefinitionBuilder { ..self } } + #[allow(dead_code)] pub fn option(self, handle: SymbolHandle) -> Self { self.option_w(handle, 1.0) } @@ -337,6 +341,7 @@ impl SymbolDefinitionBuilder { pub fn option_str(self, s: &str) -> Self { self.option_symbol(const_str(s)) } + #[allow(dead_code)] pub fn options_symbol(mut self, symbols: [SymbolType; N]) -> Self { for symbol in symbols { self = self.option_symbol(symbol) @@ -349,7 +354,7 @@ impl SymbolDefinitionBuilder { } self } - + #[allow(dead_code)] pub fn repeat(self, range: Range, separator: &str) -> Self { let symbol = self.symbol.expect("symbol must be set"); let (handle, builder) = self.generator.create_handle(); @@ -363,7 +368,7 @@ impl SymbolDefinitionBuilder { ..self } } - + #[allow(dead_code)] pub fn optional(self, prob: f64) -> Self { let symbol = self.symbol.expect("symbol must be set"); let (handle, builder) = self.generator.create_handle(); From 41419ab11a8e9394baa9e51872b71d1c2c57521c Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Sun, 2 Feb 2025 20:12:56 +0400 Subject: [PATCH 9/9] add env logger and fix range --- tests/integration/fuzz/grammar_generator.rs | 2 +- tests/integration/fuzz/mod.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/fuzz/grammar_generator.rs b/tests/integration/fuzz/grammar_generator.rs index 013d1ee5b..e6f4b60bd 100644 --- a/tests/integration/fuzz/grammar_generator.rs +++ b/tests/integration/fuzz/grammar_generator.rs @@ -187,7 +187,7 @@ impl GrammarGenerator { } => { let mut s = fixed_prefix.clone(); for _ in 0..*random_length { - s.push(rng.random_range('A'..'Z')); + s.push(rng.random_range('A'..='Z')); } next.push(GrammarFrontierNode::String(s)); } diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 91e8c5f2f..e7baf481b 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -87,6 +87,7 @@ mod tests { #[test] pub fn arithmetic_expression_fuzz() { + let _ = env_logger::try_init(); let g = GrammarGenerator::new(); let (expr, expr_builder) = g.create_handle(); let (bin_op, bin_op_builder) = g.create_handle(); @@ -133,7 +134,7 @@ mod tests { let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); let (mut rng, seed) = rng_from_time(); - println!("seed: {}", seed); + log::info!("seed: {}", seed); for _ in 0..1024 { let query = g.generate(&mut rng, sql, 50); let limbo = limbo_exec_row(&limbo_conn, &query);