mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-17 15:14:20 +01:00
Merge 'Fix shr instruction' from Nikita Sivukhin
This PR fixes implementation of binary shift right/left instructions. Before there were a minor incompatibility between limbo and sqlite implementation in case when right shift second argument were more than 64 and first argument were negative. As sqlite implementation of right binary shift is sign-extended - so `-1` should be returned in such case when limbo returned zero. This PR fixes this bug and also introduce a fuzz tests for arithemtic expressions. This fuzz test were written with a help of `GrammarGenerator` which allows to easily define probabilistic context- free grammar and then later sample random strings from it. Closes #867
This commit is contained in:
127
Cargo.lock
generated
127
Cargo.lock
generated
@@ -24,10 +24,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
"zerocopy 0.7.35",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -60,7 +60,7 @@ version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18a1e15a87b13ae79e04e07b3714fc41d5f6993dff11662fdbe0b207c6ad0fe0"
|
||||
dependencies = [
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -472,6 +472,8 @@ dependencies = [
|
||||
"env_logger 0.10.2",
|
||||
"limbo_core",
|
||||
"log",
|
||||
"rand 0.9.0",
|
||||
"rand_chacha 0.9.0",
|
||||
"rexpect",
|
||||
"rusqlite",
|
||||
"rustyline",
|
||||
@@ -977,10 +979,22 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi 0.13.3+wasi-0.2.2",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.31.1"
|
||||
@@ -1400,7 +1414,7 @@ dependencies = [
|
||||
"itoa",
|
||||
"nom",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
"ryu",
|
||||
"serde_json",
|
||||
]
|
||||
@@ -1557,7 +1571,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"criterion",
|
||||
"fallible-iterator 0.3.0",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"hex",
|
||||
"indexmap",
|
||||
"io-uring",
|
||||
@@ -1579,7 +1593,7 @@ dependencies = [
|
||||
"pest_derive",
|
||||
"polling",
|
||||
"pprof",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"rstest",
|
||||
@@ -1647,8 +1661,8 @@ dependencies = [
|
||||
"limbo_core",
|
||||
"log",
|
||||
"notify",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"rand 0.8.5",
|
||||
"rand_chacha 0.3.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
@@ -1680,7 +1694,7 @@ dependencies = [
|
||||
"limbo_ext",
|
||||
"quickcheck",
|
||||
"quickcheck_macros",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1798,7 +1812,7 @@ checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
@@ -2067,7 +2081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2176,7 +2190,7 @@ version = "0.2.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
|
||||
dependencies = [
|
||||
"zerocopy",
|
||||
"zerocopy 0.7.35",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2307,7 +2321,7 @@ checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
|
||||
dependencies = [
|
||||
"env_logger 0.8.4",
|
||||
"log",
|
||||
"rand",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2347,8 +2361,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
"rand_chacha 0.3.1",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
|
||||
dependencies = [
|
||||
"rand_chacha 0.9.0",
|
||||
"rand_core 0.9.0",
|
||||
"zerocopy 0.8.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2358,7 +2383,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2367,7 +2402,17 @@ version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff"
|
||||
dependencies = [
|
||||
"getrandom 0.3.1",
|
||||
"zerocopy 0.8.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2405,7 +2450,7 @@ version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"libredox",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
@@ -2847,7 +2892,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.59.0",
|
||||
@@ -3079,7 +3124,7 @@ version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3119,6 +3164,15 @@ version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.13.3+wasi-0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
|
||||
dependencies = [
|
||||
"wit-bindgen-rt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.100"
|
||||
@@ -3454,6 +3508,15 @@ version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rt"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "write16"
|
||||
version = "1.0.0"
|
||||
@@ -3497,7 +3560,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"zerocopy-derive",
|
||||
"zerocopy-derive 0.7.35",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468"
|
||||
dependencies = [
|
||||
"zerocopy-derive 0.8.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3511,6 +3583,17 @@ dependencies = [
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerofrom"
|
||||
version = "0.1.5"
|
||||
|
||||
@@ -494,7 +494,7 @@ impl Statement {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum StepResult<'a> {
|
||||
Row(Row<'a>),
|
||||
IO,
|
||||
@@ -503,7 +503,7 @@ pub enum StepResult<'a> {
|
||||
Busy,
|
||||
}
|
||||
|
||||
#[derive(PartialEq)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Row<'a> {
|
||||
pub values: Vec<Value<'a>>,
|
||||
}
|
||||
|
||||
@@ -880,16 +880,7 @@ pub fn exec_shift_left(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue
|
||||
}
|
||||
|
||||
fn compute_shl(lhs: i64, rhs: i64) -> i64 {
|
||||
if rhs == 0 {
|
||||
lhs
|
||||
} else if rhs >= 64 || rhs <= -64 {
|
||||
0
|
||||
} else if rhs < 0 {
|
||||
// if negative do right shift
|
||||
lhs >> (-rhs)
|
||||
} else {
|
||||
lhs << rhs
|
||||
}
|
||||
compute_shr(lhs, -rhs)
|
||||
}
|
||||
|
||||
pub fn exec_shift_right(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue {
|
||||
@@ -927,11 +918,15 @@ pub fn exec_shift_right(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValu
|
||||
}
|
||||
}
|
||||
|
||||
// compute binary shift to the right if rhs >= 0 and binary shift to the left - if rhs < 0
|
||||
// note, that binary shift to the right is sign-extended
|
||||
fn compute_shr(lhs: i64, rhs: i64) -> i64 {
|
||||
if rhs == 0 {
|
||||
lhs
|
||||
} else if rhs >= 64 || rhs <= -64 {
|
||||
} else if rhs >= 64 && lhs >= 0 || rhs <= -64 {
|
||||
0
|
||||
} else if rhs >= 64 && lhs < 0 {
|
||||
-1
|
||||
} else if rhs < 0 {
|
||||
// if negative do left shift
|
||||
lhs << (-rhs)
|
||||
|
||||
@@ -123,3 +123,23 @@ do_execsql_test select-is-not-null {
|
||||
select 4 is not null, '4' is not null, 0 is not null, (1 / 2) is not null;
|
||||
} {0|0|0|0
|
||||
1|1|1|1}
|
||||
|
||||
do_execsql_test select_bin_shr {
|
||||
select 997623670 >> 0, 997623670 >> 1, 997623670 >> 10, 997623670 >> 30;
|
||||
select -997623670 >> 0, -997623670 >> 1, -997623670 >> 10, -997623670 >> 30;
|
||||
select 997623670 << 0, 997623670 << -1, 997623670 << -10, 997623670 << -30;
|
||||
select -997623670 << 0, -997623670 << -1, -997623670 << -10, -997623670 << -30;
|
||||
} {997623670|498811835|974241|0
|
||||
-997623670|-498811835|-974242|-1
|
||||
997623670|498811835|974241|0
|
||||
-997623670|-498811835|-974242|-1}
|
||||
|
||||
do_execsql_test select_bin_shl {
|
||||
select 997623670 << 0, 997623670 << 1, 997623670 << 10, 997623670 << 30;
|
||||
select -997623670 << 0, -997623670 << 1, -997623670 << 10, -997623670 << 30;
|
||||
select 997623670 >> 0, 997623670 >> -1, 997623670 >> -10, 997623670 >> -30;
|
||||
select -997623670 >> 0, -997623670 >> -1, -997623670 >> -10, -997623670 >> -30;
|
||||
} {997623670|1995247340|1021566638080|1071190259091374080
|
||||
-997623670|-1995247340|-1021566638080|-1071190259091374080
|
||||
997623670|1995247340|1021566638080|1071190259091374080
|
||||
-997623670|-1995247340|-1021566638080|-1071190259091374080}
|
||||
|
||||
@@ -5,7 +5,7 @@ authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
description = "Integration tests"
|
||||
description = "Integration & fuzz tests"
|
||||
|
||||
[lib]
|
||||
path = "lib.rs"
|
||||
@@ -25,7 +25,9 @@ rusqlite = { version = "0.29", features = ["bundled"] }
|
||||
tempfile = "3.0.7"
|
||||
log = "0.4.22"
|
||||
assert_cmd = "^2"
|
||||
rand_chacha = "0.9.0"
|
||||
rand = "0.9.0"
|
||||
|
||||
# rexpect does not support windows.
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
rexpect = "0.6.0"
|
||||
rexpect = "0.6.0"
|
||||
|
||||
@@ -12,6 +12,13 @@ pub struct TempDatabase {
|
||||
|
||||
#[allow(dead_code, clippy::arc_with_non_send_sync)]
|
||||
impl TempDatabase {
|
||||
pub fn new_empty() -> Self {
|
||||
let mut path = TempDir::new().unwrap().into_path();
|
||||
path.push("test.db");
|
||||
let io: Arc<dyn limbo_core::IO> = Arc::new(limbo_core::PlatformIO::new().unwrap());
|
||||
|
||||
Self { path, io }
|
||||
}
|
||||
pub fn new(table_sql: &str) -> Self {
|
||||
let mut path = TempDir::new().unwrap().into_path();
|
||||
path.push("test.db");
|
||||
|
||||
390
tests/integration/fuzz/grammar_generator.rs
Normal file
390
tests/integration/fuzz/grammar_generator.rs
Normal file
@@ -0,0 +1,390 @@
|
||||
/// Grammar generator is a helper to build a probabilistic grammar and generate random string from it
|
||||
/// Grammar consists of terminal (characters) and symbols (non-terminal with some expansion rule)
|
||||
///
|
||||
/// Current, supported expansion rules are:
|
||||
/// 1. Symbol -> [Str]: generate terminals which form fixed length string with constant prefix and random suffix
|
||||
/// 2. Symbol -> [Int]: generate terminals which form integer from specified range
|
||||
/// 3. Symbol -> (Inner)?: generate expansion for Inner symbol with some probability
|
||||
/// 4. Symbol -> (Inner){n..m}: generate k expansions for Inner symbol where k \in [n..m) with uniform distribution
|
||||
/// (note, that every repetition will be expanded independently)
|
||||
/// 5. Symbol -> Inner1 Inner2 .. Inner[n]: concatenate expansions from inner symbols and insert separator string between them
|
||||
/// 6. Symbol -> Choice1 | Choice2 | .. | Choice[n]: pick random choice according to their weights randomly and generate expansion for it
|
||||
///
|
||||
/// (this is more or less [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) with very minor differences)
|
||||
///
|
||||
/// The idea behind this code is to provide a way to "build" grammar generator with all these rules and their dependencies and after that
|
||||
/// we can randomly sample strings from this generator easily.
|
||||
use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
|
||||
|
||||
use rand::Rng;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum SymbolType {
|
||||
Str {
|
||||
fixed_prefix: String,
|
||||
random_length: usize,
|
||||
},
|
||||
Int {
|
||||
range: Range<i32>,
|
||||
},
|
||||
#[allow(dead_code)]
|
||||
Optional {
|
||||
value: SymbolHandle,
|
||||
prob: f64,
|
||||
},
|
||||
#[allow(dead_code)]
|
||||
Repeat {
|
||||
value: SymbolHandle,
|
||||
range: Range<usize>,
|
||||
separator: String,
|
||||
},
|
||||
Concat {
|
||||
values: Vec<SymbolHandle>,
|
||||
separator: String,
|
||||
},
|
||||
Choice {
|
||||
values: Vec<(SymbolHandle, f64)>,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn const_str(s: &str) -> SymbolType {
|
||||
SymbolType::Str {
|
||||
fixed_prefix: s.to_string(),
|
||||
random_length: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn rand_str(fixed_prefix: &str, random_length: usize) -> SymbolType {
|
||||
SymbolType::Str {
|
||||
fixed_prefix: fixed_prefix.to_string(),
|
||||
random_length,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rand_int(range: Range<i32>) -> SymbolType {
|
||||
SymbolType::Int { range }
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
|
||||
pub struct SymbolHandle(i32);
|
||||
pub struct SymbolDefinitionBuilder {
|
||||
generator: GrammarGenerator,
|
||||
handle: SymbolHandle,
|
||||
symbol: Option<SymbolType>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum GrammarFrontierNode {
|
||||
Handle(SymbolHandle),
|
||||
String(String),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GrammarGenerator(Rc<RefCell<GrammarGeneratorInner>>);
|
||||
|
||||
struct GrammarGeneratorInner {
|
||||
last_symbol_id: i32,
|
||||
symbols: HashMap<SymbolHandle, SymbolType>,
|
||||
}
|
||||
|
||||
impl GrammarGenerator {
|
||||
pub fn new() -> Self {
|
||||
GrammarGenerator(Rc::new(RefCell::new(GrammarGeneratorInner {
|
||||
last_symbol_id: 0,
|
||||
symbols: HashMap::new(),
|
||||
})))
|
||||
}
|
||||
pub fn create_handle(&self) -> (SymbolHandle, SymbolDefinitionBuilder) {
|
||||
let handle = SymbolHandle(self.0.borrow().last_symbol_id);
|
||||
self.0.borrow_mut().last_symbol_id += 1;
|
||||
|
||||
let builder = SymbolDefinitionBuilder {
|
||||
generator: self.clone(),
|
||||
handle,
|
||||
symbol: None,
|
||||
};
|
||||
(handle, builder)
|
||||
}
|
||||
pub fn create(&self) -> SymbolDefinitionBuilder {
|
||||
let (_, builder) = self.create_handle();
|
||||
builder
|
||||
}
|
||||
pub fn register(&self, handle: SymbolHandle, value: SymbolType) {
|
||||
let result = self.0.borrow_mut().symbols.insert(handle, value);
|
||||
assert!(result.is_none(), "handle can be registered only once");
|
||||
}
|
||||
|
||||
// this helper runs DFS for directed graph and set is_recursive[v] = true for all reachable from root vertices
|
||||
// if path of infinite lengths exists for v
|
||||
fn is_recursive_from_root(
|
||||
&self,
|
||||
root: SymbolHandle,
|
||||
is_recursive: &mut HashMap<SymbolHandle, bool>,
|
||||
) -> bool {
|
||||
if let Some(_) = is_recursive.get(&root) {
|
||||
is_recursive.insert(root, true);
|
||||
return true;
|
||||
}
|
||||
is_recursive.insert(root, false);
|
||||
let symbols = &self.0.borrow().symbols;
|
||||
let recursive = match symbols.get(&root).expect("symbol must be registered") {
|
||||
SymbolType::Str { .. } | SymbolType::Int { .. } => false,
|
||||
SymbolType::Optional { value, .. } | SymbolType::Repeat { value, .. } => {
|
||||
self.is_recursive_from_root(*value, is_recursive)
|
||||
}
|
||||
SymbolType::Concat { values, .. } => {
|
||||
let mut recursive = false;
|
||||
for value in values.iter() {
|
||||
recursive |= self.is_recursive_from_root(*value, is_recursive);
|
||||
}
|
||||
recursive
|
||||
}
|
||||
SymbolType::Choice { values, .. } => {
|
||||
let mut recursive = false;
|
||||
for (value, _) in values.iter() {
|
||||
recursive |= self.is_recursive_from_root(*value, is_recursive);
|
||||
}
|
||||
recursive
|
||||
}
|
||||
};
|
||||
is_recursive.insert(root, recursive);
|
||||
recursive
|
||||
}
|
||||
|
||||
// we generate random sample from grammar in BFS fashion instead of DFS because in such a way we can force abort generation of string in more fair fashion
|
||||
// the problem with probabilistic grammar, is that it's recursive rules can have infinite (or very large) average length of expanded terminals
|
||||
// in order to fight with this problem, we provide length_limit_hint which will change logic of generation and start using only non-recursive rules (if this is possible) in case
|
||||
// when "frontier" of the generation already have >= length_limit_hint nodes
|
||||
pub fn generate(
|
||||
&self,
|
||||
rng: &mut ChaCha8Rng,
|
||||
root: SymbolHandle,
|
||||
length_limit_hint: usize,
|
||||
) -> String {
|
||||
let mut frontier = vec![GrammarFrontierNode::Handle(root)];
|
||||
|
||||
let mut is_recursive = HashMap::new();
|
||||
self.is_recursive_from_root(root, &mut is_recursive);
|
||||
|
||||
let symbols = &self.0.borrow().symbols;
|
||||
let terminals = loop {
|
||||
let mut next = Vec::new();
|
||||
let mut expanded = false;
|
||||
let limit_exceeded = frontier.len() >= length_limit_hint;
|
||||
for node in frontier.into_iter() {
|
||||
let GrammarFrontierNode::Handle(handle) = node else {
|
||||
next.push(node);
|
||||
continue;
|
||||
};
|
||||
|
||||
expanded = true;
|
||||
match symbols.get(&handle).expect("symbol must be registered") {
|
||||
SymbolType::Str {
|
||||
fixed_prefix,
|
||||
random_length,
|
||||
} => {
|
||||
let mut s = fixed_prefix.clone();
|
||||
for _ in 0..*random_length {
|
||||
s.push(rng.random_range('A'..='Z'));
|
||||
}
|
||||
next.push(GrammarFrontierNode::String(s));
|
||||
}
|
||||
SymbolType::Int { range } => {
|
||||
next.push(GrammarFrontierNode::String(
|
||||
rng.random_range(range.clone()).to_string(),
|
||||
));
|
||||
}
|
||||
SymbolType::Optional { value, prob } => {
|
||||
if !limit_exceeded && rng.random_bool(*prob) {
|
||||
next.push(GrammarFrontierNode::Handle(*value));
|
||||
}
|
||||
}
|
||||
SymbolType::Repeat {
|
||||
value,
|
||||
range,
|
||||
separator,
|
||||
} => {
|
||||
let repetitions = if !limit_exceeded {
|
||||
rng.random_range(range.clone())
|
||||
} else {
|
||||
range.start
|
||||
};
|
||||
for i in 0..repetitions {
|
||||
if i > 0 {
|
||||
next.push(GrammarFrontierNode::String(separator.to_string()));
|
||||
}
|
||||
next.push(GrammarFrontierNode::Handle(*value));
|
||||
}
|
||||
}
|
||||
SymbolType::Concat { values, separator } => {
|
||||
for (i, value) in values.iter().enumerate() {
|
||||
if i > 0 {
|
||||
next.push(GrammarFrontierNode::String(separator.to_string()));
|
||||
}
|
||||
next.push(GrammarFrontierNode::Handle(*value));
|
||||
}
|
||||
}
|
||||
SymbolType::Choice { values } => {
|
||||
let mut handles = if !limit_exceeded {
|
||||
values.clone()
|
||||
} else {
|
||||
values
|
||||
.iter()
|
||||
.filter(|x| is_recursive.get(&x.0) != Some(&true))
|
||||
.map(|x| *x)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
if handles.len() == 0 {
|
||||
handles = values.clone();
|
||||
}
|
||||
|
||||
let sum: f64 = handles.iter().map(|x| x.1).sum();
|
||||
let mut sample = rng.random_range(0.0..sum);
|
||||
for (i, (handle, weight)) in handles.iter().enumerate() {
|
||||
sample -= weight;
|
||||
if sample > 0.0 && i < handles.len() - 1 {
|
||||
continue;
|
||||
}
|
||||
next.push(GrammarFrontierNode::Handle(*handle));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !expanded {
|
||||
break next;
|
||||
}
|
||||
frontier = next;
|
||||
};
|
||||
let mut result = String::new();
|
||||
for node in terminals {
|
||||
let GrammarFrontierNode::String(string) = node else {
|
||||
panic!("frontier in the end must contain only string nodes");
|
||||
};
|
||||
result.push_str(&string);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolDefinitionBuilder {
|
||||
pub fn use_symbol(self, symbol: SymbolType) -> Self {
|
||||
assert!(self.symbol.is_none(), "symbol must be unset");
|
||||
Self {
|
||||
symbol: Some(symbol),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn concat(self, separator: &str) -> Self {
|
||||
assert!(self.symbol.is_none(), "symbol must be unset");
|
||||
Self {
|
||||
symbol: Some(SymbolType::Concat {
|
||||
values: vec![],
|
||||
separator: separator.to_string(),
|
||||
}),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn push(mut self, handle: SymbolHandle) -> Self {
|
||||
let Some(SymbolType::Concat {
|
||||
mut values,
|
||||
separator,
|
||||
}) = self.symbol.take()
|
||||
else {
|
||||
panic!("symbol must be set to Concat type");
|
||||
};
|
||||
values.push(handle);
|
||||
Self {
|
||||
symbol: Some(SymbolType::Concat { values, separator }),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn push_symbol(self, symbol: SymbolType) -> Self {
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
self.push(handle)
|
||||
}
|
||||
pub fn push_str(self, s: &str) -> Self {
|
||||
self.push_symbol(const_str(s))
|
||||
}
|
||||
pub fn choice(self) -> Self {
|
||||
assert!(self.symbol.is_none(), "symbol must be unset");
|
||||
Self {
|
||||
symbol: Some(SymbolType::Choice { values: vec![] }),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn option_w(mut self, handle: SymbolHandle, weight: f64) -> Self {
|
||||
let Some(SymbolType::Choice { mut values }) = self.symbol.take() else {
|
||||
panic!("symbol must be set to Choice type");
|
||||
};
|
||||
values.push((handle, weight));
|
||||
Self {
|
||||
symbol: Some(SymbolType::Choice { values }),
|
||||
..self
|
||||
}
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub fn option(self, handle: SymbolHandle) -> Self {
|
||||
self.option_w(handle, 1.0)
|
||||
}
|
||||
pub fn option_symbol_w(self, symbol: SymbolType, weight: f64) -> Self {
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
self.option_w(handle, weight)
|
||||
}
|
||||
pub fn option_symbol(self, symbol: SymbolType) -> Self {
|
||||
self.option_symbol_w(symbol, 1.0)
|
||||
}
|
||||
pub fn option_str(self, s: &str) -> Self {
|
||||
self.option_symbol(const_str(s))
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub fn options_symbol<const N: usize>(mut self, symbols: [SymbolType; N]) -> Self {
|
||||
for symbol in symbols {
|
||||
self = self.option_symbol(symbol)
|
||||
}
|
||||
self
|
||||
}
|
||||
pub fn options_str<const N: usize>(mut self, strs: [&str; N]) -> Self {
|
||||
for s in strs {
|
||||
self = self.option_str(s)
|
||||
}
|
||||
self
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub fn repeat(self, range: Range<usize>, separator: &str) -> Self {
|
||||
let symbol = self.symbol.expect("symbol must be set");
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
Self {
|
||||
symbol: Some(SymbolType::Repeat {
|
||||
value: handle,
|
||||
range,
|
||||
separator: separator.to_string(),
|
||||
}),
|
||||
..self
|
||||
}
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub fn optional(self, prob: f64) -> Self {
|
||||
let symbol = self.symbol.expect("symbol must be set");
|
||||
let (handle, builder) = self.generator.create_handle();
|
||||
builder.use_symbol(symbol).build();
|
||||
Self {
|
||||
symbol: Some(SymbolType::Optional {
|
||||
value: handle,
|
||||
prob,
|
||||
}),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build(self) -> SymbolHandle {
|
||||
let symbol = self.symbol.expect("symbol must be set");
|
||||
self.generator.register(self.handle, symbol);
|
||||
self.handle
|
||||
}
|
||||
}
|
||||
149
tests/integration/fuzz/mod.rs
Normal file
149
tests/integration/fuzz/mod.rs
Normal file
@@ -0,0 +1,149 @@
|
||||
pub mod grammar_generator;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::rc::Rc;
|
||||
|
||||
use rand::SeedableRng;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
use rusqlite::params;
|
||||
|
||||
use crate::{
|
||||
common::TempDatabase,
|
||||
fuzz::grammar_generator::{rand_int, GrammarGenerator},
|
||||
};
|
||||
|
||||
fn rng_from_time() -> (ChaCha8Rng, u64) {
|
||||
let seed = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs();
|
||||
let rng = ChaCha8Rng::seed_from_u64(seed);
|
||||
(rng, seed)
|
||||
}
|
||||
|
||||
fn sqlite_exec_row(conn: &rusqlite::Connection, query: &str) -> Vec<rusqlite::types::Value> {
|
||||
let mut stmt = conn.prepare(&query).unwrap();
|
||||
let mut rows = stmt.query(params![]).unwrap();
|
||||
let mut columns = Vec::new();
|
||||
let row = rows.next().unwrap().unwrap();
|
||||
for i in 0.. {
|
||||
let column: rusqlite::types::Value = match row.get(i) {
|
||||
Ok(column) => column,
|
||||
Err(rusqlite::Error::InvalidColumnIndex(_)) => break,
|
||||
Err(err) => panic!("unexpected rusqlite error: {}", err),
|
||||
};
|
||||
columns.push(column);
|
||||
}
|
||||
assert!(rows.next().unwrap().is_none());
|
||||
|
||||
columns
|
||||
}
|
||||
|
||||
fn limbo_exec_row(
|
||||
conn: &Rc<limbo_core::Connection>,
|
||||
query: &str,
|
||||
) -> Vec<rusqlite::types::Value> {
|
||||
let mut stmt = conn.prepare(query).unwrap();
|
||||
let result = stmt.step().unwrap();
|
||||
let row = loop {
|
||||
match result {
|
||||
limbo_core::StepResult::Row(row) => break row,
|
||||
limbo_core::StepResult::IO => continue,
|
||||
r => panic!("unexpected result {:?}: expecting single row", r),
|
||||
}
|
||||
};
|
||||
row.values
|
||||
.iter()
|
||||
.map(|x| match x {
|
||||
limbo_core::Value::Null => rusqlite::types::Value::Null,
|
||||
limbo_core::Value::Integer(x) => rusqlite::types::Value::Integer(*x),
|
||||
limbo_core::Value::Float(x) => rusqlite::types::Value::Real(*x),
|
||||
limbo_core::Value::Text(x) => rusqlite::types::Value::Text((*x).clone()),
|
||||
limbo_core::Value::Blob(x) => rusqlite::types::Value::Blob((*x).clone()),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn arithmetic_expression_fuzz_ex1() {
|
||||
let db = TempDatabase::new_empty();
|
||||
let limbo_conn = db.connect_limbo();
|
||||
let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap();
|
||||
|
||||
for query in [
|
||||
"SELECT ~1 >> 1536",
|
||||
"SELECT ~ + 3 << - ~ (~ (8)) - + -1 - 3 >> 3 + -6 * (-7 * 9 >> - 2)",
|
||||
] {
|
||||
let limbo = limbo_exec_row(&limbo_conn, query);
|
||||
let sqlite = sqlite_exec_row(&sqlite_conn, query);
|
||||
assert_eq!(
|
||||
limbo, sqlite,
|
||||
"query: {}, limbo: {:?}, sqlite: {:?}",
|
||||
query, limbo, sqlite
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn arithmetic_expression_fuzz() {
|
||||
let _ = env_logger::try_init();
|
||||
let g = GrammarGenerator::new();
|
||||
let (expr, expr_builder) = g.create_handle();
|
||||
let (bin_op, bin_op_builder) = g.create_handle();
|
||||
let (unary_op, unary_op_builder) = g.create_handle();
|
||||
let (paren, paren_builder) = g.create_handle();
|
||||
|
||||
paren_builder
|
||||
.concat("")
|
||||
.push_str("(")
|
||||
.push(expr)
|
||||
.push_str(")")
|
||||
.build();
|
||||
|
||||
unary_op_builder
|
||||
.concat(" ")
|
||||
.push(g.create().choice().options_str(["~", "+", "-"]).build())
|
||||
.push(expr)
|
||||
.build();
|
||||
|
||||
bin_op_builder
|
||||
.concat(" ")
|
||||
.push(expr)
|
||||
.push(
|
||||
g.create()
|
||||
.choice()
|
||||
.options_str(["+", "-", "*", "/", "%", "&", "|", "<<", ">>"])
|
||||
.build(),
|
||||
)
|
||||
.push(expr)
|
||||
.build();
|
||||
|
||||
expr_builder
|
||||
.choice()
|
||||
.option_w(unary_op, 1.0)
|
||||
.option_w(bin_op, 1.0)
|
||||
.option_w(paren, 1.0)
|
||||
.option_symbol_w(rand_int(-10..10), 1.0)
|
||||
.build();
|
||||
|
||||
let sql = g.create().concat(" ").push_str("SELECT").push(expr).build();
|
||||
|
||||
let db = TempDatabase::new_empty();
|
||||
let limbo_conn = db.connect_limbo();
|
||||
let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap();
|
||||
|
||||
let (mut rng, seed) = rng_from_time();
|
||||
log::info!("seed: {}", seed);
|
||||
for _ in 0..1024 {
|
||||
let query = g.generate(&mut rng, sql, 50);
|
||||
let limbo = limbo_exec_row(&limbo_conn, &query);
|
||||
let sqlite = sqlite_exec_row(&sqlite_conn, &query);
|
||||
assert_eq!(
|
||||
limbo, sqlite,
|
||||
"query: {}, limbo: {:?}, sqlite: {:?}",
|
||||
query, limbo, sqlite
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
mod common;
|
||||
mod functions;
|
||||
mod fuzz;
|
||||
mod pragma;
|
||||
mod query_processing;
|
||||
|
||||
Reference in New Issue
Block a user