Merge 'Introduce libFuzzer' from Levy A.

This PR introduces structured fuzzing with
[libFuzzer](https://llvm.org/docs/LibFuzzer.html). The expression target
implementation is not complete, but already found a compatibility issue.
More fuzzing targets should be moved from `tests/fuzz` to `fuzz` and
benefit from more advanced fuzzing techniques.
- [x] Add fuzzing guide to `README.md`
   - Install `cargo-fuzz`.
   - Use the nightly version of cargo with the `fuzz` dev shell or use
rustup to switch versions.
   - Run `cargo fuzz run ...`
- [x] Add all binary operations.
# 🐞 Bugs
Compatibility issue found when trying to `select ?` with a `NaN` value.
Sqlite returns `NULL`, while Limbo returns `NaN` (reasonable, but
incompatible).
```
thread '<unnamed>' panicked at fuzz_targets/expression.rs:130:5:
assertion `left == right` failed
  left: Null
 right: Float(NaN)
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
==59288== ERROR: libFuzzer: deadly signal
    #0 0x00010564c0f0 in __sanitizer_print_stack_trace+0x28 (librustc-nightly_rt.asan.dylib:arm64+0x5c0f0)
    #1 0x0001024e7b64 in fuzzer::PrintStackTrace()+0x30 (expression:arm64+0x101c53b64)
    #2 0x0001024da650 in fuzzer::Fuzzer::CrashCallback()+0x60 (expression:arm64+0x101c46650)
    #3 0x000195fa6de0 in _sigtramp+0x34 (libsystem_platform.dylib:arm64+0x3de0)
    #4 0x000195f6ff6c in pthread_kill+0x11c (libsystem_pthread.dylib:arm64+0x6f6c)
    #5 0x000195e7c904 in abort+0x7c (libsystem_c.dylib:arm64+0x79904)
    #6 0x000102580990 in std::sys::pal::unix::abort_internal::hd275d720c474f43c+0x8 (expression:arm64+0x101cec990)
    #7 0x000102621604 in std::process::abort::h62d9ecef2f17e944+0x8 (expression:arm64+0x101d8d604)
    #8 0x0001024d93bc in libfuzzer_sys::initialize::_$u7b$$u7b$closure$u7d$$u7d$::h3b4b43a8f9432830+0xb8 (expression:arm64+0x101c453bc)
    #9 0x000102577de0 in std::panicking::rust_panic_with_hook::h19683f6fd94fb24c+0x2b8 (expression:arm64+0x101ce3de0)
    #10 0x000102577970 in std::panicking::begin_panic_handler::_$u7b$$u7b$closure$u7d$$u7d$::h4e98e5e8777eac5e+0x8c (expression:arm64+0x101ce3970)
    #11 0x0001025754e0 in std::sys::backtrace::__rust_end_short_backtrace::h12a2d70ebc9128b2+0x8 (expression:arm64+0x101ce14e0)
    #12 0x000102577628 in rust_begin_unwind+0x1c (expression:arm64+0x101ce3628)
    #13 0x000102623340 in core::panicking::panic_fmt::h8c4d74b8e5179d60+0x1c (expression:arm64+0x101d8f340)
    #14 0x0001026236cc in core::panicking::assert_failed_inner::he8fd1f85d57f866a+0x104 (expression:arm64+0x101d8f6cc)
    #15 0x0001025c73dc in core::panicking::assert_failed::h3e7590b91d46bff9 panicking.rs:364
    #16 0x000100930910 in expression::do_fuzz::hfcf5c5e5fde1a31c expression.rs:130
    #17 0x0001009373fc in rust_fuzzer_test_input lib.rs:359
    #18 0x0001024d2f34 in std::panicking::try::do_call::hce6ebc856827ae8b+0xc4 (expression:arm64+0x101c3ef34)
    #19 0x0001024d9624 in __rust_try+0x18 (expression:arm64+0x101c45624)
    #20 0x0001024d896c in LLVMFuzzerTestOneInput+0x16c (expression:arm64+0x101c4496c)
    #21 0x0001024dc3cc in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long)+0x148 (expression:arm64+0x101c483cc)
    #22 0x0001024db8dc in fuzzer::Fuzzer::RunOne(unsigned char const*, unsigned long, bool, fuzzer::InputInfo*, bool, bool*)+0x58 (expression:arm64+0x101c478dc)
    #23 0x0001024dd920 in fuzzer::Fuzzer::MutateAndTestOne()+0x258 (expression:arm64+0x101c49920)
    #24 0x0001024de908 in fuzzer::Fuzzer::Loop(std::__1::vector<fuzzer::SizedFile, std::__1::allocator<fuzzer::SizedFile>>&)+0x38c (expression:arm64+0x101c4a908)
    #25 0x0001024fd120 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long))+0x1bac (expression:arm64+0x101c69120)
    #26 0x00010250d884 in main+0x34 (expression:arm64+0x101c79884)
    #27 0x000195bf0270  (<unknown module>)

NOTE: libFuzzer has rudimentary signal handlers.
      Combine libFuzzer with AddressSanitizer or similar for better crash reports.
SUMMARY: libFuzzer: deadly signal
MS: 3 CopyPart-ShuffleBytes-CrossOver-; base unit: a36928cfe783d55be82d526168a2da57372fdfdc
0xff,0xfd,0xff,0x3f,0x87,0x0,0x6e,0x6f,0x77,0x48,0x48,0x48,0xff,0x48,0xff,0xff,0x5b,0xff,0x5b,
\377\375\377?\207\000nowHHH\377H\377\377[\377[
artifact_prefix='/Users/levy/Documents/limbo/fuzz/artifacts/expression/'; Test unit written to /Users/levy/Documents/limbo/fuzz/artifacts/expression/crash-63bfc8813b82bd8b97c557650289a6bc2c055ca5
Base64: //3/P4cAbm93SEhI/0j//1v/Ww==

────────────────────────────────────────────────────────────────────────────────

Failing input:

        artifacts/expression/crash-63bfc8813b82bd8b97c557650289a6bc2c055ca5

Output of `std::fmt::Debug`:

        Value(
            Real(
                NaN,
            ),
        )

Reproduce with:

        cargo fuzz run expression artifacts/expression/crash-63bfc8813b82bd8b97c557650289a6bc2c055ca5

Minimize test case with:

        cargo fuzz tmin expression artifacts/expression/crash-63bfc8813b82bd8b97c557650289a6bc2c055ca5

────────────────────────────────────────────────────────────────────────────────
```

Reviewed-by: Preston Thorpe (@PThorpe92)
Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #1116
This commit is contained in:
Pekka Enberg
2025-03-26 18:36:39 +02:00
6 changed files with 1960 additions and 2 deletions

View File

@@ -17,10 +17,10 @@
overlays = [ (import rust-overlay) ];
};
toolchain = break ((pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml).override {
toolchain = (pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml).override {
extensions = [ "rust-analyzer" "rust-src" ];
targets = [ "wasm32-unknown-unknown" ];
});
};
lib = pkgs.lib;
@@ -68,6 +68,13 @@
apple-sdk
];
};
devShells.fuzz = with pkgs; mkShell {
nativeBuildInputs = [
(pkgs.rust-bin.selectLatestNightlyWith (toolchain: toolchain.minimal))
] ++ lib.optionals pkgs.stdenv.isDarwin [
apple-sdk
];
};
}
);
}

3
fuzz/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
target
corpus
artifacts

1662
fuzz/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

23
fuzz/Cargo.toml Normal file
View File

@@ -0,0 +1,23 @@
[package]
name = "limbo-fuzz"
version = "0.0.0"
authors = ["the Limbo authors"]
publish = false
edition = "2021"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
arbitrary = { version = "1.4.1", features = ["derive"] }
limbo_core = { path = "../core" }
rusqlite = { version = "0.34.0", features = ["bundled"] }
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[[bin]]
name = "expression"
path = "fuzz_targets/expression.rs"

43
fuzz/README.md Normal file
View File

@@ -0,0 +1,43 @@
# Limbo Fuzzing
## Prerequisites
Ensure you have the following installed:
- Nightly Rust toolchain (required for `cargo-fuzz` unless using Nix)
- `cargo-fuzz` (install it using `cargo install cargo-fuzz`)
- Nix (if using a `flake.nix` setup)
## Using Nix
```sh
nix develop .#fuzz
```
This will set up the required environment with the nightly toolchain and
dependencies.
## Running the Fuzzer
If using Nix:
```sh
cargo fuzz run <fuzz_target>
```
If using `rustup` without Nix:
```sh
cargo +nightly fuzz run <fuzz_target>
```
This will compile the fuzz target and start fuzzing with `libFuzzer`.
## Example
Run the expression target with:
```sh
cargo fuzz run expression
```

View File

@@ -0,0 +1,220 @@
#![no_main]
use core::fmt;
use std::{error::Error, num::NonZero, sync::Arc};
use arbitrary::Arbitrary;
use libfuzzer_sys::{fuzz_target, Corpus};
use limbo_core::{OwnedValue, IO as _};
macro_rules! str_enum {
($vis:vis enum $name:ident { $($variant:ident => $value:literal),*, }) => {
#[derive(PartialEq, Debug, Copy, Clone, Arbitrary)]
$vis enum $name {
$($variant),*
}
impl $name {
pub fn to_str(self) -> &'static str {
match self {
$($name::$variant => $value),*
}
}
}
impl fmt::Display for $name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_str())
}
}
};
}
str_enum! {
enum Binary {
Equal => "=",
Is => "IS",
NotEqual => "<>",
GreaterThan => ">",
GreaterThanOrEqual => ">=",
LessThan => "<",
LessThanOrEqual => "<=",
RightShift => ">>",
LeftShift => "<<",
BitwiseAnd => "&",
BitwiseOr => "|",
And => "AND",
Or => "OR",
Add => "+",
Subtract => "-",
Multiply => "*",
Divide => "/",
Mod => "%",
Concat => "||",
}
}
str_enum! {
enum Unary {
Not => "~",
Negative => "-",
Positive => "+",
}
}
#[derive(Arbitrary, Debug, Clone)]
enum Value {
Null,
Integer(i64),
Real(f64),
Text(String),
Blob(Vec<u8>),
}
impl From<Value> for limbo_core::OwnedValue {
fn from(value: Value) -> limbo_core::OwnedValue {
match value {
Value::Null => limbo_core::OwnedValue::Null,
Value::Integer(v) => limbo_core::OwnedValue::Integer(v),
Value::Real(v) => {
if v.is_nan() {
limbo_core::OwnedValue::Null
} else {
limbo_core::OwnedValue::Float(v)
}
}
Value::Text(v) => limbo_core::OwnedValue::from_text(&v),
Value::Blob(v) => limbo_core::OwnedValue::from_blob(v.to_owned()),
}
}
}
impl rusqlite::ToSql for Value {
fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
use rusqlite::types::ToSqlOutput;
Ok(match self {
Value::Null => ToSqlOutput::Owned(rusqlite::types::Value::Null),
Value::Integer(v) => ToSqlOutput::Owned(rusqlite::types::Value::Integer(*v)),
Value::Real(v) => ToSqlOutput::Owned(rusqlite::types::Value::Real(*v)),
Value::Text(v) => ToSqlOutput::Owned(rusqlite::types::Value::Text(v.to_owned())),
Value::Blob(v) => ToSqlOutput::Owned(rusqlite::types::Value::Blob(v.to_owned())),
})
}
}
impl rusqlite::types::FromSql for Value {
fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult<Self> {
Ok(match value {
rusqlite::types::ValueRef::Null => Value::Null,
rusqlite::types::ValueRef::Integer(v) => Value::Integer(v),
rusqlite::types::ValueRef::Real(v) => Value::Real(v),
rusqlite::types::ValueRef::Text(v) => {
Value::Text(String::from_utf8_lossy(v).to_string())
}
rusqlite::types::ValueRef::Blob(v) => Value::Blob(v.to_vec()),
})
}
}
#[derive(Debug, Arbitrary)]
enum Expr {
Value(Value),
Binary(Binary, Box<Expr>, Box<Expr>),
Unary(Unary, Box<Expr>),
}
#[derive(Debug)]
struct Output {
query: String,
parameters: Vec<Value>,
depth: usize,
}
impl Expr {
pub fn lower(&self) -> Output {
match self {
Expr::Value(value) => Output {
query: "?".to_string(),
parameters: vec![value.clone()],
depth: 0,
},
Expr::Unary(op, expr) => {
let expr = expr.lower();
Output {
query: format!("{op} ({})", expr.query),
parameters: expr.parameters,
depth: expr.depth + 1,
}
}
Expr::Binary(op, lhs, rhs) => {
let mut lhs = lhs.lower();
let mut rhs = rhs.lower();
Output {
query: format!("({}) {op} ({})", lhs.query, rhs.query),
parameters: {
lhs.parameters.append(&mut rhs.parameters);
lhs.parameters
},
depth: lhs.depth.max(rhs.depth) + 1,
}
}
}
}
}
fn do_fuzz(expr: Expr) -> Result<Corpus, Box<dyn Error>> {
let expr = expr.lower();
let sql = format!("SELECT {}", expr.query);
// FIX: `limbo_core::translate::expr::translate_expr` causes a overflow if this is any higher.
if expr.depth > 153 {
return Ok(Corpus::Reject);
}
let expected = {
let conn = rusqlite::Connection::open_in_memory()?;
conn.query_row(
&sql,
rusqlite::params_from_iter(expr.parameters.iter()),
|row| row.get::<_, Value>(0),
)?
};
let found = 'value: {
let io = Arc::new(limbo_core::MemoryIO::new());
let db = limbo_core::Database::open_file(io.clone(), ":memory:", true)?;
let conn = db.connect()?;
let mut stmt = conn.prepare(sql)?;
for (idx, value) in expr.parameters.iter().enumerate() {
stmt.bind_at(NonZero::new(idx + 1).unwrap(), value.clone().into())
}
loop {
use limbo_core::StepResult;
match stmt.step()? {
StepResult::IO => io.run_once()?,
StepResult::Row => {
let row = stmt.row().unwrap();
assert_eq!(row.count(), 1, "expr: {:?}", expr);
break 'value row.get_value(0).clone();
}
_ => unreachable!(),
}
}
};
assert_eq!(
OwnedValue::from(expected.clone()),
found.clone(),
"with expression {:?} {}",
expr,
match (expected, found) {
(Value::Real(a), OwnedValue::Float(b)) => format!("float diff: {:?}", (a - b).abs()),
_ => "".to_string(),
}
);
Ok(Corpus::Keep)
}
fuzz_target!(|expr: Expr| -> Corpus { do_fuzz(expr).unwrap_or(Corpus::Keep) });