From 01a680b69e3ce271c395ba46bd25c46b969c92fc Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Wed, 4 Jun 2025 23:18:37 -0300 Subject: [PATCH] feat(fuzz)+fix: add schema fuzz testing and fix some bugs --- core/translate/mod.rs | 28 +--- core/vdbe/builder.rs | 16 +- fuzz/Cargo.lock | 57 ++++--- fuzz/Cargo.toml | 4 + fuzz/fuzz_targets/schema.rs | 310 ++++++++++++++++++++++++++++++++++++ 5 files changed, 370 insertions(+), 45 deletions(-) create mode 100644 fuzz/fuzz_targets/schema.rs diff --git a/core/translate/mod.rs b/core/translate/mod.rs index f945a276d..5d0a0875b 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -206,15 +206,7 @@ pub fn translate_inner( name: table_name.clone(), }); - program.cursor_loop(cursor_id, |program| { - let rowid = program.alloc_register(); - - // FIXME: Handle tables without rowid. - program.emit_insn(Insn::RowId { - cursor_id, - dest: rowid, - }); - + program.cursor_loop(cursor_id, |program, rowid| { let first_column = program.alloc_registers(column_count); let mut iter = first_column; @@ -340,14 +332,7 @@ pub fn translate_inner( name: sqlite_schema.name.clone(), }); - program.cursor_loop(cursor_id, |program| { - let rowid = program.alloc_register(); - - program.emit_insn(Insn::RowId { - cursor_id, - dest: rowid, - }); - + program.cursor_loop(cursor_id, |program, rowid| { let first_column = program.alloc_registers(5); for i in 0..5 { @@ -425,14 +410,7 @@ pub fn translate_inner( name: sqlite_schema.name.clone(), }); - program.cursor_loop(cursor_id, |program| { - let rowid = program.alloc_register(); - - program.emit_insn(Insn::RowId { - cursor_id, - dest: rowid, - }); - + program.cursor_loop(cursor_id, |program, rowid| { let first_column = program.alloc_registers(5); for i in 0..5 { diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 48bd62550..8254695d8 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -774,7 +774,7 @@ impl ProgramBuilder { } #[inline] - pub fn cursor_loop(&mut self, cursor_id: CursorID, f: impl Fn(&mut ProgramBuilder)) { + pub fn cursor_loop(&mut self, cursor_id: CursorID, f: impl Fn(&mut ProgramBuilder, usize)) { let loop_start = self.allocate_label(); let loop_end = self.allocate_label(); @@ -784,7 +784,19 @@ impl ProgramBuilder { }); self.preassign_label_to_next_insn(loop_start); - f(self); + let rowid = self.alloc_register(); + + self.emit_insn(Insn::RowId { + cursor_id, + dest: rowid, + }); + + self.emit_insn(Insn::IsNull { + reg: rowid, + target_pc: loop_end, + }); + + f(self, rowid); self.emit_insn(Insn::Next { cursor_id, diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 091feceb7..507975013 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -523,9 +523,9 @@ checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" -version = "0.1.39" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44" +checksum = "ec9d6fac27761dabcd4ee73571cdb06b7022dc99089acbe5435691edffaac0f4" dependencies = [ "cc", "libc", @@ -566,8 +566,9 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.19" +version = "0.0.21" dependencies = [ + "bitflags", "built", "cfg_block", "chrono", @@ -590,16 +591,18 @@ dependencies = [ "rand", "regex", "regex-syntax", - "rustix", + "rustix 1.0.7", "ryu", "strum", + "strum_macros", "thiserror 1.0.69", "tracing", + "uncased", ] [[package]] name = "limbo_ext" -version = "0.0.19" +version = "0.0.21" dependencies = [ "chrono", "getrandom 0.3.1", @@ -608,7 +611,7 @@ dependencies = [ [[package]] name = "limbo_macros" -version = "0.0.19" +version = "0.0.21" dependencies = [ "proc-macro2", "quote", @@ -617,7 +620,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3_parser" -version = "0.0.19" +version = "0.0.21" dependencies = [ "bitflags", "cc", @@ -636,7 +639,7 @@ dependencies = [ [[package]] name = "limbo_time" -version = "0.0.19" +version = "0.0.21" dependencies = [ "chrono", "limbo_ext", @@ -648,7 +651,7 @@ dependencies = [ [[package]] name = "limbo_uuid" -version = "0.0.19" +version = "0.0.21" dependencies = [ "limbo_ext", "mimalloc", @@ -661,6 +664,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "litemap" version = "0.7.5" @@ -691,21 +700,20 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "miette" -version = "7.5.0" +version = "7.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a955165f87b37fd1862df2a59547ac542c77ef6d17c666f619d1ad22dd89484" +checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7" dependencies = [ "cfg-if", "miette-derive", - "thiserror 1.0.69", "unicode-width", ] [[package]] name = "miette-derive" -version = "7.5.0" +version = "7.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf45bf44ab49be92fd1227a3be6fc6f617f1a337c06af54981048574d8783147" +checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" dependencies = [ "proc-macro2", "quote", @@ -714,9 +722,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.43" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633" +checksum = "995942f432bbb4822a7e9c3faa87a695185b0d09273ba85f097b54f4e458f2af" dependencies = [ "libmimalloc-sys", ] @@ -826,7 +834,7 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix", + "rustix 0.38.44", "tracing", "windows-sys", ] @@ -949,7 +957,20 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys", +] + +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.9.4", "windows-sys", ] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 69d6f438f..7c157b8eb 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -18,6 +18,10 @@ rusqlite = { version = "0.34.0", features = ["bundled"] } [workspace] members = ["."] +[[bin]] +name = "schema" +path = "fuzz_targets/schema.rs" + [[bin]] name = "expression" path = "fuzz_targets/expression.rs" diff --git a/fuzz/fuzz_targets/schema.rs b/fuzz/fuzz_targets/schema.rs new file mode 100644 index 000000000..15413b258 --- /dev/null +++ b/fuzz/fuzz_targets/schema.rs @@ -0,0 +1,310 @@ +#![no_main] +use core::fmt; +use std::{error::Error, num::NonZero, sync::Arc}; + +use arbitrary::Arbitrary; +use libfuzzer_sys::{fuzz_target, Corpus}; +use limbo_core::{Value, IO as _}; +use rusqlite::ffi::SQLITE_STATIC; + +#[derive(Debug, Clone, PartialEq, Eq)] +struct Id(String); + +impl<'a> Arbitrary<'a> for Id { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { + let len: usize = u.int_in_range(1..=10)?; + let is_quoted = bool::arbitrary(u)?; + + let mut out = String::with_capacity(len + if is_quoted { 2 } else { 0 }); + + if is_quoted { + out.push('"'); + } + + for _ in 0..len { + out.push(u.choose(b"abcdefghijklnmopqrstuvwxyz")?.clone() as char); + } + + if is_quoted { + out.push('"'); + } + + Ok(Id(out)) + } +} + +impl fmt::Display for Id { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +#[derive(Debug, Arbitrary, Clone)] +enum Type { + None, + Integer, + Text, + Real, + Blob, + Custom(Id), +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Type::None => Ok(()), + Type::Integer => write!(f, "INTEGER"), + Type::Text => write!(f, "TEXT"), + Type::Real => write!(f, "REAL"), + Type::Blob => write!(f, "BLOB"), + Type::Custom(id) => write!(f, "{}", id), + } + } +} + +#[derive(Debug, Arbitrary, Clone)] +struct ColumnDef { + name: Id, + r#type: Type, + unique: bool, +} + +impl fmt::Display for ColumnDef { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ColumnDef { + name, + r#type, + unique, + } = self; + write!(f, "{name} {type}",)?; + + if *unique { + write!(f, " UNIQUE")?; + } + + // if *primary_key { + // write!(f, " PRIMARY KEY")?; + // } + + Ok(()) + } +} + +#[derive(Debug, Clone)] +struct Columns(Vec); + +impl<'a> Arbitrary<'a> for Columns { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { + let len: usize = u.int_in_range(1..=4)?; + + let mut out: Vec = Vec::with_capacity(len); + + for i in 0..len { + out.push(ColumnDef { + name: Id(format!("c{i}")), + r#type: u.arbitrary()?, + unique: u.arbitrary()?, + }); + } + + Ok(Self(out)) + } +} + +impl fmt::Display for Columns { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (i, column) in self.0.iter().enumerate() { + if i > 0 { + write!(f, ", ")? + } + + write!(f, "{column}")? + } + + Ok(()) + } +} + +#[derive(Debug, Clone)] +struct TableDef { + name: Id, + columns: Columns, +} + +impl fmt::Display for TableDef { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let TableDef { name, columns } = self; + + write!(f, "CREATE TABLE {name} ( {columns} )") + } +} + +#[derive(Debug, Clone)] +struct IndexDef { + name: Id, + table: Id, + columns: Vec, +} + +impl fmt::Display for IndexDef { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let IndexDef { + name, + table, + columns, + } = self; + + todo!() + } +} + +#[derive(Debug)] +enum Op { + CreateTable(TableDef), + CreateIndex(IndexDef), + DropTable { table: Id }, + DropColumn { table: Id, column: Id }, + RenameTable { rename_from: Id, rename_to: Id }, +} + +impl fmt::Display for Op { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Op::CreateTable(table_def) => write!(f, "{table_def}"), + Op::CreateIndex(index_def) => write!(f, "{index_def}"), + Op::DropColumn { table, column } => { + write!(f, "ALTER TABLE {table} DROP COLUMN {column}") + } + Op::DropTable { table } => write!(f, "DROP TABLE {table}"), + Op::RenameTable { + rename_from, + rename_to, + } => write!(f, "ALTER TABLE {rename_from} RENAME TO {rename_to}"), + } + } +} + +#[derive(Debug)] +struct Ops(Vec); + +impl<'a> Arbitrary<'a> for Ops { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { + let mut ops = Vec::new(); + let mut tables = Vec::new(); + + let mut drop_list = Vec::new(); + + let mut table_index: usize = 0; + + let num_ops = u.int_in_range(1..=10)?; + + for _ in 0..num_ops { + let op_type = if tables.is_empty() { + 0 + } else { + u.int_in_range(0..=2)? + }; + + match op_type { + 0 => { + let table_def = TableDef { + name: { + let out = format!("t{table_index}"); + table_index += 1; + + Id(out) + }, + columns: u.arbitrary()?, + }; + + ops.push(Op::CreateTable(table_def.clone())); + + tables.push(table_def); + } + 1 => { + let index = u.choose_index(tables.len())?; + + let table = &tables[index]; + + let rename_to = Id(format!("t{table_index}")); + table_index += 1; + + ops.push(Op::RenameTable { + rename_from: table.name.clone(), + rename_to: rename_to.clone(), + }); + + tables.push(TableDef { + name: rename_to, + columns: table.columns.clone(), + }); + + tables.remove(index); + } + 2 => { + let index = u.choose_index(tables.len())?; + + let table = &tables[index]; + + if table.columns.0.len() == 1 { + let table = tables.remove(index); + + ops.push(Op::DropTable { + table: table.name.clone(), + }); + + drop_list.push(table.name); + } else { + let table = &mut tables[index]; + + let index = u.choose_index(table.columns.0.len())?; + + ops.push(Op::DropColumn { + table: table.name.clone(), + column: table.columns.0.remove(index).name, + }); + } + } + _ => panic!(), + } + } + + Ok(Self(ops)) + } +} + +fn do_fuzz(Ops(ops): Ops) -> Result> { + dbg!(&ops); + + let rusqlite_conn = rusqlite::Connection::open_in_memory()?; + + let io = Arc::new(limbo_core::MemoryIO::new()); + let db = limbo_core::Database::open_file(io.clone(), ":memory:", true)?; + let limbo_conn = db.connect()?; + + for op in ops { + let sql = op.to_string(); + + dbg!(&sql); + + let expected = rusqlite_conn + .execute(&sql, ()) + .inspect_err(|_| { + dbg!(&sql); + }) + .unwrap(); + + let found = 'value: { + limbo_conn + .execute(&sql) + .inspect_err(|_| { + dbg!(&sql); + }) + .unwrap() + }; + } + + Ok(Corpus::Keep) +} + +fuzz_target!(|ops: Ops| -> Corpus { do_fuzz(ops).unwrap_or(Corpus::Keep) });