copy generation code from simulator

This commit is contained in:
pedrocarlo
2025-08-25 15:14:10 -03:00
parent b16f96b507
commit 0285bdd72c
25 changed files with 6490 additions and 3 deletions

View File

@@ -0,0 +1,296 @@
use turso_parser::ast::{
self, Expr, LikeOperator, Name, Operator, QualifiedName, Type, UnaryOperator,
};
use crate::{
generation::{
frequency, gen_random_text, one_of, pick, pick_index, Arbitrary, ArbitraryFrom,
ArbitrarySizedFrom,
},
model::table::SimValue,
};
impl<T> Arbitrary for Box<T>
where
T: Arbitrary,
{
fn arbitrary<R: rand::Rng>(rng: &mut R) -> Self {
Box::from(T::arbitrary(rng))
}
}
impl<A, T> ArbitrarySizedFrom<A> for Box<T>
where
T: ArbitrarySizedFrom<A>,
{
fn arbitrary_sized_from<R: rand::Rng>(rng: &mut R, t: A, size: usize) -> Self {
Box::from(T::arbitrary_sized_from(rng, t, size))
}
}
impl<T> Arbitrary for Option<T>
where
T: Arbitrary,
{
fn arbitrary<R: rand::Rng>(rng: &mut R) -> Self {
rng.random_bool(0.5).then_some(T::arbitrary(rng))
}
}
impl<A, T> ArbitrarySizedFrom<A> for Option<T>
where
T: ArbitrarySizedFrom<A>,
{
fn arbitrary_sized_from<R: rand::Rng>(rng: &mut R, t: A, size: usize) -> Self {
rng.random_bool(0.5)
.then_some(T::arbitrary_sized_from(rng, t, size))
}
}
impl<A: Copy, T> ArbitraryFrom<A> for Vec<T>
where
T: ArbitraryFrom<A>,
{
fn arbitrary_from<R: rand::Rng>(rng: &mut R, t: A) -> Self {
let size = rng.random_range(0..5);
(0..size).map(|_| T::arbitrary_from(rng, t)).collect()
}
}
// Freestyling generation
impl ArbitrarySizedFrom<&SimulatorEnv> for Expr {
fn arbitrary_sized_from<R: rand::Rng>(rng: &mut R, t: &SimulatorEnv, size: usize) -> Self {
frequency(
vec![
(
1,
Box::new(|rng| Expr::Literal(ast::Literal::arbitrary_from(rng, t))),
),
(
size,
Box::new(|rng| {
one_of(
vec![
// Box::new(|rng: &mut R| Expr::Between {
// lhs: Box::arbitrary_sized_from(rng, t, size - 1),
// not: rng.gen_bool(0.5),
// start: Box::arbitrary_sized_from(rng, t, size - 1),
// end: Box::arbitrary_sized_from(rng, t, size - 1),
// }),
Box::new(|rng: &mut R| {
Expr::Binary(
Box::arbitrary_sized_from(rng, t, size - 1),
Operator::arbitrary(rng),
Box::arbitrary_sized_from(rng, t, size - 1),
)
}),
// Box::new(|rng| Expr::Case {
// base: Option::arbitrary_from(rng, t),
// when_then_pairs: {
// let size = rng.gen_range(0..5);
// (0..size)
// .map(|_| (Self::arbitrary_from(rng, t), Self::arbitrary_from(rng, t)))
// .collect()
// },
// else_expr: Option::arbitrary_from(rng, t),
// }),
// Box::new(|rng| Expr::Cast {
// expr: Box::arbitrary_sized_from(rng, t),
// type_name: Option::arbitrary(rng),
// }),
// Box::new(|rng| Expr::Collate(Box::arbitrary_sized_from(rng, t), CollateName::arbitrary(rng).0)),
// Box::new(|rng| Expr::InList {
// lhs: Box::arbitrary_sized_from(rng, t),
// not: rng.gen_bool(0.5),
// rhs: Option::arbitrary_from(rng, t),
// }),
// Box::new(|rng| Expr::IsNull(Box::arbitrary_sized_from(rng, t))),
// Box::new(|rng| {
// // let op = LikeOperator::arbitrary_from(rng, t);
// let op = ast::LikeOperator::Like; // todo: remove this line when LikeOperator is implemented
// let escape = if matches!(op, LikeOperator::Like) {
// Option::arbitrary_sized_from(rng, t, size - 1)
// } else {
// None
// };
// Expr::Like {
// lhs: Box::arbitrary_sized_from(rng, t, size - 1),
// not: rng.gen_bool(0.5),
// op,
// rhs: Box::arbitrary_sized_from(rng, t, size - 1),
// escape,
// }
// }),
// Box::new(|rng| Expr::NotNull(Box::arbitrary_sized_from(rng, t))),
// // TODO: only supports one paranthesized expression
// Box::new(|rng| Expr::Parenthesized(vec![Expr::arbitrary_from(rng, t)])),
// Box::new(|rng| {
// let table_idx = pick_index(t.tables.len(), rng);
// let table = &t.tables[table_idx];
// let col_idx = pick_index(table.columns.len(), rng);
// let col = &table.columns[col_idx];
// Expr::Qualified(Name(table.name.clone()), Name(col.name.clone()))
// })
Box::new(|rng| {
Expr::Unary(
UnaryOperator::arbitrary_from(rng, t),
Box::arbitrary_sized_from(rng, t, size - 1),
)
}),
// TODO: skip Exists for now
// TODO: skip Function Call for now
// TODO: skip Function Call Star for now
// TODO: skip ID for now
// TODO: skip InSelect as still need to implement ArbitratyFrom for Select
// TODO: skip InTable
// TODO: skip Name
// TODO: Skip DoublyQualified for now
// TODO: skip Raise
// TODO: skip subquery
],
rng,
)
}),
),
],
rng,
)
}
}
impl Arbitrary for Operator {
fn arbitrary<R: rand::Rng>(rng: &mut R) -> Self {
let choices = [
Operator::Add,
Operator::And,
// Operator::ArrowRight, -- todo: not implemented in `binary_compare` yet
// Operator::ArrowRightShift, -- todo: not implemented in `binary_compare` yet
Operator::BitwiseAnd,
// Operator::BitwiseNot, -- todo: not implemented in `binary_compare` yet
Operator::BitwiseOr,
// Operator::Concat, -- todo: not implemented in `exec_concat`
Operator::Divide,
Operator::Equals,
Operator::Greater,
Operator::GreaterEquals,
Operator::Is,
Operator::IsNot,
Operator::LeftShift,
Operator::Less,
Operator::LessEquals,
Operator::Modulus,
Operator::Multiply,
Operator::NotEquals,
Operator::Or,
Operator::RightShift,
Operator::Subtract,
];
*pick(&choices, rng)
}
}
impl Arbitrary for Type {
fn arbitrary<R: rand::Rng>(rng: &mut R) -> Self {
let name = pick(&["INT", "INTEGER", "REAL", "TEXT", "BLOB", "ANY"], rng).to_string();
Self {
name,
size: None, // TODO: come back later here
}
}
}
struct CollateName(String);
impl Arbitrary for CollateName {
fn arbitrary<R: rand::Rng>(rng: &mut R) -> Self {
let choice = rng.random_range(0..3);
CollateName(
match choice {
0 => "BINARY",
1 => "RTRIM",
2 => "NOCASE",
_ => unreachable!(),
}
.to_string(),
)
}
}
impl ArbitraryFrom<&SimulatorEnv> for QualifiedName {
fn arbitrary_from<R: rand::Rng>(rng: &mut R, t: &SimulatorEnv) -> Self {
// TODO: for now just generate table name
let table_idx = pick_index(t.tables.len(), rng);
let table = &t.tables[table_idx];
// TODO: for now forego alias
Self {
db_name: None,
name: Name::new(&table.name),
alias: None,
}
}
}
impl ArbitraryFrom<&SimulatorEnv> for LikeOperator {
fn arbitrary_from<R: rand::Rng>(rng: &mut R, _t: &SimulatorEnv) -> Self {
let choice = rng.random_range(0..4);
match choice {
0 => LikeOperator::Glob,
1 => LikeOperator::Like,
2 => LikeOperator::Match,
3 => LikeOperator::Regexp,
_ => unreachable!(),
}
}
}
// Current implementation does not take into account the columns affinity nor if table is Strict
impl ArbitraryFrom<&SimulatorEnv> for ast::Literal {
fn arbitrary_from<R: rand::Rng>(rng: &mut R, _t: &SimulatorEnv) -> Self {
loop {
let choice = rng.random_range(0..5);
let lit = match choice {
0 => ast::Literal::Numeric({
let integer = rng.random_bool(0.5);
if integer {
rng.random_range(i64::MIN..i64::MAX).to_string()
} else {
rng.random_range(-1e10..1e10).to_string()
}
}),
1 => ast::Literal::String(format!("'{}'", gen_random_text(rng))),
2 => ast::Literal::Blob(hex::encode(gen_random_text(rng).as_bytes())),
// TODO: skip Keyword
3 => continue,
4 => ast::Literal::Null,
// TODO: Ignore Date stuff for now
_ => continue,
};
break lit;
}
}
}
// Creates a litreal value
impl ArbitraryFrom<&Vec<&SimValue>> for ast::Expr {
fn arbitrary_from<R: rand::Rng>(rng: &mut R, values: &Vec<&SimValue>) -> Self {
if values.is_empty() {
return Self::Literal(ast::Literal::Null);
}
// TODO: for now just convert the value to an ast::Literal
let value = pick(values, rng);
Expr::Literal((*value).into())
}
}
impl ArbitraryFrom<&SimulatorEnv> for UnaryOperator {
fn arbitrary_from<R: rand::Rng>(rng: &mut R, _t: &SimulatorEnv) -> Self {
let choice = rng.random_range(0..4);
match choice {
0 => Self::BitwiseNot,
1 => Self::Negative,
2 => Self::Not,
3 => Self::Positive,
_ => unreachable!(),
}
}
}

View File

@@ -0,0 +1,166 @@
use std::{iter::Sum, ops::SubAssign};
use anarchist_readable_name_generator_lib::readable_name_custom;
use rand::{distr::uniform::SampleUniform, Rng};
mod expr;
pub mod plan;
mod predicate;
pub mod property;
pub mod query;
pub mod table;
type ArbitraryFromFunc<'a, R, T> = Box<dyn Fn(&mut R) -> T + 'a>;
type Choice<'a, R, T> = (usize, Box<dyn Fn(&mut R) -> Option<T> + 'a>);
/// Arbitrary trait for generating random values
/// An implementation of arbitrary is assumed to be a uniform sampling of
/// the possible values of the type, with a bias towards smaller values for
/// practicality.
pub trait Arbitrary {
fn arbitrary<R: Rng>(rng: &mut R) -> Self;
}
/// ArbitrarySized trait for generating random values of a specific size
/// An implementation of arbitrary_sized is assumed to be a uniform sampling of
/// the possible values of the type, with a bias towards smaller values for
/// practicality, but with the additional constraint that the generated value
/// must fit in the given size. This is useful for generating values that are
/// constrained by a specific size, such as integers or strings.
pub trait ArbitrarySized {
fn arbitrary_sized<R: Rng>(rng: &mut R, size: usize) -> Self;
}
/// ArbitraryFrom trait for generating random values from a given value
/// ArbitraryFrom allows for constructing relations, where the generated
/// value is dependent on the given value. These relations could be constraints
/// such as generating an integer within an interval, or a value that fits in a table,
/// or a predicate satisfying a given table row.
pub trait ArbitraryFrom<T> {
fn arbitrary_from<R: Rng>(rng: &mut R, t: T) -> Self;
}
/// ArbitrarySizedFrom trait for generating random values from a given value
/// ArbitrarySizedFrom allows for constructing relations, where the generated
/// value is dependent on the given value and a size constraint. These relations
/// could be constraints such as generating an integer within an interval,
/// or a value that fits in a table, or a predicate satisfying a given table row,
/// but with the additional constraint that the generated value must fit in the given size.
/// This is useful for generating values that are constrained by a specific size,
/// such as integers or strings, while still being dependent on the given value.
pub trait ArbitrarySizedFrom<T> {
fn arbitrary_sized_from<R: Rng>(rng: &mut R, t: T, size: usize) -> Self;
}
/// ArbitraryFromMaybe trait for fallibally generating random values from a given value
pub trait ArbitraryFromMaybe<T> {
fn arbitrary_from_maybe<R: Rng>(rng: &mut R, t: T) -> Option<Self>
where
Self: Sized;
}
/// Frequency is a helper function for composing different generators with different frequency
/// of occurrences.
/// The type signature for the `N` parameter is a bit complex, but it
/// roughly corresponds to a type that can be summed, compared, subtracted and sampled, which are
/// the operations we require for the implementation.
// todo: switch to a simpler type signature that can accommodate all integer and float types, which
// should be enough for our purposes.
pub(crate) fn frequency<
T,
R: Rng,
N: Sum + PartialOrd + Copy + Default + SampleUniform + SubAssign,
>(
choices: Vec<(N, ArbitraryFromFunc<R, T>)>,
rng: &mut R,
) -> T {
let total = choices.iter().map(|(weight, _)| *weight).sum::<N>();
let mut choice = rng.random_range(N::default()..total);
for (weight, f) in choices {
if choice < weight {
return f(rng);
}
choice -= weight;
}
unreachable!()
}
/// one_of is a helper function for composing different generators with equal probability of occurrence.
pub(crate) fn one_of<T, R: Rng>(choices: Vec<ArbitraryFromFunc<R, T>>, rng: &mut R) -> T {
let index = rng.random_range(0..choices.len());
choices[index](rng)
}
/// backtrack is a helper function for composing different "failable" generators.
/// The function takes a list of functions that return an Option<T>, along with number of retries
/// to make before giving up.
pub(crate) fn backtrack<T, R: Rng>(mut choices: Vec<Choice<R, T>>, rng: &mut R) -> Option<T> {
loop {
// If there are no more choices left, we give up
let choices_ = choices
.iter()
.enumerate()
.filter(|(_, (retries, _))| *retries > 0)
.collect::<Vec<_>>();
if choices_.is_empty() {
tracing::trace!("backtrack: no more choices left");
return None;
}
// Run a one_of on the remaining choices
let (choice_index, choice) = pick(&choices_, rng);
let choice_index = *choice_index;
// If the choice returns None, we decrement the number of retries and try again
let result = choice.1(rng);
if result.is_some() {
return result;
} else {
choices[choice_index].0 -= 1;
}
}
}
/// pick is a helper function for uniformly picking a random element from a slice
pub(crate) fn pick<'a, T, R: Rng>(choices: &'a [T], rng: &mut R) -> &'a T {
let index = rng.random_range(0..choices.len());
&choices[index]
}
/// pick_index is typically used for picking an index from a slice to later refer to the element
/// at that index.
pub(crate) fn pick_index<R: Rng>(choices: usize, rng: &mut R) -> usize {
rng.random_range(0..choices)
}
/// pick_n_unique is a helper function for uniformly picking N unique elements from a range.
/// The elements themselves are usize, typically representing indices.
pub(crate) fn pick_n_unique<R: Rng>(
range: std::ops::Range<usize>,
n: usize,
rng: &mut R,
) -> Vec<usize> {
use rand::seq::SliceRandom;
let mut items: Vec<usize> = range.collect();
items.shuffle(rng);
items.into_iter().take(n).collect()
}
/// gen_random_text uses `anarchist_readable_name_generator_lib` to generate random
/// readable names for tables, columns, text values etc.
pub(crate) fn gen_random_text<T: Rng>(rng: &mut T) -> String {
let big_text = rng.random_ratio(1, 1000);
if big_text {
// let max_size: u64 = 2 * 1024 * 1024 * 1024;
let max_size: u64 = 2 * 1024;
let size = rng.random_range(1024..max_size);
let mut name = String::with_capacity(size as usize);
for i in 0..size {
name.push(((i % 26) as u8 + b'A') as char);
}
name
} else {
let name = readable_name_custom("_", rng);
name.replace("-", "_")
}
}

View File

@@ -0,0 +1,833 @@
use std::{
collections::HashSet,
fmt::{Debug, Display},
path::Path,
sync::Arc,
vec,
};
use serde::{Deserialize, Serialize};
use turso_core::{Connection, Result, StepResult};
use crate::{
generation::query::SelectFree,
model::{
query::{update::Update, Create, CreateIndex, Delete, Drop, Insert, Query, Select},
table::SimValue,
},
runner::{
env::{SimConnection, SimulationType, SimulatorTables},
io::SimulatorIO,
},
SimulatorEnv,
};
use crate::generation::{frequency, Arbitrary, ArbitraryFrom};
use super::property::{remaining, Property};
pub(crate) type ResultSet = Result<Vec<Vec<SimValue>>>;
#[derive(Clone, Serialize, Deserialize)]
pub(crate) struct InteractionPlan {
pub(crate) plan: Vec<Interactions>,
}
impl InteractionPlan {
/// Compute via diff computes a a plan from a given `.plan` file without the need to parse
/// sql. This is possible because there are two versions of the plan file, one that is human
/// readable and one that is serialized as JSON. Under watch mode, the users will be able to
/// delete interactions from the human readable file, and this function uses the JSON file as
/// a baseline to detect with interactions were deleted and constructs the plan from the
/// remaining interactions.
pub(crate) fn compute_via_diff(plan_path: &Path) -> Vec<Vec<Interaction>> {
let interactions = std::fs::read_to_string(plan_path).unwrap();
let interactions = interactions.lines().collect::<Vec<_>>();
let plan: InteractionPlan = serde_json::from_str(
std::fs::read_to_string(plan_path.with_extension("json"))
.unwrap()
.as_str(),
)
.unwrap();
let mut plan = plan
.plan
.into_iter()
.map(|i| i.interactions())
.collect::<Vec<_>>();
let (mut i, mut j) = (0, 0);
while i < interactions.len() && j < plan.len() {
if interactions[i].starts_with("-- begin")
|| interactions[i].starts_with("-- end")
|| interactions[i].is_empty()
{
i += 1;
continue;
}
// interactions[i] is the i'th line in the human readable plan
// plan[j][k] is the k'th interaction in the j'th property
let mut k = 0;
while k < plan[j].len() {
if i >= interactions.len() {
let _ = plan.split_off(j + 1);
let _ = plan[j].split_off(k);
break;
}
tracing::error!("Comparing '{}' with '{}'", interactions[i], plan[j][k]);
if interactions[i].contains(plan[j][k].to_string().as_str()) {
i += 1;
k += 1;
} else {
plan[j].remove(k);
panic!("Comparing '{}' with '{}'", interactions[i], plan[j][k]);
}
}
if plan[j].is_empty() {
plan.remove(j);
} else {
j += 1;
}
}
let _ = plan.split_off(j);
plan
}
}
pub(crate) struct InteractionPlanState {
pub(crate) stack: Vec<ResultSet>,
pub(crate) interaction_pointer: usize,
pub(crate) secondary_pointer: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(crate) enum Interactions {
Property(Property),
Query(Query),
Fault(Fault),
}
impl Interactions {
pub(crate) fn name(&self) -> Option<&str> {
match self {
Interactions::Property(property) => Some(property.name()),
Interactions::Query(_) => None,
Interactions::Fault(_) => None,
}
}
pub(crate) fn interactions(&self) -> Vec<Interaction> {
match self {
Interactions::Property(property) => property.interactions(),
Interactions::Query(query) => vec![Interaction::Query(query.clone())],
Interactions::Fault(fault) => vec![Interaction::Fault(fault.clone())],
}
}
}
impl Interactions {
pub(crate) fn dependencies(&self) -> HashSet<String> {
match self {
Interactions::Property(property) => {
property
.interactions()
.iter()
.fold(HashSet::new(), |mut acc, i| match i {
Interaction::Query(q) => {
acc.extend(q.dependencies());
acc
}
_ => acc,
})
}
Interactions::Query(query) => query.dependencies(),
Interactions::Fault(_) => HashSet::new(),
}
}
pub(crate) fn uses(&self) -> Vec<String> {
match self {
Interactions::Property(property) => {
property
.interactions()
.iter()
.fold(vec![], |mut acc, i| match i {
Interaction::Query(q) => {
acc.extend(q.uses());
acc
}
_ => acc,
})
}
Interactions::Query(query) => query.uses(),
Interactions::Fault(_) => vec![],
}
}
}
impl Display for InteractionPlan {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for interactions in &self.plan {
match interactions {
Interactions::Property(property) => {
let name = property.name();
writeln!(f, "-- begin testing '{name}'")?;
for interaction in property.interactions() {
write!(f, "\t")?;
match interaction {
Interaction::Query(query) => writeln!(f, "{query};")?,
Interaction::Assumption(assumption) => {
writeln!(f, "-- ASSUME {};", assumption.name)?
}
Interaction::Assertion(assertion) => {
writeln!(f, "-- ASSERT {};", assertion.name)?
}
Interaction::Fault(fault) => writeln!(f, "-- FAULT '{fault}';")?,
Interaction::FsyncQuery(query) => {
writeln!(f, "-- FSYNC QUERY;")?;
writeln!(f, "{query};")?;
writeln!(f, "{query};")?
}
Interaction::FaultyQuery(query) => {
writeln!(f, "{query}; -- FAULTY QUERY")?
}
}
}
writeln!(f, "-- end testing '{name}'")?;
}
Interactions::Fault(fault) => {
writeln!(f, "-- FAULT '{fault}'")?;
}
Interactions::Query(query) => {
writeln!(f, "{query};")?;
}
}
}
Ok(())
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct InteractionStats {
pub(crate) read_count: usize,
pub(crate) write_count: usize,
pub(crate) delete_count: usize,
pub(crate) update_count: usize,
pub(crate) create_count: usize,
pub(crate) create_index_count: usize,
pub(crate) drop_count: usize,
pub(crate) begin_count: usize,
pub(crate) commit_count: usize,
pub(crate) rollback_count: usize,
}
impl Display for InteractionStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Read: {}, Write: {}, Delete: {}, Update: {}, Create: {}, CreateIndex: {}, Drop: {}, Begin: {}, Commit: {}, Rollback: {}",
self.read_count,
self.write_count,
self.delete_count,
self.update_count,
self.create_count,
self.create_index_count,
self.drop_count,
self.begin_count,
self.commit_count,
self.rollback_count,
)
}
}
#[derive(Debug)]
pub(crate) enum Interaction {
Query(Query),
Assumption(Assertion),
Assertion(Assertion),
Fault(Fault),
/// Will attempt to run any random query. However, when the connection tries to sync it will
/// close all connections and reopen the database and assert that no data was lost
FsyncQuery(Query),
FaultyQuery(Query),
}
impl Display for Interaction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Query(query) => write!(f, "{query}"),
Self::Assumption(assumption) => write!(f, "ASSUME {}", assumption.name),
Self::Assertion(assertion) => write!(f, "ASSERT {}", assertion.name),
Self::Fault(fault) => write!(f, "FAULT '{fault}'"),
Self::FsyncQuery(query) => write!(f, "{query}"),
Self::FaultyQuery(query) => write!(f, "{query}; -- FAULTY QUERY"),
}
}
}
type AssertionFunc = dyn Fn(&Vec<ResultSet>, &mut SimulatorEnv) -> Result<Result<(), String>>;
enum AssertionAST {
Pick(),
}
pub(crate) struct Assertion {
pub(crate) func: Box<AssertionFunc>,
pub(crate) name: String, // For display purposes in the plan
}
impl Debug for Assertion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Assertion")
.field("name", &self.name)
.finish()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(crate) enum Fault {
Disconnect,
ReopenDatabase,
}
impl Display for Fault {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Fault::Disconnect => write!(f, "DISCONNECT"),
Fault::ReopenDatabase => write!(f, "REOPEN_DATABASE"),
}
}
}
impl InteractionPlan {
pub(crate) fn new() -> Self {
Self { plan: Vec::new() }
}
pub(crate) fn stats(&self) -> InteractionStats {
let mut stats = InteractionStats {
read_count: 0,
write_count: 0,
delete_count: 0,
update_count: 0,
create_count: 0,
create_index_count: 0,
drop_count: 0,
begin_count: 0,
commit_count: 0,
rollback_count: 0,
};
fn query_stat(q: &Query, stats: &mut InteractionStats) {
match q {
Query::Select(_) => stats.read_count += 1,
Query::Insert(_) => stats.write_count += 1,
Query::Delete(_) => stats.delete_count += 1,
Query::Create(_) => stats.create_count += 1,
Query::Drop(_) => stats.drop_count += 1,
Query::Update(_) => stats.update_count += 1,
Query::CreateIndex(_) => stats.create_index_count += 1,
Query::Begin(_) => stats.begin_count += 1,
Query::Commit(_) => stats.commit_count += 1,
Query::Rollback(_) => stats.rollback_count += 1,
}
}
for interactions in &self.plan {
match interactions {
Interactions::Property(property) => {
for interaction in &property.interactions() {
if let Interaction::Query(query) = interaction {
query_stat(query, &mut stats);
}
}
}
Interactions::Query(query) => {
query_stat(query, &mut stats);
}
Interactions::Fault(_) => {}
}
}
stats
}
}
impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan {
fn arbitrary_from<R: rand::Rng>(rng: &mut R, env: &mut SimulatorEnv) -> Self {
let mut plan = InteractionPlan::new();
let num_interactions = env.opts.max_interactions;
// First create at least one table
let create_query = Create::arbitrary(rng);
env.tables.push(create_query.table.clone());
plan.plan
.push(Interactions::Query(Query::Create(create_query)));
while plan.plan.len() < num_interactions {
tracing::debug!(
"Generating interaction {}/{}",
plan.plan.len(),
num_interactions
);
let interactions = Interactions::arbitrary_from(rng, (env, plan.stats()));
interactions.shadow(&mut env.tables);
plan.plan.push(interactions);
}
tracing::info!("Generated plan with {} interactions", plan.plan.len());
plan
}
}
impl Interaction {
pub(crate) fn execute_query(&self, conn: &mut Arc<Connection>, _io: &SimulatorIO) -> ResultSet {
if let Self::Query(query) = self {
let query_str = query.to_string();
let rows = conn.query(&query_str);
if rows.is_err() {
let err = rows.err();
tracing::debug!(
"Error running query '{}': {:?}",
&query_str[0..query_str.len().min(4096)],
err
);
if let Some(turso_core::LimboError::ParseError(e)) = err {
panic!("Unexpected parse error: {e}");
}
return Err(err.unwrap());
}
let rows = rows?;
assert!(rows.is_some());
let mut rows = rows.unwrap();
let mut out = Vec::new();
while let Ok(row) = rows.step() {
match row {
StepResult::Row => {
let row = rows.row().unwrap();
let mut r = Vec::new();
for v in row.get_values() {
let v = v.into();
r.push(v);
}
out.push(r);
}
StepResult::IO => {
rows.run_once().unwrap();
}
StepResult::Interrupt => {}
StepResult::Done => {
break;
}
StepResult::Busy => {
return Err(turso_core::LimboError::Busy);
}
}
}
Ok(out)
} else {
unreachable!("unexpected: this function should only be called on queries")
}
}
pub(crate) fn execute_assertion(
&self,
stack: &Vec<ResultSet>,
env: &mut SimulatorEnv,
) -> Result<()> {
match self {
Self::Assertion(assertion) => {
let result = assertion.func.as_ref()(stack, env);
match result {
Ok(Ok(())) => Ok(()),
Ok(Err(message)) => Err(turso_core::LimboError::InternalError(format!(
"Assertion '{}' failed: {}",
assertion.name, message
))),
Err(err) => Err(turso_core::LimboError::InternalError(format!(
"Assertion '{}' execution error: {}",
assertion.name, err
))),
}
}
_ => {
unreachable!("unexpected: this function should only be called on assertions")
}
}
}
pub(crate) fn execute_assumption(
&self,
stack: &Vec<ResultSet>,
env: &mut SimulatorEnv,
) -> Result<()> {
match self {
Self::Assumption(assumption) => {
let result = assumption.func.as_ref()(stack, env);
match result {
Ok(Ok(())) => Ok(()),
Ok(Err(message)) => Err(turso_core::LimboError::InternalError(format!(
"Assumption '{}' failed: {}",
assumption.name, message
))),
Err(err) => Err(turso_core::LimboError::InternalError(format!(
"Assumption '{}' execution error: {}",
assumption.name, err
))),
}
}
_ => {
unreachable!("unexpected: this function should only be called on assumptions")
}
}
}
pub(crate) fn execute_fault(&self, env: &mut SimulatorEnv, conn_index: usize) -> Result<()> {
match self {
Self::Fault(fault) => {
match fault {
Fault::Disconnect => {
if env.connections[conn_index].is_connected() {
env.connections[conn_index].disconnect();
} else {
return Err(turso_core::LimboError::InternalError(
"connection already disconnected".into(),
));
}
env.connections[conn_index] = SimConnection::Disconnected;
}
Fault::ReopenDatabase => {
reopen_database(env);
}
}
Ok(())
}
_ => {
unreachable!("unexpected: this function should only be called on faults")
}
}
}
pub(crate) fn execute_fsync_query(
&self,
conn: Arc<Connection>,
env: &mut SimulatorEnv,
) -> ResultSet {
if let Self::FsyncQuery(query) = self {
let query_str = query.to_string();
let rows = conn.query(&query_str);
if rows.is_err() {
let err = rows.err();
tracing::debug!(
"Error running query '{}': {:?}",
&query_str[0..query_str.len().min(4096)],
err
);
return Err(err.unwrap());
}
let mut rows = rows.unwrap().unwrap();
let mut out = Vec::new();
while let Ok(row) = rows.step() {
match row {
StepResult::Row => {
let row = rows.row().unwrap();
let mut r = Vec::new();
for v in row.get_values() {
let v = v.into();
r.push(v);
}
out.push(r);
}
StepResult::IO => {
let syncing = {
let files = env.io.files.borrow();
// TODO: currently assuming we only have 1 file that is syncing
files
.iter()
.any(|file| file.sync_completion.borrow().is_some())
};
if syncing {
reopen_database(env);
} else {
rows.run_once().unwrap();
}
}
StepResult::Done => {
break;
}
StepResult::Busy => {
return Err(turso_core::LimboError::Busy);
}
StepResult::Interrupt => {}
}
}
Ok(out)
} else {
unreachable!("unexpected: this function should only be called on queries")
}
}
pub(crate) fn execute_faulty_query(
&self,
conn: &Arc<Connection>,
env: &mut SimulatorEnv,
) -> ResultSet {
use rand::Rng;
if let Self::FaultyQuery(query) = self {
let query_str = query.to_string();
let rows = conn.query(&query_str);
if rows.is_err() {
let err = rows.err();
tracing::debug!(
"Error running query '{}': {:?}",
&query_str[0..query_str.len().min(4096)],
err
);
if let Some(turso_core::LimboError::ParseError(e)) = err {
panic!("Unexpected parse error: {e}");
}
return Err(err.unwrap());
}
let mut rows = rows.unwrap().unwrap();
let mut out = Vec::new();
let mut current_prob = 0.05;
let mut incr = 0.001;
loop {
let syncing = {
let files = env.io.files.borrow();
files
.iter()
.any(|file| file.sync_completion.borrow().is_some())
};
let inject_fault = env.rng.gen_bool(current_prob);
// TODO: avoid for now injecting faults when syncing
if inject_fault && !syncing {
env.io.inject_fault(true);
}
match rows.step()? {
StepResult::Row => {
let row = rows.row().unwrap();
let mut r = Vec::new();
for v in row.get_values() {
let v = v.into();
r.push(v);
}
out.push(r);
}
StepResult::IO => {
rows.run_once()?;
current_prob += incr;
if current_prob > 1.0 {
current_prob = 1.0;
} else {
incr *= 1.01;
}
}
StepResult::Done => {
break;
}
StepResult::Busy => {
return Err(turso_core::LimboError::Busy);
}
StepResult::Interrupt => {}
}
}
Ok(out)
} else {
unreachable!("unexpected: this function should only be called on queries")
}
}
}
fn reopen_database(env: &mut SimulatorEnv) {
// 1. Close all connections without default checkpoint-on-close behavior
// to expose bugs related to how we handle WAL
let num_conns = env.connections.len();
env.connections.clear();
// Clear all open files
// TODO: for correct reporting of faults we should get all the recorded numbers and transfer to the new file
env.io.files.borrow_mut().clear();
// 2. Re-open database
match env.type_ {
SimulationType::Differential => {
for _ in 0..num_conns {
env.connections.push(SimConnection::SQLiteConnection(
rusqlite::Connection::open(env.get_db_path())
.expect("Failed to open SQLite connection"),
));
}
}
SimulationType::Default | SimulationType::Doublecheck => {
env.db = None;
let db = match turso_core::Database::open_file(
env.io.clone(),
env.get_db_path().to_str().expect("path should be 'to_str'"),
false,
true,
) {
Ok(db) => db,
Err(e) => {
tracing::error!(
"Failed to open database at {}: {}",
env.get_db_path().display(),
e
);
panic!("Failed to open database: {e}");
}
};
env.db = Some(db);
for _ in 0..num_conns {
env.connections.push(SimConnection::LimboConnection(
env.db.as_ref().expect("db to be Some").connect().unwrap(),
));
}
}
};
}
fn random_create<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
let mut create = Create::arbitrary(rng);
while env.tables.iter().any(|t| t.name == create.table.name) {
create = Create::arbitrary(rng);
}
Interactions::Query(Query::Create(create))
}
fn random_read<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
Interactions::Query(Query::Select(Select::arbitrary_from(rng, env)))
}
fn random_expr<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
Interactions::Query(Query::Select(SelectFree::arbitrary_from(rng, env).0))
}
fn random_write<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
Interactions::Query(Query::Insert(Insert::arbitrary_from(rng, env)))
}
fn random_delete<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
Interactions::Query(Query::Delete(Delete::arbitrary_from(rng, env)))
}
fn random_update<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
Interactions::Query(Query::Update(Update::arbitrary_from(rng, env)))
}
fn random_drop<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
Interactions::Query(Query::Drop(Drop::arbitrary_from(rng, env)))
}
fn random_create_index<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Option<Interactions> {
if env.tables.is_empty() {
return None;
}
let mut create_index = CreateIndex::arbitrary_from(rng, env);
while env
.tables
.iter()
.find(|t| t.name == create_index.table_name)
.expect("table should exist")
.indexes
.iter()
.any(|i| i == &create_index.index_name)
{
create_index = CreateIndex::arbitrary_from(rng, env);
}
Some(Interactions::Query(Query::CreateIndex(create_index)))
}
fn random_fault<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Interactions {
let faults = if env.opts.disable_reopen_database {
vec![Fault::Disconnect]
} else {
vec![Fault::Disconnect, Fault::ReopenDatabase]
};
let fault = faults[rng.random_range(0..faults.len())].clone();
Interactions::Fault(fault)
}
impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions {
fn arbitrary_from<R: rand::Rng>(
rng: &mut R,
(env, stats): (&SimulatorEnv, InteractionStats),
) -> Self {
let remaining_ = remaining(env, &stats);
frequency(
vec![
(
f64::min(remaining_.read, remaining_.write) + remaining_.create,
Box::new(|rng: &mut R| {
Interactions::Property(Property::arbitrary_from(rng, (env, &stats)))
}),
),
(
remaining_.read,
Box::new(|rng: &mut R| random_read(rng, env)),
),
(
remaining_.read / 3.0,
Box::new(|rng: &mut R| random_expr(rng, env)),
),
(
remaining_.write,
Box::new(|rng: &mut R| random_write(rng, env)),
),
(
remaining_.create,
Box::new(|rng: &mut R| random_create(rng, env)),
),
(
remaining_.create_index,
Box::new(|rng: &mut R| {
if let Some(interaction) = random_create_index(rng, env) {
interaction
} else {
// if no tables exist, we can't create an index, so fallback to creating a table
random_create(rng, env)
}
}),
),
(
remaining_.delete,
Box::new(|rng: &mut R| random_delete(rng, env)),
),
(
remaining_.update,
Box::new(|rng: &mut R| random_update(rng, env)),
),
(
// remaining_.drop,
0.0,
Box::new(|rng: &mut R| random_drop(rng, env)),
),
(
remaining_
.read
.min(remaining_.write)
.min(remaining_.create)
.max(1.0),
Box::new(|rng: &mut R| random_fault(rng, env)),
),
],
rng,
)
}
}

View File

@@ -0,0 +1,586 @@
//! Contains code for generation for [ast::Expr::Binary] Predicate
use turso_parser::ast::{self, Expr};
use crate::{
generation::{
backtrack, one_of, pick,
predicate::{CompoundPredicate, SimplePredicate},
table::{GTValue, LTValue, LikeValue},
ArbitraryFrom, ArbitraryFromMaybe as _,
},
model::{
query::predicate::Predicate,
table::{SimValue, Table, TableContext},
},
};
impl Predicate {
/// Generate an [ast::Expr::Binary] [Predicate] from a column and [SimValue]
pub fn from_column_binary<R: rand::Rng>(
rng: &mut R,
column_name: &str,
value: &SimValue,
) -> Predicate {
let expr = one_of(
vec![
Box::new(|_| {
Expr::Binary(
Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))),
ast::Operator::Equals,
Box::new(Expr::Literal(value.into())),
)
}),
Box::new(|rng| {
let gt_value = GTValue::arbitrary_from(rng, value).0;
Expr::Binary(
Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))),
ast::Operator::Greater,
Box::new(Expr::Literal(gt_value.into())),
)
}),
Box::new(|rng| {
let lt_value = LTValue::arbitrary_from(rng, value).0;
Expr::Binary(
Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))),
ast::Operator::Less,
Box::new(Expr::Literal(lt_value.into())),
)
}),
],
rng,
);
Predicate(expr)
}
/// Produces a true [ast::Expr::Binary] [Predicate] that is true for the provided row in the given table
pub fn true_binary<R: rand::Rng>(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate {
// Pick a column
let column_index = rng.random_range(0..t.columns.len());
let mut column = t.columns[column_index].clone();
let value = &row[column_index];
let mut table_name = t.name.clone();
if t.name.is_empty() {
// If the table name is empty, we cannot create a qualified expression
// so we use the column name directly
let mut splitted = column.name.split('.');
table_name = splitted
.next()
.expect("Column name should have a table prefix for a joined table")
.to_string();
column.name = splitted
.next()
.expect("Column name should have a column suffix for a joined table")
.to_string();
}
let expr = backtrack(
vec![
(
1,
Box::new(|_| {
Some(Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::Equals,
Box::new(Expr::Literal(value.into())),
))
}),
),
(
1,
Box::new(|rng| {
let v = SimValue::arbitrary_from(rng, &column.column_type);
if &v == value {
None
} else {
Some(Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::NotEquals,
Box::new(Expr::Literal(v.into())),
))
}
}),
),
(
1,
Box::new(|rng| {
let lt_value = LTValue::arbitrary_from(rng, value).0;
Some(Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::Greater,
Box::new(Expr::Literal(lt_value.into())),
))
}),
),
(
1,
Box::new(|rng| {
let gt_value = GTValue::arbitrary_from(rng, value).0;
Some(Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::Less,
Box::new(Expr::Literal(gt_value.into())),
))
}),
),
(
1,
Box::new(|rng| {
// TODO: generation for Like and Glob expressions should be extracted to different module
LikeValue::arbitrary_from_maybe(rng, value).map(|like| {
Expr::Like {
lhs: Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
not: false, // TODO: also generate this value eventually
op: ast::LikeOperator::Like,
rhs: Box::new(Expr::Literal(like.0.into())),
escape: None, // TODO: implement
}
})
}),
),
],
rng,
);
// Backtrack will always return Some here
Predicate(expr.unwrap())
}
/// Produces an [ast::Expr::Binary] [Predicate] that is false for the provided row in the given table
pub fn false_binary<R: rand::Rng>(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate {
// Pick a column
let column_index = rng.random_range(0..t.columns.len());
let mut column = t.columns[column_index].clone();
let mut table_name = t.name.clone();
let value = &row[column_index];
if t.name.is_empty() {
// If the table name is empty, we cannot create a qualified expression
// so we use the column name directly
let mut splitted = column.name.split('.');
table_name = splitted
.next()
.expect("Column name should have a table prefix for a joined table")
.to_string();
column.name = splitted
.next()
.expect("Column name should have a column suffix for a joined table")
.to_string();
}
let expr = one_of(
vec![
Box::new(|_| {
Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::NotEquals,
Box::new(Expr::Literal(value.into())),
)
}),
Box::new(|rng| {
let v = loop {
let v = SimValue::arbitrary_from(rng, &column.column_type);
if &v != value {
break v;
}
};
Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::Equals,
Box::new(Expr::Literal(v.into())),
)
}),
Box::new(|rng| {
let gt_value = GTValue::arbitrary_from(rng, value).0;
Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::Greater,
Box::new(Expr::Literal(gt_value.into())),
)
}),
Box::new(|rng| {
let lt_value = LTValue::arbitrary_from(rng, value).0;
Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(&table_name),
ast::Name::new(&column.name),
)),
ast::Operator::Less,
Box::new(Expr::Literal(lt_value.into())),
)
}),
],
rng,
);
Predicate(expr)
}
}
impl SimplePredicate {
/// Generates a true [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table
pub fn true_binary<R: rand::Rng, T: TableContext>(
rng: &mut R,
table: &T,
row: &[SimValue],
) -> Self {
// Pick a random column
let columns = table.columns().collect::<Vec<_>>();
let column_index = rng.random_range(0..columns.len());
let column = columns[column_index];
let column_value = &row[column_index];
let table_name = column.table_name;
// Avoid creation of NULLs
if row.is_empty() {
return SimplePredicate(Predicate(Expr::Literal(SimValue::TRUE.into())));
}
let expr = one_of(
vec![
Box::new(|_rng| {
Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(table_name),
ast::Name::new(&column.column.name),
)),
ast::Operator::Equals,
Box::new(Expr::Literal(column_value.into())),
)
}),
Box::new(|rng| {
let lt_value = LTValue::arbitrary_from(rng, column_value).0;
Expr::Binary(
Box::new(Expr::Qualified(
ast::Name::new(table_name),
ast::Name::new(&column.column.name),
)),
ast::Operator::Greater,
Box::new(Expr::Literal(lt_value.into())),
)
}),
Box::new(|rng| {
let gt_value = GTValue::arbitrary_from(rng, column_value).0;
Expr::Binary(
Box::new(Expr::Qualified(
ast::Name::new(table_name),
ast::Name::new(&column.column.name),
)),
ast::Operator::Less,
Box::new(Expr::Literal(gt_value.into())),
)
}),
],
rng,
);
SimplePredicate(Predicate(expr))
}
/// Generates a false [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table
pub fn false_binary<R: rand::Rng, T: TableContext>(
rng: &mut R,
table: &T,
row: &[SimValue],
) -> Self {
let columns = table.columns().collect::<Vec<_>>();
// Pick a random column
let column_index = rng.random_range(0..columns.len());
let column = columns[column_index];
let column_value = &row[column_index];
let table_name = column.table_name;
// Avoid creation of NULLs
if row.is_empty() {
return SimplePredicate(Predicate(Expr::Literal(SimValue::FALSE.into())));
}
let expr = one_of(
vec![
Box::new(|_rng| {
Expr::Binary(
Box::new(Expr::Qualified(
ast::Name::new(table_name),
ast::Name::new(&column.column.name),
)),
ast::Operator::NotEquals,
Box::new(Expr::Literal(column_value.into())),
)
}),
Box::new(|rng| {
let gt_value = GTValue::arbitrary_from(rng, column_value).0;
Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(table_name),
ast::Name::new(&column.column.name),
)),
ast::Operator::Greater,
Box::new(Expr::Literal(gt_value.into())),
)
}),
Box::new(|rng| {
let lt_value = LTValue::arbitrary_from(rng, column_value).0;
Expr::Binary(
Box::new(ast::Expr::Qualified(
ast::Name::new(table_name),
ast::Name::new(&column.column.name),
)),
ast::Operator::Less,
Box::new(Expr::Literal(lt_value.into())),
)
}),
],
rng,
);
SimplePredicate(Predicate(expr))
}
}
impl CompoundPredicate {
/// Decide if you want to create an AND or an OR
///
/// Creates a Compound Predicate that is TRUE or FALSE for at least a single row
pub fn from_table_binary<R: rand::Rng, T: TableContext>(
rng: &mut R,
table: &T,
predicate_value: bool,
) -> Self {
// Cannot pick a row if the table is empty
let rows = table.rows();
if rows.is_empty() {
return Self(if predicate_value {
Predicate::true_()
} else {
Predicate::false_()
});
}
let row = pick(rows, rng);
let predicate = if rng.random_bool(0.7) {
// An AND for true requires each of its children to be true
// An AND for false requires at least one of its children to be false
if predicate_value {
(0..rng.random_range(1..=3))
.map(|_| SimplePredicate::arbitrary_from(rng, (table, row, true)).0)
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::And,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::true_())
} else {
// Create a vector of random booleans
let mut booleans = (0..rng.random_range(1..=3))
.map(|_| rng.random_bool(0.5))
.collect::<Vec<_>>();
let len = booleans.len();
// Make sure at least one of them is false
if booleans.iter().all(|b| *b) {
booleans[rng.random_range(0..len)] = false;
}
booleans
.iter()
.map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0)
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::And,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::false_())
}
} else {
// An OR for true requires at least one of its children to be true
// An OR for false requires each of its children to be false
if predicate_value {
// Create a vector of random booleans
let mut booleans = (0..rng.random_range(1..=3))
.map(|_| rng.random_bool(0.5))
.collect::<Vec<_>>();
let len = booleans.len();
// Make sure at least one of them is true
if booleans.iter().all(|b| !*b) {
booleans[rng.random_range(0..len)] = true;
}
booleans
.iter()
.map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0)
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::Or,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::true_())
} else {
(0..rng.random_range(1..=3))
.map(|_| SimplePredicate::arbitrary_from(rng, (table, row, false)).0)
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::Or,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::false_())
}
};
Self(predicate)
}
}
#[cfg(test)]
mod tests {
use rand::{Rng as _, SeedableRng as _};
use rand_chacha::ChaCha8Rng;
use crate::{
generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _},
model::{
query::predicate::{expr_to_value, Predicate},
table::{SimValue, Table},
},
};
fn get_seed() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs()
}
#[test]
fn fuzz_true_binary_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
let row = pick(&values, &mut rng);
let predicate = Predicate::true_binary(&mut rng, &table, row);
let value = expr_to_value(&predicate.0, row, &table);
assert!(
value.as_ref().is_some_and(|value| value.as_bool()),
"Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}"
)
}
}
#[test]
fn fuzz_false_binary_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
let row = pick(&values, &mut rng);
let predicate = Predicate::false_binary(&mut rng, &table, row);
let value = expr_to_value(&predicate.0, row, &table);
assert!(
!value.as_ref().is_some_and(|value| value.as_bool()),
"Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}"
)
}
}
#[test]
fn fuzz_true_binary_simple_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let mut table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
table.rows.extend(values.clone());
let row = pick(&table.rows, &mut rng);
let predicate = SimplePredicate::true_binary(&mut rng, &table, row);
let result = values
.iter()
.map(|row| predicate.0.test(row, &table))
.reduce(|accum, curr| accum || curr)
.unwrap_or(false);
assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}")
}
}
#[test]
fn fuzz_false_binary_simple_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let mut table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
table.rows.extend(values.clone());
let row = pick(&table.rows, &mut rng);
let predicate = SimplePredicate::false_binary(&mut rng, &table, row);
let result = values
.iter()
.map(|row| predicate.0.test(row, &table))
.any(|res| !res);
assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}")
}
}
}

View File

@@ -0,0 +1,378 @@
use rand::{seq::SliceRandom as _, Rng};
use turso_parser::ast::{self, Expr};
use crate::model::{
query::predicate::Predicate,
table::{SimValue, Table, TableContext},
};
use super::{one_of, ArbitraryFrom};
mod binary;
mod unary;
#[derive(Debug)]
struct CompoundPredicate(Predicate);
#[derive(Debug)]
struct SimplePredicate(Predicate);
impl<A: AsRef<[SimValue]>, T: TableContext> ArbitraryFrom<(&T, A, bool)> for SimplePredicate {
fn arbitrary_from<R: Rng>(rng: &mut R, (table, row, predicate_value): (&T, A, bool)) -> Self {
let row = row.as_ref();
// Pick an operator
let choice = rng.random_range(0..2);
// Pick an operator
match predicate_value {
true => match choice {
0 => SimplePredicate::true_binary(rng, table, row),
1 => SimplePredicate::true_unary(rng, table, row),
_ => unreachable!(),
},
false => match choice {
0 => SimplePredicate::false_binary(rng, table, row),
1 => SimplePredicate::false_unary(rng, table, row),
_ => unreachable!(),
},
}
}
}
impl<T: TableContext> ArbitraryFrom<(&T, bool)> for CompoundPredicate {
fn arbitrary_from<R: Rng>(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self {
CompoundPredicate::from_table_binary(rng, table, predicate_value)
}
}
impl<T: TableContext> ArbitraryFrom<&T> for Predicate {
fn arbitrary_from<R: Rng>(rng: &mut R, table: &T) -> Self {
let predicate_value = rng.random_bool(0.5);
Predicate::arbitrary_from(rng, (table, predicate_value)).parens()
}
}
impl<T: TableContext> ArbitraryFrom<(&T, bool)> for Predicate {
fn arbitrary_from<R: Rng>(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self {
CompoundPredicate::arbitrary_from(rng, (table, predicate_value)).0
}
}
impl ArbitraryFrom<(&str, &SimValue)> for Predicate {
fn arbitrary_from<R: Rng>(rng: &mut R, (column_name, value): (&str, &SimValue)) -> Self {
Predicate::from_column_binary(rng, column_name, value)
}
}
impl ArbitraryFrom<(&Table, &Vec<SimValue>)> for Predicate {
fn arbitrary_from<R: Rng>(rng: &mut R, (t, row): (&Table, &Vec<SimValue>)) -> Self {
// We want to produce a predicate that is true for the row
// We can do this by creating several predicates that
// are true, some that are false, combiend them in ways that correspond to the creation of a true predicate
// Produce some true and false predicates
let mut true_predicates = (1..=rng.random_range(1..=4))
.map(|_| Predicate::true_binary(rng, t, row))
.collect::<Vec<_>>();
let false_predicates = (0..=rng.random_range(0..=3))
.map(|_| Predicate::false_binary(rng, t, row))
.collect::<Vec<_>>();
// Start building a top level predicate from a true predicate
let mut result = true_predicates.pop().unwrap();
let mut predicates = true_predicates
.iter()
.map(|p| (true, p.clone()))
.chain(false_predicates.iter().map(|p| (false, p.clone())))
.collect::<Vec<_>>();
predicates.shuffle(rng);
while !predicates.is_empty() {
// Create a new predicate from at least 1 and at most 3 predicates
let context =
predicates[0..rng.random_range(0..=usize::min(3, predicates.len()))].to_vec();
// Shift `predicates` to remove the predicates in the context
predicates = predicates[context.len()..].to_vec();
// `result` is true, so we have the following three options to make a true predicate:
// T or F
// T or T
// T and T
result = one_of(
vec![
// T or (X1 or X2 or ... or Xn)
Box::new(|_| {
Predicate(Expr::Binary(
Box::new(result.0.clone()),
ast::Operator::Or,
Box::new(
context
.iter()
.map(|(_, p)| p.clone())
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::Or,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::false_())
.0,
),
))
}),
// T or (T1 and T2 and ... and Tn)
Box::new(|_| {
Predicate(Expr::Binary(
Box::new(result.0.clone()),
ast::Operator::Or,
Box::new(
context
.iter()
.map(|(_, p)| p.clone())
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::And,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::true_())
.0,
),
))
}),
// T and T
Box::new(|_| {
// Check if all the predicates in the context are true
if context.iter().all(|(b, _)| *b) {
// T and (X1 or X2 or ... or Xn)
Predicate(Expr::Binary(
Box::new(result.0.clone()),
ast::Operator::And,
Box::new(
context
.iter()
.map(|(_, p)| p.clone())
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::And,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::true_())
.0,
),
))
}
// Check if there is at least one true predicate
else if context.iter().any(|(b, _)| *b) {
// T and (X1 or X2 or ... or Xn)
Predicate(Expr::Binary(
Box::new(result.0.clone()),
ast::Operator::And,
Box::new(
context
.iter()
.map(|(_, p)| p.clone())
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::Or,
Box::new(curr.0),
))
})
.unwrap_or(Predicate::false_())
.0,
),
))
// Predicate::And(vec![
// result.clone(),
// Predicate::Or(context.iter().map(|(_, p)| p.clone()).collect()),
// ])
} else {
// T and (X1 or X2 or ... or Xn or TRUE)
Predicate(Expr::Binary(
Box::new(result.0.clone()),
ast::Operator::And,
Box::new(
context
.iter()
.map(|(_, p)| p.clone())
.chain(std::iter::once(Predicate::true_()))
.reduce(|accum, curr| {
Predicate(Expr::Binary(
Box::new(accum.0),
ast::Operator::Or,
Box::new(curr.0),
))
})
.unwrap() // Chain guarantees at least one value
.0,
),
))
}
}),
],
rng,
);
}
result
}
}
#[cfg(test)]
mod tests {
use rand::{Rng as _, SeedableRng as _};
use rand_chacha::ChaCha8Rng;
use crate::{
generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _},
model::{
query::predicate::{expr_to_value, Predicate},
table::{SimValue, Table},
},
};
fn get_seed() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs()
}
#[test]
fn fuzz_arbitrary_table_true_simple_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
let row = pick(&values, &mut rng);
let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, true)).0;
let value = expr_to_value(&predicate.0, row, &table);
assert!(
value.as_ref().is_some_and(|value| value.as_bool()),
"Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}"
)
}
}
#[test]
fn fuzz_arbitrary_table_false_simple_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
let row = pick(&values, &mut rng);
let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, false)).0;
let value = expr_to_value(&predicate.0, row, &table);
assert!(
!value.as_ref().is_some_and(|value| value.as_bool()),
"Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}"
)
}
}
#[test]
fn fuzz_arbitrary_row_table_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
let row = pick(&values, &mut rng);
let predicate = Predicate::arbitrary_from(&mut rng, (&table, row));
let value = expr_to_value(&predicate.0, row, &table);
assert!(
value.as_ref().is_some_and(|value| value.as_bool()),
"Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}"
)
}
}
#[test]
fn fuzz_arbitrary_true_table_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let mut table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
table.rows.extend(values.clone());
let predicate = Predicate::arbitrary_from(&mut rng, (&table, true));
let result = values
.iter()
.map(|row| predicate.test(row, &table))
.reduce(|accum, curr| accum || curr)
.unwrap_or(false);
assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}")
}
}
#[test]
fn fuzz_arbitrary_false_table_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let mut table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
table.rows.extend(values.clone());
let predicate = Predicate::arbitrary_from(&mut rng, (&table, false));
let result = values
.iter()
.map(|row| predicate.test(row, &table))
.any(|res| !res);
assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}")
}
}
}

View File

@@ -0,0 +1,306 @@
//! Contains code regarding generation for [ast::Expr::Unary] Predicate
//! TODO: for now just generating [ast::Literal], but want to also generate Columns and any
//! arbitrary [ast::Expr]
use turso_parser::ast::{self, Expr};
use crate::{
generation::{backtrack, pick, predicate::SimplePredicate, ArbitraryFromMaybe},
model::{
query::predicate::Predicate,
table::{SimValue, TableContext},
},
};
pub struct TrueValue(pub SimValue);
impl ArbitraryFromMaybe<&SimValue> for TrueValue {
fn arbitrary_from_maybe<R: rand::Rng>(_rng: &mut R, value: &SimValue) -> Option<Self>
where
Self: Sized,
{
// If the Value is a true value return it else you cannot return a true Value
value.as_bool().then_some(Self(value.clone()))
}
}
impl ArbitraryFromMaybe<&Vec<&SimValue>> for TrueValue {
fn arbitrary_from_maybe<R: rand::Rng>(rng: &mut R, values: &Vec<&SimValue>) -> Option<Self>
where
Self: Sized,
{
if values.is_empty() {
return Some(Self(SimValue::TRUE));
}
let value = pick(values, rng);
Self::arbitrary_from_maybe(rng, *value)
}
}
pub struct FalseValue(pub SimValue);
impl ArbitraryFromMaybe<&SimValue> for FalseValue {
fn arbitrary_from_maybe<R: rand::Rng>(_rng: &mut R, value: &SimValue) -> Option<Self>
where
Self: Sized,
{
// If the Value is a false value return it else you cannot return a false Value
(!value.as_bool()).then_some(Self(value.clone()))
}
}
impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue {
fn arbitrary_from_maybe<R: rand::Rng>(rng: &mut R, values: &Vec<&SimValue>) -> Option<Self>
where
Self: Sized,
{
if values.is_empty() {
return Some(Self(SimValue::FALSE));
}
let value = pick(values, rng);
Self::arbitrary_from_maybe(rng, *value)
}
}
pub struct BitNotValue(pub SimValue);
impl ArbitraryFromMaybe<(&SimValue, bool)> for BitNotValue {
fn arbitrary_from_maybe<R: rand::Rng>(
_rng: &mut R,
(value, predicate): (&SimValue, bool),
) -> Option<Self>
where
Self: Sized,
{
let bit_not_val = value.unary_exec(ast::UnaryOperator::BitwiseNot);
// If you bit not the Value and it meets the predicate return Some, else None
(bit_not_val.as_bool() == predicate).then_some(BitNotValue(value.clone()))
}
}
impl ArbitraryFromMaybe<(&Vec<&SimValue>, bool)> for BitNotValue {
fn arbitrary_from_maybe<R: rand::Rng>(
rng: &mut R,
(values, predicate): (&Vec<&SimValue>, bool),
) -> Option<Self>
where
Self: Sized,
{
if values.is_empty() {
return None;
}
let value = pick(values, rng);
Self::arbitrary_from_maybe(rng, (*value, predicate))
}
}
// TODO: have some more complex generation with columns names here as well
impl SimplePredicate {
/// Generates a true [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for some values in the table
pub fn true_unary<R: rand::Rng, T: TableContext>(
rng: &mut R,
table: &T,
row: &[SimValue],
) -> Self {
let columns = table.columns().collect::<Vec<_>>();
// Pick a random column
let column_index = rng.random_range(0..columns.len());
let column_value = &row[column_index];
let num_retries = row.len();
// Avoid creation of NULLs
if row.is_empty() {
return SimplePredicate(Predicate(Expr::Literal(SimValue::TRUE.into())));
}
let expr = backtrack(
vec![
(
num_retries,
Box::new(|rng| {
TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| {
assert!(value.0.as_bool());
// Positive is a no-op in Sqlite
Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into()))
})
}),
),
// (
// num_retries,
// Box::new(|rng| {
// TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| {
// assert!(value.0.as_bool());
// // True Value with negative is still True
// Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into()))
// })
// }),
// ),
// (
// num_retries,
// Box::new(|rng| {
// BitNotValue::arbitrary_from_maybe(rng, (column_value, true)).map(|value| {
// Expr::unary(
// ast::UnaryOperator::BitwiseNot,
// Expr::Literal(value.0.into()),
// )
// })
// }),
// ),
(
num_retries,
Box::new(|rng| {
FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| {
assert!(!value.0.as_bool());
Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into()))
})
}),
),
],
rng,
);
// If cannot generate a value
SimplePredicate(Predicate(
expr.unwrap_or(Expr::Literal(SimValue::TRUE.into())),
))
}
/// Generates a false [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for a row in the table
pub fn false_unary<R: rand::Rng, T: TableContext>(
rng: &mut R,
table: &T,
row: &[SimValue],
) -> Self {
let columns = table.columns().collect::<Vec<_>>();
// Pick a random column
let column_index = rng.random_range(0..columns.len());
let column_value = &row[column_index];
let num_retries = row.len();
// Avoid creation of NULLs
if row.is_empty() {
return SimplePredicate(Predicate(Expr::Literal(SimValue::FALSE.into())));
}
let expr = backtrack(
vec![
// (
// num_retries,
// Box::new(|rng| {
// FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| {
// assert!(!value.0.as_bool());
// // Positive is a no-op in Sqlite
// Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into()))
// })
// }),
// ),
// (
// num_retries,
// Box::new(|rng| {
// FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| {
// assert!(!value.0.as_bool());
// // True Value with negative is still True
// Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into()))
// })
// }),
// ),
// (
// num_retries,
// Box::new(|rng| {
// BitNotValue::arbitrary_from_maybe(rng, (column_value, false)).map(|value| {
// Expr::unary(
// ast::UnaryOperator::BitwiseNot,
// Expr::Literal(value.0.into()),
// )
// })
// }),
// ),
(
num_retries,
Box::new(|rng| {
TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| {
assert!(value.0.as_bool());
Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into()))
})
}),
),
],
rng,
);
// If cannot generate a value
SimplePredicate(Predicate(
expr.unwrap_or(Expr::Literal(SimValue::FALSE.into())),
))
}
}
#[cfg(test)]
mod tests {
use rand::{Rng as _, SeedableRng as _};
use rand_chacha::ChaCha8Rng;
use crate::{
generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _},
model::table::{SimValue, Table},
};
fn get_seed() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs()
}
#[test]
fn fuzz_true_unary_simple_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let mut table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
table.rows.extend(values.clone());
let row = pick(&table.rows, &mut rng);
let predicate = SimplePredicate::true_unary(&mut rng, &table, row);
let result = values
.iter()
.map(|row| predicate.0.test(row, &table))
.reduce(|accum, curr| accum || curr)
.unwrap_or(false);
assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}")
}
}
#[test]
fn fuzz_false_unary_simple_predicate() {
let seed = get_seed();
let mut rng = ChaCha8Rng::seed_from_u64(seed);
for _ in 0..10000 {
let mut table = Table::arbitrary(&mut rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type))
.collect()
})
.collect();
table.rows.extend(values.clone());
let row = pick(&table.rows, &mut rng);
let predicate = SimplePredicate::false_unary(&mut rng, &table, row);
let result = values
.iter()
.map(|row| predicate.0.test(row, &table))
.any(|res| !res);
assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}")
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,447 @@
use crate::generation::{
gen_random_text, pick_n_unique, Arbitrary, ArbitraryFrom, ArbitrarySizedFrom,
};
use crate::model::query::predicate::Predicate;
use crate::model::query::select::{
CompoundOperator, CompoundSelect, Distinctness, FromClause, OrderBy, ResultColumn, SelectBody,
SelectInner,
};
use crate::model::query::update::Update;
use crate::model::query::{Create, CreateIndex, Delete, Drop, Insert, Query, Select};
use crate::model::table::{JoinTable, JoinType, JoinedTable, SimValue, Table, TableContext};
use crate::SimulatorEnv;
use itertools::Itertools;
use rand::Rng;
use turso_parser::ast::{Expr, SortOrder};
use super::property::Remaining;
use super::{backtrack, frequency, pick};
impl Arbitrary for Create {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
Create {
table: Table::arbitrary(rng),
}
}
}
impl ArbitraryFrom<&Vec<Table>> for FromClause {
fn arbitrary_from<R: Rng>(rng: &mut R, tables: &Vec<Table>) -> Self {
let num_joins = match rng.random_range(0..=100) {
0..=90 => 0,
91..=97 => 1,
98..=100 => 2,
_ => unreachable!(),
};
let mut tables = tables.clone();
let mut table = pick(&tables, rng).clone();
tables.retain(|t| t.name != table.name);
let name = table.name.clone();
let mut table_context = JoinTable {
tables: Vec::new(),
rows: Vec::new(),
};
let joins: Vec<_> = (0..num_joins)
.filter_map(|_| {
if tables.is_empty() {
return None;
}
let join_table = pick(&tables, rng).clone();
let joined_table_name = join_table.name.clone();
tables.retain(|t| t.name != join_table.name);
table_context.rows = table_context
.rows
.iter()
.cartesian_product(join_table.rows.iter())
.map(|(t_row, j_row)| {
let mut row = t_row.clone();
row.extend(j_row.clone());
row
})
.collect();
// TODO: inneficient. use a Deque to push_front?
table_context.tables.insert(0, join_table);
for row in &mut table.rows {
assert_eq!(
row.len(),
table.columns.len(),
"Row length does not match column length after join"
);
}
let predicate = Predicate::arbitrary_from(rng, &table);
Some(JoinedTable {
table: joined_table_name,
join_type: JoinType::Inner,
on: predicate,
})
})
.collect();
FromClause { table: name, joins }
}
}
impl ArbitraryFrom<&SimulatorEnv> for SelectInner {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
let from = FromClause::arbitrary_from(rng, &env.tables);
let mut tables = env.tables.clone();
// todo: this is a temporary hack because env is not separated from the tables
let join_table = from
.shadow(&mut tables)
.expect("Failed to shadow FromClause");
let cuml_col_count = join_table.columns().count();
let order_by = 'order_by: {
if rng.random_bool(0.3) {
let order_by_table_candidates = from
.joins
.iter()
.map(|j| j.table.clone())
.chain(std::iter::once(from.table.clone()))
.collect::<Vec<_>>();
let order_by_col_count =
(rng.random::<f64>() * rng.random::<f64>() * (cuml_col_count as f64)) as usize; // skew towards 0
if order_by_col_count == 0 {
break 'order_by None;
}
let mut col_names = std::collections::HashSet::new();
let mut order_by_cols = Vec::new();
while order_by_cols.len() < order_by_col_count {
let table = pick(&order_by_table_candidates, rng);
let table = tables.iter().find(|t| t.name == *table).unwrap();
let col = pick(&table.columns, rng);
let col_name = format!("{}.{}", table.name, col.name);
if col_names.insert(col_name.clone()) {
order_by_cols.push((
col_name,
if rng.random_bool(0.5) {
SortOrder::Asc
} else {
SortOrder::Desc
},
));
}
}
Some(OrderBy {
columns: order_by_cols,
})
} else {
None
}
};
SelectInner {
distinctness: if env.opts.experimental_indexes {
Distinctness::arbitrary(rng)
} else {
Distinctness::All
},
columns: vec![ResultColumn::Star],
from: Some(from),
where_clause: Predicate::arbitrary_from(rng, &join_table),
order_by,
}
}
}
impl ArbitrarySizedFrom<&SimulatorEnv> for SelectInner {
fn arbitrary_sized_from<R: Rng>(
rng: &mut R,
env: &SimulatorEnv,
num_result_columns: usize,
) -> Self {
let mut select_inner = SelectInner::arbitrary_from(rng, env);
let select_from = &select_inner.from.as_ref().unwrap();
let table_names = select_from
.joins
.iter()
.map(|j| j.table.clone())
.chain(std::iter::once(select_from.table.clone()))
.collect::<Vec<_>>();
let flat_columns_names = table_names
.iter()
.flat_map(|t| {
env.tables
.iter()
.find(|table| table.name == *t)
.unwrap()
.columns
.iter()
.map(|c| format!("{}.{}", t.clone(), c.name))
})
.collect::<Vec<_>>();
let selected_columns = pick_unique(&flat_columns_names, num_result_columns, rng);
let mut columns = Vec::new();
for column_name in selected_columns {
columns.push(ResultColumn::Column(column_name.clone()));
}
select_inner.columns = columns;
select_inner
}
}
impl Arbitrary for Distinctness {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
match rng.random_range(0..=5) {
0..4 => Distinctness::All,
_ => Distinctness::Distinct,
}
}
}
impl Arbitrary for CompoundOperator {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
match rng.random_range(0..=1) {
0 => CompoundOperator::Union,
1 => CompoundOperator::UnionAll,
_ => unreachable!(),
}
}
}
/// SelectFree is a wrapper around Select that allows for arbitrary generation
/// of selects without requiring a specific environment, which is useful for generating
/// arbitrary expressions without referring to the tables.
pub(crate) struct SelectFree(pub(crate) Select);
impl ArbitraryFrom<&SimulatorEnv> for SelectFree {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
let expr = Predicate(Expr::arbitrary_sized_from(rng, env, 8));
let select = Select::expr(expr);
Self(select)
}
}
impl ArbitraryFrom<&SimulatorEnv> for Select {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
// Generate a number of selects based on the query size
// If experimental indexes are enabled, we can have selects with compounds
// Otherwise, we just have a single select with no compounds
let num_compound_selects = if env.opts.experimental_indexes {
match rng.random_range(0..=100) {
0..=95 => 0,
96..=99 => 1,
100 => 2,
_ => unreachable!(),
}
} else {
0
};
let min_column_count_across_tables =
env.tables.iter().map(|t| t.columns.len()).min().unwrap();
let num_result_columns = rng.random_range(1..=min_column_count_across_tables);
let mut first = SelectInner::arbitrary_sized_from(rng, env, num_result_columns);
let mut rest: Vec<SelectInner> = (0..num_compound_selects)
.map(|_| SelectInner::arbitrary_sized_from(rng, env, num_result_columns))
.collect();
if !rest.is_empty() {
// ORDER BY is not supported in compound selects yet
first.order_by = None;
for s in &mut rest {
s.order_by = None;
}
}
Self {
body: SelectBody {
select: Box::new(first),
compounds: rest
.into_iter()
.map(|s| CompoundSelect {
operator: CompoundOperator::arbitrary(rng),
select: Box::new(s),
})
.collect(),
},
limit: None,
}
}
}
impl ArbitraryFrom<&SimulatorEnv> for Insert {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
let gen_values = |rng: &mut R| {
let table = pick(&env.tables, rng);
let num_rows = rng.random_range(1..10);
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| SimValue::arbitrary_from(rng, &c.column_type))
.collect()
})
.collect();
Some(Insert::Values {
table: table.name.clone(),
values,
})
};
let _gen_select = |rng: &mut R| {
// Find a non-empty table
let select_table = env.tables.iter().find(|t| !t.rows.is_empty())?;
let row = pick(&select_table.rows, rng);
let predicate = Predicate::arbitrary_from(rng, (select_table, row));
// Pick another table to insert into
let select = Select::simple(select_table.name.clone(), predicate);
let table = pick(&env.tables, rng);
Some(Insert::Select {
table: table.name.clone(),
select: Box::new(select),
})
};
// TODO: Add back gen_select when https://github.com/tursodatabase/turso/issues/2129 is fixed.
// Backtrack here cannot return None
backtrack(vec![(1, Box::new(gen_values))], rng).unwrap()
}
}
impl ArbitraryFrom<&SimulatorEnv> for Delete {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
let table = pick(&env.tables, rng);
Self {
table: table.name.clone(),
predicate: Predicate::arbitrary_from(rng, table),
}
}
}
impl ArbitraryFrom<&SimulatorEnv> for Drop {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
let table = pick(&env.tables, rng);
Self {
table: table.name.clone(),
}
}
}
impl ArbitraryFrom<&SimulatorEnv> for CreateIndex {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
assert!(
!env.tables.is_empty(),
"Cannot create an index when no tables exist in the environment."
);
let table = pick(&env.tables, rng);
if table.columns.is_empty() {
panic!(
"Cannot create an index on table '{}' as it has no columns.",
table.name
);
}
let num_columns_to_pick = rng.random_range(1..=table.columns.len());
let picked_column_indices = pick_n_unique(0..table.columns.len(), num_columns_to_pick, rng);
let columns = picked_column_indices
.into_iter()
.map(|i| {
let column = &table.columns[i];
(
column.name.clone(),
if rng.random_bool(0.5) {
SortOrder::Asc
} else {
SortOrder::Desc
},
)
})
.collect::<Vec<(String, SortOrder)>>();
let index_name = format!(
"idx_{}_{}",
table.name,
gen_random_text(rng).chars().take(8).collect::<String>()
);
CreateIndex {
index_name,
table_name: table.name.clone(),
columns,
}
}
}
impl ArbitraryFrom<(&SimulatorEnv, &Remaining)> for Query {
fn arbitrary_from<R: Rng>(rng: &mut R, (env, remaining): (&SimulatorEnv, &Remaining)) -> Self {
frequency(
vec![
(
remaining.create,
Box::new(|rng| Self::Create(Create::arbitrary(rng))),
),
(
remaining.read,
Box::new(|rng| Self::Select(Select::arbitrary_from(rng, env))),
),
(
remaining.write,
Box::new(|rng| Self::Insert(Insert::arbitrary_from(rng, env))),
),
(
remaining.update,
Box::new(|rng| Self::Update(Update::arbitrary_from(rng, env))),
),
(
f64::min(remaining.write, remaining.delete),
Box::new(|rng| Self::Delete(Delete::arbitrary_from(rng, env))),
),
],
rng,
)
}
}
fn pick_unique<T: ToOwned + PartialEq>(
items: &[T],
count: usize,
rng: &mut impl rand::Rng,
) -> Vec<T::Owned>
where
<T as ToOwned>::Owned: PartialEq,
{
let mut picked: Vec<T::Owned> = Vec::new();
while picked.len() < count {
let item = pick(items, rng);
if !picked.contains(&item.to_owned()) {
picked.push(item.to_owned());
}
}
picked
}
impl ArbitraryFrom<&SimulatorEnv> for Update {
fn arbitrary_from<R: Rng>(rng: &mut R, env: &SimulatorEnv) -> Self {
let table = pick(&env.tables, rng);
let num_cols = rng.random_range(1..=table.columns.len());
let columns = pick_unique(&table.columns, num_cols, rng);
let set_values: Vec<(String, SimValue)> = columns
.iter()
.map(|column| {
(
column.name.clone(),
SimValue::arbitrary_from(rng, &column.column_type),
)
})
.collect();
Update {
table: table.name.clone(),
set_values,
predicate: Predicate::arbitrary_from(rng, table),
}
}
}

View File

@@ -0,0 +1,258 @@
use std::collections::HashSet;
use rand::Rng;
use turso_core::Value;
use crate::generation::{gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom};
use crate::model::table::{Column, ColumnType, Name, SimValue, Table};
use super::ArbitraryFromMaybe;
impl Arbitrary for Name {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
let name = readable_name_custom("_", rng);
Name(name.replace("-", "_"))
}
}
impl Arbitrary for Table {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
let name = Name::arbitrary(rng).0;
let columns = loop {
let large_table = rng.random_bool(0.1);
let column_size = if large_table {
rng.random_range(64..125) // todo: make this higher (128+)
} else {
rng.random_range(1..=10)
};
let columns = (1..=column_size)
.map(|_| Column::arbitrary(rng))
.collect::<Vec<_>>();
// TODO: see if there is a better way to detect duplicates here
let mut set = HashSet::with_capacity(columns.len());
set.extend(columns.iter());
// Has repeated column name inside so generate again
if set.len() != columns.len() {
continue;
}
break columns;
};
Table {
rows: Vec::new(),
name,
columns,
indexes: vec![],
}
}
}
impl Arbitrary for Column {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
let name = Name::arbitrary(rng).0;
let column_type = ColumnType::arbitrary(rng);
Self {
name,
column_type,
primary: false,
unique: false,
}
}
}
impl Arbitrary for ColumnType {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned()
}
}
impl ArbitraryFrom<&Table> for Vec<SimValue> {
fn arbitrary_from<R: Rng>(rng: &mut R, table: &Table) -> Self {
let mut row = Vec::new();
for column in table.columns.iter() {
let value = SimValue::arbitrary_from(rng, &column.column_type);
row.push(value);
}
row
}
}
impl ArbitraryFrom<&Vec<&SimValue>> for SimValue {
fn arbitrary_from<R: Rng>(rng: &mut R, values: &Vec<&Self>) -> Self {
if values.is_empty() {
return Self(Value::Null);
}
pick(values, rng).to_owned().clone()
}
}
impl ArbitraryFrom<&ColumnType> for SimValue {
fn arbitrary_from<R: Rng>(rng: &mut R, column_type: &ColumnType) -> Self {
let value = match column_type {
ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)),
ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)),
ColumnType::Text => Value::build_text(gen_random_text(rng)),
ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()),
};
SimValue(value)
}
}
pub(crate) struct LTValue(pub(crate) SimValue);
impl ArbitraryFrom<&Vec<&SimValue>> for LTValue {
fn arbitrary_from<R: Rng>(rng: &mut R, values: &Vec<&SimValue>) -> Self {
if values.is_empty() {
return Self(SimValue(Value::Null));
}
// Get value less than all values
let value = Value::exec_min(values.iter().map(|value| &value.0));
Self::arbitrary_from(rng, &SimValue(value))
}
}
impl ArbitraryFrom<&SimValue> for LTValue {
fn arbitrary_from<R: Rng>(rng: &mut R, value: &SimValue) -> Self {
let new_value = match &value.0 {
Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)),
Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)),
value @ Value::Text(..) => {
// Either shorten the string, or make at least one character smaller and mutate the rest
let mut t = value.to_string();
if rng.random_bool(0.01) {
t.pop();
Value::build_text(t)
} else {
let mut t = t.chars().map(|c| c as u32).collect::<Vec<_>>();
let index = rng.random_range(0..t.len());
t[index] -= 1;
// Mutate the rest of the string
for val in t.iter_mut().skip(index + 1) {
*val = rng.random_range('a' as u32..='z' as u32);
}
let t = t
.into_iter()
.map(|c| char::from_u32(c).unwrap_or('z'))
.collect::<String>();
Value::build_text(t)
}
}
Value::Blob(b) => {
// Either shorten the blob, or make at least one byte smaller and mutate the rest
let mut b = b.clone();
if rng.random_bool(0.01) {
b.pop();
Value::Blob(b)
} else {
let index = rng.random_range(0..b.len());
b[index] -= 1;
// Mutate the rest of the blob
for val in b.iter_mut().skip(index + 1) {
*val = rng.random_range(0..=255);
}
Value::Blob(b)
}
}
_ => unreachable!(),
};
Self(SimValue(new_value))
}
}
pub(crate) struct GTValue(pub(crate) SimValue);
impl ArbitraryFrom<&Vec<&SimValue>> for GTValue {
fn arbitrary_from<R: Rng>(rng: &mut R, values: &Vec<&SimValue>) -> Self {
if values.is_empty() {
return Self(SimValue(Value::Null));
}
// Get value greater than all values
let value = Value::exec_max(values.iter().map(|value| &value.0));
Self::arbitrary_from(rng, &SimValue(value))
}
}
impl ArbitraryFrom<&SimValue> for GTValue {
fn arbitrary_from<R: Rng>(rng: &mut R, value: &SimValue) -> Self {
let new_value = match &value.0 {
Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)),
Value::Float(f) => Value::Float(rng.random_range(*f..1e10)),
value @ Value::Text(..) => {
// Either lengthen the string, or make at least one character smaller and mutate the rest
let mut t = value.to_string();
if rng.random_bool(0.01) {
t.push(rng.random_range(0..=255) as u8 as char);
Value::build_text(t)
} else {
let mut t = t.chars().map(|c| c as u32).collect::<Vec<_>>();
let index = rng.random_range(0..t.len());
t[index] += 1;
// Mutate the rest of the string
for val in t.iter_mut().skip(index + 1) {
*val = rng.random_range('a' as u32..='z' as u32);
}
let t = t
.into_iter()
.map(|c| char::from_u32(c).unwrap_or('a'))
.collect::<String>();
Value::build_text(t)
}
}
Value::Blob(b) => {
// Either lengthen the blob, or make at least one byte smaller and mutate the rest
let mut b = b.clone();
if rng.random_bool(0.01) {
b.push(rng.random_range(0..=255));
Value::Blob(b)
} else {
let index = rng.random_range(0..b.len());
b[index] += 1;
// Mutate the rest of the blob
for val in b.iter_mut().skip(index + 1) {
*val = rng.random_range(0..=255);
}
Value::Blob(b)
}
}
_ => unreachable!(),
};
Self(SimValue(new_value))
}
}
pub(crate) struct LikeValue(pub(crate) SimValue);
impl ArbitraryFromMaybe<&SimValue> for LikeValue {
fn arbitrary_from_maybe<R: Rng>(rng: &mut R, value: &SimValue) -> Option<Self> {
match &value.0 {
value @ Value::Text(..) => {
let t = value.to_string();
let mut t = t.chars().collect::<Vec<_>>();
// Remove a number of characters, either insert `_` for each character removed, or
// insert one `%` for the whole substring
let mut i = 0;
while i < t.len() {
if rng.random_bool(0.1) {
t[i] = '_';
} else if rng.random_bool(0.05) {
t[i] = '%';
// skip a list of characters
for _ in 0..rng.random_range(0..=3.min(t.len() - i - 1)) {
t.remove(i + 1);
}
}
i += 1;
}
let index = rng.random_range(0..t.len());
t.insert(index, '%');
Some(Self(SimValue(Value::build_text(
t.into_iter().collect::<String>(),
))))
}
_ => None,
}
}
}