- added Arbitrary and ArbitraryOf<T> traits for mroe centralized generation

- implemented random generation for tables and structured queries
This commit is contained in:
alpaylan
2024-12-13 07:51:47 -05:00
parent 4f395623ab
commit c51de732c8

View File

@@ -2,6 +2,7 @@ use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowR
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use std::cell::RefCell;
use std::fmt::Display;
use std::rc::Rc;
use std::sync::Arc;
use tempfile::TempDir;
@@ -36,12 +37,32 @@ struct SimulatorOpts {
page_size: usize,
}
trait Arbitrary {
fn arbitrary<R: Rng>(rng: &mut R) -> Self;
}
trait ArbitraryOf<T> {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &T) -> Self;
}
struct Table {
rows: Vec<Vec<Value>>,
name: String,
columns: Vec<Column>,
}
impl Arbitrary for Table {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
let name = gen_random_name(rng);
let columns = gen_columns(rng);
Table {
rows: Vec::new(),
name,
columns,
}
}
}
#[derive(Clone)]
struct Column {
name: String,
@@ -50,6 +71,19 @@ struct Column {
unique: bool,
}
impl Arbitrary for Column {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
let name = gen_random_name(rng);
let column_type = ColumnType::arbitrary(rng);
Column {
name,
column_type,
primary: false,
unique: false,
}
}
}
#[derive(Clone)]
enum ColumnType {
Integer,
@@ -58,7 +92,19 @@ enum ColumnType {
Blob,
}
#[derive(Debug, PartialEq)]
impl Arbitrary for ColumnType {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
match rng.gen_range(0..4) {
0 => ColumnType::Integer,
1 => ColumnType::Float,
2 => ColumnType::Text,
3 => ColumnType::Blob,
_ => unreachable!(),
}
}
}
#[derive(Clone, Debug, PartialEq)]
enum Value {
Null,
Integer(i64),
@@ -67,6 +113,312 @@ enum Value {
Blob(Vec<u8>),
}
impl ArbitraryOf<Vec<&Value>> for Value {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &Vec<&Value>) -> Self {
if t.is_empty() {
return Value::Null;
}
let index = rng.gen_range(0..t.len());
t[index].clone()
}
}
impl ArbitraryOf<ColumnType> for Value {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &ColumnType) -> Self {
match t {
ColumnType::Integer => Value::Integer(rng.gen_range(i64::MIN..i64::MAX)),
ColumnType::Float => Value::Float(rng.gen_range(-1e10..1e10)),
ColumnType::Text => Value::Text(gen_random_text(rng)),
ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()),
}
}
}
struct LTValue(Value);
impl ArbitraryOf<Vec<&Value>> for LTValue {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &Vec<&Value>) -> Self {
if t.is_empty() {
return LTValue(Value::Null);
}
let index = rng.gen_range(0..t.len());
LTValue::arbitrary_of(rng, t[index])
}
}
impl ArbitraryOf<Value> for LTValue {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &Value) -> Self {
match t {
Value::Integer(i) => LTValue(Value::Integer(rng.gen_range(i64::MIN..*i - 1))),
Value::Float(f) => LTValue(Value::Float(rng.gen_range(-1e10..*f - 1.0))),
Value::Text(t) => {
// Either shorten the string, or make at least one character smaller and mutate the rest
let mut t = t.clone();
if rng.gen_bool(0.01) {
t.pop();
LTValue(Value::Text(t))
} else {
let index = rng.gen_range(0..t.len());
let mut t = t.into_bytes();
t[index] -= 1;
// Mutate the rest of the string
for i in (index+1)..t.len() {
t[i] = rng.gen_range(0..=255);
}
LTValue(Value::Text(String::from_utf8(t).unwrap()))
}
}
Value::Blob(b) => todo!(),
_ => unreachable!(),
}
}
}
struct GTValue(Value);
impl ArbitraryOf<Vec<&Value>> for GTValue {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &Vec<&Value>) -> Self {
if t.is_empty() {
return GTValue(Value::Null);
}
let index = rng.gen_range(0..t.len());
GTValue::arbitrary_of(rng, t[index])
}
}
impl ArbitraryOf<Value> for GTValue {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &Value) -> Self {
match t {
Value::Integer(i) => GTValue(Value::Integer(rng.gen_range(*i..i64::MAX))),
Value::Float(f) => GTValue(Value::Float(rng.gen_range(*f..1e10))),
Value::Text(t) => {
// Either lengthen the string, or make at least one character smaller and mutate the rest
let mut t = t.clone();
if rng.gen_bool(0.01) {
t.push(rng.gen_range(0..=255) as u8 as char);
GTValue(Value::Text(t))
} else {
let index = rng.gen_range(0..t.len());
let mut t = t.into_bytes();
t[index] += 1;
// Mutate the rest of the string
for i in (index+1)..t.len() {
t[i] = rng.gen_range(0..=255);
}
GTValue(Value::Text(String::from_utf8(t).unwrap()))
}
}
Value::Blob(b) => todo!(),
_ => unreachable!(),
}
}
}
enum Predicate {
And(Vec<Predicate>),
Or(Vec<Predicate>),
Eq(String, Value),
Gt(String, Value),
Lt(String, Value),
}
enum Query {
Create { table: Table },
Select { table: String, guard: Predicate },
Insert { table: String, values: Vec<Value> },
Delete { table: String, guard: Predicate },
}
impl ArbitraryOf<Table> for Query {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &Table) -> Self {
match rng.gen_range(0..=200) {
0 => Query::Create {
table: Table::arbitrary(rng),
},
1..=100 => Query::Select {
table: t.name.clone(),
guard: Predicate::arbitrary_of(rng, t),
},
101..=200 => Query::Insert {
table: t.name.clone(),
values: t
.columns
.iter()
.map(|c| Value::arbitrary_of(rng, &c.column_type))
.collect(),
},
201..=300 => Query::Delete {
table: t.name.clone(),
guard: Predicate::arbitrary_of(rng, t),
},
_ => unreachable!(),
}
}
}
struct CompoundPredicate(Predicate);
struct SimplePredicate(Predicate);
impl ArbitraryOf<(&Table, bool)> for SimplePredicate {
fn arbitrary_of<R: Rng>(rng: &mut R, (t, b): &(&Table, bool)) -> Self {
// Pick a random column
let column_index = rng.gen_range(0..t.columns.len());
let column = &t.columns[column_index];
let column_values = t.rows.iter().map(|r| &r[column_index]).collect::<Vec<_>>();
// Pick an operator
let operator = match rng.gen_range(0..3) {
0 => {
if *b {
Predicate::Eq(column.name.clone(), Value::arbitrary_of(rng, &column_values))
} else {
Predicate::Eq(column.name.clone(), Value::arbitrary_of(rng, &column.column_type))
}
}
1 => Predicate::Gt(column.name.clone(),
match b {
true => GTValue::arbitrary_of(rng, &column_values).0,
false => LTValue::arbitrary_of(rng, &column_values).0,
}),
2 => Predicate::Lt(column.name.clone(),
match b {
true => LTValue::arbitrary_of(rng, &column_values).0,
false => GTValue::arbitrary_of(rng, &column_values).0,
}),
_ => unreachable!(),
};
SimplePredicate(operator)
}
}
impl ArbitraryOf<(&Table, bool)> for CompoundPredicate {
fn arbitrary_of<R: Rng>(rng: &mut R, (t, b): &(&Table, bool)) -> Self {
// Decide if you want to create an AND or an OR
CompoundPredicate(if rng.gen_bool(0.7) {
// An AND for true requires each of its children to be true
// An AND for false requires at least one of its children to be false
if *b {
Predicate::And(
(0..rng.gen_range(1..=3))
.map(|_| SimplePredicate::arbitrary_of(rng, &(*t, true)).0)
.collect(),
)
} else {
// Create a vector of random booleans
let mut booleans = (0..rng.gen_range(1..=3))
.map(|_| rng.gen_bool(0.5))
.collect::<Vec<_>>();
let len = booleans.len();
// Make sure at least one of them is false
if booleans.iter().all(|b| *b) {
booleans[rng.gen_range(0..len)] = false;
}
Predicate::And(
booleans
.iter()
.map(|b| SimplePredicate::arbitrary_of(rng, &(*t, *b)).0)
.collect(),
)
}
} else {
// An OR for true requires at least one of its children to be true
// An OR for false requires each of its children to be false
if *b {
// Create a vector of random booleans
let mut booleans = (0..rng.gen_range(1..=3))
.map(|_| rng.gen_bool(0.5))
.collect::<Vec<_>>();
let len = booleans.len();
// Make sure at least one of them is true
if booleans.iter().all(|b| !*b) {
booleans[rng.gen_range(0..len)] = true;
}
Predicate::Or(
booleans
.iter()
.map(|b| SimplePredicate::arbitrary_of(rng, &(*t, *b)).0)
.collect(),
)
} else {
Predicate::Or(
(0..rng.gen_range(1..=3))
.map(|_| SimplePredicate::arbitrary_of(rng, &(*t, false)).0)
.collect(),
)
}
})
}
}
impl ArbitraryOf<Table> for Predicate {
fn arbitrary_of<R: Rng>(rng: &mut R, t: &Table) -> Self {
let b= rng.gen_bool(0.5);
CompoundPredicate::arbitrary_of(rng, &(t, b)).0
}
}
impl Display for Predicate {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Predicate::And(predicates) => {
write!(f, "(")?;
for (i, p) in predicates.iter().enumerate() {
if i != 0 {
write!(f, " AND ")?;
}
write!(f, "{}", p)?;
}
write!(f, ")")
}
Predicate::Or(predicates) => {
write!(f, "(")?;
for (i, p) in predicates.iter().enumerate() {
if i != 0 {
write!(f, " OR ")?;
}
write!(f, "{}", p)?;
}
write!(f, ")")
}
Predicate::Eq(name, value) => write!(f, "{} = {}", name, value),
Predicate::Gt(name, value) => write!(f, "{} > {}", name, value),
Predicate::Lt(name, value) => write!(f, "{} < {}", name, value),
}
}
}
impl Display for Query {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Query::Create { table } => write!(f, "{}", table.to_create_str()),
Query::Select { table, guard } => write!(f, "SELECT * FROM {} WHERE {}", table, guard),
Query::Insert { table, values } => {
write!(f, "INSERT INTO {} VALUES (", table)?;
for (i, v) in values.iter().enumerate() {
if i != 0 {
write!(f, ", ")?;
}
write!(f, "{}", v)?;
}
write!(f, ")")
}
Query::Delete { table, guard } => write!(f, "DELETE FROM {} WHERE {}", table, guard),
}
}
}
#[allow(clippy::arc_with_non_send_sync)]
fn main() {
let _ = env_logger::try_init();
@@ -160,17 +512,21 @@ fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc<Connection>) -> Resu
} else if env.tables.is_empty() {
maybe_add_table(env, conn)?;
} else {
let roll = env.rng.gen_range(0..100);
if roll < env.opts.read_percent {
// read
do_select(env, conn)?;
} else if roll < env.opts.read_percent + env.opts.write_percent {
// write
do_write(env, conn)?;
} else {
// delete
// TODO
}
let query = Query::arbitrary_of(&mut env.rng, &env.tables[0]);
log::info!("running query '{}'", query);
let rows = get_all_rows(env, conn, query.to_string().as_str())?;
log::debug!("{:?}", rows);
// let roll = env.rng.gen_range(0..100);
// if roll < env.opts.read_percent {
// // read
// do_select(env, conn)?;
// } else if roll < env.opts.read_percent + env.opts.write_percent {
// // write
// do_write(env, conn)?;
// } else {
// // delete
// // TODO
// }
}
Ok(())
}
@@ -201,12 +557,7 @@ fn do_write(env: &mut SimulatorEnv, conn: &mut Rc<Connection>) -> Result<()> {
// gen insert query
for column in &columns {
let value = match column.column_type {
ColumnType::Integer => Value::Integer(env.rng.gen_range(i64::MIN..i64::MAX)),
ColumnType::Float => Value::Float(env.rng.gen_range(-1e10..1e10)),
ColumnType::Text => Value::Text(gen_random_text(env)),
ColumnType::Blob => Value::Blob(gen_random_text(env).as_bytes().to_vec()),
};
let value = Value::arbitrary_of(&mut env.rng, &column.column_type);
query.push_str(value.to_string().as_str());
query.push(',');
@@ -237,8 +588,8 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc<Connection>) -> Result<
if env.tables.len() < env.opts.max_tables {
let table = Table {
rows: Vec::new(),
name: gen_random_name(env),
columns: gen_columns(env),
name: gen_random_name(&mut env.rng),
columns: gen_columns(&mut env.rng),
};
let rows = get_all_rows(env, conn, table.to_create_str().as_str())?;
log::debug!("{:?}", rows);
@@ -266,32 +617,32 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc<Connection>) -> Result<
Ok(())
}
fn gen_random_name(env: &mut SimulatorEnv) -> String {
let name = readable_name_custom("_", &mut env.rng);
fn gen_random_name<T: Rng>(rng: &mut T) -> String {
let name = readable_name_custom("_", rng);
name.replace("-", "_")
}
fn gen_random_text(env: &mut SimulatorEnv) -> String {
let big_text = env.rng.gen_ratio(1, 1000);
fn gen_random_text<T: Rng>(rng: &mut T) -> String {
let big_text = rng.gen_ratio(1, 1000);
if big_text {
let max_size: u64 = 2 * 1024 * 1024 * 1024;
let size = env.rng.gen_range(1024..max_size);
let size = rng.gen_range(1024..max_size);
let mut name = String::new();
for i in 0..size {
name.push(((i % 26) as u8 + b'A') as char);
}
name
} else {
let name = readable_name_custom("_", &mut env.rng);
let name = readable_name_custom("_", rng);
name.replace("-", "_")
}
}
fn gen_columns(env: &mut SimulatorEnv) -> Vec<Column> {
let mut column_range = env.rng.gen_range(1..128);
fn gen_columns<T: Rng>(rng: &mut T) -> Vec<Column> {
let mut column_range = rng.gen_range(1..128);
let mut columns = Vec::new();
while column_range > 0 {
let column_type = match env.rng.gen_range(0..4) {
let column_type = match rng.gen_range(0..4) {
0 => ColumnType::Integer,
1 => ColumnType::Float,
2 => ColumnType::Text,
@@ -299,7 +650,7 @@ fn gen_columns(env: &mut SimulatorEnv) -> Vec<Column> {
_ => unreachable!(),
};
let column = Column {
name: gen_random_name(env),
name: gen_random_name(rng),
column_type,
primary: false,
unique: false,
@@ -565,14 +916,14 @@ impl Table {
}
}
impl Value {
pub fn to_string(&self) -> String {
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::Null => "NULL".to_string(),
Value::Integer(i) => i.to_string(),
Value::Float(f) => f.to_string(),
Value::Text(t) => format!("'{}'", t.clone()),
Value::Blob(vec) => to_sqlite_blob(vec),
Value::Null => write!(f, "NULL"),
Value::Integer(i) => write!(f, "{}", i),
Value::Float(fl) => write!(f, "{}", fl),
Value::Text(t) => write!(f, "'{}'", t),
Value::Blob(b) => write!(f, "{}", to_sqlite_blob(b)),
}
}
}