create ArbitraryContext and ArbitraryContextFrom traits to pass generation context + start implementing them in Table + FromClause

This commit is contained in:
pedrocarlo
2025-08-27 01:48:06 -03:00
parent e0552629e3
commit 1a8b78afd8
4 changed files with 167 additions and 26 deletions

View File

@@ -3,8 +3,8 @@ use std::{iter::Sum, ops::SubAssign};
use anarchist_readable_name_generator_lib::readable_name_custom;
use rand::{distr::uniform::SampleUniform, Rng};
pub mod opts;
pub mod expr;
pub mod opts;
pub mod predicate;
pub mod query;
pub mod table;
@@ -41,6 +41,18 @@ pub trait ArbitraryFrom<T> {
fn arbitrary_from<R: Rng>(rng: &mut R, t: T) -> Self;
}
pub trait ArbitraryContext {
fn arbitrary_with_context<R: Rng, C: GenerationContext>(rng: &mut R, context: &C) -> Self;
}
pub trait ArbitraryContextFrom<T> {
fn arbitrary_with_context_from<R: Rng, C: GenerationContext>(
rng: &mut R,
context: &C,
t: T,
) -> Self;
}
/// ArbitrarySizedFrom trait for generating random values from a given value
/// ArbitrarySizedFrom allows for constructing relations, where the generated
/// value is dependent on the given value and a size constraint. These relations

View File

@@ -1,5 +1,7 @@
use std::ops::Range;
use rand::distr::weighted::WeightedIndex;
use crate::model::table::Table;
#[derive(Debug, Clone)]
@@ -7,6 +9,7 @@ pub struct Opts {
/// Indexes enabled
pub indexes: bool,
pub table: TableOpts,
pub query: QueryOpts,
}
impl Default for Opts {
@@ -14,6 +17,7 @@ impl Default for Opts {
Self {
indexes: true,
table: Default::default(),
query: Default::default(),
}
}
}
@@ -45,7 +49,7 @@ impl Default for TableOpts {
#[derive(Debug, Clone)]
pub struct LargeTableOpts {
pub enable: bool,
pub large_table_prob: f32,
pub large_table_prob: f64,
/// Range of numbers of columns to generate
pub column_range: Range<u32>,
}
@@ -60,3 +64,46 @@ impl Default for LargeTableOpts {
}
}
}
#[derive(Debug, Default, Clone)]
pub struct QueryOpts {
pub from_clause: FromClauseOpts,
}
#[derive(Debug, Clone)]
pub struct FromClauseOpts {
pub joins: Vec<JoinWeight>,
}
impl Default for FromClauseOpts {
fn default() -> Self {
Self {
joins: vec![
JoinWeight {
num_joins: 0,
weight: 90,
},
JoinWeight {
num_joins: 1,
weight: 7,
},
JoinWeight {
num_joins: 2,
weight: 3,
},
],
}
}
}
impl FromClauseOpts {
pub fn as_weighted_index(&self) -> WeightedIndex<u32> {
WeightedIndex::new(self.joins.iter().map(|weight| weight.weight)).unwrap()
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub struct JoinWeight {
pub num_joins: u32,
pub weight: u32,
}

View File

@@ -1,6 +1,6 @@
use crate::generation::{
gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryFrom, ArbitrarySizedFrom,
GenerationContext,
gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryContext, ArbitraryContextFrom,
ArbitraryFrom, ArbitrarySizedFrom, GenerationContext,
};
use crate::model::query::predicate::Predicate;
use crate::model::query::select::{
@@ -24,6 +24,77 @@ impl Arbitrary for Create {
}
}
impl ArbitraryContext for Create {
fn arbitrary_with_context<R: Rng, C: GenerationContext>(rng: &mut R, context: &C) -> Self {
Create {
table: Table::arbitrary_with_context(rng, context),
}
}
}
impl ArbitraryContextFrom<&Vec<Table>> for FromClause {
fn arbitrary_with_context_from<R: Rng, C: GenerationContext>(
rng: &mut R,
context: &C,
tables: &Vec<Table>,
) -> Self {
let opts = &context.opts().query.from_clause;
let weights = opts.as_weighted_index();
let num_joins = opts.joins[rng.sample(weights)].num_joins;
let mut tables = tables.clone();
let mut table = pick(&tables, rng).clone();
tables.retain(|t| t.name != table.name);
let name = table.name.clone();
let mut table_context = JoinTable {
tables: Vec::new(),
rows: Vec::new(),
};
let joins: Vec<_> = (0..num_joins)
.filter_map(|_| {
if tables.is_empty() {
return None;
}
let join_table = pick(&tables, rng).clone();
let joined_table_name = join_table.name.clone();
tables.retain(|t| t.name != join_table.name);
table_context.rows = table_context
.rows
.iter()
.cartesian_product(join_table.rows.iter())
.map(|(t_row, j_row)| {
let mut row = t_row.clone();
row.extend(j_row.clone());
row
})
.collect();
// TODO: inneficient. use a Deque to push_front?
table_context.tables.insert(0, join_table);
for row in &mut table.rows {
assert_eq!(
row.len(),
table.columns.len(),
"Row length does not match column length after join"
);
}
let predicate = Predicate::arbitrary_from(rng, &table);
Some(JoinedTable {
table: joined_table_name,
join_type: JoinType::Inner,
on: predicate,
})
})
.collect();
FromClause { table: name, joins }
}
}
impl ArbitraryFrom<&Vec<Table>> for FromClause {
fn arbitrary_from<R: Rng>(rng: &mut R, tables: &Vec<Table>) -> Self {
let num_joins = match rng.random_range(0..=100) {

View File

@@ -3,7 +3,10 @@ use std::collections::HashSet;
use rand::Rng;
use turso_core::Value;
use crate::generation::{gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom};
use crate::generation::{
gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryContext, ArbitraryFrom,
GenerationContext, Opts,
};
use crate::model::table::{Column, ColumnType, Name, SimValue, Table};
use super::ArbitraryFromMaybe;
@@ -15,38 +18,46 @@ impl Arbitrary for Name {
}
}
impl Arbitrary for Table {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
impl Table {
fn gen_table<R: Rng>(rng: &mut R, opts: &Opts) -> Self {
let opts = opts.table.clone();
let name = Name::arbitrary(rng).0;
let columns = loop {
let large_table = rng.random_bool(0.1);
let column_size = if large_table {
rng.random_range(64..125) // todo: make this higher (128+)
} else {
rng.random_range(1..=10)
};
let columns = (1..=column_size)
.map(|_| Column::arbitrary(rng))
.collect::<Vec<_>>();
// TODO: see if there is a better way to detect duplicates here
let mut set = HashSet::with_capacity(columns.len());
set.extend(columns.iter());
// Has repeated column name inside so generate again
if set.len() != columns.len() {
continue;
let large_table =
opts.large_table.enable && rng.random_bool(opts.large_table.large_table_prob);
let column_size = if large_table {
rng.random_range(opts.large_table.column_range)
} else {
rng.random_range(opts.column_range)
} as usize;
let mut column_set = HashSet::with_capacity(column_size);
for col in std::iter::repeat_with(|| Column::arbitrary(rng)) {
column_set.insert(col);
if column_set.len() == column_size {
break;
}
break columns;
};
}
Table {
rows: Vec::new(),
name,
columns,
columns: Vec::from_iter(column_set.into_iter()),
indexes: vec![],
}
}
}
impl Arbitrary for Table {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
Table::gen_table(rng, &Opts::default())
}
}
impl ArbitraryContext for Table {
fn arbitrary_with_context<R: Rng, C: GenerationContext>(rng: &mut R, context: &C) -> Self {
Table::gen_table(rng, context.opts())
}
}
impl Arbitrary for Column {
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
let name = Name::arbitrary(rng).0;