diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 6d475590a..331dc65f4 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -3,8 +3,8 @@ use std::{iter::Sum, ops::SubAssign}; use anarchist_readable_name_generator_lib::readable_name_custom; use rand::{distr::uniform::SampleUniform, Rng}; -pub mod opts; pub mod expr; +pub mod opts; pub mod predicate; pub mod query; pub mod table; @@ -41,6 +41,18 @@ pub trait ArbitraryFrom { fn arbitrary_from(rng: &mut R, t: T) -> Self; } +pub trait ArbitraryContext { + fn arbitrary_with_context(rng: &mut R, context: &C) -> Self; +} + +pub trait ArbitraryContextFrom { + fn arbitrary_with_context_from( + rng: &mut R, + context: &C, + t: T, + ) -> Self; +} + /// ArbitrarySizedFrom trait for generating random values from a given value /// ArbitrarySizedFrom allows for constructing relations, where the generated /// value is dependent on the given value and a size constraint. These relations diff --git a/sql_generation/generation/opts.rs b/sql_generation/generation/opts.rs index d9caf81d7..b61da6cdc 100644 --- a/sql_generation/generation/opts.rs +++ b/sql_generation/generation/opts.rs @@ -1,5 +1,7 @@ use std::ops::Range; +use rand::distr::weighted::WeightedIndex; + use crate::model::table::Table; #[derive(Debug, Clone)] @@ -7,6 +9,7 @@ pub struct Opts { /// Indexes enabled pub indexes: bool, pub table: TableOpts, + pub query: QueryOpts, } impl Default for Opts { @@ -14,6 +17,7 @@ impl Default for Opts { Self { indexes: true, table: Default::default(), + query: Default::default(), } } } @@ -45,7 +49,7 @@ impl Default for TableOpts { #[derive(Debug, Clone)] pub struct LargeTableOpts { pub enable: bool, - pub large_table_prob: f32, + pub large_table_prob: f64, /// Range of numbers of columns to generate pub column_range: Range, } @@ -60,3 +64,46 @@ impl Default for LargeTableOpts { } } } + +#[derive(Debug, Default, Clone)] +pub struct QueryOpts { + pub from_clause: FromClauseOpts, +} + +#[derive(Debug, Clone)] +pub struct FromClauseOpts { + pub joins: Vec, +} + +impl Default for FromClauseOpts { + fn default() -> Self { + Self { + joins: vec![ + JoinWeight { + num_joins: 0, + weight: 90, + }, + JoinWeight { + num_joins: 1, + weight: 7, + }, + JoinWeight { + num_joins: 2, + weight: 3, + }, + ], + } + } +} + +impl FromClauseOpts { + pub fn as_weighted_index(&self) -> WeightedIndex { + WeightedIndex::new(self.joins.iter().map(|weight| weight.weight)).unwrap() + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd)] +pub struct JoinWeight { + pub num_joins: u32, + pub weight: u32, +} diff --git a/sql_generation/generation/query.rs b/sql_generation/generation/query.rs index d7840a001..f3729bf9e 100644 --- a/sql_generation/generation/query.rs +++ b/sql_generation/generation/query.rs @@ -1,6 +1,6 @@ use crate::generation::{ - gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryFrom, ArbitrarySizedFrom, - GenerationContext, + gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryContext, ArbitraryContextFrom, + ArbitraryFrom, ArbitrarySizedFrom, GenerationContext, }; use crate::model::query::predicate::Predicate; use crate::model::query::select::{ @@ -24,6 +24,77 @@ impl Arbitrary for Create { } } +impl ArbitraryContext for Create { + fn arbitrary_with_context(rng: &mut R, context: &C) -> Self { + Create { + table: Table::arbitrary_with_context(rng, context), + } + } +} + +impl ArbitraryContextFrom<&Vec> for FromClause { + fn arbitrary_with_context_from( + rng: &mut R, + context: &C, + tables: &Vec
, + ) -> Self { + let opts = &context.opts().query.from_clause; + let weights = opts.as_weighted_index(); + let num_joins = opts.joins[rng.sample(weights)].num_joins; + + let mut tables = tables.clone(); + let mut table = pick(&tables, rng).clone(); + + tables.retain(|t| t.name != table.name); + + let name = table.name.clone(); + + let mut table_context = JoinTable { + tables: Vec::new(), + rows: Vec::new(), + }; + + let joins: Vec<_> = (0..num_joins) + .filter_map(|_| { + if tables.is_empty() { + return None; + } + let join_table = pick(&tables, rng).clone(); + let joined_table_name = join_table.name.clone(); + + tables.retain(|t| t.name != join_table.name); + table_context.rows = table_context + .rows + .iter() + .cartesian_product(join_table.rows.iter()) + .map(|(t_row, j_row)| { + let mut row = t_row.clone(); + row.extend(j_row.clone()); + row + }) + .collect(); + // TODO: inneficient. use a Deque to push_front? + table_context.tables.insert(0, join_table); + for row in &mut table.rows { + assert_eq!( + row.len(), + table.columns.len(), + "Row length does not match column length after join" + ); + } + + let predicate = Predicate::arbitrary_from(rng, &table); + Some(JoinedTable { + table: joined_table_name, + join_type: JoinType::Inner, + on: predicate, + }) + }) + .collect(); + FromClause { table: name, joins } + } +} + impl ArbitraryFrom<&Vec
> for FromClause { fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { let num_joins = match rng.random_range(0..=100) { diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index d21397cbe..32bcb8117 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -3,7 +3,10 @@ use std::collections::HashSet; use rand::Rng; use turso_core::Value; -use crate::generation::{gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom}; +use crate::generation::{ + gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryContext, ArbitraryFrom, + GenerationContext, Opts, +}; use crate::model::table::{Column, ColumnType, Name, SimValue, Table}; use super::ArbitraryFromMaybe; @@ -15,38 +18,46 @@ impl Arbitrary for Name { } } -impl Arbitrary for Table { - fn arbitrary(rng: &mut R) -> Self { +impl Table { + fn gen_table(rng: &mut R, opts: &Opts) -> Self { + let opts = opts.table.clone(); let name = Name::arbitrary(rng).0; - let columns = loop { - let large_table = rng.random_bool(0.1); - let column_size = if large_table { - rng.random_range(64..125) // todo: make this higher (128+) - } else { - rng.random_range(1..=10) - }; - let columns = (1..=column_size) - .map(|_| Column::arbitrary(rng)) - .collect::>(); - // TODO: see if there is a better way to detect duplicates here - let mut set = HashSet::with_capacity(columns.len()); - set.extend(columns.iter()); - // Has repeated column name inside so generate again - if set.len() != columns.len() { - continue; + let large_table = + opts.large_table.enable && rng.random_bool(opts.large_table.large_table_prob); + let column_size = if large_table { + rng.random_range(opts.large_table.column_range) + } else { + rng.random_range(opts.column_range) + } as usize; + let mut column_set = HashSet::with_capacity(column_size); + for col in std::iter::repeat_with(|| Column::arbitrary(rng)) { + column_set.insert(col); + if column_set.len() == column_size { + break; } - break columns; - }; + } Table { rows: Vec::new(), name, - columns, + columns: Vec::from_iter(column_set.into_iter()), indexes: vec![], } } } +impl Arbitrary for Table { + fn arbitrary(rng: &mut R) -> Self { + Table::gen_table(rng, &Opts::default()) + } +} + +impl ArbitraryContext for Table { + fn arbitrary_with_context(rng: &mut R, context: &C) -> Self { + Table::gen_table(rng, context.opts()) + } +} + impl Arbitrary for Column { fn arbitrary(rng: &mut R) -> Self { let name = Name::arbitrary(rng).0;