add Select and Insert generation options

This commit is contained in:
pedrocarlo
2025-08-27 15:20:32 -03:00
parent 06b923d0c1
commit bc6976fd33
2 changed files with 84 additions and 27 deletions

View File

@@ -1,4 +1,7 @@
use std::ops::Range;
use std::{
num::{NonZero, NonZeroU32},
ops::Range,
};
use rand::distr::weighted::WeightedIndex;
@@ -67,7 +70,49 @@ impl Default for LargeTableOpts {
#[derive(Debug, Default, Clone)]
pub struct QueryOpts {
pub select: SelectOpts,
pub from_clause: FromClauseOpts,
pub insert: InsertOpts,
}
#[derive(Debug, Clone)]
pub struct SelectOpts {
pub order_by_prob: f64,
pub compound_selects: Vec<CompoundSelectWeight>,
}
impl Default for SelectOpts {
fn default() -> Self {
Self {
order_by_prob: 0.3,
compound_selects: vec![
CompoundSelectWeight {
num_compound_selects: 0,
weight: 95,
},
CompoundSelectWeight {
num_compound_selects: 1,
weight: 4,
},
CompoundSelectWeight {
num_compound_selects: 2,
weight: 1,
},
],
}
}
}
impl SelectOpts {
pub fn compound_select_weighted_index(&self) -> WeightedIndex<u32> {
WeightedIndex::new(self.compound_selects.iter().map(|weight| weight.weight)).unwrap()
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub struct CompoundSelectWeight {
pub num_compound_selects: u32,
pub weight: u32,
}
#[derive(Debug, Clone)]
@@ -107,3 +152,18 @@ pub struct JoinWeight {
pub num_joins: u32,
pub weight: u32,
}
#[derive(Debug, Clone)]
pub struct InsertOpts {
pub min_rows: NonZeroU32,
pub max_rows: NonZeroU32,
}
impl Default for InsertOpts {
fn default() -> Self {
Self {
min_rows: NonZero::new(1).unwrap(),
max_rows: NonZero::new(10).unwrap(),
}
}
}

View File

@@ -90,24 +90,25 @@ impl Arbitrary for SelectInner {
let join_table = from.into_join_table(&tables);
let cuml_col_count = join_table.columns().count();
let order_by = 'order_by: {
if rng.random_bool(0.3) {
let order_by = rng
.random_bool(env.opts().query.select.order_by_prob)
.then(|| {
let order_by_table_candidates = from
.joins
.iter()
.map(|j| j.table.clone())
.chain(std::iter::once(from.table.clone()))
.map(|j| &j.table)
.chain(std::iter::once(&from.table))
.collect::<Vec<_>>();
let order_by_col_count =
(rng.random::<f64>() * rng.random::<f64>() * (cuml_col_count as f64)) as usize; // skew towards 0
if order_by_col_count == 0 {
break 'order_by None;
return None;
}
let mut col_names = std::collections::HashSet::new();
let mut order_by_cols = Vec::new();
while order_by_cols.len() < order_by_col_count {
let table = pick(&order_by_table_candidates, rng);
let table = tables.iter().find(|t| t.name == *table).unwrap();
let table = tables.iter().find(|t| t.name == table.as_str()).unwrap();
let col = pick(&table.columns, rng);
let col_name = format!("{}.{}", table.name, col.name);
if col_names.insert(col_name.clone()) {
@@ -124,10 +125,8 @@ impl Arbitrary for SelectInner {
Some(OrderBy {
columns: order_by_cols,
})
} else {
None
}
};
})
.flatten();
SelectInner {
distinctness: if env.opts().indexes {
@@ -154,12 +153,10 @@ impl ArbitrarySized for SelectInner {
let table_names = select_from
.joins
.iter()
.map(|j| j.table.clone())
.chain(std::iter::once(select_from.table.clone()))
.collect::<Vec<_>>();
.map(|j| &j.table)
.chain(std::iter::once(&select_from.table));
let flat_columns_names = table_names
.iter()
.flat_map(|t| {
env.tables()
.iter()
@@ -167,14 +164,15 @@ impl ArbitrarySized for SelectInner {
.unwrap()
.columns
.iter()
.map(|c| format!("{}.{}", t.clone(), c.name))
.map(move |c| format!("{}.{}", t, c.name))
})
.collect::<Vec<_>>();
let selected_columns = pick_unique(&flat_columns_names, num_result_columns, rng);
let mut columns = Vec::new();
for column_name in selected_columns {
columns.push(ResultColumn::Column(column_name.clone()));
}
let columns = selected_columns
.into_iter()
.map(|col_name| ResultColumn::Column(col_name))
.collect();
select_inner.columns = columns;
select_inner
}
@@ -188,6 +186,7 @@ impl Arbitrary for Distinctness {
}
}
}
impl Arbitrary for CompoundOperator {
fn arbitrary<R: Rng, C: GenerationContext>(rng: &mut R, _context: &C) -> Self {
match rng.random_range(0..=1) {
@@ -216,13 +215,10 @@ impl Arbitrary for Select {
// Generate a number of selects based on the query size
// If experimental indexes are enabled, we can have selects with compounds
// Otherwise, we just have a single select with no compounds
let opts = &env.opts().query.select;
let num_compound_selects = if env.opts().indexes {
match rng.random_range(0..=100) {
0..=95 => 0,
96..=99 => 1,
100 => 2,
_ => unreachable!(),
}
opts.compound_selects[rng.sample(opts.compound_select_weighted_index())]
.num_compound_selects
} else {
0
};
@@ -264,9 +260,10 @@ impl Arbitrary for Select {
impl Arbitrary for Insert {
fn arbitrary<R: Rng, C: GenerationContext>(rng: &mut R, env: &C) -> Self {
let opts = &env.opts().query.insert;
let gen_values = |rng: &mut R| {
let table = pick(env.tables(), rng);
let num_rows = rng.random_range(1..10);
let num_rows = rng.random_range(opts.min_rows.get()..opts.max_rows.get());
let values: Vec<Vec<SimValue>> = (0..num_rows)
.map(|_| {
table