From fb6c5ffcff295133963a90b27c6f86abb35ad6bf Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 9 Oct 2025 16:48:57 -0300 Subject: [PATCH] move SimValue generation to separate files to facilitate generation of new types of values in the future --- sql_generation/generation/mod.rs | 1 + sql_generation/generation/predicate/binary.rs | 2 +- sql_generation/generation/table.rs | 232 +----------------- sql_generation/generation/value/cmp.rs | 146 +++++++++++ sql_generation/generation/value/mod.rs | 58 +++++ sql_generation/generation/value/pattern.rs | 44 ++++ 6 files changed, 252 insertions(+), 231 deletions(-) create mode 100644 sql_generation/generation/value/cmp.rs create mode 100644 sql_generation/generation/value/mod.rs create mode 100644 sql_generation/generation/value/pattern.rs diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 1292b3448..e67dc482b 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -8,6 +8,7 @@ pub mod opts; pub mod predicate; pub mod query; pub mod table; +pub mod value; pub use opts::*; diff --git a/sql_generation/generation/predicate/binary.rs b/sql_generation/generation/predicate/binary.rs index 37b2e4e93..e3b52d5ec 100644 --- a/sql_generation/generation/predicate/binary.rs +++ b/sql_generation/generation/predicate/binary.rs @@ -6,7 +6,7 @@ use crate::{ generation::{ backtrack, one_of, pick, predicate::{CompoundPredicate, SimplePredicate}, - table::{GTValue, LTValue, LikeValue}, + value::{GTValue, LTValue, LikeValue}, ArbitraryFrom, ArbitraryFromMaybe as _, GenerationContext, }, model::{ diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index 6e55942c3..2ced09fee 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -2,14 +2,9 @@ use std::sync::atomic::{AtomicU64, Ordering}; use indexmap::IndexSet; use rand::Rng; -use turso_core::Value; -use crate::generation::{ - gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom, GenerationContext, -}; -use crate::model::table::{Column, ColumnType, Name, SimValue, Table}; - -use super::ArbitraryFromMaybe; +use crate::generation::{pick, readable_name_custom, Arbitrary, GenerationContext}; +use crate::model::table::{Column, ColumnType, Name, Table}; static COUNTER: AtomicU64 = AtomicU64::new(0); @@ -67,226 +62,3 @@ impl Arbitrary for ColumnType { pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned() } } - -impl ArbitraryFrom<&Table> for Vec { - fn arbitrary_from( - rng: &mut R, - context: &C, - table: &Table, - ) -> Self { - let mut row = Vec::new(); - for column in table.columns.iter() { - let value = SimValue::arbitrary_from(rng, context, &column.column_type); - row.push(value); - } - row - } -} - -impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - values: &Vec<&Self>, - ) -> Self { - if values.is_empty() { - return Self(Value::Null); - } - - pick(values, rng).to_owned().clone() - } -} - -impl ArbitraryFrom<&ColumnType> for SimValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - column_type: &ColumnType, - ) -> Self { - let value = match column_type { - ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)), - ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)), - ColumnType::Text => Value::build_text(gen_random_text(rng)), - ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), - }; - SimValue(value) - } -} - -pub struct LTValue(pub SimValue); - -impl ArbitraryFrom<&Vec<&SimValue>> for LTValue { - fn arbitrary_from( - rng: &mut R, - context: &C, - values: &Vec<&SimValue>, - ) -> Self { - if values.is_empty() { - return Self(SimValue(Value::Null)); - } - - // Get value less than all values - let value = Value::exec_min(values.iter().map(|value| &value.0)); - Self::arbitrary_from(rng, context, &SimValue(value)) - } -} - -impl ArbitraryFrom<&SimValue> for LTValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - value: &SimValue, - ) -> Self { - let new_value = match &value.0 { - Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)), - Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)), - value @ Value::Text(..) => { - // Either shorten the string, or make at least one character smaller and mutate the rest - let mut t = value.to_string(); - if rng.random_bool(0.01) { - t.pop(); - Value::build_text(t) - } else { - let mut t = t.chars().map(|c| c as u32).collect::>(); - let index = rng.random_range(0..t.len()); - t[index] -= 1; - // Mutate the rest of the string - for val in t.iter_mut().skip(index + 1) { - *val = rng.random_range('a' as u32..='z' as u32); - } - let t = t - .into_iter() - .map(|c| char::from_u32(c).unwrap_or('z')) - .collect::(); - Value::build_text(t) - } - } - Value::Blob(b) => { - // Either shorten the blob, or make at least one byte smaller and mutate the rest - let mut b = b.clone(); - if rng.random_bool(0.01) { - b.pop(); - Value::Blob(b) - } else { - let index = rng.random_range(0..b.len()); - b[index] -= 1; - // Mutate the rest of the blob - for val in b.iter_mut().skip(index + 1) { - *val = rng.random_range(0..=255); - } - Value::Blob(b) - } - } - _ => unreachable!(), - }; - Self(SimValue(new_value)) - } -} - -pub struct GTValue(pub SimValue); - -impl ArbitraryFrom<&Vec<&SimValue>> for GTValue { - fn arbitrary_from( - rng: &mut R, - context: &C, - values: &Vec<&SimValue>, - ) -> Self { - if values.is_empty() { - return Self(SimValue(Value::Null)); - } - // Get value greater than all values - let value = Value::exec_max(values.iter().map(|value| &value.0)); - - Self::arbitrary_from(rng, context, &SimValue(value)) - } -} - -impl ArbitraryFrom<&SimValue> for GTValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - value: &SimValue, - ) -> Self { - let new_value = match &value.0 { - Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)), - Value::Float(f) => Value::Float(rng.random_range(*f..1e10)), - value @ Value::Text(..) => { - // Either lengthen the string, or make at least one character smaller and mutate the rest - let mut t = value.to_string(); - if rng.random_bool(0.01) { - t.push(rng.random_range(0..=255) as u8 as char); - Value::build_text(t) - } else { - let mut t = t.chars().map(|c| c as u32).collect::>(); - let index = rng.random_range(0..t.len()); - t[index] += 1; - // Mutate the rest of the string - for val in t.iter_mut().skip(index + 1) { - *val = rng.random_range('a' as u32..='z' as u32); - } - let t = t - .into_iter() - .map(|c| char::from_u32(c).unwrap_or('a')) - .collect::(); - Value::build_text(t) - } - } - Value::Blob(b) => { - // Either lengthen the blob, or make at least one byte smaller and mutate the rest - let mut b = b.clone(); - if rng.random_bool(0.01) { - b.push(rng.random_range(0..=255)); - Value::Blob(b) - } else { - let index = rng.random_range(0..b.len()); - b[index] += 1; - // Mutate the rest of the blob - for val in b.iter_mut().skip(index + 1) { - *val = rng.random_range(0..=255); - } - Value::Blob(b) - } - } - _ => unreachable!(), - }; - Self(SimValue(new_value)) - } -} - -pub struct LikeValue(pub SimValue); - -impl ArbitraryFromMaybe<&SimValue> for LikeValue { - fn arbitrary_from_maybe( - rng: &mut R, - _context: &C, - value: &SimValue, - ) -> Option { - match &value.0 { - value @ Value::Text(..) => { - let t = value.to_string(); - let mut t = t.chars().collect::>(); - // Remove a number of characters, either insert `_` for each character removed, or - // insert one `%` for the whole substring - let mut i = 0; - while i < t.len() { - if rng.random_bool(0.1) { - t[i] = '_'; - } else if rng.random_bool(0.05) { - t[i] = '%'; - // skip a list of characters - for _ in 0..rng.random_range(0..=3.min(t.len() - i - 1)) { - t.remove(i + 1); - } - } - i += 1; - } - let index = rng.random_range(0..t.len()); - t.insert(index, '%'); - Some(Self(SimValue(Value::build_text( - t.into_iter().collect::(), - )))) - } - _ => None, - } - } -} diff --git a/sql_generation/generation/value/cmp.rs b/sql_generation/generation/value/cmp.rs new file mode 100644 index 000000000..567a59a5e --- /dev/null +++ b/sql_generation/generation/value/cmp.rs @@ -0,0 +1,146 @@ +use turso_core::Value; + +use crate::{ + generation::{ArbitraryFrom, GenerationContext}, + model::table::SimValue, +}; + +pub struct LTValue(pub SimValue); + +impl ArbitraryFrom<&Vec<&SimValue>> for LTValue { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { + if values.is_empty() { + return Self(SimValue(Value::Null)); + } + + // Get value less than all values + let value = Value::exec_min(values.iter().map(|value| &value.0)); + Self::arbitrary_from(rng, context, &SimValue(value)) + } +} + +impl ArbitraryFrom<&SimValue> for LTValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { + let new_value = match &value.0 { + Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)), + Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)), + value @ Value::Text(..) => { + // Either shorten the string, or make at least one character smaller and mutate the rest + let mut t = value.to_string(); + if rng.random_bool(0.01) { + t.pop(); + Value::build_text(t) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.random_range(0..t.len()); + t[index] -= 1; + // Mutate the rest of the string + for val in t.iter_mut().skip(index + 1) { + *val = rng.random_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('z')) + .collect::(); + Value::build_text(t) + } + } + Value::Blob(b) => { + // Either shorten the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.random_bool(0.01) { + b.pop(); + Value::Blob(b) + } else { + let index = rng.random_range(0..b.len()); + b[index] -= 1; + // Mutate the rest of the blob + for val in b.iter_mut().skip(index + 1) { + *val = rng.random_range(0..=255); + } + Value::Blob(b) + } + } + _ => unreachable!(), + }; + Self(SimValue(new_value)) + } +} + +pub struct GTValue(pub SimValue); + +impl ArbitraryFrom<&Vec<&SimValue>> for GTValue { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { + if values.is_empty() { + return Self(SimValue(Value::Null)); + } + // Get value greater than all values + let value = Value::exec_max(values.iter().map(|value| &value.0)); + + Self::arbitrary_from(rng, context, &SimValue(value)) + } +} + +impl ArbitraryFrom<&SimValue> for GTValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { + let new_value = match &value.0 { + Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)), + Value::Float(f) => Value::Float(rng.random_range(*f..1e10)), + value @ Value::Text(..) => { + // Either lengthen the string, or make at least one character smaller and mutate the rest + let mut t = value.to_string(); + if rng.random_bool(0.01) { + t.push(rng.random_range(0..=255) as u8 as char); + Value::build_text(t) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.random_range(0..t.len()); + t[index] += 1; + // Mutate the rest of the string + for val in t.iter_mut().skip(index + 1) { + *val = rng.random_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('a')) + .collect::(); + Value::build_text(t) + } + } + Value::Blob(b) => { + // Either lengthen the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.random_bool(0.01) { + b.push(rng.random_range(0..=255)); + Value::Blob(b) + } else { + let index = rng.random_range(0..b.len()); + b[index] += 1; + // Mutate the rest of the blob + for val in b.iter_mut().skip(index + 1) { + *val = rng.random_range(0..=255); + } + Value::Blob(b) + } + } + _ => unreachable!(), + }; + Self(SimValue(new_value)) + } +} diff --git a/sql_generation/generation/value/mod.rs b/sql_generation/generation/value/mod.rs new file mode 100644 index 000000000..e0c98ad84 --- /dev/null +++ b/sql_generation/generation/value/mod.rs @@ -0,0 +1,58 @@ +use rand::Rng; +use turso_core::Value; + +use crate::{ + generation::{gen_random_text, pick, ArbitraryFrom, GenerationContext}, + model::table::{ColumnType, SimValue, Table}, +}; + +mod cmp; +mod pattern; + +pub use cmp::{GTValue, LTValue}; +pub use pattern::LikeValue; + +impl ArbitraryFrom<&Table> for Vec { + fn arbitrary_from( + rng: &mut R, + context: &C, + table: &Table, + ) -> Self { + let mut row = Vec::new(); + for column in table.columns.iter() { + let value = SimValue::arbitrary_from(rng, context, &column.column_type); + row.push(value); + } + row + } +} + +impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + values: &Vec<&Self>, + ) -> Self { + if values.is_empty() { + return Self(Value::Null); + } + + pick(values, rng).to_owned().clone() + } +} + +impl ArbitraryFrom<&ColumnType> for SimValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + column_type: &ColumnType, + ) -> Self { + let value = match column_type { + ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)), + ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)), + ColumnType::Text => Value::build_text(gen_random_text(rng)), + ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), + }; + SimValue(value) + } +} diff --git a/sql_generation/generation/value/pattern.rs b/sql_generation/generation/value/pattern.rs new file mode 100644 index 000000000..3bf0d7a9f --- /dev/null +++ b/sql_generation/generation/value/pattern.rs @@ -0,0 +1,44 @@ +use turso_core::Value; + +use crate::{ + generation::{ArbitraryFromMaybe, GenerationContext}, + model::table::SimValue, +}; + +pub struct LikeValue(pub SimValue); + +impl ArbitraryFromMaybe<&SimValue> for LikeValue { + fn arbitrary_from_maybe( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option { + match &value.0 { + value @ Value::Text(..) => { + let t = value.to_string(); + let mut t = t.chars().collect::>(); + // Remove a number of characters, either insert `_` for each character removed, or + // insert one `%` for the whole substring + let mut i = 0; + while i < t.len() { + if rng.random_bool(0.1) { + t[i] = '_'; + } else if rng.random_bool(0.05) { + t[i] = '%'; + // skip a list of characters + for _ in 0..rng.random_range(0..=3.min(t.len() - i - 1)) { + t.remove(i + 1); + } + } + i += 1; + } + let index = rng.random_range(0..t.len()); + t.insert(index, '%'); + Some(Self(SimValue(Value::build_text( + t.into_iter().collect::(), + )))) + } + _ => None, + } + } +}