From f1146e716c51687d9051c82a7adf6fa7becbebdb Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 2 Jun 2025 14:39:32 -0300 Subject: [PATCH] inital implementation for ast generation --- Cargo.lock | 2 + simulator/Cargo.toml | 3 +- simulator/generation/expr.rs | 281 +++++++++++++++++++++++++++++++++++ simulator/generation/mod.rs | 1 + 4 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 simulator/generation/expr.rs diff --git a/Cargo.lock b/Cargo.lock index f6fbb4826..95b643eb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1960,7 +1960,9 @@ dependencies = [ "clap", "dirs 6.0.0", "env_logger 0.10.2", + "hex", "limbo_core", + "limbo_sqlite3_parser", "log", "notify", "rand 0.8.5", diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 852a39cb7..0c7a76e45 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -35,4 +35,5 @@ chrono = { version = "0.4.40", features = ["serde"] } tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } anyhow.workspace = true - +limbo_sqlite3_parser = { workspace = true } +hex = "0.4.3" diff --git a/simulator/generation/expr.rs b/simulator/generation/expr.rs new file mode 100644 index 000000000..86351786a --- /dev/null +++ b/simulator/generation/expr.rs @@ -0,0 +1,281 @@ +use limbo_sqlite3_parser::ast::{ + self, Expr, LikeOperator, Name, Operator, QualifiedName, Type, UnaryOperator, +}; + +use crate::{ + generation::{gen_random_text, pick, pick_index, Arbitrary, ArbitraryFrom}, + SimulatorEnv, +}; + +impl Arbitrary for Box +where + T: Arbitrary, +{ + fn arbitrary(rng: &mut R) -> Self { + Box::from(T::arbitrary(rng)) + } +} + +impl ArbitraryFrom for Box +where + T: ArbitraryFrom, +{ + fn arbitrary_from(rng: &mut R, t: A) -> Self { + Box::from(T::arbitrary_from(rng, t)) + } +} + +impl Arbitrary for Option +where + T: Arbitrary, +{ + fn arbitrary(rng: &mut R) -> Self { + rng.gen_bool(0.5).then_some(T::arbitrary(rng)) + } +} + +impl ArbitraryFrom for Option +where + T: ArbitraryFrom, +{ + fn arbitrary_from(rng: &mut R, t: A) -> Self { + rng.gen_bool(0.5).then_some(T::arbitrary_from(rng, t)) + } +} + +impl ArbitraryFrom for Vec +where + T: ArbitraryFrom, +{ + fn arbitrary_from(rng: &mut R, t: A) -> Self { + let size = rng.gen_range(0..5); + (0..size) + .into_iter() + .map(|_| T::arbitrary_from(rng, t)) + .collect() + } +} + +impl ArbitraryFrom<&SimulatorEnv> for Expr { + fn arbitrary_from(rng: &mut R, t: &SimulatorEnv) -> Self { + // Loop until we get an implmeneted expression + loop { + let choice = rng.gen_range(0..25); + let expr = match choice { + 0 => Expr::Between { + lhs: Box::arbitrary_from(rng, t), + not: rng.gen_bool(0.5), + start: Box::arbitrary_from(rng, t), + end: Box::arbitrary_from(rng, t), + }, + 1 => Expr::Binary( + Box::arbitrary_from(rng, t), + Operator::arbitrary(rng), + Box::arbitrary_from(rng, t), + ), + 2 => Expr::Case { + base: Option::arbitrary_from(rng, t), + when_then_pairs: { + let size = rng.gen_range(0..5); + (0..size) + .into_iter() + .map(|_| (Self::arbitrary_from(rng, t), Self::arbitrary_from(rng, t))) + .collect() + }, + else_expr: Option::arbitrary_from(rng, t), + }, + 3 => Expr::Cast { + expr: Box::arbitrary_from(rng, t), + type_name: Option::arbitrary(rng), + }, + 4 => Expr::Collate(Box::arbitrary_from(rng, t), CollateName::arbitrary(rng).0), + // TODO: Skip Column as this is not generated by Parser Normally + 5 => continue, + // TODO: Skip DoublyQualified for now + 6 => continue, + // TODO: skip Exists for now + 7 => continue, + // TODO: skip Function Call for now + 8 => continue, + // TODO: skip Function Call Star for now + 9 => continue, + // TODO: skip ID for now + 10 => continue, + 11 => Expr::InList { + lhs: Box::arbitrary_from(rng, t), + not: rng.gen_bool(0.5), + rhs: Option::arbitrary_from(rng, t), + }, + // TODO: skip InSelect as still need to implement ArbitratyFrom for Select + 12 => continue, + // TODO: skip InTable + 13 => continue, + 14 => Expr::IsNull(Box::arbitrary_from(rng, t)), + 15 => { + let op = LikeOperator::arbitrary_from(rng, t); + let escape = if matches!(op, LikeOperator::Like) { + Option::arbitrary_from(rng, t) + } else { + None + }; + Expr::Like { + lhs: Box::arbitrary_from(rng, t), + not: rng.gen_bool(0.5), + op, + rhs: Box::arbitrary_from(rng, t), + escape, + } + } + 16 => Expr::Literal(ast::Literal::arbitrary_from(rng, t)), + // TODO: skip Name + 17 => continue, + 18 => Expr::NotNull(Box::arbitrary_from(rng, t)), + // TODO: only support one paranthesized expression + 19 => Expr::Parenthesized(vec![Expr::arbitrary_from(rng, t)]), + 20 => { + let table_idx = pick_index(t.tables.len(), rng); + let table = &t.tables[table_idx]; + let col_idx = pick_index(table.columns.len(), rng); + let col = &table.columns[col_idx]; + Expr::Qualified(Name(table.name.clone()), Name(col.name.clone())) + } + // TODO: skip Raise + 21 => continue, + // TODO: skip RowId not emitted by parser + 22 => continue, + // TODO: skip subquery + 23 => continue, + 24 => Expr::Unary( + UnaryOperator::arbitrary_from(rng, t), + Box::arbitrary_from(rng, t), + ), + // TODO: skip Variable as it does not make much sense for the simulator + 25 => continue, + _ => unreachable!(), + }; + break expr; + } + } +} + +impl Arbitrary for Operator { + fn arbitrary(rng: &mut R) -> Self { + let choice = rng.gen_range(0..23); + match choice { + 0 => Operator::Add, + 1 => Operator::And, + 2 => Operator::ArrowRight, + 3 => Operator::ArrowRightShift, + 4 => Operator::BitwiseAnd, + 5 => Operator::BitwiseNot, + 6 => Operator::BitwiseOr, + 7 => Operator::Concat, + 8 => Operator::Divide, + 9 => Operator::Equals, + 10 => Operator::Greater, + 11 => Operator::GreaterEquals, + 12 => Operator::Is, + 13 => Operator::IsNot, + 14 => Operator::LeftShift, + 15 => Operator::Less, + 16 => Operator::LessEquals, + 17 => Operator::Modulus, + 18 => Operator::Multiply, + 19 => Operator::NotEquals, + 20 => Operator::Or, + 21 => Operator::RightShift, + 22 => Operator::Subtract, + _ => unreachable!(), + } + } +} + +impl Arbitrary for Type { + fn arbitrary(rng: &mut R) -> Self { + let name = pick(&["INT", "INTEGER", "REAL", "TEXT", "BLOB", "ANY"], rng).to_string(); + Self { + name, + size: None, // TODO: come back later here + } + } +} + +struct CollateName(String); + +impl Arbitrary for CollateName { + fn arbitrary(rng: &mut R) -> Self { + let choice = rng.gen_range(0..3); + CollateName( + match choice { + 0 => "BINARY", + 1 => "RTRIM", + 2 => "NOCASE", + _ => unreachable!(), + } + .to_string(), + ) + } +} + +impl ArbitraryFrom<&SimulatorEnv> for QualifiedName { + fn arbitrary_from(rng: &mut R, t: &SimulatorEnv) -> Self { + // TODO: for now just generate table name + let table_idx = pick_index(t.tables.len(), rng); + let table = &t.tables[table_idx]; + // TODO: for now forego alias + Self::single(Name(table.name.clone())) + } +} + +impl ArbitraryFrom<&SimulatorEnv> for LikeOperator { + fn arbitrary_from(rng: &mut R, _t: &SimulatorEnv) -> Self { + let choice = rng.gen_range(0..4); + match choice { + 0 => LikeOperator::Glob, + 1 => LikeOperator::Like, + 2 => LikeOperator::Match, + 3 => LikeOperator::Regexp, + _ => unreachable!(), + } + } +} + +// Current implementation does not take into account the columns affinity nor if table is Strict +impl ArbitraryFrom<&SimulatorEnv> for ast::Literal { + fn arbitrary_from(rng: &mut R, _t: &SimulatorEnv) -> Self { + loop { + let choice = rng.gen_range(0..8); + let lit = match choice { + 0 => ast::Literal::Numeric({ + let integer = rng.gen_bool(0.5); + if integer { + rng.gen_range(i64::MIN..i64::MAX).to_string() + } else { + rng.gen_range(-1e10..1e10).to_string() + } + }), + 1 => ast::Literal::String(gen_random_text(rng)), + 2 => ast::Literal::Blob(hex::encode(gen_random_text(rng).as_bytes().to_vec())), + // TODO: skip Keyword + 3 => continue, + 4 => ast::Literal::Null, + // TODO: Ignore CurrentDate stuff for now + _ => continue, + }; + break lit; + } + } +} + +impl ArbitraryFrom<&SimulatorEnv> for UnaryOperator { + fn arbitrary_from(rng: &mut R, _t: &SimulatorEnv) -> Self { + let choice = rng.gen_range(0..4); + match choice { + 0 => Self::BitwiseNot, + 1 => Self::Negative, + 2 => Self::Not, + 3 => Self::Positive, + _ => unreachable!(), + } + } +} diff --git a/simulator/generation/mod.rs b/simulator/generation/mod.rs index ac1f97f60..0fea1b704 100644 --- a/simulator/generation/mod.rs +++ b/simulator/generation/mod.rs @@ -3,6 +3,7 @@ use std::{iter::Sum, ops::SubAssign}; use anarchist_readable_name_generator_lib::readable_name_custom; use rand::{distributions::uniform::SampleUniform, Rng}; +mod expr; pub mod plan; pub mod property; pub mod query;