From ea660b947d4a0a26bf91bb621eb989b669ffd2e0 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sun, 27 Jul 2025 19:13:23 +0800 Subject: [PATCH 1/3] support VALUES clauses for compound select --- core/translate/values.rs | 92 ++++++++++++++++++++++++++--------- testing/insert.test | 9 ++++ testing/select.test | 23 +++++++++ tests/integration/fuzz/mod.rs | 33 +++++++++---- 4 files changed, 126 insertions(+), 31 deletions(-) diff --git a/core/translate/values.rs b/core/translate/values.rs index 28c037850..efefdf443 100644 --- a/core/translate/values.rs +++ b/core/translate/values.rs @@ -2,7 +2,7 @@ use crate::translate::emitter::Resolver; use crate::translate::expr::{translate_expr_no_constant_opt, NoConstantOptReason}; use crate::translate::plan::{QueryDestination, SelectPlan}; use crate::vdbe::builder::ProgramBuilder; -use crate::vdbe::insn::Insn; +use crate::vdbe::insn::{IdxInsertFlags, Insn}; use crate::vdbe::BranchOffset; use crate::Result; @@ -21,7 +21,7 @@ pub fn emit_values( QueryDestination::CoroutineYield { yield_reg, .. } => { emit_values_in_subquery(program, plan, resolver, yield_reg)? } - QueryDestination::EphemeralIndex { .. } => unreachable!(), + QueryDestination::EphemeralIndex { .. } => emit_toplevel_values(program, plan, resolver)?, QueryDestination::EphemeralTable { .. } => unreachable!(), }; Ok(reg_result_cols_start) @@ -45,22 +45,7 @@ fn emit_values_when_single_row( NoConstantOptReason::RegisterReuse, )?; } - match plan.query_destination { - QueryDestination::ResultRows => { - program.emit_insn(Insn::ResultRow { - start_reg, - count: row_len, - }); - } - QueryDestination::CoroutineYield { yield_reg, .. } => { - program.emit_insn(Insn::Yield { - yield_reg, - end_offset: BranchOffset::Offset(0), - }); - } - QueryDestination::EphemeralIndex { .. } => unreachable!(), - QueryDestination::EphemeralTable { .. } => unreachable!(), - } + emit_values_to_destination(program, plan, start_reg, row_len); Ok(start_reg) } @@ -106,10 +91,8 @@ fn emit_toplevel_values( }); } - program.emit_insn(Insn::ResultRow { - start_reg: copy_start_reg, - count: row_len, - }); + emit_values_to_destination(program, plan, copy_start_reg, row_len); + program.emit_insn(Insn::Goto { target_pc: goto_label, }); @@ -145,3 +128,68 @@ fn emit_values_in_subquery( Ok(start_reg) } + +fn emit_values_to_destination( + program: &mut ProgramBuilder, + plan: &SelectPlan, + start_reg: usize, + row_len: usize, +) { + match &plan.query_destination { + QueryDestination::ResultRows => { + program.emit_insn(Insn::ResultRow { + start_reg, + count: row_len, + }); + } + QueryDestination::CoroutineYield { yield_reg, .. } => { + program.emit_insn(Insn::Yield { + yield_reg: *yield_reg, + end_offset: BranchOffset::Offset(0), + }); + } + QueryDestination::EphemeralIndex { .. } => { + emit_values_to_index(program, plan, start_reg, row_len); + } + QueryDestination::EphemeralTable { .. } => unreachable!(), + } +} + +fn emit_values_to_index( + program: &mut ProgramBuilder, + plan: &SelectPlan, + start_reg: usize, + row_len: usize, +) { + let (cursor_id, index, is_delete) = match &plan.query_destination { + QueryDestination::EphemeralIndex { + cursor_id, + index, + is_delete, + } => (cursor_id, index, is_delete), + _ => unreachable!(), + }; + if *is_delete { + program.emit_insn(Insn::IdxDelete { + start_reg, + num_regs: row_len, + cursor_id: *cursor_id, + raise_error_if_no_matching_entry: false, + }); + } else { + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: row_len, + dest_reg: record_reg, + index_name: Some(index.name.clone()), + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: *cursor_id, + record_reg, + unpacked_start: None, + unpacked_count: None, + flags: IdxInsertFlags::new().no_op_duplicate(), + }); + } +} diff --git a/testing/insert.test b/testing/insert.test index e2d1b327b..8bc7e5aa0 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -344,6 +344,15 @@ if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-s } {1|100 2|200} + do_execsql_test_on_specific_db {:memory:} insert_from_select_union-2 { + CREATE TABLE t (a, b); + CREATE TABLE t2 (b, c); + + INSERT INTO t SELECT * FROM t UNION values(1, 100), (2, 200); + SELECT * FROM t; + } {1|100 + 2|200} + do_execsql_test_on_specific_db {:memory:} insert_from_select_intersect { CREATE TABLE t (a, b); CREATE TABLE t1 (a, b); diff --git a/testing/select.test b/testing/select.test index 15b92f97f..988b95f95 100755 --- a/testing/select.test +++ b/testing/select.test @@ -583,6 +583,29 @@ if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-s select * from t INTERSECT select * from u EXCEPT select * from v; } {} + + do_execsql_test_on_specific_db {:memory:} select-values-union { + CREATE TABLE t (x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + + values('x', 'x') UNION select * from t; + } {x|x + y|y} + + do_execsql_test_on_specific_db {:memory:} select-values-union-2 { + CREATE TABLE t (x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + + values('x', 'x'), ('y', 'y') UNION select * from t; + } {x|x + y|y} + + do_execsql_test_on_specific_db {:memory:} select-values-except { + CREATE TABLE t (x TEXT, y TEXT); + INSERT INTO t VALUES('x','x'),('y','y'); + + select * from t EXCEPT values('x','x'),('z','y'); + } {y|y} } do_execsql_test_on_specific_db {:memory:} select-no-match-in-leaf-page { diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index f2620177f..1eab15322 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -569,14 +569,28 @@ mod tests { .map(|c| c.to_string()) .collect::>(); - for _ in 0..num_selects_in_union { - // Randomly pick a table - let table_to_select_from = &table_names[rng.random_range(0..table_names.len())]; - select_statements.push(format!( - "SELECT {} FROM {}", - cols_to_select.join(", "), - table_to_select_from - )); + let mut has_right_most_values = false; + for i in 0..num_selects_in_union { + let p = 1.0 / table_names.len() as f64; + // Randomly decide whether to use a VALUES clause or a SELECT clause + if rng.random_bool(p) { + let values = (0..cols_to_select.len()) + .map(|_| rng.random_range(-3..3)) + .map(|val| val.to_string()) + .collect::>(); + select_statements.push(format!("VALUES({})", values.join(", "))); + if i == (num_selects_in_union - 1) { + has_right_most_values = true; + } + } else { + // Randomly pick a table + let table_to_select_from = &table_names[rng.random_range(0..table_names.len())]; + select_statements.push(format!( + "SELECT {} FROM {}", + cols_to_select.join(", "), + table_to_select_from + )); + } } const COMPOUND_OPERATORS: [&str; 4] = @@ -590,7 +604,8 @@ mod tests { query.push_str(select_statement); } - if rng.random_bool(0.8) { + // if the right most SELECT is a VALUES claude, no limit is not allowed + if rng.random_bool(0.8) && !has_right_most_values { let limit_val = rng.random_range(0..=MAX_LIMIT_VALUE); // LIMIT 0 is valid query = format!("{query} LIMIT {limit_val}"); } From aa69b279c39298b72eb49f10cc0a01dab3e8e37a Mon Sep 17 00:00:00 2001 From: meteorgan Date: Mon, 28 Jul 2025 00:58:20 +0800 Subject: [PATCH 2/3] support limit --- core/translate/compound_select.rs | 2 +- core/translate/emitter.rs | 2 +- core/translate/values.rs | 27 +++++++++++++++++++++------ testing/select.test | 18 ++++++++++++++++++ 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/core/translate/compound_select.rs b/core/translate/compound_select.rs index 426d396ab..d17a28b79 100644 --- a/core/translate/compound_select.rs +++ b/core/translate/compound_select.rs @@ -311,7 +311,7 @@ fn create_dedupe_index( schema: &Schema, ) -> crate::Result<(usize, Arc)> { if !schema.indexes_enabled { - crate::bail_parse_error!("UNION OR INTERSECT is not supported without indexes"); + crate::bail_parse_error!("UNION OR INTERSECT or EXCEPT is not supported without indexes"); } let dedupe_index = Arc::new(Index { diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 3ab881813..32d4035a7 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -265,7 +265,7 @@ pub fn emit_query<'a>( t_ctx: &mut TranslateCtx<'a>, ) -> Result { if !plan.values.is_empty() { - let reg_result_cols_start = emit_values(program, plan, &t_ctx.resolver)?; + let reg_result_cols_start = emit_values(program, plan, &t_ctx.resolver, t_ctx.limit_ctx)?; return Ok(reg_result_cols_start); } diff --git a/core/translate/values.rs b/core/translate/values.rs index efefdf443..73a33d5eb 100644 --- a/core/translate/values.rs +++ b/core/translate/values.rs @@ -1,4 +1,4 @@ -use crate::translate::emitter::Resolver; +use crate::translate::emitter::{LimitCtx, Resolver}; use crate::translate::expr::{translate_expr_no_constant_opt, NoConstantOptReason}; use crate::translate::plan::{QueryDestination, SelectPlan}; use crate::vdbe::builder::ProgramBuilder; @@ -10,18 +10,21 @@ pub fn emit_values( program: &mut ProgramBuilder, plan: &SelectPlan, resolver: &Resolver, + limit_ctx: Option, ) -> Result { if plan.values.len() == 1 { - let start_reg = emit_values_when_single_row(program, plan, resolver)?; + let start_reg = emit_values_when_single_row(program, plan, resolver, limit_ctx)?; return Ok(start_reg); } let reg_result_cols_start = match plan.query_destination { - QueryDestination::ResultRows => emit_toplevel_values(program, plan, resolver)?, + QueryDestination::ResultRows => emit_toplevel_values(program, plan, resolver, limit_ctx)?, QueryDestination::CoroutineYield { yield_reg, .. } => { emit_values_in_subquery(program, plan, resolver, yield_reg)? } - QueryDestination::EphemeralIndex { .. } => emit_toplevel_values(program, plan, resolver)?, + QueryDestination::EphemeralIndex { .. } => { + emit_toplevel_values(program, plan, resolver, limit_ctx)? + } QueryDestination::EphemeralTable { .. } => unreachable!(), }; Ok(reg_result_cols_start) @@ -31,6 +34,7 @@ fn emit_values_when_single_row( program: &mut ProgramBuilder, plan: &SelectPlan, resolver: &Resolver, + limit_ctx: Option, ) -> Result { let first_row = &plan.values[0]; let row_len = first_row.len(); @@ -45,7 +49,9 @@ fn emit_values_when_single_row( NoConstantOptReason::RegisterReuse, )?; } - emit_values_to_destination(program, plan, start_reg, row_len); + let end_label = program.allocate_label(); + emit_values_to_destination(program, plan, start_reg, row_len, limit_ctx, end_label); + program.preassign_label_to_next_insn(end_label); Ok(start_reg) } @@ -53,6 +59,7 @@ fn emit_toplevel_values( program: &mut ProgramBuilder, plan: &SelectPlan, resolver: &Resolver, + limit_ctx: Option, ) -> Result { let yield_reg = program.alloc_register(); let definition_label = program.allocate_label(); @@ -91,7 +98,7 @@ fn emit_toplevel_values( }); } - emit_values_to_destination(program, plan, copy_start_reg, row_len); + emit_values_to_destination(program, plan, copy_start_reg, row_len, limit_ctx, end_label); program.emit_insn(Insn::Goto { target_pc: goto_label, @@ -134,6 +141,8 @@ fn emit_values_to_destination( plan: &SelectPlan, start_reg: usize, row_len: usize, + limit_ctx: Option, + end_label: BranchOffset, ) { match &plan.query_destination { QueryDestination::ResultRows => { @@ -141,6 +150,12 @@ fn emit_values_to_destination( start_reg, count: row_len, }); + if let Some(limit_ctx) = limit_ctx { + program.emit_insn(Insn::DecrJumpZero { + reg: limit_ctx.reg_limit, + target_pc: end_label, + }); + } } QueryDestination::CoroutineYield { yield_reg, .. } => { program.emit_insn(Insn::Yield { diff --git a/testing/select.test b/testing/select.test index 988b95f95..ec434b538 100755 --- a/testing/select.test +++ b/testing/select.test @@ -606,6 +606,24 @@ if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-s select * from t EXCEPT values('x','x'),('z','y'); } {y|y} + + do_execsql_test_on_specific_db {:memory:} select-values-union-all-limit { + CREATE TABLE t (x TEXT); + INSERT INTO t VALUES('x'), ('y'), ('z'); + + values('x') UNION ALL select * from t limit 3; + } {x + x + y} + + do_execsql_test_on_specific_db {:memory:} select-values-union-all-limit-2 { + CREATE TABLE t (x TEXT); + INSERT INTO t VALUES('x'), ('y'), ('z'); + + values('a'), ('b') UNION ALL select * from t limit 3; + } {a + b + x} } do_execsql_test_on_specific_db {:memory:} select-no-match-in-leaf-page { From f0c2c377c4e2dc6d8929401c0037e419bc0089e9 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Mon, 28 Jul 2025 01:01:03 +0800 Subject: [PATCH 3/3] fix typo --- tests/integration/fuzz/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 1eab15322..5a5216c1a 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -604,7 +604,7 @@ mod tests { query.push_str(select_statement); } - // if the right most SELECT is a VALUES claude, no limit is not allowed + // if the right most SELECT is a VALUES clause, no limit is not allowed if rng.random_bool(0.8) && !has_right_most_values { let limit_val = rng.random_range(0..=MAX_LIMIT_VALUE); // LIMIT 0 is valid query = format!("{query} LIMIT {limit_val}");