Merge 'Fix: Evaluating expression in LIMIT and OFFSET clauses.' from

Closes #3687 .
Previously, the `try_fold_expr_to_i64` function casted `NULL` as `0`
when evaluating expressions in `LIMIT` or `OFFSET` clauses. I removed
this function since evaluating the expression directly and relying on
the MustBeInt operation for casting seems to handle everything.

Closes #3695
This commit is contained in:
Jussi Saurio
2025-10-15 10:36:36 +03:00
committed by GitHub
6 changed files with 234 additions and 125 deletions

View File

@@ -3,14 +3,13 @@ use crate::translate::collate::get_collseq_from_expr;
use crate::translate::emitter::{emit_query, LimitCtx, Resolver, TranslateCtx};
use crate::translate::expr::translate_expr;
use crate::translate::plan::{Plan, QueryDestination, SelectPlan};
use crate::translate::result_row::try_fold_expr_to_i64;
use crate::vdbe::builder::{CursorType, ProgramBuilder};
use crate::vdbe::insn::Insn;
use crate::vdbe::BranchOffset;
use crate::{emit_explain, QueryMode, SymbolTable};
use crate::{emit_explain, LimboError, QueryMode, SymbolTable};
use std::sync::Arc;
use tracing::instrument;
use turso_parser::ast::{CompoundOperator, SortOrder};
use turso_parser::ast::{CompoundOperator, Expr, Literal, SortOrder};
use tracing::Level;
@@ -41,44 +40,66 @@ pub fn emit_program_for_compound_select(
// Each subselect shares the same limit_ctx and offset, because the LIMIT, OFFSET applies to
// the entire compound select, not just a single subselect.
let limit_ctx = limit.as_ref().map(|limit| {
let reg = program.alloc_register();
if let Some(val) = try_fold_expr_to_i64(limit) {
program.emit_insn(Insn::Integer {
value: val,
dest: reg,
});
} else {
program.add_comment(program.offset(), "OFFSET expr");
_ = translate_expr(program, None, limit, reg, &right_most_ctx.resolver);
let limit_ctx = limit
.as_ref()
.map(|limit| {
let reg = program.alloc_register();
match limit.as_ref() {
Expr::Literal(Literal::Numeric(n)) => {
if let Ok(value) = n.parse::<i64>() {
program.add_comment(program.offset(), "LIMIT counter");
program.emit_insn(Insn::Integer { value, dest: reg });
} else {
let value = n
.parse::<f64>()
.map_err(|_| LimboError::ParseError("invalid limit".to_string()))?;
program.emit_insn(Insn::Real { value, dest: reg });
program.add_comment(program.offset(), "LIMIT counter");
program.emit_insn(Insn::MustBeInt { reg });
}
}
_ => {
_ = translate_expr(program, None, limit, reg, &right_most_ctx.resolver);
program.add_comment(program.offset(), "LIMIT counter");
program.emit_insn(Insn::MustBeInt { reg });
}
}
Ok::<_, LimboError>(LimitCtx::new_shared(reg))
})
.transpose()?;
let offset_reg = offset
.as_ref()
.map(|offset_expr| {
let reg = program.alloc_register();
match offset_expr.as_ref() {
Expr::Literal(Literal::Numeric(n)) => {
// Compile-time constant offset
if let Ok(value) = n.parse::<i64>() {
program.emit_insn(Insn::Integer { value, dest: reg });
} else {
let value = n
.parse::<f64>()
.map_err(|_| LimboError::ParseError("invalid offset".to_string()))?;
program.emit_insn(Insn::Real { value, dest: reg });
}
}
_ => {
_ = translate_expr(program, None, offset_expr, reg, &right_most_ctx.resolver);
}
}
program.add_comment(program.offset(), "OFFSET counter");
program.emit_insn(Insn::MustBeInt { reg });
}
LimitCtx::new_shared(reg)
});
let offset_reg = offset.as_ref().map(|offset_expr| {
let reg = program.alloc_register();
if let Some(val) = try_fold_expr_to_i64(offset_expr) {
// Compile-time constant offset
program.emit_insn(Insn::Integer {
value: val,
dest: reg,
let combined_reg = program.alloc_register();
program.add_comment(program.offset(), "OFFSET + LIMIT");
program.emit_insn(Insn::OffsetLimit {
offset_reg: reg,
combined_reg,
limit_reg: limit_ctx.as_ref().unwrap().reg_limit,
});
} else {
program.add_comment(program.offset(), "OFFSET expr");
_ = translate_expr(program, None, offset_expr, reg, &right_most_ctx.resolver);
program.emit_insn(Insn::MustBeInt { reg });
}
let combined_reg = program.alloc_register();
program.emit_insn(Insn::OffsetLimit {
offset_reg: reg,
combined_reg,
limit_reg: limit_ctx.as_ref().unwrap().reg_limit,
});
reg
});
Ok::<_, LimboError>(reg)
})
.transpose()?;
// When a compound SELECT is part of a query that yields results to a coroutine (e.g. within an INSERT clause),
// we must allocate registers for the result columns to be yielded. Each subselect will then yield to

View File

@@ -6,7 +6,7 @@ use std::num::NonZeroUsize;
use std::sync::Arc;
use tracing::{instrument, Level};
use turso_parser::ast::{self, Expr};
use turso_parser::ast::{self, Expr, Literal};
use super::aggregation::emit_ungrouped_aggregation;
use super::expr::translate_expr;
@@ -37,7 +37,6 @@ use crate::translate::fkeys::{
};
use crate::translate::plan::{DeletePlan, JoinedTable, Plan, QueryDestination, Search};
use crate::translate::planner::ROWID_STRS;
use crate::translate::result_row::try_fold_expr_to_i64;
use crate::translate::values::emit_values;
use crate::translate::window::{emit_window_results, init_window, WindowMetadata};
use crate::util::{exprs_are_equivalent, normalize_ident};
@@ -273,7 +272,7 @@ pub fn emit_query<'a>(
let after_main_loop_label = program.allocate_label();
t_ctx.label_main_loop_end = Some(after_main_loop_label);
init_limit(program, t_ctx, &plan.limit, &plan.offset);
init_limit(program, t_ctx, &plan.limit, &plan.offset)?;
if !plan.values.is_empty() {
let reg_result_cols_start = emit_values(program, plan, t_ctx)?;
@@ -445,7 +444,7 @@ fn emit_program_for_delete(
let after_main_loop_label = program.allocate_label();
t_ctx.label_main_loop_end = Some(after_main_loop_label);
init_limit(program, &mut t_ctx, &plan.limit, &None);
init_limit(program, &mut t_ctx, &plan.limit, &None)?;
// No rows will be read from source table loops if there is a constant false condition eg. WHERE 0
if plan.contains_constant_false_condition {
@@ -900,7 +899,7 @@ fn emit_program_for_update(
let after_main_loop_label = program.allocate_label();
t_ctx.label_main_loop_end = Some(after_main_loop_label);
init_limit(program, &mut t_ctx, &plan.limit, &plan.offset);
init_limit(program, &mut t_ctx, &plan.limit, &plan.offset)?;
// No rows will be read from source table loops if there is a constant false condition eg. WHERE 0
if plan.contains_constant_false_condition {
@@ -2037,62 +2036,82 @@ fn init_limit(
t_ctx: &mut TranslateCtx,
limit: &Option<Box<Expr>>,
offset: &Option<Box<Expr>>,
) {
) -> Result<()> {
if t_ctx.limit_ctx.is_none() && limit.is_some() {
t_ctx.limit_ctx = Some(LimitCtx::new(program));
}
let Some(limit_ctx) = &t_ctx.limit_ctx else {
return;
return Ok(());
};
if limit_ctx.initialize_counter {
if let Some(expr) = limit {
if let Some(value) = try_fold_expr_to_i64(expr) {
program.emit_insn(Insn::Integer {
value,
dest: limit_ctx.reg_limit,
});
} else {
let r = limit_ctx.reg_limit;
program.add_comment(program.offset(), "OFFSET expr");
_ = translate_expr(program, None, expr, r, &t_ctx.resolver);
program.emit_insn(Insn::MustBeInt { reg: r });
match expr.as_ref() {
Expr::Literal(Literal::Numeric(n)) => {
if let Ok(value) = n.parse::<i64>() {
program.add_comment(program.offset(), "LIMIT counter");
program.emit_insn(Insn::Integer {
value,
dest: limit_ctx.reg_limit,
});
} else {
program.emit_insn(Insn::Real {
value: n.parse::<f64>().unwrap(),
dest: limit_ctx.reg_limit,
});
program.add_comment(program.offset(), "LIMIT counter");
program.emit_insn(Insn::MustBeInt {
reg: limit_ctx.reg_limit,
});
}
}
_ => {
let r = limit_ctx.reg_limit;
_ = translate_expr(program, None, expr, r, &t_ctx.resolver);
program.emit_insn(Insn::MustBeInt { reg: r });
}
}
}
}
if t_ctx.reg_offset.is_none() {
if let Some(expr) = offset {
if let Some(value) = try_fold_expr_to_i64(expr) {
if value != 0 {
let reg = program.alloc_register();
t_ctx.reg_offset = Some(reg);
program.emit_insn(Insn::Integer { value, dest: reg });
let combined_reg = program.alloc_register();
t_ctx.reg_limit_offset_sum = Some(combined_reg);
program.emit_insn(Insn::OffsetLimit {
limit_reg: limit_ctx.reg_limit,
offset_reg: reg,
combined_reg,
});
let offset_reg = program.alloc_register();
t_ctx.reg_offset = Some(offset_reg);
match expr.as_ref() {
Expr::Literal(Literal::Numeric(n)) => {
if let Ok(value) = n.parse::<i64>() {
program.emit_insn(Insn::Integer {
value,
dest: offset_reg,
});
} else {
let value = n.parse::<f64>()?;
program.emit_insn(Insn::Real {
value,
dest: limit_ctx.reg_limit,
});
program.emit_insn(Insn::MustBeInt {
reg: limit_ctx.reg_limit,
});
}
}
_ => {
_ = translate_expr(program, None, expr, offset_reg, &t_ctx.resolver);
}
} else {
let reg = program.alloc_register();
t_ctx.reg_offset = Some(reg);
let r = reg;
program.add_comment(program.offset(), "OFFSET expr");
_ = translate_expr(program, None, expr, r, &t_ctx.resolver);
program.emit_insn(Insn::MustBeInt { reg: r });
let combined_reg = program.alloc_register();
t_ctx.reg_limit_offset_sum = Some(combined_reg);
program.emit_insn(Insn::OffsetLimit {
limit_reg: limit_ctx.reg_limit,
offset_reg: reg,
combined_reg,
});
}
program.add_comment(program.offset(), "OFFSET counter");
program.emit_insn(Insn::MustBeInt { reg: offset_reg });
let combined_reg = program.alloc_register();
t_ctx.reg_limit_offset_sum = Some(combined_reg);
program.add_comment(program.offset(), "OFFSET + LIMIT");
program.emit_insn(Insn::OffsetLimit {
limit_reg: limit_ctx.reg_limit,
offset_reg,
combined_reg,
});
}
}
@@ -2105,6 +2124,8 @@ fn init_limit(
target_pc: main_loop_end,
jump_if_null: false,
});
Ok(())
}
/// We have `Expr`s which have *not* had column references bound to them,

View File

@@ -1,5 +1,3 @@
use turso_parser::ast::{Expr, Literal, Operator, UnaryOperator};
use crate::{
vdbe::{
builder::ProgramBuilder,
@@ -174,36 +172,3 @@ pub fn emit_offset(program: &mut ProgramBuilder, jump_to: BranchOffset, reg_offs
decrement_by: 1,
});
}
#[allow(clippy::borrowed_box)]
pub fn try_fold_expr_to_i64(expr: &Box<Expr>) -> Option<i64> {
match expr.as_ref() {
Expr::Literal(Literal::Numeric(n)) => n.parse::<i64>().ok(),
Expr::Literal(Literal::Null) => Some(0),
Expr::Id(name) if !name.quoted() => {
let lowered = name.as_str();
if lowered == "true" {
Some(1)
} else if lowered == "false" {
Some(0)
} else {
None
}
}
Expr::Unary(UnaryOperator::Negative, inner) => try_fold_expr_to_i64(inner).map(|v| -v),
Expr::Unary(UnaryOperator::Positive, inner) => try_fold_expr_to_i64(inner),
Expr::Binary(left, op, right) => {
let l = try_fold_expr_to_i64(left)?;
let r = try_fold_expr_to_i64(right)?;
match op {
Operator::Add => Some(l.saturating_add(r)),
Operator::Subtract => Some(l.saturating_sub(r)),
Operator::Multiply => Some(l.saturating_mul(r)),
Operator::Divide if r != 0 => Some(l.saturating_div(r)),
_ => None,
}
}
_ => None,
}
}

View File

@@ -6559,21 +6559,17 @@ pub fn op_must_be_int(
Value::Integer(_) => {}
Value::Float(f) => match cast_real_to_integer(*f) {
Ok(i) => state.registers[*reg] = Register::Value(Value::Integer(i)),
Err(_) => crate::bail_parse_error!(
"MustBeInt: the value in register cannot be cast to integer"
),
Err(_) => crate::bail_parse_error!("datatype mismatch"),
},
Value::Text(text) => match checked_cast_text_to_numeric(text.as_str()) {
Ok(Value::Integer(i)) => state.registers[*reg] = Register::Value(Value::Integer(i)),
Ok(Value::Float(f)) => {
state.registers[*reg] = Register::Value(Value::Integer(f as i64))
}
_ => crate::bail_parse_error!(
"MustBeInt: the value in register cannot be cast to integer"
),
_ => crate::bail_parse_error!("datatype mismatch"),
},
_ => {
crate::bail_parse_error!("MustBeInt: the value in register cannot be cast to integer");
crate::bail_parse_error!("datatype mismatch");
}
};
state.pc += 1;

49
testing/offset.test Normal file → Executable file
View File

@@ -64,3 +64,52 @@ do_execsql_test_on_specific_db {:memory:} select-ungrouped-aggregate-with-offset
INSERT INTO t VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
SELECT COUNT(a) FROM t LIMIT 1 OFFSET 1;
} {}
do_execsql_test_on_specific_db {:memory:} offset-expr-can-be-cast-losslessly-1 {
SELECT 1 LIMIT 3 OFFSET 1.1 + 2.9;
} {}
do_execsql_test_on_specific_db {:memory:} offset-expr-can-be-cast-losslessly-2 {
CREATE TABLE T(a);
INSERT INTO T VALUES (1),(2),(3),(4);
SELECT * FROM T LIMIT 1+'2' OFFSET 1.6/2 + 3.6/3 + 4*0.25;
} {4}
# Strings are cast to float. Final result is integer losslessly
do_execsql_test_on_specific_db {:memory:} offset-expr-can-be-cast-losslessly-3 {
CREATE TABLE T(a);
INSERT INTO T VALUES (1),(2),(3),(4);
SELECT * FROM T LIMIT 3 OFFSET '0.8' + '1.2' + '4'*'0.25';
} {4}
# Strings are cast to 0. Expression still valid.
do_execsql_test_on_specific_db {:memory:} offset-expr-int-and-string {
SELECT 1 LIMIT 3 OFFSET 3/3 + 'test' + 4*'test are best';
} {}
do_execsql_test_in_memory_error_content offset-expr-cannot-be-cast-losslessly-1 {
SELECT 1 LIMIT 3 OFFSET 1.1;
} {"datatype mismatch"}
do_execsql_test_in_memory_error_content offset-expr-cannot-be-cast-losslessly-2 {
SELECT 1 LIMIT 3 OFFSET 1.1 + 2.2 + 1.9/8;
} {"datatype mismatch"}
# Return error as float in expression cannot be cast losslessly
do_execsql_test_in_memory_error_content offset-expr-cannot-be-cast-losslessly-3 {
SELECT 1 LIMIT 3 OFFSET 1.1 + 'a';
} {"datatype mismatch"}
do_execsql_test_in_memory_error_content offset-expr-invalid-data-type-1 {
SELECT 1 LIMIT 3 OFFSET 'a';
} {"datatype mismatch"}
do_execsql_test_in_memory_error_content offset-expr-invalid-data-type-2 {
SELECT 1 LIMIT 3 OFFSET NULL;
} {"datatype mismatch"}
# Expression below evaluates to NULL (string → 0)
do_execsql_test_in_memory_error_content offset-expr-invalid-data-type-3 {
SELECT 1 LIMIT 3 OFFSET 1/'iwillbezero ;-; ';
} {"datatype mismatch"}

View File

@@ -951,6 +951,63 @@ foreach {testname limit ans} {
"SELECT id FROM users ORDER BY id LIMIT $limit" $ans
}
do_execsql_test_on_specific_db {:memory:} limit-expr-can-be-cast-losslessly-1 {
SELECT 1 LIMIT 1.1 + 2.9;
} {1}
do_execsql_test_on_specific_db {:memory:} limit-expr-can-be-cast-losslessly-2 {
CREATE TABLE T(a);
INSERT INTO T VALUES (1),(1),(1),(1);
SELECT * FROM T LIMIT 1.6/2 + 3.6/3 + 4*0.25;
} {1
1
1}
# Numeric strings are cast to float. The final evaluation of the expression returns an int losslessly
do_execsql_test_on_specific_db {:memory:} limit-expr-can-be-cast-losslessly-3 {
CREATE TABLE T(a);
INSERT INTO T VALUES (1),(1),(1),(1);
SELECT * FROM T LIMIT '0.8' + '1.2' + 4*0.25;
} {1
1
1}
# Invalid strings are cast to 0. So expression is valid
do_execsql_test_on_specific_db {:memory:} limit-expr-int-and-string {
SELECT 1 LIMIT 3/3 + 'test' + 4*'test are best';
} {1}
do_execsql_test_in_memory_error_content limit-expr-cannot-be-cast-losslessly-1 {
SELECT 1 LIMIT 1.1;
} {"datatype mismatch"}
do_execsql_test_in_memory_error_content limit-expr-cannot-be-cast-losslessly-2 {
SELECT 1 LIMIT 1.1 + 2.2 + 1.9/8;
} {"datatype mismatch"}
# Return error as float in the expression cannot be cast losslessly
do_execsql_test_in_memory_error_content limit-expr-cannot-be-cast-losslessly-3 {
SELECT 1 LIMIT 1.1 +'a';
} {"datatype mismatch"}
do_execsql_test_in_memory_error_content limit-expr-invalid-data-type-1 {
SELECT 1 LIMIT 'a';
} {"datatype mismatch"}
do_execsql_test_in_memory_error_content limit-expr-invalid-data-type-2 {
SELECT 1 LIMIT NULL;
} {"datatype mismatch"}
# The expression below evaluates to NULL as string is cast to 0
do_execsql_test_in_memory_error_content limit-expr-invalid-data-type-3 {
SELECT 1 LIMIT 1/'iwillbezero ;-; ' ;
} {"datatype mismatch"}
# Expression is evaluated as NULL
do_execsql_test_in_memory_error_content limit-expr-invalid-data-type-4 {
SELECT 1 LIMIT 4+NULL;
} {"datatype mismatch"}
do_execsql_test_on_specific_db {:memory:} rowid-references {
CREATE TABLE test_table (id INTEGER);
INSERT INTO test_table VALUES (5),(5);