Files
turso/core/translate/upsert.rs
Preston Thorpe 74bbb0d5a3 Merge 'Allow using indexes to iterate rows in UPDATE statements' from Jussi Saurio
Closes #2600
## Problem
Every btree has a key it is sorted by - this is the integer `rowid` for
tables and an arbitrary-sized, potentially multi-column key for indexes.
Executing an UPDATE in a loop is not safe if the update modifies any
part of the key of the btree that is used for iterating the rows in said
loop. For example:
- Using the table itself to iterate rows is not safe if the UPDATE
modifies the rowid (or rowid alias) of a row, because since it modifies
the iteration order itself, it may cause rows to be skipped:
```sql
CREATE TABLE t(x INTEGER PRIMARY KEY, y);
INSERT <something>
UPDATE t SET y = RANDOM() where x > 100; // safe to iterate 't', 'y' is not being modified
UPDATE t SET x = RANDOM() where x > 100; // not safe to iterate 't', 'x' is being modified
```
- Using an index to iterate rows is not safe if the UPDATE modifies any
of the columns in the index key
```sql
CREATE TABLE t(x, y, z);
CREATE INDEX txy ON t (x,y);
INSERT <something>
UPDATE t SET z = RANDOM() where x = 100 and y > 0; // safe to iterate txy, neither x or y is being modified
UPDATE t SET x = RANDOM() where x = 100 and y > 0; // not safe to iterate txy, 'x' is being modified
UPDATE t SET y = RANDOM() where x = 100 and y > 0; // not safe to iterate txy, 'y' is being modified
```
## Current solution in tursodb
Our current `main` code recognizes this issue and adopts this pseudocode
algorithm from SQLite:
- open a table or index for reading the rows of the source table,
- for each row that matches the condition in the UPDATE statement, write
the row into a temporary table
- then use that temporary table for iteration in the UPDATE loop.
This guarantees that the iteration order will not be affected by the
UPDATEs because the ephemeral table is not under modification.
## Problem with current solution
Our `main` code specialcases the ephemeral table solution to rowids /
rowid aliases only. Using indexes for UPDATE iteration was disabled in
an earlier PR (#2599) due to the safety issue mentioned above, which
means that many UPDATE statements become full table scans:
```sql
turso> create table t(x PRIMARY KEY);
turso> insert into t select value from generate_series(1,10000);
turso> explain update t set x = x + 100000 where x > 50 and x < 60;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     28    0                    0   Start at 28
1     OpenWrite          0     2     0                    0   root=2; iDb=0
2     OpenWrite          1     3     0                    0   root=3; iDb=0
-- scan entire 't' despite very narrow update range!
3     Rewind             0     27    0                    0   Rewind table t
...
```
## Solution
We move the ephemeral table logic to _after_ the optimizer has selected
the best access path for the table, and then, if the UPDATE modifies the
key of the chosen access path (table or index; whichever was selected by
the optimizer), we change the plan to include the ephemeral table
prepopulation. Hence, the same query from above becomes:
```sql
turso> explain update t set x = x + 100000 where x > 50 and x < 60;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     35    0                    0   Start at 35
1     OpenEphemeral      0     1     0                    0   cursor=0 is_table=true
2     OpenRead           1     3     0                    0   index=sqlite_autoindex_t_1, root=3, iDb=0
3     Integer            50    2     0                    0   r[2]=50
-- index seek on PRIMARY KEY index
4     SeekGT             1     10    2                    0   key=[2..2]
5       Integer          60    2     0                    0   r[2]=60
6       IdxGE            1     10    2                    0   key=[2..2]
7       IdxRowId         1     1     0                    0   r[1]=cursor 1 for index sqlite_autoindex_t_1.rowid
8       Insert           0     3     1     ephemeral_scratch  2   intkey=r[1] data=r[3]
9     Next               1     6     0                    0   
10    OpenWrite          2     2     0                    0   root=2; iDb=0
11    OpenWrite          3     3     0                    0   root=3; iDb=0
-- only scan rows that were inserted to ephemeral index
12    Rewind             0     34    0                    0   Rewind table ephemeral_scratch
13      RowId            0     5     0                    0   r[5]=ephemeral_scratch.rowid
```
Note that an ephemeral index does not have to be used if the index is
not affected:
```sql
turso> create table t(x PRIMARY KEY, data);
turso> explain update t set data = 'some_data' where x > 50 and x < 60;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     15    0                    0   Start at 15
1     OpenWrite          0     2     0                    0   root=2; iDb=0
2     OpenWrite          1     3     0                    0   root=3; iDb=0
3     Integer            50    1     0                    0   r[1]=50
-- direct index seek
4     SeekGT             1     14    1                    0   key=[1..1]
```

Reviewed-by: Preston Thorpe <preston@turso.tech>

Closes #3728
2025-10-14 16:11:25 -04:00

993 lines
34 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::collections::HashSet;
use std::num::NonZeroUsize;
use std::{collections::HashMap, sync::Arc};
use turso_parser::ast::{self, Upsert};
use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY;
use crate::schema::ROWID_SENTINEL;
use crate::translate::emitter::UpdateRowSource;
use crate::translate::expr::{walk_expr, WalkControl};
use crate::translate::fkeys::{emit_fk_child_update_counters, emit_parent_pk_change_checks};
use crate::translate::insert::{format_unique_violation_desc, InsertEmitCtx};
use crate::translate::planner::ROWID_STRS;
use crate::vdbe::insn::CmpInsFlags;
use crate::Connection;
use crate::{
bail_parse_error,
error::SQLITE_CONSTRAINT_NOTNULL,
schema::{Index, IndexColumn, Schema, Table},
translate::{
emitter::{
emit_cdc_full_record, emit_cdc_insns, emit_cdc_patch_record, OperationMode, Resolver,
},
expr::{
emit_returning_results, translate_expr, translate_expr_no_constant_opt, walk_expr_mut,
NoConstantOptReason, ReturningValueRegisters,
},
insert::Insertion,
plan::ResultSetColumn,
},
util::normalize_ident,
vdbe::{
builder::ProgramBuilder,
insn::{IdxInsertFlags, InsertFlags, Insn},
},
};
// The following comment is copied directly from SQLite source and should be used as a guiding light
// whenever we encounter compatibility bugs related to conflict clause handling:
/* UNIQUE and PRIMARY KEY constraints should be handled in the following
** order:
**
** (1) OE_Update
** (2) OE_Abort, OE_Fail, OE_Rollback, OE_Ignore
** (3) OE_Replace
**
** OE_Fail and OE_Ignore must happen before any changes are made.
** OE_Update guarantees that only a single row will change, so it
** must happen before OE_Replace. Technically, OE_Abort and OE_Rollback
** could happen in any order, but they are grouped up front for
** convenience.
**
** 2018-08-14: Ticket https://www.sqlite.org/src/info/908f001483982c43
** The order of constraints used to have OE_Update as (2) and OE_Abort
** and so forth as (1). But apparently PostgreSQL checks the OE_Update
** constraint before any others, so it had to be moved.
**
** Constraint checking code is generated in this order:
** (A) The rowid constraint
** (B) Unique index constraints that do not have OE_Replace as their
** default conflict resolution strategy
** (C) Unique index that do use OE_Replace by default.
**
** The ordering of (2) and (3) is accomplished by making sure the linked
** list of indexes attached to a table puts all OE_Replace indexes last
** in the list. See sqlite3CreateIndex() for where that happens.
*/
/// A ConflictTarget is extracted from each ON CONFLICT target,
// e.g. INSERT INTO x(a) ON CONFLICT *(a COLLATE nocase)*
#[derive(Debug, Clone)]
pub struct ConflictTarget {
/// The normalized column name in question
col_name: String,
/// Possible collation name, normalized to lowercase
collate: Option<String>,
}
// Extract `(column, optional_collate)` from an ON CONFLICT target Expr.
// Accepts: Id, Qualified, DoublyQualified, Parenthesized, Collate
fn extract_target_key(e: &ast::Expr) -> Option<ConflictTarget> {
match e {
ast::Expr::Collate(inner, c) => {
let mut tk = extract_target_key(inner.as_ref())?;
let cstr = c.as_str();
tk.collate = Some(cstr.to_ascii_lowercase());
Some(tk)
}
ast::Expr::Parenthesized(v) if v.len() == 1 => extract_target_key(&v[0]),
ast::Expr::Id(name) => Some(ConflictTarget {
col_name: normalize_ident(name.as_str()),
collate: None,
}),
// t.a or db.t.a: accept ident or quoted in the column position
ast::Expr::Qualified(_, col) | ast::Expr::DoublyQualified(_, _, col) => {
let cname = col.as_str();
Some(ConflictTarget {
col_name: normalize_ident(cname),
collate: None,
})
}
_ => None,
}
}
// Return the index keys effective collation.
// If `idx_col.collation` is None, fall back to the column default or "BINARY".
fn effective_collation_for_index_col(idx_col: &IndexColumn, table: &Table) -> String {
if let Some(c) = idx_col.collation.as_ref() {
return c.to_string().to_ascii_lowercase();
}
// Otherwise use the table default, or default to BINARY
table
.get_column_by_name(&idx_col.name)
.map(|s| {
s.1.collation
.map(|c| c.to_string().to_ascii_lowercase())
.unwrap_or_else(|| "binary".to_string())
})
.unwrap_or_else(|| "binary".to_string())
}
/// Match ON CONFLICT target to the PRIMARY KEY/rowid alias.
pub fn upsert_matches_rowid_alias(upsert: &Upsert, table: &Table) -> bool {
let Some(t) = upsert.index.as_ref() else {
// omitted target matches everything, CatchAll handled elsewhere
return false;
};
if t.targets.len() != 1 {
return false;
}
// Only treat as PK if the PK is the rowid alias (INTEGER PRIMARY KEY)
let pk = table.columns().iter().find(|c| c.is_rowid_alias);
if let Some(pkcol) = pk {
extract_target_key(&t.targets[0].expr).is_some_and(|tk| {
tk.col_name
.eq_ignore_ascii_case(pkcol.name.as_ref().unwrap_or(&String::new()))
})
} else {
false
}
}
/// Returns array of chaned column indicies and whether rowid was changed.
fn collect_changed_cols(
table: &Table,
set_pairs: &[(usize, Box<ast::Expr>)],
) -> (HashSet<usize>, bool) {
let mut cols_changed = HashSet::with_capacity(table.columns().len());
let mut rowid_changed = false;
for (col_idx, _) in set_pairs {
if let Some(c) = table.columns().get(*col_idx) {
if c.is_rowid_alias {
rowid_changed = true;
} else {
cols_changed.insert(*col_idx);
}
}
}
(cols_changed, rowid_changed)
}
#[inline]
fn upsert_index_is_affected(
table: &Table,
idx: &Index,
changed_cols: &HashSet<usize>,
rowid_changed: bool,
) -> bool {
if rowid_changed {
return true;
}
let km = index_keys(idx);
let pm = partial_index_cols(idx, table);
for c in km.iter().chain(pm.iter()) {
if changed_cols.contains(c) {
return true;
}
}
false
}
/// Columns used by index key
#[inline]
fn index_keys(idx: &Index) -> Vec<usize> {
idx.columns.iter().map(|ic| ic.pos_in_table).collect()
}
/// Columns referenced by the partial WHERE (empty if none).
fn partial_index_cols(idx: &Index, table: &Table) -> HashSet<usize> {
use ast::Expr;
let Some(expr) = &idx.where_clause else {
return HashSet::new();
};
let mut out = HashSet::new();
let _ = walk_expr(expr, &mut |e: &ast::Expr| -> crate::Result<WalkControl> {
match e {
Expr::Id(n) => {
if let Some((i, _)) = table.get_column_by_name(&normalize_ident(n.as_str())) {
out.insert(i);
}
}
Expr::Qualified(ns, c) | Expr::DoublyQualified(_, ns, c) => {
// Only count columns that belong to this table
let nsn = normalize_ident(ns.as_str());
let tname = normalize_ident(table.get_name());
if nsn.eq_ignore_ascii_case(&tname) {
if let Some((i, _)) = table.get_column_by_name(&normalize_ident(c.as_str())) {
out.insert(i);
}
}
}
_ => {}
}
Ok(WalkControl::Continue)
});
out
}
/// Match ON CONFLICT target to a UNIQUE index, *ignoring order* but requiring
/// exact coverage (same column multiset). If the target specifies a COLLATED
/// column, the collation must match the index column's effective collation.
/// If the target omits collation, any index collation is accepted.
/// Partial (WHERE) indexes never match.
pub fn upsert_matches_index(upsert: &Upsert, index: &Index, table: &Table) -> bool {
let Some(target) = upsert.index.as_ref() else {
return true;
};
// must be a non-partial UNIQUE index with identical arity
if !index.unique || index.where_clause.is_some() || target.targets.len() != index.columns.len()
{
return false;
}
// Build a multiset of index columns: (normalized name, effective collation)
// effective collation = index collation if set, else table column default, else "binary"
let mut idx_cols: Vec<(String, String)> = index
.columns
.iter()
.map(|ic| {
(
normalize_ident(&ic.name),
effective_collation_for_index_col(ic, table),
)
})
.collect();
// For each target key, locate a matching index column (name equal ignoring case,
// and collation equal iff the target specifies one). Consume each match once.
for te in &target.targets {
let Some(tk) = extract_target_key(&te.expr) else {
return false;
};
let tname = tk.col_name;
let mut found = None;
for (i, (iname, icoll)) in idx_cols.iter().enumerate() {
if tname.eq_ignore_ascii_case(iname)
&& match tk.collate.as_ref() {
Some(c) => c.eq_ignore_ascii_case(icoll),
None => true, // unspecified collation -> accept any
}
{
found = Some(i);
break;
}
}
if let Some(i) = found {
// consume this index column once (multiset match)
idx_cols.swap_remove(i);
} else {
return false;
}
}
// All target columns matched exactly once
idx_cols.is_empty()
}
#[derive(Clone, Debug)]
pub enum ResolvedUpsertTarget {
// ON CONFLICT DO
CatchAll,
// ON CONFLICT(pk) DO
PrimaryKey,
// matched this non-partial UNIQUE index
Index(Arc<Index>),
}
pub fn resolve_upsert_target(
schema: &Schema,
table: &Table,
upsert: &Upsert,
) -> crate::Result<ResolvedUpsertTarget> {
// Omitted target, catch-all
if upsert.index.is_none() {
return Ok(ResolvedUpsertTarget::CatchAll);
}
// Targeted: must match PK, only if PK is a rowid alias
if upsert_matches_rowid_alias(upsert, table) {
return Ok(ResolvedUpsertTarget::PrimaryKey);
}
// Otherwise match a UNIQUE index, also covering non-rowid PRIMARY KEYs
for idx in schema.get_indices(table.get_name()) {
if idx.unique && upsert_matches_index(upsert, idx, table) {
return Ok(ResolvedUpsertTarget::Index(Arc::clone(idx)));
}
}
crate::bail_parse_error!(
"ON CONFLICT clause does not match any PRIMARY KEY or UNIQUE constraint"
);
}
#[allow(clippy::too_many_arguments)]
/// Emit the bytecode to implement the `DO UPDATE` arm of an UPSERT.
///
/// This routine is entered after the caller has determined that an INSERT
/// would violate a UNIQUE/PRIMARY KEY constraint and that the user requested
/// `ON CONFLICT ... DO UPDATE`.
///
/// High-level flow:
/// 1. Seek to the conflicting row by rowid and load the current row snapshot
/// into a contiguous set of registers.
/// 2. Optionally duplicate CURRENT into BEFORE* (for index rebuild and CDC).
/// 3. Copy CURRENT into NEW, then evaluate SET expressions into NEW,
/// with all references to the target table columns rewritten to read from
/// the CURRENT registers (per SQLite semantics).
/// 4. Enforce NOT NULL constraints and (if STRICT) type checks on NEW.
/// 5. Rebuild indexes (delete keys using BEFORE, insert keys using NEW).
/// 6. Rewrite the table row payload at the same rowid with NEW.
/// 7. Emit CDC rows and RETURNING output if requested.
/// 8. Jump to `row_done_label`.
///
/// Semantics reference: https://sqlite.org/lang_upsert.html
/// Column references in the DO UPDATE expressions refer to the original
/// (unchanged) row. To refer to would-be inserted values, use `excluded.x`.
pub fn emit_upsert(
program: &mut ProgramBuilder,
table: &Table,
ctx: &InsertEmitCtx,
insertion: &Insertion,
set_pairs: &mut [(usize, Box<ast::Expr>)],
where_clause: &mut Option<Box<ast::Expr>>,
resolver: &Resolver,
returning: &mut [ResultSetColumn],
connection: &Arc<Connection>,
) -> crate::Result<()> {
// Seek & snapshot CURRENT
program.emit_insn(Insn::SeekRowid {
cursor_id: ctx.cursor_id,
src_reg: ctx.conflict_rowid_reg,
target_pc: ctx.row_done_label,
});
let num_cols = ctx.table.columns.len();
let current_start = program.alloc_registers(num_cols);
for (i, col) in ctx.table.columns.iter().enumerate() {
if col.is_rowid_alias {
program.emit_insn(Insn::RowId {
cursor_id: ctx.cursor_id,
dest: current_start + i,
});
} else {
program.emit_insn(Insn::Column {
cursor_id: ctx.cursor_id,
column: i,
dest: current_start + i,
default: None,
});
}
}
// BEFORE for index maintenance / CDC
let before_start = if ctx.cdc_table.is_some() || !ctx.idx_cursors.is_empty() {
let s = program.alloc_registers(num_cols);
program.emit_insn(Insn::Copy {
src_reg: current_start,
dst_reg: s,
extra_amount: num_cols - 1,
});
Some(s)
} else {
None
};
// NEW = CURRENT, then apply SET
let new_start = program.alloc_registers(num_cols);
program.emit_insn(Insn::Copy {
src_reg: current_start,
dst_reg: new_start,
extra_amount: num_cols - 1,
});
// WHERE on target row
if let Some(pred) = where_clause.as_mut() {
rewrite_expr_to_registers(
pred,
table,
current_start,
ctx.conflict_rowid_reg,
Some(table.get_name()),
Some(insertion),
true,
)?;
let pr = program.alloc_register();
translate_expr(program, None, pred, pr, resolver)?;
program.emit_insn(Insn::IfNot {
reg: pr,
target_pc: ctx.row_done_label,
jump_if_null: true,
});
}
// Apply SET; capture rowid change if any
let mut new_rowid_reg: Option<usize> = None;
for (col_idx, expr) in set_pairs.iter_mut() {
rewrite_expr_to_registers(
expr,
table,
current_start,
ctx.conflict_rowid_reg,
Some(table.get_name()),
Some(insertion),
true,
)?;
translate_expr_no_constant_opt(
program,
None,
expr,
new_start + *col_idx,
resolver,
NoConstantOptReason::RegisterReuse,
)?;
let col = &table.columns()[*col_idx];
if col.notnull && !col.is_rowid_alias {
program.emit_insn(Insn::HaltIfNull {
target_reg: new_start + *col_idx,
err_code: SQLITE_CONSTRAINT_NOTNULL,
description: String::from(table.get_name()) + col.name.as_ref().unwrap(),
});
}
if col.is_rowid_alias {
// Must be integer; remember the NEW rowid value
let r = program.alloc_register();
program.emit_insn(Insn::Copy {
src_reg: new_start + *col_idx,
dst_reg: r,
extra_amount: 0,
});
program.emit_insn(Insn::MustBeInt { reg: r });
new_rowid_reg = Some(r);
}
}
if let Some(bt) = table.btree() {
if bt.is_strict {
program.emit_insn(Insn::TypeCheck {
start_reg: new_start,
count: num_cols,
check_generated: true,
table_reference: Arc::clone(&bt),
});
}
}
let (changed_cols, rowid_changed) = collect_changed_cols(table, set_pairs);
let rowid_alias_idx = table.columns().iter().position(|c| c.is_rowid_alias);
let has_direct_rowid_update = set_pairs
.iter()
.any(|(idx, _)| *idx == rowid_alias_idx.unwrap_or(ROWID_SENTINEL));
let has_user_provided_rowid = if let Some(i) = rowid_alias_idx {
set_pairs.iter().any(|(idx, _)| *idx == i) || has_direct_rowid_update
} else {
has_direct_rowid_update
};
let rowid_set_clause_reg = if has_user_provided_rowid {
Some(new_rowid_reg.unwrap_or(ctx.conflict_rowid_reg))
} else {
None
};
if let Some(bt) = table.btree() {
if connection.foreign_keys_enabled() {
let rowid_new_reg = new_rowid_reg.unwrap_or(ctx.conflict_rowid_reg);
// Child-side checks
if resolver.schema.has_child_fks(bt.name.as_str()) {
emit_fk_child_update_counters(
program,
resolver,
&bt,
table.get_name(),
ctx.cursor_id,
new_start,
rowid_new_reg,
&changed_cols,
)?;
}
emit_parent_pk_change_checks(
program,
resolver,
&bt,
ctx.cursor_id,
ctx.conflict_rowid_reg,
new_start,
new_rowid_reg.unwrap_or(ctx.conflict_rowid_reg),
rowid_set_clause_reg,
set_pairs,
)?;
}
}
// Index rebuild (DELETE old, INSERT new), honoring partial-index WHEREs
if let Some(before) = before_start {
for (idx_name, _root, idx_cid) in &ctx.idx_cursors {
let idx_meta = resolver
.schema
.get_index(table.get_name(), idx_name)
.expect("index exists");
if !upsert_index_is_affected(table, idx_meta, &changed_cols, rowid_changed) {
continue; // skip untouched index completely
}
let k = idx_meta.columns.len();
let before_pred_reg = eval_partial_pred_for_row_image(
program,
table,
idx_meta,
before,
ctx.conflict_rowid_reg,
resolver,
);
let new_rowid = new_rowid_reg.unwrap_or(ctx.conflict_rowid_reg);
let new_pred_reg = eval_partial_pred_for_row_image(
program, table, idx_meta, new_start, new_rowid, resolver,
);
// Skip delete if BEFORE predicate false/NULL
let maybe_skip_del = before_pred_reg.map(|r| {
let lbl = program.allocate_label();
program.emit_insn(Insn::IfNot {
reg: r,
target_pc: lbl,
jump_if_null: true,
});
lbl
});
// DELETE old key
let del = program.alloc_registers(k + 1);
for (i, ic) in idx_meta.columns.iter().enumerate() {
let (ci, _) = table.get_column_by_name(&ic.name).unwrap();
program.emit_insn(Insn::Copy {
src_reg: before + ci,
dst_reg: del + i,
extra_amount: 0,
});
}
program.emit_insn(Insn::Copy {
src_reg: ctx.conflict_rowid_reg,
dst_reg: del + k,
extra_amount: 0,
});
program.emit_insn(Insn::IdxDelete {
start_reg: del,
num_regs: k + 1,
cursor_id: *idx_cid,
raise_error_if_no_matching_entry: false,
});
if let Some(label) = maybe_skip_del {
program.resolve_label(label, program.offset());
}
// Skip insert if NEW predicate false/NULL
let maybe_skip_ins = new_pred_reg.map(|r| {
let lbl = program.allocate_label();
program.emit_insn(Insn::IfNot {
reg: r,
target_pc: lbl,
jump_if_null: true,
});
lbl
});
// INSERT new key (use NEW rowid if present)
let ins = program.alloc_registers(k + 1);
for (i, ic) in idx_meta.columns.iter().enumerate() {
let (ci, _) = table.get_column_by_name(&ic.name).unwrap();
program.emit_insn(Insn::Copy {
src_reg: new_start + ci,
dst_reg: ins + i,
extra_amount: 0,
});
}
program.emit_insn(Insn::Copy {
src_reg: new_rowid,
dst_reg: ins + k,
extra_amount: 0,
});
let rec = program.alloc_register();
program.emit_insn(Insn::MakeRecord {
start_reg: ins,
count: k + 1,
dest_reg: rec,
index_name: Some((*idx_name).clone()),
affinity_str: None,
});
if idx_meta.unique {
// Affinity on the key columns for the NoConflict probe
let ok = program.allocate_label();
let aff: String = idx_meta
.columns
.iter()
.map(|c| {
table
.get_column_by_name(&c.name)
.map(|(_, col)| col.affinity().aff_mask())
.unwrap_or('B')
})
.collect();
program.emit_insn(Insn::Affinity {
start_reg: ins,
count: NonZeroUsize::new(k).unwrap(),
affinities: aff,
});
program.emit_insn(Insn::NoConflict {
cursor_id: *idx_cid,
target_pc: ok,
record_reg: ins,
num_regs: k,
});
let hit = program.alloc_register();
program.emit_insn(Insn::IdxRowId {
cursor_id: *idx_cid,
dest: hit,
});
program.emit_insn(Insn::Eq {
lhs: new_rowid,
rhs: hit,
target_pc: ok,
flags: CmpInsFlags::default(),
collation: program.curr_collation(),
});
let description = format_unique_violation_desc(table.get_name(), idx_meta);
program.emit_insn(Insn::Halt {
err_code: SQLITE_CONSTRAINT_PRIMARYKEY,
description,
});
program.preassign_label_to_next_insn(ok);
}
program.emit_insn(Insn::IdxInsert {
cursor_id: *idx_cid,
record_reg: rec,
unpacked_start: Some(ins),
unpacked_count: Some((k + 1) as u16),
flags: IdxInsertFlags::new().nchange(true),
});
if let Some(lbl) = maybe_skip_ins {
program.resolve_label(lbl, program.offset());
}
}
}
// Build NEW table payload
let rec = program.alloc_register();
let affinity_str = table
.columns()
.iter()
.map(|c| c.affinity().aff_mask())
.collect::<String>();
program.emit_insn(Insn::MakeRecord {
start_reg: new_start,
count: num_cols,
dest_reg: rec,
index_name: None,
affinity_str: Some(affinity_str),
});
// If rowid changed, first ensure no other row owns it, then delete+insert
if let Some(rnew) = new_rowid_reg {
let ok = program.allocate_label();
// If equal to old rowid, skip uniqueness probe
program.emit_insn(Insn::Eq {
lhs: rnew,
rhs: ctx.conflict_rowid_reg,
target_pc: ok,
flags: CmpInsFlags::default(),
collation: program.curr_collation(),
});
// If another row already has rnew -> constraint
program.emit_insn(Insn::NotExists {
cursor: ctx.cursor_id,
rowid_reg: rnew,
target_pc: ok,
});
program.emit_insn(Insn::Halt {
err_code: SQLITE_CONSTRAINT_PRIMARYKEY,
description: format!(
"{}.{}",
table.get_name(),
table
.columns()
.iter()
.find(|c| c.is_rowid_alias)
.and_then(|c| c.name.as_ref())
.unwrap_or(&"rowid".to_string())
),
});
program.preassign_label_to_next_insn(ok);
// Now replace the row
program.emit_insn(Insn::Delete {
cursor_id: ctx.cursor_id,
table_name: table.get_name().to_string(),
is_part_of_update: true,
});
program.emit_insn(Insn::Insert {
cursor: ctx.cursor_id,
key_reg: rnew,
record_reg: rec,
flag: InsertFlags::new().require_seek().update_rowid_change(),
table_name: table.get_name().to_string(),
});
} else {
program.emit_insn(Insn::Insert {
cursor: ctx.cursor_id,
key_reg: ctx.conflict_rowid_reg,
record_reg: rec,
flag: InsertFlags::new(),
table_name: table.get_name().to_string(),
});
}
// emit CDC instructions
if let Some((cdc_id, _)) = ctx.cdc_table {
let new_rowid = new_rowid_reg.unwrap_or(ctx.conflict_rowid_reg);
if new_rowid_reg.is_some() {
// DELETE (before)
let before_rec = if program.capture_data_changes_mode().has_before() {
Some(emit_cdc_full_record(
program,
table.columns(),
ctx.cursor_id,
ctx.conflict_rowid_reg,
))
} else {
None
};
emit_cdc_insns(
program,
resolver,
OperationMode::DELETE,
cdc_id,
ctx.conflict_rowid_reg,
before_rec,
None,
None,
table.get_name(),
)?;
// INSERT (after)
let after_rec = if program.capture_data_changes_mode().has_after() {
Some(emit_cdc_patch_record(
program, table, new_start, rec, new_rowid,
))
} else {
None
};
emit_cdc_insns(
program,
resolver,
OperationMode::INSERT,
cdc_id,
new_rowid,
None,
after_rec,
None,
table.get_name(),
)?;
} else {
let after_rec = if program.capture_data_changes_mode().has_after() {
Some(emit_cdc_patch_record(
program,
table,
new_start,
rec,
ctx.conflict_rowid_reg,
))
} else {
None
};
let before_rec = if program.capture_data_changes_mode().has_before() {
Some(emit_cdc_full_record(
program,
table.columns(),
ctx.cursor_id,
ctx.conflict_rowid_reg,
))
} else {
None
};
emit_cdc_insns(
program,
resolver,
OperationMode::UPDATE(UpdateRowSource::Normal),
cdc_id,
ctx.conflict_rowid_reg,
before_rec,
after_rec,
None,
table.get_name(),
)?;
}
}
// RETURNING from NEW image + final rowid
if !returning.is_empty() {
let regs = ReturningValueRegisters {
rowid_register: new_rowid_reg.unwrap_or(ctx.conflict_rowid_reg),
columns_start_register: new_start,
num_columns: num_cols,
};
emit_returning_results(program, returning, &regs)?;
}
program.emit_insn(Insn::Goto {
target_pc: ctx.row_done_label,
});
Ok(())
}
/// Normalize the `SET` clause into `(column_index, Expr)` pairs using table layout.
///
/// Supports multi-target row-value SETs: `SET (a, b) = (expr1, expr2)`.
/// Enforces same number of column names and RHS values.
/// If the same column is assigned multiple times, the last assignment wins.
pub fn collect_set_clauses_for_upsert(
table: &Table,
set_items: &mut [ast::Set],
) -> crate::Result<Vec<(usize, Box<ast::Expr>)>> {
let lookup: HashMap<String, usize> = table
.columns()
.iter()
.enumerate()
.filter_map(|(i, c)| c.name.as_ref().map(|n| (n.to_lowercase(), i)))
.collect();
let mut out: Vec<(usize, Box<ast::Expr>)> = vec![];
for set in set_items {
let values: Vec<Box<ast::Expr>> = match set.expr.as_ref() {
ast::Expr::Parenthesized(v) => v.clone(),
e => vec![e.clone().into()],
};
if set.col_names.len() != values.len() {
bail_parse_error!(
"{} columns assigned {} values",
set.col_names.len(),
values.len()
);
}
for (cn, e) in set.col_names.iter().zip(values.into_iter()) {
let Some(idx) = lookup.get(&normalize_ident(cn.as_str())) else {
bail_parse_error!("no such column: {}", cn);
};
if let Some(existing) = out.iter_mut().find(|(i, _)| *i == *idx) {
existing.1 = e;
} else {
out.push((*idx, e));
}
}
}
Ok(out)
}
fn eval_partial_pred_for_row_image(
prg: &mut ProgramBuilder,
table: &Table,
idx: &Index,
row_start: usize, // base of CURRENT or NEW image
rowid_reg: usize, // rowid for that image
resolver: &Resolver,
) -> Option<usize> {
let Some(where_expr) = &idx.where_clause else {
return None;
};
let mut e = where_expr.as_ref().clone();
rewrite_expr_to_registers(
&mut e, table, row_start, rowid_reg, None, // table_name
None, // insertion
false, // dont allow EXCLUDED
)
.ok()?;
let r = prg.alloc_register();
translate_expr_no_constant_opt(
prg,
None,
&e,
r,
resolver,
NoConstantOptReason::RegisterReuse,
)
.ok()?;
Some(r)
}
/// Generic rewriter that maps column references to registers for a given row image.
///
/// - Id/Qualified refs to the *target table* (when `table_name` is provided) resolve
/// to the CURRENT/NEW row image starting at `base_start`, with `rowid` (or the
/// rowid-alias) mapped to `rowid_reg`.
/// - If `allow_excluded` and `insertion` are provided, `EXCLUDED.x` resolves to the
/// insertion registers (and `EXCLUDED.rowid` resolves to `insertion.key_register()`).
/// - If `table_name` is `None`, qualified refs never match
/// - Leaves names from other tables/namespaces untouched.
fn rewrite_expr_to_registers(
e: &mut ast::Expr,
table: &Table,
base_start: usize,
rowid_reg: usize,
table_name: Option<&str>,
insertion: Option<&Insertion>,
allow_excluded: bool,
) -> crate::Result<WalkControl> {
use ast::Expr;
let table_name_norm = table_name.map(normalize_ident);
// Map a column name to a register within the row image at `base_start`.
let col_reg_from_row_image = |name: &str| -> Option<usize> {
if ROWID_STRS.iter().any(|s| s.eq_ignore_ascii_case(name)) {
return Some(rowid_reg);
}
let (idx, c) = table.get_column_by_name(name)?;
if c.is_rowid_alias {
Some(rowid_reg)
} else {
Some(base_start + idx)
}
};
walk_expr_mut(
e,
&mut |expr: &mut ast::Expr| -> crate::Result<WalkControl> {
match expr {
Expr::Qualified(ns, c) | Expr::DoublyQualified(_, ns, c) => {
let ns = normalize_ident(ns.as_str());
let c = normalize_ident(c.as_str());
// Handle EXCLUDED.* if enabled
if allow_excluded && ns.eq_ignore_ascii_case("excluded") {
if let Some(ins) = insertion {
if ROWID_STRS.iter().any(|s| s.eq_ignore_ascii_case(&c)) {
*expr = Expr::Register(ins.key_register());
} else if let Some(cm) = ins.get_col_mapping_by_name(&c) {
*expr = Expr::Register(cm.register);
} else {
bail_parse_error!("no such column in EXCLUDED: {}", c);
}
}
// If insertion is None, leave EXCLUDED.* untouched.
return Ok(WalkControl::Continue);
}
// Match the target table namespace if provided
if let Some(ref tn) = table_name_norm {
if ns.eq_ignore_ascii_case(tn) {
if let Some(r) = col_reg_from_row_image(&c) {
*expr = Expr::Register(r);
}
}
}
}
// Unqualified id -> row image (CURRENT/NEW depending on caller)
Expr::Id(name) => {
if let Some(r) = col_reg_from_row_image(&normalize_ident(name.as_str())) {
*expr = Expr::Register(r);
}
}
_ => {}
}
Ok(WalkControl::Continue)
},
)
}