mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-26 20:44:23 +01:00
## Problem:
- We have cases where we are evaluating expressions in a hot loop that
could only be evaluated once. For example: `CAST('2025-01-01' as
DATETIME)` -- the value of this never changes, so we should only run it
once.
- We have no robust way of doing this right now for entire _expressions_
-- the only existing facility we have is
`program.mark_last_insn_constant()`, which has no concept of how many
instructions translating a given _expression_ spends, and breaks very
easily for this reason.
## Main ideas of this PR:
- Add `expr.is_constant()` determining whether the expression is
compile-time constant. Tries to be conservative and not deem something
compile-time constant if there is no certainty.
- Whenever we think a compile-time constant expression is about to be
translated into bytecode in `translate_expr()`, start a so called
`constant span`, which means a range of instructions that are part of a
compile-time constant expression.
- At the end of translating the program, all `constant spans` are
hoisted outside of any table loops so they only get evaluated once.
- The target offsets of any jump instructions (e.g. `Goto`) are moved to
the correct place, taking into account all instructions whose offsets
were shifted due to moving the compile-time constant expressions around.
- An escape hatch wrapper `translate_expr_no_constant_opt()` is added
for cases where we should not hoist constants even if we otherwise
could. Right now the only example of this is cases where we are reusing
the same register(s) in multiple iterations of some kind of loop, e.g.
`VALUES(...)` or in the `coalesce()` function implementation.
## Performance effects
Here is an example of a modified/simplified TPC-H query where the
`CAST()` calls were previously run millions of times in a hot loop, but
now they are optimized out of the loop.
**BYTECODE PLAN BEFORE:**
```sql
limbo> explain select
l_orderkey,
3 as revenue,
o_orderdate,
o_shippriority
from
lineitem,
orders,
customer
where
c_mktsegment = 'FURNITURE'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < cast('1995-03-29' as datetime)
and l_shipdate > cast('1995-03-29' as datetime);
addr opcode p1 p2 p3 p4 p5 comment
---- ----------------- ---- ---- ---- ------------- -- -------
0 Init 0 26 0 0 Start at 26
1 OpenRead 0 10 0 0 table=lineitem, root=10
2 OpenRead 1 9 0 0 table=orders, root=9
3 OpenRead 2 8 0 0 table=customer, root=8
4 Rewind 0 25 0 0 Rewind lineitem
5 Column 0 10 5 0 r[5]=lineitem.l_shipdate
6 String8 0 7 0 1995-03-29 0 r[7]='1995-03-29'
7 Function 0 7 6 cast 0 r[6]=func(r[7..8]) <-- CAST() executed millions of times
8 Le 5 6 24 0 if r[5]<=r[6] goto 24
9 Column 0 0 9 0 r[9]=lineitem.l_orderkey
10 SeekRowid 1 9 24 0 if (r[9]!=orders.rowid) goto 24
11 Column 1 4 10 0 r[10]=orders.o_orderdate
12 String8 0 12 0 1995-03-29 0 r[12]='1995-03-29'
13 Function 0 12 11 cast 0 r[11]=func(r[12..13])
14 Ge 10 11 24 0 if r[10]>=r[11] goto 24
15 Column 1 1 14 0 r[14]=orders.o_custkey
16 SeekRowid 2 14 24 0 if (r[14]!=customer.rowid) goto 24
17 Column 2 6 15 0 r[15]=customer.c_mktsegment
18 Ne 15 16 24 0 if r[15]!=r[16] goto 24
19 Column 0 0 1 0 r[1]=lineitem.l_orderkey
20 Integer 3 2 0 0 r[2]=3
21 Column 1 4 3 0 r[3]=orders.o_orderdate
22 Column 1 7 4 0 r[4]=orders.o_shippriority
23 ResultRow 1 4 0 0 output=r[1..4]
24 Next 0 5 0 0
25 Halt 0 0 0 0
26 Transaction 0 0 0 0 write=false
27 String8 0 8 0 DATETIME 0 r[8]='DATETIME'
28 String8 0 13 0 DATETIME 0 r[13]='DATETIME'
29 String8 0 16 0 FURNITURE 0 r[16]='FURNITURE'
30 Goto 0 1 0
```
**BYTECODE PLAN AFTER**:
```sql
limbo> explain select
l_orderkey,
3 as revenue,
o_orderdate,
o_shippriority
from
lineitem,
orders,
customer
where
c_mktsegment = 'FURNITURE'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < cast('1995-03-29' as datetime)
and l_shipdate > cast('1995-03-29' as datetime);
addr opcode p1 p2 p3 p4 p5 comment
---- ----------------- ---- ---- ---- ------------- -- -------
0 Init 0 21 0 0 Start at 21
1 OpenRead 0 10 0 0 table=lineitem, root=10
2 OpenRead 1 9 0 0 table=orders, root=9
3 OpenRead 2 8 0 0 table=customer, root=8
4 Rewind 0 20 0 0 Rewind lineitem
5 Column 0 10 5 0 r[5]=lineitem.l_shipdate
6 Le 5 6 19 0 if r[5]<=r[6] goto 19
7 Column 0 0 9 0 r[9]=lineitem.l_orderkey
8 SeekRowid 1 9 19 0 if (r[9]!=orders.rowid) goto 19
9 Column 1 4 10 0 r[10]=orders.o_orderdate
10 Ge 10 11 19 0 if r[10]>=r[11] goto 19
11 Column 1 1 14 0 r[14]=orders.o_custkey
12 SeekRowid 2 14 19 0 if (r[14]!=customer.rowid) goto 19
13 Column 2 6 15 0 r[15]=customer.c_mktsegment
14 Ne 15 16 19 0 if r[15]!=r[16] goto 19
15 Column 0 0 1 0 r[1]=lineitem.l_orderkey
16 Column 1 4 3 0 r[3]=orders.o_orderdate
17 Column 1 7 4 0 r[4]=orders.o_shippriority
18 ResultRow 1 4 0 0 output=r[1..4]
19 Next 0 5 0 0
20 Halt 0 0 0 0
21 Transaction 0 0 0 0 write=false
22 String8 0 7 0 1995-03-29 0 r[7]='1995-03-29'
23 String8 0 8 0 DATETIME 0 r[8]='DATETIME'
24 Function 1 7 6 cast 0 r[6]=func(r[7..8]) <-- CAST() executed twice
25 String8 0 12 0 1995-03-29 0 r[12]='1995-03-29'
26 String8 0 13 0 DATETIME 0 r[13]='DATETIME'
27 Function 1 12 11 cast 0 r[11]=func(r[12..13])
28 String8 0 16 0 FURNITURE 0 r[16]='FURNITURE'
29 Integer 3 2 0 0 r[2]=3
30 Goto 0 1 0 0
```
**EXECUTION RUNTIME BEFORE:**
```sql
limbo> select
l_orderkey,
3 as revenue,
o_orderdate,
o_shippriority
from
lineitem,
orders,
customer
where
c_mktsegment = 'FURNITURE'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < cast('1995-03-29' as datetime)
and l_shipdate > cast('1995-03-29' as datetime);
┌────────────┬─────────┬─────────────┬────────────────┐
│ l_orderkey │ revenue │ o_orderdate │ o_shippriority │
├────────────┼─────────┼─────────────┼────────────────┤
└────────────┴─────────┴─────────────┴────────────────┘
Command stats:
----------------------------
total: 3.633396667 s (this includes parsing/coloring of cli app)
```
**EXECUTION RUNTIME AFTER:**
```sql
limbo> select
l_orderkey,
3 as revenue,
o_orderdate,
o_shippriority
from
lineitem,
orders,
customer
where
c_mktsegment = 'FURNITURE'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < cast('1995-03-29' as datetime)
and l_shipdate > cast('1995-03-29' as datetime);
┌────────────┬─────────┬─────────────┬────────────────┐
│ l_orderkey │ revenue │ o_orderdate │ o_shippriority │
├────────────┼─────────┼─────────────┼────────────────┤
└────────────┴─────────┴─────────────┴────────────────┘
Command stats:
----------------------------
total: 2.0923475 s (this includes parsing/coloring of cli app)
````
Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>
Closes #1359
751 lines
26 KiB
Rust
751 lines
26 KiB
Rust
use std::ops::Deref;
|
||
use std::rc::Rc;
|
||
|
||
use limbo_sqlite3_parser::ast::{
|
||
DistinctNames, Expr, InsertBody, OneSelect, QualifiedName, ResolveType, ResultColumn, With,
|
||
};
|
||
|
||
use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY;
|
||
use crate::schema::{IndexColumn, Table};
|
||
use crate::util::normalize_ident;
|
||
use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode};
|
||
use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral};
|
||
use crate::vdbe::BranchOffset;
|
||
use crate::{
|
||
schema::{Column, Schema},
|
||
vdbe::{
|
||
builder::{CursorType, ProgramBuilder},
|
||
insn::Insn,
|
||
},
|
||
};
|
||
use crate::{Result, SymbolTable, VirtualTable};
|
||
|
||
use super::emitter::Resolver;
|
||
use super::expr::{translate_expr_no_constant_opt, NoConstantOptReason};
|
||
|
||
#[allow(clippy::too_many_arguments)]
|
||
pub fn translate_insert(
|
||
query_mode: QueryMode,
|
||
schema: &Schema,
|
||
with: &Option<With>,
|
||
on_conflict: &Option<ResolveType>,
|
||
tbl_name: &QualifiedName,
|
||
columns: &Option<DistinctNames>,
|
||
body: &InsertBody,
|
||
_returning: &Option<Vec<ResultColumn>>,
|
||
syms: &SymbolTable,
|
||
) -> Result<ProgramBuilder> {
|
||
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
|
||
query_mode,
|
||
num_cursors: 1,
|
||
approx_num_insns: 30,
|
||
approx_num_labels: 5,
|
||
});
|
||
if with.is_some() {
|
||
crate::bail_parse_error!("WITH clause is not supported");
|
||
}
|
||
if on_conflict.is_some() {
|
||
crate::bail_parse_error!("ON CONFLICT clause is not supported");
|
||
}
|
||
|
||
let table_name = &tbl_name.name;
|
||
let table = match schema.get_table(table_name.0.as_str()) {
|
||
Some(table) => table,
|
||
None => crate::bail_corrupt_error!("Parse error: no such table: {}", table_name),
|
||
};
|
||
let resolver = Resolver::new(syms);
|
||
if let Some(virtual_table) = &table.virtual_table() {
|
||
translate_virtual_table_insert(
|
||
&mut program,
|
||
virtual_table.clone(),
|
||
columns,
|
||
body,
|
||
on_conflict,
|
||
&resolver,
|
||
)?;
|
||
return Ok(program);
|
||
}
|
||
let init_label = program.allocate_label();
|
||
program.emit_insn(Insn::Init {
|
||
target_pc: init_label,
|
||
});
|
||
let start_offset = program.offset();
|
||
|
||
let Some(btree_table) = table.btree() else {
|
||
crate::bail_corrupt_error!("Parse error: no such table: {}", table_name);
|
||
};
|
||
if !btree_table.has_rowid {
|
||
crate::bail_parse_error!("INSERT into WITHOUT ROWID table is not supported");
|
||
}
|
||
|
||
let cursor_id = program.alloc_cursor_id(
|
||
Some(table_name.0.clone()),
|
||
CursorType::BTreeTable(btree_table.clone()),
|
||
);
|
||
// allocate cursor id's for each btree index cursor we'll need to populate the indexes
|
||
// (idx name, root_page, idx cursor id)
|
||
let idx_cursors = schema
|
||
.get_indices(&table_name.0)
|
||
.iter()
|
||
.map(|idx| {
|
||
(
|
||
&idx.name,
|
||
idx.root_page,
|
||
program.alloc_cursor_id(
|
||
Some(table_name.0.clone()),
|
||
CursorType::BTreeIndex(idx.clone()),
|
||
),
|
||
)
|
||
})
|
||
.collect::<Vec<(&String, usize, usize)>>();
|
||
let root_page = btree_table.root_page;
|
||
let values = match body {
|
||
InsertBody::Select(select, _) => match &select.body.select.deref() {
|
||
OneSelect::Values(values) => values,
|
||
_ => todo!(),
|
||
},
|
||
InsertBody::DefaultValues => &vec![vec![]],
|
||
};
|
||
|
||
let column_mappings = resolve_columns_for_insert(&table, columns, values)?;
|
||
let index_col_mappings = resolve_indicies_for_insert(schema, table.as_ref(), &column_mappings)?;
|
||
// Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias)
|
||
let rowid_alias_index = btree_table.columns.iter().position(|c| c.is_rowid_alias);
|
||
let has_user_provided_rowid = {
|
||
assert_eq!(column_mappings.len(), btree_table.columns.len());
|
||
if let Some(index) = rowid_alias_index {
|
||
column_mappings[index].value_index.is_some()
|
||
} else {
|
||
false
|
||
}
|
||
};
|
||
|
||
// allocate a register for each column in the table. if not provided by user, they will simply be set as null.
|
||
// allocate an extra register for rowid regardless of whether user provided a rowid alias column.
|
||
let num_cols = btree_table.columns.len();
|
||
let rowid_reg = program.alloc_registers(num_cols + 1);
|
||
let column_registers_start = rowid_reg + 1;
|
||
let rowid_alias_reg = {
|
||
if has_user_provided_rowid {
|
||
Some(column_registers_start + rowid_alias_index.unwrap())
|
||
} else {
|
||
None
|
||
}
|
||
};
|
||
|
||
let record_register = program.alloc_register();
|
||
let halt_label = program.allocate_label();
|
||
let mut loop_start_offset = BranchOffset::Offset(0);
|
||
|
||
let inserting_multiple_rows = values.len() > 1;
|
||
|
||
// Multiple rows - use coroutine for value population
|
||
if inserting_multiple_rows {
|
||
let yield_reg = program.alloc_register();
|
||
let jump_on_definition_label = program.allocate_label();
|
||
let start_offset_label = program.allocate_label();
|
||
program.emit_insn(Insn::InitCoroutine {
|
||
yield_reg,
|
||
jump_on_definition: jump_on_definition_label,
|
||
start_offset: start_offset_label,
|
||
});
|
||
|
||
program.resolve_label(start_offset_label, program.offset());
|
||
|
||
for value in values {
|
||
populate_column_registers(
|
||
&mut program,
|
||
value,
|
||
&column_mappings,
|
||
column_registers_start,
|
||
true,
|
||
rowid_reg,
|
||
&resolver,
|
||
)?;
|
||
program.emit_insn(Insn::Yield {
|
||
yield_reg,
|
||
end_offset: halt_label,
|
||
});
|
||
}
|
||
program.emit_insn(Insn::EndCoroutine { yield_reg });
|
||
program.preassign_label_to_next_insn(jump_on_definition_label);
|
||
|
||
program.emit_insn(Insn::OpenWrite {
|
||
cursor_id,
|
||
root_page: RegisterOrLiteral::Literal(root_page),
|
||
});
|
||
|
||
// Main loop
|
||
// FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation,
|
||
// the other row will still be inserted.
|
||
loop_start_offset = program.offset();
|
||
program.emit_insn(Insn::Yield {
|
||
yield_reg,
|
||
end_offset: halt_label,
|
||
});
|
||
} else {
|
||
// Single row - populate registers directly
|
||
program.emit_insn(Insn::OpenWrite {
|
||
cursor_id,
|
||
root_page: RegisterOrLiteral::Literal(root_page),
|
||
});
|
||
|
||
populate_column_registers(
|
||
&mut program,
|
||
&values[0],
|
||
&column_mappings,
|
||
column_registers_start,
|
||
false,
|
||
rowid_reg,
|
||
&resolver,
|
||
)?;
|
||
}
|
||
// Open all the index btrees for writing
|
||
for idx_cursor in idx_cursors.iter() {
|
||
program.emit_insn(Insn::OpenWrite {
|
||
cursor_id: idx_cursor.2,
|
||
root_page: idx_cursor.1.into(),
|
||
});
|
||
}
|
||
// Common record insertion logic for both single and multiple rows
|
||
let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label()));
|
||
if let Some(reg) = rowid_alias_reg {
|
||
// for the row record, the rowid alias column (INTEGER PRIMARY KEY) is always set to NULL
|
||
// and its value is copied to the rowid register. in the case where a single row is inserted,
|
||
// the value is written directly to the rowid register (see populate_column_registers()).
|
||
// again, not sure why this only happens in the single row case, but let's mimic sqlite.
|
||
// in the single row case we save a Copy instruction, but in the multiple rows case we do
|
||
// it here in the loop.
|
||
if inserting_multiple_rows {
|
||
program.emit_insn(Insn::Copy {
|
||
src_reg: reg,
|
||
dst_reg: rowid_reg,
|
||
amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1
|
||
});
|
||
// for the row record, the rowid alias column is always set to NULL
|
||
program.emit_insn(Insn::SoftNull { reg });
|
||
}
|
||
// the user provided rowid value might itself be NULL. If it is, we create a new rowid on the next instruction.
|
||
program.emit_insn(Insn::NotNull {
|
||
reg: rowid_reg,
|
||
target_pc: check_rowid_is_integer_label.unwrap(),
|
||
});
|
||
}
|
||
|
||
// Create new rowid if a) not provided by user or b) provided by user but is NULL
|
||
program.emit_insn(Insn::NewRowid {
|
||
cursor: cursor_id,
|
||
rowid_reg,
|
||
prev_largest_reg: 0,
|
||
});
|
||
|
||
if let Some(must_be_int_label) = check_rowid_is_integer_label {
|
||
program.resolve_label(must_be_int_label, program.offset());
|
||
// If the user provided a rowid, it must be an integer.
|
||
program.emit_insn(Insn::MustBeInt { reg: rowid_reg });
|
||
}
|
||
|
||
// Check uniqueness constraint for rowid if it was provided by user.
|
||
// When the DB allocates it there are no need for separate uniqueness checks.
|
||
if has_user_provided_rowid {
|
||
let make_record_label = program.allocate_label();
|
||
program.emit_insn(Insn::NotExists {
|
||
cursor: cursor_id,
|
||
rowid_reg,
|
||
target_pc: make_record_label,
|
||
});
|
||
let rowid_column_name = if let Some(index) = rowid_alias_index {
|
||
btree_table
|
||
.columns
|
||
.get(index)
|
||
.unwrap()
|
||
.name
|
||
.as_ref()
|
||
.expect("column name is None")
|
||
} else {
|
||
"rowid"
|
||
};
|
||
|
||
program.emit_insn(Insn::Halt {
|
||
err_code: SQLITE_CONSTRAINT_PRIMARYKEY,
|
||
description: format!("{}.{}", table_name.0, rowid_column_name),
|
||
});
|
||
program.preassign_label_to_next_insn(make_record_label);
|
||
}
|
||
|
||
match table.btree() {
|
||
Some(t) if t.is_strict => {
|
||
program.emit_insn(Insn::TypeCheck {
|
||
start_reg: column_registers_start,
|
||
count: num_cols,
|
||
check_generated: true,
|
||
table_reference: Rc::clone(&t),
|
||
});
|
||
}
|
||
_ => (),
|
||
}
|
||
|
||
for index_col_mapping in index_col_mappings.iter() {
|
||
// find which cursor we opened earlier for this index
|
||
let idx_cursor_id = idx_cursors
|
||
.iter()
|
||
.find(|(name, _, _)| *name == &index_col_mapping.idx_name)
|
||
.map(|(_, _, c_id)| *c_id)
|
||
.expect("no cursor found for index");
|
||
|
||
let num_cols = index_col_mapping.columns.len();
|
||
// allocate scratch registers for the index columns plus rowid
|
||
let idx_start_reg = program.alloc_registers(num_cols + 1);
|
||
|
||
// copy each index column from the table's column registers into these scratch regs
|
||
for (i, col) in index_col_mapping.columns.iter().enumerate() {
|
||
// copy from the table's column register over to the index's scratch register
|
||
|
||
program.emit_insn(Insn::Copy {
|
||
src_reg: column_registers_start + col.0,
|
||
dst_reg: idx_start_reg + i,
|
||
amount: 0,
|
||
});
|
||
}
|
||
// last register is the rowid
|
||
program.emit_insn(Insn::Copy {
|
||
src_reg: rowid_reg,
|
||
dst_reg: idx_start_reg + num_cols,
|
||
amount: 0,
|
||
});
|
||
|
||
let record_reg = program.alloc_register();
|
||
program.emit_insn(Insn::MakeRecord {
|
||
start_reg: idx_start_reg,
|
||
count: num_cols + 1,
|
||
dest_reg: record_reg,
|
||
});
|
||
|
||
let index = schema
|
||
.get_index(&table_name.0, &index_col_mapping.idx_name)
|
||
.expect("index should be present");
|
||
|
||
if index.unique {
|
||
let label_idx_insert = program.allocate_label();
|
||
program.emit_insn(Insn::NoConflict {
|
||
cursor_id: idx_cursor_id,
|
||
target_pc: label_idx_insert,
|
||
record_reg: idx_start_reg,
|
||
num_regs: num_cols,
|
||
});
|
||
let column_names = index_col_mapping.columns.iter().enumerate().fold(
|
||
String::with_capacity(50),
|
||
|mut accum, (idx, (index, _))| {
|
||
if idx > 0 {
|
||
accum.push_str(", ");
|
||
}
|
||
|
||
accum.push_str(&btree_table.name);
|
||
accum.push('.');
|
||
|
||
let name = btree_table
|
||
.columns
|
||
.get(*index)
|
||
.unwrap()
|
||
.name
|
||
.as_ref()
|
||
.expect("column name is None");
|
||
accum.push_str(name);
|
||
|
||
accum
|
||
},
|
||
);
|
||
|
||
program.emit_insn(Insn::Halt {
|
||
err_code: SQLITE_CONSTRAINT_PRIMARYKEY,
|
||
description: column_names,
|
||
});
|
||
|
||
program.resolve_label(label_idx_insert, program.offset());
|
||
}
|
||
|
||
// now do the actual index insertion using the unpacked registers
|
||
program.emit_insn(Insn::IdxInsert {
|
||
cursor_id: idx_cursor_id,
|
||
record_reg,
|
||
unpacked_start: Some(idx_start_reg), // TODO: enable optimization
|
||
unpacked_count: Some((num_cols + 1) as u16),
|
||
// TODO: figure out how to determine whether or not we need to seek prior to insert.
|
||
flags: IdxInsertFlags::new(),
|
||
});
|
||
}
|
||
|
||
// Create and insert the record
|
||
program.emit_insn(Insn::MakeRecord {
|
||
start_reg: column_registers_start,
|
||
count: num_cols,
|
||
dest_reg: record_register,
|
||
});
|
||
|
||
program.emit_insn(Insn::Insert {
|
||
cursor: cursor_id,
|
||
key_reg: rowid_reg,
|
||
record_reg: record_register,
|
||
flag: 0,
|
||
});
|
||
|
||
if inserting_multiple_rows {
|
||
// For multiple rows, loop back
|
||
program.emit_insn(Insn::Goto {
|
||
target_pc: loop_start_offset,
|
||
});
|
||
}
|
||
|
||
program.resolve_label(halt_label, program.offset());
|
||
program.emit_insn(Insn::Halt {
|
||
err_code: 0,
|
||
description: String::new(),
|
||
});
|
||
program.preassign_label_to_next_insn(init_label);
|
||
|
||
program.emit_insn(Insn::Transaction { write: true });
|
||
program.emit_constant_insns();
|
||
program.emit_insn(Insn::Goto {
|
||
target_pc: start_offset,
|
||
});
|
||
|
||
Ok(program)
|
||
}
|
||
|
||
#[derive(Debug)]
|
||
/// Represents how a column should be populated during an INSERT.
|
||
/// Contains both the column definition and optionally the index into the VALUES tuple.
|
||
struct ColumnMapping<'a> {
|
||
/// Reference to the column definition from the table schema
|
||
column: &'a Column,
|
||
/// If Some(i), use the i-th value from the VALUES tuple
|
||
/// If None, use NULL (column was not specified in INSERT statement)
|
||
value_index: Option<usize>,
|
||
/// The default value for the column, if defined
|
||
default_value: Option<&'a Expr>,
|
||
}
|
||
|
||
/// Resolves how each column in a table should be populated during an INSERT.
|
||
/// Returns a Vec of ColumnMapping, one for each column in the table's schema.
|
||
///
|
||
/// For each column, specifies:
|
||
/// 1. The column definition (type, constraints, etc)
|
||
/// 2. Where to get the value from:
|
||
/// - Some(i) -> use i-th value from the VALUES tuple
|
||
/// - None -> use NULL (column wasn't specified in INSERT)
|
||
///
|
||
/// Two cases are handled:
|
||
/// 1. No column list specified (INSERT INTO t VALUES ...):
|
||
/// - Values are assigned to columns in table definition order
|
||
/// - If fewer values than columns, remaining columns map to None
|
||
/// 2. Column list specified (INSERT INTO t (col1, col3) VALUES ...):
|
||
/// - Named columns map to their corresponding value index
|
||
/// - Unspecified columns map to None
|
||
fn resolve_columns_for_insert<'a>(
|
||
table: &'a Table,
|
||
columns: &Option<DistinctNames>,
|
||
values: &[Vec<Expr>],
|
||
) -> Result<Vec<ColumnMapping<'a>>> {
|
||
if values.is_empty() {
|
||
crate::bail_parse_error!("no values to insert");
|
||
}
|
||
|
||
let table_columns = &table.columns();
|
||
|
||
// Case 1: No columns specified - map values to columns in order
|
||
if columns.is_none() {
|
||
let num_values = values[0].len();
|
||
if num_values > table_columns.len() {
|
||
crate::bail_parse_error!(
|
||
"table {} has {} columns but {} values were supplied",
|
||
&table.get_name(),
|
||
table_columns.len(),
|
||
num_values
|
||
);
|
||
}
|
||
|
||
// Verify all value tuples have same length
|
||
for value in values.iter().skip(1) {
|
||
if value.len() != num_values {
|
||
crate::bail_parse_error!("all VALUES must have the same number of terms");
|
||
}
|
||
}
|
||
|
||
// Map each column to either its corresponding value index or None
|
||
return Ok(table_columns
|
||
.iter()
|
||
.enumerate()
|
||
.map(|(i, col)| ColumnMapping {
|
||
column: col,
|
||
value_index: if i < num_values { Some(i) } else { None },
|
||
default_value: col.default.as_ref(),
|
||
})
|
||
.collect());
|
||
}
|
||
|
||
// Case 2: Columns specified - map named columns to their values
|
||
let mut mappings: Vec<_> = table_columns
|
||
.iter()
|
||
.map(|col| ColumnMapping {
|
||
column: col,
|
||
value_index: None,
|
||
default_value: col.default.as_ref(),
|
||
})
|
||
.collect();
|
||
|
||
// Map each named column to its value index
|
||
for (value_index, column_name) in columns.as_ref().unwrap().iter().enumerate() {
|
||
let column_name = normalize_ident(column_name.0.as_str());
|
||
let table_index = table_columns.iter().position(|c| {
|
||
c.name
|
||
.as_ref()
|
||
.map_or(false, |name| name.eq_ignore_ascii_case(&column_name))
|
||
});
|
||
|
||
if table_index.is_none() {
|
||
crate::bail_parse_error!(
|
||
"table {} has no column named {}",
|
||
&table.get_name(),
|
||
column_name
|
||
);
|
||
}
|
||
|
||
mappings[table_index.unwrap()].value_index = Some(value_index);
|
||
}
|
||
|
||
Ok(mappings)
|
||
}
|
||
|
||
/// Represents how a column in an index should be populated during an INSERT.
|
||
/// Similar to ColumnMapping above but includes the index name, as well as multiple
|
||
/// possible value indices for each.
|
||
#[derive(Debug, Default)]
|
||
struct IndexColMapping {
|
||
idx_name: String,
|
||
columns: Vec<(usize, IndexColumn)>,
|
||
value_indicies: Vec<Option<usize>>,
|
||
}
|
||
|
||
impl IndexColMapping {
|
||
fn new(name: String) -> Self {
|
||
IndexColMapping {
|
||
idx_name: name,
|
||
..Default::default()
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Example:
|
||
/// Table 'test': (a, b, c);
|
||
/// Index 'idx': test(a, b);
|
||
///________________________________
|
||
/// Insert (a, c): (2, 3)
|
||
/// Record: (2, NULL, 3)
|
||
/// IndexColMapping: (a, b) = (2, NULL)
|
||
fn resolve_indicies_for_insert(
|
||
schema: &Schema,
|
||
table: &Table,
|
||
columns: &[ColumnMapping<'_>],
|
||
) -> Result<Vec<IndexColMapping>> {
|
||
let mut index_col_mappings = Vec::new();
|
||
// Iterate over all indices for this table
|
||
for index in schema.get_indices(table.get_name()) {
|
||
let mut idx_map = IndexColMapping::new(index.name.clone());
|
||
// For each column in the index (in the order defined by the index),
|
||
// try to find the corresponding column in the insert’s column mapping.
|
||
for idx_col in &index.columns {
|
||
let target_name = normalize_ident(idx_col.name.as_str());
|
||
if let Some((i, col_mapping)) = columns.iter().enumerate().find(|(_, mapping)| {
|
||
mapping
|
||
.column
|
||
.name
|
||
.as_ref()
|
||
.map_or(false, |name| name.eq_ignore_ascii_case(&target_name))
|
||
}) {
|
||
idx_map.columns.push((i, idx_col.clone()));
|
||
idx_map.value_indicies.push(col_mapping.value_index);
|
||
} else {
|
||
return Err(crate::LimboError::ParseError(format!(
|
||
"Column {} not found in index {}",
|
||
target_name, index.name
|
||
)));
|
||
}
|
||
}
|
||
// Add the mapping if at least one column was found.
|
||
if !idx_map.columns.is_empty() {
|
||
index_col_mappings.push(idx_map);
|
||
}
|
||
}
|
||
Ok(index_col_mappings)
|
||
}
|
||
|
||
/// Populates the column registers with values for a single row
|
||
fn populate_column_registers(
|
||
program: &mut ProgramBuilder,
|
||
value: &[Expr],
|
||
column_mappings: &[ColumnMapping],
|
||
column_registers_start: usize,
|
||
inserting_multiple_rows: bool,
|
||
rowid_reg: usize,
|
||
resolver: &Resolver,
|
||
) -> Result<()> {
|
||
for (i, mapping) in column_mappings.iter().enumerate() {
|
||
let target_reg = column_registers_start + i;
|
||
|
||
// Column has a value in the VALUES tuple
|
||
if let Some(value_index) = mapping.value_index {
|
||
// When inserting a single row, SQLite writes the value provided for the rowid alias column (INTEGER PRIMARY KEY)
|
||
// directly into the rowid register and writes a NULL into the rowid alias column. Not sure why this only happens
|
||
// in the single row case, but let's copy it.
|
||
let write_directly_to_rowid_reg =
|
||
mapping.column.is_rowid_alias && !inserting_multiple_rows;
|
||
let reg = if write_directly_to_rowid_reg {
|
||
rowid_reg
|
||
} else {
|
||
target_reg
|
||
};
|
||
translate_expr_no_constant_opt(
|
||
program,
|
||
None,
|
||
value.get(value_index).expect("value index out of bounds"),
|
||
reg,
|
||
resolver,
|
||
NoConstantOptReason::RegisterReuse,
|
||
)?;
|
||
if write_directly_to_rowid_reg {
|
||
program.emit_insn(Insn::SoftNull { reg: target_reg });
|
||
}
|
||
} else if let Some(default_expr) = mapping.default_value {
|
||
translate_expr_no_constant_opt(
|
||
program,
|
||
None,
|
||
default_expr,
|
||
target_reg,
|
||
resolver,
|
||
NoConstantOptReason::RegisterReuse,
|
||
)?;
|
||
} else {
|
||
// Column was not specified as has no DEFAULT - use NULL if it is nullable, otherwise error
|
||
// Rowid alias columns can be NULL because we will autogenerate a rowid in that case.
|
||
let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias;
|
||
if is_nullable {
|
||
program.emit_insn(Insn::Null {
|
||
dest: target_reg,
|
||
dest_end: None,
|
||
});
|
||
program.mark_last_insn_constant();
|
||
} else {
|
||
crate::bail_parse_error!(
|
||
"column {} is not nullable",
|
||
mapping.column.name.as_ref().expect("column name is None")
|
||
);
|
||
}
|
||
}
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
fn translate_virtual_table_insert(
|
||
program: &mut ProgramBuilder,
|
||
virtual_table: Rc<VirtualTable>,
|
||
columns: &Option<DistinctNames>,
|
||
body: &InsertBody,
|
||
on_conflict: &Option<ResolveType>,
|
||
resolver: &Resolver,
|
||
) -> Result<()> {
|
||
let init_label = program.allocate_label();
|
||
program.emit_insn(Insn::Init {
|
||
target_pc: init_label,
|
||
});
|
||
let start_offset = program.offset();
|
||
|
||
let values = match body {
|
||
InsertBody::Select(select, None) => match &select.body.select.deref() {
|
||
OneSelect::Values(values) => values,
|
||
_ => crate::bail_parse_error!("Virtual tables only support VALUES clause in INSERT"),
|
||
},
|
||
InsertBody::DefaultValues => &vec![],
|
||
_ => crate::bail_parse_error!("Unsupported INSERT body for virtual tables"),
|
||
};
|
||
|
||
let table = Table::Virtual(virtual_table.clone());
|
||
let column_mappings = resolve_columns_for_insert(&table, columns, values)?;
|
||
|
||
let value_registers_start = program.alloc_registers(values[0].len());
|
||
for (i, expr) in values[0].iter().enumerate() {
|
||
translate_expr_no_constant_opt(
|
||
program,
|
||
None,
|
||
expr,
|
||
value_registers_start + i,
|
||
resolver,
|
||
NoConstantOptReason::RegisterReuse,
|
||
)?;
|
||
}
|
||
/* *
|
||
* Inserts for virtual tables are done in a single step.
|
||
* argv[0] = (NULL for insert)
|
||
* argv[1] = (NULL for insert)
|
||
* argv[2..] = column values
|
||
* */
|
||
|
||
let rowid_reg = program.alloc_registers(column_mappings.len() + 3);
|
||
let insert_rowid_reg = rowid_reg + 1; // argv[1] = insert_rowid
|
||
let data_start_reg = rowid_reg + 2; // argv[2..] = column values
|
||
|
||
program.emit_insn(Insn::Null {
|
||
dest: rowid_reg,
|
||
dest_end: None,
|
||
});
|
||
program.emit_insn(Insn::Null {
|
||
dest: insert_rowid_reg,
|
||
dest_end: None,
|
||
});
|
||
|
||
for (i, mapping) in column_mappings.iter().enumerate() {
|
||
let target_reg = data_start_reg + i;
|
||
if let Some(value_index) = mapping.value_index {
|
||
program.emit_insn(Insn::Copy {
|
||
src_reg: value_registers_start + value_index,
|
||
dst_reg: target_reg,
|
||
amount: 1,
|
||
});
|
||
} else {
|
||
program.emit_insn(Insn::Null {
|
||
dest: target_reg,
|
||
dest_end: None,
|
||
});
|
||
}
|
||
}
|
||
|
||
let conflict_action = on_conflict.as_ref().map(|c| c.bit_value()).unwrap_or(0) as u16;
|
||
|
||
let cursor_id = program.alloc_cursor_id(
|
||
Some(virtual_table.name.clone()),
|
||
CursorType::VirtualTable(virtual_table.clone()),
|
||
);
|
||
|
||
program.emit_insn(Insn::VUpdate {
|
||
cursor_id,
|
||
arg_count: column_mappings.len() + 2,
|
||
start_reg: rowid_reg,
|
||
vtab_ptr: virtual_table.implementation.as_ref().ctx as usize,
|
||
conflict_action,
|
||
});
|
||
|
||
let halt_label = program.allocate_label();
|
||
program.resolve_label(halt_label, program.offset());
|
||
program.emit_insn(Insn::Halt {
|
||
err_code: 0,
|
||
description: String::new(),
|
||
});
|
||
|
||
program.resolve_label(init_label, program.offset());
|
||
|
||
program.emit_insn(Insn::Goto {
|
||
target_pc: start_offset,
|
||
});
|
||
|
||
Ok(())
|
||
}
|