Merge 'Strict table support' from Ihor Andrianov

Closes #884
Support for
```CREATE TABLE test(id INTEGER) STRICT;```

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #1268
This commit is contained in:
Pekka Enberg
2025-04-09 16:45:56 +03:00
9 changed files with 490 additions and 11 deletions

View File

@@ -161,6 +161,7 @@ pub struct BTreeTable {
pub primary_key_column_names: Vec<String>,
pub columns: Vec<Column>,
pub has_rowid: bool,
pub is_strict: bool,
}
impl BTreeTable {
@@ -262,12 +263,14 @@ fn create_table(
let mut has_rowid = true;
let mut primary_key_column_names = vec![];
let mut cols = vec![];
let is_strict: bool;
match body {
CreateTableBody::ColumnsAndConstraints {
columns,
constraints,
options,
} => {
is_strict = options.contains(TableOptions::STRICT);
if let Some(constraints) = constraints {
for c in constraints {
if let limbo_sqlite3_parser::ast::TableConstraint::PrimaryKey {
@@ -390,6 +393,7 @@ fn create_table(
has_rowid,
primary_key_column_names,
columns: cols,
is_strict,
})
}
@@ -456,7 +460,7 @@ pub fn affinity(datatype: &str) -> Affinity {
}
// Rule 3: BLOB or empty -> BLOB affinity (historically called NONE)
if datatype.contains("BLOB") || datatype.is_empty() {
if datatype.contains("BLOB") || datatype.is_empty() || datatype.contains("ANY") {
return Affinity::Blob;
}
@@ -508,11 +512,11 @@ pub enum Affinity {
Numeric,
}
pub const SQLITE_AFF_TEXT: char = 'a';
pub const SQLITE_AFF_NONE: char = 'b'; // Historically called NONE, but it's the same as BLOB
pub const SQLITE_AFF_NUMERIC: char = 'c';
pub const SQLITE_AFF_INTEGER: char = 'd';
pub const SQLITE_AFF_REAL: char = 'e';
pub const SQLITE_AFF_NONE: char = 'A'; // Historically called NONE, but it's the same as BLOB
pub const SQLITE_AFF_TEXT: char = 'B';
pub const SQLITE_AFF_NUMERIC: char = 'C';
pub const SQLITE_AFF_INTEGER: char = 'D';
pub const SQLITE_AFF_REAL: char = 'E';
impl Affinity {
/// This is meant to be used in opcodes like Eq, which state:
@@ -552,6 +556,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
root_page: 1,
name: "sqlite_schema".to_string(),
has_rowid: true,
is_strict: false,
primary_key_column_names: vec![],
columns: vec![
Column {
@@ -1046,6 +1051,7 @@ mod tests {
root_page: 0,
name: "t1".to_string(),
has_rowid: true,
is_strict: false,
primary_key_column_names: vec!["nonexistent".to_string()],
columns: vec![Column {
name: Some("a".to_string()),

View File

@@ -1,6 +1,8 @@
// This module contains code for emitting bytecode instructions for SQL query execution.
// It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine.
use std::rc::Rc;
use limbo_sqlite3_parser::ast::{self};
use crate::function::Func;
@@ -615,6 +617,16 @@ fn emit_update_insns(
}
}
}
if let Some(btree_table) = table_ref.btree() {
if btree_table.is_strict {
program.emit_insn(Insn::TypeCheck {
start_reg: first_col_reg,
count: table_ref.columns().len(),
check_generated: true,
table_reference: Rc::clone(&btree_table),
});
}
}
let record_reg = program.alloc_register();
program.emit_insn(Insn::MakeRecord {
start_reg: first_col_reg,

View File

@@ -275,6 +275,17 @@ pub fn translate_insert(
program.resolve_label(make_record_label, program.offset());
}
match table.btree() {
Some(t) if t.is_strict => {
program.emit_insn(Insn::TypeCheck {
start_reg: column_registers_start,
count: num_cols,
check_generated: true,
table_reference: Rc::clone(&t),
});
}
_ => (),
}
// Create and insert the record
program.emit_insn(Insn::MakeRecord {
start_reg: column_registers_start,

View File

@@ -22,6 +22,20 @@ pub enum OwnedValueType {
Error,
}
impl Display for OwnedValueType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let value = match self {
Self::Null => "NULL",
Self::Integer => "INT",
Self::Float => "REAL",
Self::Blob => "BLOB",
Self::Text => "TEXT",
Self::Error => "ERROR",
};
write!(f, "{}", value)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum TextSubtype {
Text,
@@ -69,6 +83,15 @@ impl Text {
}
}
impl From<String> for Text {
fn from(value: String) -> Self {
Text {
value: value.into_bytes(),
subtype: TextSubtype::Text,
}
}
}
impl TextRef {
pub fn as_str(&self) -> &str {
unsafe { std::str::from_utf8_unchecked(self.value.to_slice()) }

View File

@@ -1,5 +1,5 @@
#![allow(unused_variables)]
use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY};
use crate::error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY};
use crate::ext::ExtValue;
use crate::function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc};
use crate::functions::datetime::{
@@ -10,11 +10,13 @@ use std::{borrow::BorrowMut, rc::Rc};
use crate::pseudo::PseudoCursor;
use crate::result::LimboResult;
use crate::schema::{affinity, Affinity};
use crate::storage::btree::{BTreeCursor, BTreeKey};
use crate::storage::wal::CheckpointResult;
use crate::types::{
AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, SeekKey, SeekOp,
AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, OwnedValueType, SeekKey, SeekOp,
};
use crate::util::{
cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real,
@@ -1350,6 +1352,68 @@ pub fn op_column(
Ok(InsnFunctionStepResult::Step)
}
pub fn op_type_check(
program: &Program,
state: &mut ProgramState,
insn: &Insn,
pager: &Rc<Pager>,
mv_store: Option<&Rc<MvStore>>,
) -> Result<InsnFunctionStepResult> {
let Insn::TypeCheck {
start_reg,
count,
check_generated,
table_reference,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
};
assert_eq!(table_reference.is_strict, true);
state.registers[*start_reg..*start_reg + *count]
.iter_mut()
.zip(table_reference.columns.iter())
.try_for_each(|(reg, col)| {
// INT PRIMARY KEY is not row_id_alias so we throw error if this col is NULL
if !col.is_rowid_alias
&& col.primary_key
&& matches!(reg.get_owned_value(), OwnedValue::Null)
{
bail_constraint_error!(
"NOT NULL constraint failed: {}.{} ({})",
&table_reference.name,
col.name.as_ref().map(|s| s.as_str()).unwrap_or(""),
SQLITE_CONSTRAINT
)
} else if col.is_rowid_alias && matches!(reg.get_owned_value(), OwnedValue::Null) {
// Handle INTEGER PRIMARY KEY for null as usual (Rowid will be auto-assigned)
return Ok(());
}
let col_affinity = col.affinity();
let ty_str = col.ty_str.as_str();
let applied = apply_affinity_char(reg, col_affinity);
let value_type = reg.get_owned_value().value_type();
match (ty_str, value_type) {
("INTEGER" | "INT", OwnedValueType::Integer) => {}
("REAL", OwnedValueType::Float) => {}
("BLOB", OwnedValueType::Blob) => {}
("TEXT", OwnedValueType::Text) => {}
("ANY", _) => {}
(t, v) => bail_constraint_error!(
"cannot store {} value in {} column {}.{} ({})",
v,
t,
&table_reference.name,
col.name.as_ref().map(|s| s.as_str()).unwrap_or(""),
SQLITE_CONSTRAINT
),
};
Ok(())
})?;
state.pc += 1;
Ok(InsnFunctionStepResult::Step)
}
pub fn op_make_record(
program: &Program,
state: &mut ProgramState,
@@ -4996,6 +5060,77 @@ fn exec_if(reg: &OwnedValue, jump_if_null: bool, not: bool) -> bool {
}
}
fn apply_affinity_char(target: &mut Register, affinity: Affinity) -> bool {
if let Register::OwnedValue(value) = target {
if matches!(value, OwnedValue::Blob(_)) {
return true;
}
match affinity {
Affinity::Blob => return true,
Affinity::Text => {
if matches!(value, OwnedValue::Text(_) | OwnedValue::Null) {
return true;
}
let text = value.to_string();
*value = OwnedValue::Text(text.into());
return true;
}
Affinity::Integer | Affinity::Numeric => {
if matches!(value, OwnedValue::Integer(_)) {
return true;
}
if !matches!(value, OwnedValue::Text(_) | OwnedValue::Float(_)) {
return true;
}
if let OwnedValue::Float(fl) = *value {
if let Ok(int) = cast_real_to_integer(fl).map(OwnedValue::Integer) {
*value = int;
return true;
}
return false;
}
let text = value.to_text().unwrap();
let Ok(num) = checked_cast_text_to_numeric(&text) else {
return false;
};
*value = match &num {
OwnedValue::Float(fl) => {
cast_real_to_integer(*fl)
.map(OwnedValue::Integer)
.unwrap_or(num);
return true;
}
OwnedValue::Integer(_) if text.starts_with("0x") => {
return false;
}
_ => num,
};
}
Affinity::Real => {
if let OwnedValue::Integer(i) = value {
*value = OwnedValue::Float(*i as f64);
return true;
} else if let OwnedValue::Text(t) = value {
if t.as_str().starts_with("0x") {
return false;
}
if let Ok(num) = checked_cast_text_to_numeric(t.as_str()) {
*value = num;
return true;
} else {
return false;
}
}
}
};
}
return true;
}
fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue {
if matches!(value, OwnedValue::Null) {
return OwnedValue::Null;

View File

@@ -528,6 +528,20 @@ pub fn insn_to_str(
),
)
}
Insn::TypeCheck {
start_reg,
count,
check_generated,
..
} => (
"TypeCheck",
*start_reg as i32,
*count as i32,
*check_generated as i32,
OwnedValue::build_text(""),
0,
String::from(""),
),
Insn::MakeRecord {
start_reg,
count,

View File

@@ -1,8 +1,10 @@
use std::num::NonZero;
use std::rc::Rc;
use super::{
cast_text_to_numeric, execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx,
};
use crate::schema::BTreeTable;
use crate::storage::wal::CheckpointMode;
use crate::types::{OwnedValue, Record};
use limbo_macros::Description;
@@ -344,7 +346,16 @@ pub enum Insn {
dest: usize,
},
/// Make a record and write it to destination register.
TypeCheck {
start_reg: usize, // P1
count: usize, // P2
/// GENERATED ALWAYS AS ... STATIC columns are only checked if P3 is zero.
/// When P3 is non-zero, no type checking occurs for static generated columns.
check_generated: bool, // P3
table_reference: Rc<BTreeTable>, // P4
},
// Make a record and write it to destination register.
MakeRecord {
start_reg: usize, // P1
count: usize, // P2
@@ -427,7 +438,7 @@ pub enum Insn {
register: usize,
},
/// Write a string value into a register.
// Write a string value into a register.
String8 {
value: String,
dest: usize,
@@ -1298,6 +1309,7 @@ impl Insn {
Insn::LastAwait { .. } => execute::op_last_await,
Insn::Column { .. } => execute::op_column,
Insn::TypeCheck { .. } => execute::op_type_check,
Insn::MakeRecord { .. } => execute::op_make_record,
Insn::ResultRow { .. } => execute::op_result_row,

View File

@@ -15,4 +15,149 @@ do_execsql_test_on_specific_db {:memory:} must-be-int-insert {
} {1
2
3
4}
4}
do_execsql_test_on_specific_db {:memory:} strict-basic-creation {
CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test1 VALUES(1, 'item1', 10.5);
SELECT * FROM test1;
} {1|item1|10.5}
do_execsql_test_in_memory_any_error strict-require-datatype {
CREATE TABLE test2(id INTEGER, name) STRICT;
}
do_execsql_test_in_memory_any_error strict-valid-datatypes {
CREATE TABLE test2(id INTEGER, value DATETIME) STRICT;
}
do_execsql_test_in_memory_any_error strict-type-enforcement {
CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test3 VALUES(1, 'item1', 'not-a-number');
}
do_execsql_test_on_specific_db {:memory:} strict-type-coercion {
CREATE TABLE test4(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test4 VALUES(1, 'item1', '10.5');
SELECT typeof(price), price FROM test4;
} {real|10.5}
do_execsql_test_on_specific_db {:memory:} strict-any-flexibility {
CREATE TABLE test5(id INTEGER, data ANY) STRICT;
INSERT INTO test5 VALUES(1, 100);
INSERT INTO test5 VALUES(2, 'text');
INSERT INTO test5 VALUES(3, 3.14);
SELECT id, typeof(data) FROM test5 ORDER BY id;
} {1|integer
2|text
3|real}
do_execsql_test_on_specific_db {:memory:} strict-any-preservation {
CREATE TABLE test6(id INTEGER, code ANY) STRICT;
INSERT INTO test6 VALUES(1, '000123');
SELECT typeof(code), code FROM test6;
} {text|000123}
do_execsql_test_in_memory_any_error strict-int-vs-integer-pk {
CREATE TABLE test8(id INT PRIMARY KEY, name TEXT) STRICT
INSERT INTO test8 VALUES(NULL, 'test');
}
do_execsql_test_on_specific_db {:memory:} strict-integer-pk-behavior {
CREATE TABLE test9(id INTEGER PRIMARY KEY, name TEXT) STRICT;
INSERT INTO test9 VALUES(NULL, 'test');
SELECT id, name FROM test9;
} {1|test}
do_execsql_test_on_specific_db {:memory:} strict-mixed-inserts {
CREATE TABLE test11(
id INTEGER PRIMARY KEY,
name TEXT,
price REAL,
quantity INT,
tags ANY
) STRICT;
INSERT INTO test11 VALUES(1, 'item1', 10.5, 5, 'tag1');
INSERT INTO test11 VALUES(2, 'item2', 20.75, 10, 42);
SELECT id, name, price, quantity, typeof(tags) FROM test11 ORDER BY id;
} {1|item1|10.5|5|text
2|item2|20.75|10|integer}
do_execsql_test_on_specific_db {:memory:} strict-update-basic {
CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test1 VALUES(1, 'item1', 10.5);
UPDATE test1 SET price = 15.75 WHERE id = 1;
SELECT * FROM test1;
} {1|item1|15.75}
do_execsql_test_in_memory_any_error strict-update-type-enforcement {
CREATE TABLE test2(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test2 VALUES(1, 'item1', 10.5);
UPDATE test2 SET price = 'not-a-number' WHERE id = 1;
}
do_execsql_test_on_specific_db {:memory:} strict-update-type-coercion {
CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT;
INSERT INTO test3 VALUES(1, 'item1', 10.5);
UPDATE test3 SET price = '15.75' WHERE id = 1;
SELECT id, typeof(price), price FROM test3;
} {1|real|15.75}
do_execsql_test_on_specific_db {:memory:} strict-update-any-flexibility {
CREATE TABLE test4(id INTEGER, data ANY) STRICT;
INSERT INTO test4 VALUES(1, 100);
UPDATE test4 SET data = 'text' WHERE id = 1;
INSERT INTO test4 VALUES(2, 'original');
UPDATE test4 SET data = 3.14 WHERE id = 2;
SELECT id, typeof(data), data FROM test4 ORDER BY id;
} {1|text|text
2|real|3.14}
do_execsql_test_on_specific_db {:memory:} strict-update-any-preservation {
CREATE TABLE test5(id INTEGER, code ANY) STRICT;
INSERT INTO test5 VALUES(1, 'text');
UPDATE test5 SET code = '000123' WHERE id = 1;
SELECT typeof(code), code FROM test5;
} {text|000123}
do_execsql_test_in_memory_any_error strict-update-not-null-constraint {
CREATE TABLE test7(id INTEGER, name TEXT NOT NULL) STRICT;
INSERT INTO test7 VALUES(1, 'name');
UPDATE test7 SET name = NULL WHERE id = 1;
}
# Uncomment following test case when unique constraint is added
#do_execsql_test_any_error strict-update-pk-constraint {
# CREATE TABLE test8(id INTEGER PRIMARY KEY, name TEXT) STRICT;
# INSERT INTO test8 VALUES(1, 'name1');
# INSERT INTO test8 VALUES(2, 'name2');
# UPDATE test8 SET id = 2 WHERE id = 1;
#}
do_execsql_test_on_specific_db {:memory:} strict-update-multiple-columns {
CREATE TABLE test9(id INTEGER, name TEXT, price REAL, quantity INT) STRICT;
INSERT INTO test9 VALUES(1, 'item1', 10.5, 5);
UPDATE test9 SET name = 'updated', price = 20.75, quantity = 10 WHERE id = 1;
SELECT * FROM test9;
} {1|updated|20.75|10}
do_execsql_test_on_specific_db {:memory:} strict-update-where-clause {
CREATE TABLE test10(id INTEGER, category TEXT, price REAL) STRICT;
INSERT INTO test10 VALUES(1, 'A', 10);
INSERT INTO test10 VALUES(2, 'A', 20);
INSERT INTO test10 VALUES(3, 'B', 30);
UPDATE test10 SET price = price * 2 WHERE category = 'A';
SELECT id, price FROM test10 ORDER BY id;
} {1|20.0
2|40.0
3|30.0}
do_execsql_test_on_specific_db {:memory:} strict-update-expression {
CREATE TABLE test11(id INTEGER, name TEXT, price REAL, discount REAL) STRICT;
INSERT INTO test11 VALUES(1, 'item1', 100, 0.1);
UPDATE test11 SET price = price - (price * discount);
SELECT id, price FROM test11;
} {1|90.0}

View File

@@ -97,3 +97,124 @@ proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolera
}
}
}
# This procedure passes the test if the output contains error messages
proc run_test_expecting_any_error {sqlite_exec db_name sql} {
# Execute the SQL command and capture output
set command [list $sqlite_exec $db_name $sql]
# Use catch to handle both successful and error cases
catch {exec {*}$command} result options
# Check if the output contains error indicators (×, error, syntax error, etc.)
if {[regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} {
# Error found in output - test passed
puts "Test PASSED: Got expected error"
return 1
}
# No error indicators in output
puts "Test FAILED: '$sql'"
puts "Expected an error but command output didn't indicate any error: '$result'"
exit 1
}
# This procedure passes if error matches a specific pattern
proc run_test_expecting_error {sqlite_exec db_name sql expected_error_pattern} {
# Execute the SQL command and capture output
set command [list $sqlite_exec $db_name $sql]
# Capture output whether command succeeds or fails
catch {exec {*}$command} result options
# Check if the output contains error indicators first
if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} {
puts "Test FAILED: '$sql'"
puts "Expected an error matching '$expected_error_pattern'"
puts "But command output didn't indicate any error: '$result'"
exit 1
}
# Now check if the error message matches the expected pattern
if {![regexp $expected_error_pattern $result]} {
puts "Test FAILED: '$sql'"
puts "Error occurred but didn't match expected pattern."
puts "Output was: '$result'"
puts "Expected pattern: '$expected_error_pattern'"
exit 1
}
# If we get here, the test passed - got expected error matching pattern
return 1
}
# This version accepts exact error text, ignoring formatting
proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_text} {
# Execute the SQL command and capture output
set command [list $sqlite_exec $db_name $sql]
# Capture output whether command succeeds or fails
catch {exec {*}$command} result options
# Check if the output contains error indicators first
if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} {
puts "Test FAILED: '$sql'"
puts "Expected an error with text: '$expected_error_text'"
puts "But command output didn't indicate any error: '$result'"
exit 1
}
# Normalize both the actual and expected error messages
# Remove all whitespace, newlines, and special characters for comparison
set normalized_actual [regsub -all {[[:space:]]|[[:punct:]]} $result ""]
set normalized_expected [regsub -all {[[:space:]]|[[:punct:]]} $expected_error_text ""]
# Convert to lowercase for case-insensitive comparison
set normalized_actual [string tolower $normalized_actual]
set normalized_expected [string tolower $normalized_expected]
# Check if the normalized strings contain the same text
if {[string first $normalized_expected $normalized_actual] == -1} {
puts "Test FAILED: '$sql'"
puts "Error occurred but content didn't match."
puts "Output was: '$result'"
puts "Expected text: '$expected_error_text'"
exit 1
}
# If we get here, the test passed - got error with expected content
return 1
}
proc do_execsql_test_error {test_name sql_statements expected_error_pattern} {
foreach db $::test_dbs {
puts [format "(%s) %s Running error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name]
set combined_sql [string trim $sql_statements]
run_test_expecting_error $::sqlite_exec $db $combined_sql $expected_error_pattern
}
}
proc do_execsql_test_error_content {test_name sql_statements expected_error_text} {
foreach db $::test_dbs {
puts [format "(%s) %s Running error content test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name]
set combined_sql [string trim $sql_statements]
run_test_expecting_error_content $::sqlite_exec $db $combined_sql $expected_error_text
}
}
proc do_execsql_test_any_error {test_name sql_statements} {
foreach db $::test_dbs {
puts [format "(%s) %s Running any-error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name]
set combined_sql [string trim $sql_statements]
run_test_expecting_any_error $::sqlite_exec $db $combined_sql
}
}
proc do_execsql_test_in_memory_any_error {test_name sql_statements} {
puts [format "(in-memory) %s Running any-error test: %s" [string repeat " " 31] $test_name]
# Use ":memory:" special filename for in-memory database
set db_name ":memory:"
set combined_sql [string trim $sql_statements]
run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql
}