support Unique properly by creating a vec of auto indices

This commit is contained in:
pedrocarlo
2025-05-10 15:32:16 -03:00
parent c5f004c1d6
commit 3526a206e4
5 changed files with 233 additions and 128 deletions

View File

@@ -178,6 +178,7 @@ pub struct BTreeTable {
pub columns: Vec<Column>,
pub has_rowid: bool,
pub is_strict: bool,
pub unique_sets: Option<Vec<Vec<(String, SortOrder)>>>,
}
impl BTreeTable {
@@ -284,6 +285,7 @@ fn create_table(
let mut primary_key_columns = vec![];
let mut cols = vec![];
let is_strict: bool;
let mut unique_sets: Option<Vec<Vec<(String, SortOrder)>>> = None;
match body {
CreateTableBody::ColumnsAndConstraints {
columns,
@@ -310,6 +312,31 @@ fn create_table(
primary_key_columns
.push((col_name, column.order.unwrap_or(SortOrder::Asc)));
}
} else if let limbo_sqlite3_parser::ast::TableConstraint::Unique {
columns,
conflict_clause: _, // TODO: ignore conflict_cause for now
} = c.constraint
{
let unique_set: Vec<_> = columns
.into_iter()
.map(|column| {
let col_name = match column.expr {
Expr::Id(id) => normalize_ident(&id.0),
Expr::Literal(Literal::String(value)) => {
value.trim_matches('\'').to_owned()
}
_ => {
todo!("Unsupported primary key expression");
}
};
(col_name, column.order.unwrap_or(SortOrder::Asc))
})
.collect();
if let Some(ref mut unique_sets) = unique_sets {
unique_sets.push(unique_set);
} else {
unique_sets = Some(vec![unique_set]);
}
}
}
}
@@ -432,6 +459,7 @@ fn create_table(
primary_key_columns,
columns: cols,
is_strict,
unique_sets,
})
}
@@ -709,6 +737,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
unique: false,
},
],
unique_sets: None,
}
}
@@ -777,37 +806,47 @@ impl Index {
}
}
/// The order of index returned should be kept the same
///
/// If the order of the index returned changes, this is a breaking change
///
/// In the future when we support Alter Column, we should revisit a way to make this less dependent on ordering
pub fn automatic_from_primary_key_and_unique(
table: &BTreeTable,
index_name: &str,
root_page: usize,
) -> Result<Index> {
let mut index_columns = table
auto_indices: Vec<(String, usize)>,
) -> Result<Vec<Index>> {
// The number of auto_indices in create table should match in the number of indices we calculate in this function
let mut auto_indices = auto_indices.into_iter();
// Each unique col needs its own index
let mut indices = table
.columns
.iter()
.filter_map(|col| {
.enumerate()
.filter_map(|(pos_in_table, col)| {
if col.unique {
// Unique columns in Table should always be named
let col_name = col.name.as_ref().unwrap();
// Verify that each primary key column exists in the table
let Some((pos_in_table, _)) = table.get_column(col_name) else {
return Some(Err(crate::LimboError::InternalError(format!(
"Column {} is in index {} but not found in table {}",
col_name, index_name, table.name
))));
};
Some(Ok(IndexColumn {
name: normalize_ident(col_name),
order: SortOrder::Asc, // Default Sort Order
pos_in_table,
}))
let (index_name, root_page) = auto_indices.next().expect("number of auto_indices in schema should be same number of indices calculated");
Some(Index {
name: normalize_ident(index_name.as_str()),
table_name: table.name.clone(),
root_page,
columns: vec![IndexColumn {
name: normalize_ident(col_name),
order: SortOrder::Asc, // Default Sort Order
pos_in_table,
}],
unique: true,
ephemeral: false,
})
} else {
None
}
})
.collect::<Result<Vec<_>>>()?;
.collect::<Vec<_>>();
if table.primary_key_columns.is_empty() && index_columns.is_empty() {
if table.primary_key_columns.is_empty() && indices.is_empty() && table.unique_sets.is_none()
{
return Err(crate::LimboError::InternalError(
"Cannot create automatic index for table without primary key or unique constraint"
.to_string(),
@@ -818,44 +857,90 @@ impl Index {
// and no Unique columns.
// e.g CREATE TABLE t1 (a INTEGER PRIMARY KEY, b TEXT);
// If this happens, the caller incorrectly called this function
if table.get_rowid_alias_column().is_some() && index_columns.is_empty() {
if table.get_rowid_alias_column().is_some()
&& indices.is_empty()
&& table.unique_sets.is_none()
{
panic!("should not create an automatic index on table with a single column as rowid_alias and no UNIQUE columns");
}
if let Some(unique_sets) = table.unique_sets.as_ref() {
let unique_set_indices = unique_sets.iter().map(|set| {
let (index_name, root_page) = auto_indices.next().expect(
"number of auto_indices in schema should be same number of indices calculated",
);
let index_cols = set.iter().map(|(col_name, order)| {
let Some((pos_in_table, _)) = table.get_column(col_name) else {
// This is clearly an invariant that should be maintained, so a panic seems more correct here
panic!(
"Column {} is in index {} but not found in table {}",
col_name, index_name, table.name
);
};
IndexColumn {
name: normalize_ident(col_name),
order: *order,
pos_in_table,
}
});
Index {
name: normalize_ident(index_name.as_str()),
table_name: table.name.clone(),
root_page,
columns: index_cols.collect(),
unique: true,
ephemeral: false,
}
});
indices.extend(unique_set_indices);
}
// TODO: see a better way to please Rust type system with iterators here
// I wanted to just chain the iterator above but Rust type system get's messy with Iterators.
// It would not allow me chain them even by using a core::iter::empty()
// To circumvent this, I'm having to allocate a second Vec, and extend the other from it.
if table.get_rowid_alias_column().is_none() {
if table.get_rowid_alias_column().is_none() && !table.primary_key_columns.is_empty() {
let (index_name, root_page) = auto_indices.next().expect(
"number of auto_indices in schema should be same number of indices calculated",
);
let primary_keys = table
.primary_key_columns
.iter()
.map(|(col_name, order)| {
// Verify that each primary key column exists in the table
let Some((pos_in_table, _)) = table.get_column(col_name) else {
return Err(crate::LimboError::InternalError(format!(
// This is clearly an invariant that should be maintained, so a panic seems more correct here
panic!(
"Column {} is in index {} but not found in table {}",
col_name, index_name, table.name
)));
);
};
Ok(IndexColumn {
IndexColumn {
name: normalize_ident(col_name),
order: order.clone(),
pos_in_table,
})
}
})
.collect::<Result<Vec<_>>>()?;
index_columns.extend(primary_keys);
.collect::<Vec<_>>();
indices.push(Index {
name: normalize_ident(index_name.as_str()),
table_name: table.name.clone(),
root_page,
columns: primary_keys,
unique: true,
ephemeral: false,
});
}
Ok(Index {
name: normalize_ident(index_name),
table_name: table.name.clone(),
root_page,
columns: index_columns,
unique: true, // Primary key indexes are always unique
ephemeral: false,
})
if auto_indices.next().is_some() {
panic!("number of auto_indices in schema should be same number of indices calculated");
}
Ok(indices)
}
/// Given a column position in the table, return the position in the index.
@@ -1183,18 +1268,24 @@ mod tests {
// Without composite primary keys, we should not have an automatic index on a primary key that is a rowid alias
let sql = r#"CREATE TABLE t1 (a INTEGER PRIMARY KEY, b TEXT);"#;
let table = BTreeTable::from_sql(sql, 0).unwrap();
let _index =
Index::automatic_from_primary_key_and_unique(&table, "sqlite_autoindex_t1_1", 2)
.unwrap();
let _index = Index::automatic_from_primary_key_and_unique(
&table,
vec![("sqlite_autoindex_t1_1".to_string(), 2)],
)
.unwrap();
}
#[test]
fn test_automatic_index_composite_key() -> Result<()> {
let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, PRIMARY KEY(a, b));"#;
let table = BTreeTable::from_sql(sql, 0)?;
let index =
Index::automatic_from_primary_key_and_unique(&table, "sqlite_autoindex_t1_1", 2)?;
let mut index = Index::automatic_from_primary_key_and_unique(
&table,
vec![("sqlite_autoindex_t1_1".to_string(), 2)],
)?;
assert!(index.len() == 1);
let index = index.pop().unwrap();
assert_eq!(index.name, "sqlite_autoindex_t1_1");
assert_eq!(index.table_name, "t1");
assert_eq!(index.root_page, 2);
@@ -1211,8 +1302,10 @@ mod tests {
fn test_automatic_index_no_primary_key() -> Result<()> {
let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT);"#;
let table = BTreeTable::from_sql(sql, 0)?;
let result =
Index::automatic_from_primary_key_and_unique(&table, "sqlite_autoindex_t1_1", 2);
let result = Index::automatic_from_primary_key_and_unique(
&table,
vec![("sqlite_autoindex_t1_1".to_string(), 2)],
);
assert!(result.is_err());
assert!(matches!(
@@ -1223,7 +1316,8 @@ mod tests {
}
#[test]
fn test_automatic_index_nonexistent_column() -> Result<()> {
#[should_panic]
fn test_automatic_index_nonexistent_column() {
// Create a table with a primary key column that doesn't exist in the table
let table = BTreeTable {
root_page: 0,
@@ -1241,25 +1335,26 @@ mod tests {
default: None,
unique: false,
}],
unique_sets: None,
};
let result =
Index::automatic_from_primary_key_and_unique(&table, "sqlite_autoindex_t1_1", 2);
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
LimboError::InternalError(msg) if msg.contains("not found in table")
));
Ok(())
let _result = Index::automatic_from_primary_key_and_unique(
&table,
vec![("sqlite_autoindex_t1_1".to_string(), 2)],
);
}
#[test]
fn test_automatic_index_unique_column() -> Result<()> {
let sql = r#"CREATE table t1 (x INTEGER, y INTEGER UNIQUE);"#;
let table = BTreeTable::from_sql(sql, 0)?;
let index =
Index::automatic_from_primary_key_and_unique(&table, "sqlite_autoindex_t1_1", 2)?;
let mut index = Index::automatic_from_primary_key_and_unique(
&table,
vec![("sqlite_autoindex_t1_1".to_string(), 2)],
)?;
assert!(index.len() == 1);
let index = index.pop().unwrap();
assert_eq!(index.name, "sqlite_autoindex_t1_1");
assert_eq!(index.table_name, "t1");

View File

@@ -1,4 +1,5 @@
use std::fmt::Display;
use std::ops::Range;
use std::rc::Rc;
use crate::ast;
@@ -99,13 +100,15 @@ pub fn translate_create_table(
// https://github.com/sqlite/sqlite/blob/95f6df5b8d55e67d1e34d2bff217305a2f21b1fb/src/build.c#L2856-L2871
// https://github.com/sqlite/sqlite/blob/95f6df5b8d55e67d1e34d2bff217305a2f21b1fb/src/build.c#L1334C5-L1336C65
let index_root_reg = check_automatic_pk_index_required(&body, &mut program, &tbl_name.name.0)?;
if let Some(index_root_reg) = index_root_reg {
program.emit_insn(Insn::CreateBtree {
db: 0,
root: index_root_reg,
flags: CreateBTreeFlags::new_index(),
});
let index_regs = check_automatic_pk_index_required(&body, &mut program, &tbl_name.name.0)?;
if let Some(index_regs) = index_regs.as_ref() {
for index_reg in index_regs.clone() {
program.emit_insn(Insn::CreateBtree {
db: 0,
root: index_reg,
flags: CreateBTreeFlags::new_index(),
});
}
}
let table = schema.get_btree_table(SQLITE_TABLEID).unwrap();
@@ -130,20 +133,24 @@ pub fn translate_create_table(
);
// If we need an automatic index, add its entry to sqlite_schema
if let Some(index_root_reg) = index_root_reg {
let index_name = format!(
"{}{}_1",
PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX, tbl_name.name.0
);
emit_schema_entry(
&mut program,
sqlite_schema_cursor_id,
SchemaEntryType::Index,
&index_name,
&tbl_name.name.0,
index_root_reg,
None,
);
if let Some(index_regs) = index_regs {
for (idx, index_reg) in index_regs.into_iter().enumerate() {
let index_name = format!(
"{}{}_{}",
PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX,
tbl_name.name.0,
idx + 1
);
emit_schema_entry(
&mut program,
sqlite_schema_cursor_id,
SchemaEntryType::Index,
&index_name,
&tbl_name.name.0,
index_reg,
None,
);
}
}
program.resolve_label(parse_schema_label, program.offset());
@@ -257,7 +264,7 @@ fn check_automatic_pk_index_required(
body: &ast::CreateTableBody,
program: &mut ProgramBuilder,
tbl_name: &str,
) -> Result<Option<usize>> {
) -> Result<Option<Range<usize>>> {
match body {
ast::CreateTableBody::ColumnsAndConstraints {
columns,
@@ -265,7 +272,7 @@ fn check_automatic_pk_index_required(
options,
} => {
let mut primary_key_definition = None;
let mut has_unique_definition = false;
let mut unique_def_count = 0usize;
// Check table constraints for PRIMARY KEY
if let Some(constraints) = constraints {
@@ -338,7 +345,7 @@ fn check_automatic_pk_index_required(
is_descending: false,
});
} else if matches!(constraint.constraint, ast::ColumnConstraint::Unique(..)) {
has_unique_definition = true;
unique_def_count += 1;
}
}
}
@@ -348,10 +355,10 @@ fn check_automatic_pk_index_required(
bail_parse_error!("WITHOUT ROWID tables are not supported yet");
}
let mut total_indices = unique_def_count;
// Check if we need an automatic index
let needs_auto_index = if has_unique_definition {
true
} else if let Some(primary_key_definition) = &primary_key_definition {
let auto_index_pk = if let Some(primary_key_definition) = &primary_key_definition {
match primary_key_definition {
PrimaryKeyDefinitionType::Simple {
typename,
@@ -366,10 +373,18 @@ fn check_automatic_pk_index_required(
} else {
false
};
if auto_index_pk {
total_indices += 1;
}
if needs_auto_index {
let index_root_reg = program.alloc_register();
Ok(Some(index_root_reg))
if total_indices > 0 {
if total_indices == 1 {
let index_reg = program.alloc_register();
Ok(Some(index_reg..index_reg + 1))
} else {
let index_start_reg = program.alloc_registers(total_indices + 1);
Ok(Some(index_start_reg..index_start_reg + total_indices + 1))
}
} else {
Ok(None)
}

View File

@@ -36,19 +36,13 @@ pub fn normalize_ident(identifier: &str) -> String {
pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_";
enum UnparsedIndex {
/// CREATE INDEX idx ON table_name(sql)
FromSql {
table_name: String,
root_page: usize,
sql: String,
},
/// Implicitly created index due to primary key constraints (or UNIQUE, but not implemented)
FromConstraint {
name: String,
table_name: String,
root_page: usize,
},
/// Unparsed index that comes from a sql query, i.e not an automatic index
///
/// CREATE INDEX idx ON table_name(sql)
struct UnparsedFromSqlIndex {
table_name: String,
root_page: usize,
sql: String,
}
pub fn parse_schema_rows(
@@ -60,7 +54,11 @@ pub fn parse_schema_rows(
) -> Result<()> {
if let Some(mut rows) = rows {
rows.set_mv_tx_id(mv_tx_id);
let mut unparsed_indexes = Vec::with_capacity(10);
// TODO: if we IO, this unparsed indexes is lost. Will probably need some state between
// IO runs
let mut from_sql_indexes = Vec::with_capacity(10);
let mut automatic_indices: std::collections::HashMap<String, Vec<(String, usize)>> =
std::collections::HashMap::with_capacity(10);
loop {
match rows.step()? {
StepResult::Row => {
@@ -114,7 +112,7 @@ pub fn parse_schema_rows(
let root_page: i64 = row.get::<i64>(3)?;
match row.get::<&str>(4) {
Ok(sql) => {
unparsed_indexes.push(UnparsedIndex::FromSql {
from_sql_indexes.push(UnparsedFromSqlIndex {
table_name: row.get::<&str>(2)?.to_string(),
root_page: root_page as usize,
sql: sql.to_string(),
@@ -127,11 +125,14 @@ pub fn parse_schema_rows(
let index_name = row.get::<&str>(1)?.to_string();
let table_name = row.get::<&str>(2)?.to_string();
let root_page = row.get::<i64>(3)?;
unparsed_indexes.push(UnparsedIndex::FromConstraint {
name: index_name,
table_name,
root_page: root_page as usize,
});
match automatic_indices.entry(table_name) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(vec![(index_name, root_page as usize)]);
}
std::collections::hash_map::Entry::Occupied(mut e) => {
e.get_mut().push((index_name, root_page as usize));
}
}
}
}
}
@@ -148,30 +149,22 @@ pub fn parse_schema_rows(
StepResult::Busy => break,
}
}
for unparsed_index in unparsed_indexes {
match unparsed_index {
UnparsedIndex::FromSql {
table_name,
root_page,
sql,
} => {
let table = schema.get_btree_table(&table_name).unwrap();
let index = schema::Index::from_sql(&sql, root_page as usize, table.as_ref())?;
schema.add_index(Arc::new(index));
}
UnparsedIndex::FromConstraint {
name,
table_name,
root_page,
} => {
let table = schema.get_btree_table(&table_name).unwrap();
let index = schema::Index::automatic_from_primary_key_and_unique(
table.as_ref(),
&name,
root_page as usize,
)?;
schema.add_index(Arc::new(index));
}
for UnparsedFromSqlIndex {
table_name,
root_page,
sql,
} in from_sql_indexes
{
let table = schema.get_btree_table(&table_name).unwrap();
let index = schema::Index::from_sql(&sql, root_page as usize, table.as_ref())?;
schema.add_index(Arc::new(index));
}
for (table_name, indices) in automatic_indices {
let table = schema.get_btree_table(&table_name).unwrap();
let ret_index =
schema::Index::automatic_from_primary_key_and_unique(table.as_ref(), indices)?;
for index in ret_index {
schema.add_index(Arc::new(index));
}
}
}

View File

@@ -248,6 +248,7 @@ pub enum Register {
/// A row is a the list of registers that hold the values for a filtered row. This row is a pointer, therefore
/// after stepping again, row will be invalidated to be sure it doesn't point to somewhere unexpected.
#[derive(Debug)]
pub struct Row {
values: *const Register,
count: usize,

View File

@@ -1192,6 +1192,7 @@ mod tests {
"CREATE TABLE {} ({})",
table.name, columns_with_first_column_as_pk
);
dbg!(&query);
let limbo = limbo_exec_rows(&db, &limbo_conn, &query);
let sqlite = sqlite_exec_rows(&sqlite_conn, &query);