Merge 'Index utils' from Jussi Saurio

Extracted a few commits out from #1351 which are also going to be useful
elsewhere

Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #1352
This commit is contained in:
Jussi Saurio
2025-04-15 14:55:30 +03:00
4 changed files with 103 additions and 52 deletions

View File

@@ -179,6 +179,10 @@ impl BTreeTable {
col.is_rowid_alias
}
/// Returns the column position and column for a given column name.
/// Returns None if the column name is not found.
/// E.g. if table is CREATE TABLE t(a, b, c)
/// then get_column("b") returns (1, &Column { .. })
pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> {
let name = normalize_ident(name);
for (i, column) in self.columns.iter().enumerate() {
@@ -669,10 +673,16 @@ pub struct Index {
pub struct IndexColumn {
pub name: String,
pub order: SortOrder,
/// the position of the column in the source table.
/// for example:
/// CREATE TABLE t(a,b,c)
/// CREATE INDEX idx ON t(b)
/// b.pos_in_table == 1
pub pos_in_table: usize,
}
impl Index {
pub fn from_sql(sql: &str, root_page: usize) -> Result<Index> {
pub fn from_sql(sql: &str, root_page: usize, table: &BTreeTable) -> Result<Index> {
let mut parser = Parser::new(sql.as_bytes());
let cmd = parser.next()?;
match cmd {
@@ -684,13 +694,21 @@ impl Index {
..
})) => {
let index_name = normalize_ident(&idx_name.name.0);
let index_columns = columns
.into_iter()
.map(|col| IndexColumn {
name: normalize_ident(&col.expr.to_string()),
let mut index_columns = Vec::with_capacity(columns.len());
for col in columns.into_iter() {
let name = normalize_ident(&col.expr.to_string());
let Some((pos_in_table, _)) = table.get_column(&name) else {
return Err(crate::LimboError::InternalError(format!(
"Column {} is in index {} but not found in table {}",
name, index_name, table.name
)));
};
index_columns.push(IndexColumn {
name,
order: col.order.unwrap_or(SortOrder::Asc),
})
.collect();
pos_in_table,
});
}
Ok(Index {
name: index_name,
table_name: normalize_ident(&tbl_name.0),
@@ -719,15 +737,16 @@ impl Index {
.iter()
.map(|col_name| {
// Verify that each primary key column exists in the table
if table.get_column(col_name).is_none() {
let Some((pos_in_table, _)) = table.get_column(col_name) else {
return Err(crate::LimboError::InternalError(format!(
"Primary key column {} not found in table {}",
col_name, table.name
"Column {} is in index {} but not found in table {}",
col_name, index_name, table.name
)));
}
};
Ok(IndexColumn {
name: normalize_ident(col_name),
order: SortOrder::Asc, // Primary key indexes are always ascending
pos_in_table,
})
})
.collect::<Result<Vec<_>>>()?;
@@ -740,6 +759,18 @@ impl Index {
unique: true, // Primary key indexes are always unique
})
}
/// Given a column position in the table, return the position in the index.
/// Returns None if the column is not found in the index.
/// For example, given:
/// CREATE TABLE t(a, b, c)
/// CREATE INDEX idx ON t(b)
/// then column_table_pos_to_index_pos(1) returns Some(0)
pub fn column_table_pos_to_index_pos(&self, table_pos: usize) -> Option<usize> {
self.columns
.iter()
.position(|c| c.pos_in_table == table_pos)
}
}
#[cfg(test)]

View File

@@ -55,9 +55,10 @@ pub fn translate_create_index(
root_page: 0, // we dont have access till its created, after we parse the schema table
columns: columns
.iter()
.map(|c| IndexColumn {
name: c.0 .1.name.as_ref().unwrap().clone(),
order: c.1,
.map(|((pos_in_table, col), order)| IndexColumn {
name: col.name.as_ref().unwrap().clone(),
order: *order,
pos_in_table: *pos_in_table,
})
.collect(),
unique: unique_if_not_exists.0,

View File

@@ -732,13 +732,7 @@ pub fn try_extract_index_search_from_where_clause(
for index in table_indexes {
// Check how many terms in the where clause constrain the index in column order
find_index_constraints(
where_clause,
table_index,
table_reference,
index,
&mut constraints_cur,
)?;
find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?;
// naive scoring since we don't have statistics: prefer the index where we can use the most columns
// e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c.
let score = constraints_cur.len();
@@ -843,7 +837,6 @@ impl UnwrapParens for ast::Expr {
fn get_column_position_in_index(
expr: &ast::Expr,
table_index: usize,
table_reference: &TableReference,
index: &Arc<Index>,
) -> Result<Option<usize>> {
let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else {
@@ -852,13 +845,7 @@ fn get_column_position_in_index(
if *table != table_index {
return Ok(None);
}
let Some(column) = table_reference.table.get_column_at(*column) else {
return Ok(None);
};
Ok(index
.columns
.iter()
.position(|col| Some(&col.name) == column.name.as_ref()))
Ok(index.column_table_pos_to_index_pos(*column))
}
/// Find all [IndexConstraint]s for a given WHERE clause
@@ -868,7 +855,6 @@ fn get_column_position_in_index(
fn find_index_constraints(
where_clause: &mut Vec<WhereTerm>,
table_index: usize,
table_reference: &TableReference,
index: &Arc<Index>,
out_constraints: &mut Vec<IndexConstraint>,
) -> Result<()> {
@@ -908,9 +894,7 @@ fn find_index_constraints(
}
// Check if lhs is a column that is in the i'th position of the index
if Some(position_in_index)
== get_column_position_in_index(lhs, table_index, table_reference, index)?
{
if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? {
out_constraints.push(IndexConstraint {
operator: *operator,
position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs),
@@ -919,9 +903,7 @@ fn find_index_constraints(
break;
}
// Check if rhs is a column that is in the i'th position of the index
if Some(position_in_index)
== get_column_position_in_index(rhs, table_index, table_reference, index)?
{
if Some(position_in_index) == get_column_position_in_index(rhs, table_index, index)? {
out_constraints.push(IndexConstraint {
operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5
position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs),

View File

@@ -36,6 +36,21 @@ pub fn normalize_ident(identifier: &str) -> String {
pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_";
enum UnparsedIndex {
/// CREATE INDEX idx ON table_name(sql)
FromSql {
table_name: String,
root_page: usize,
sql: String,
},
/// Implicitly created index due to primary key constraints (or UNIQUE, but not implemented)
FromConstraint {
name: String,
table_name: String,
root_page: usize,
},
}
pub fn parse_schema_rows(
rows: Option<Statement>,
schema: &mut Schema,
@@ -45,7 +60,7 @@ pub fn parse_schema_rows(
) -> Result<()> {
if let Some(mut rows) = rows {
rows.set_mv_tx_id(mv_tx_id);
let mut automatic_indexes = Vec::new();
let mut unparsed_indexes = Vec::with_capacity(10);
loop {
match rows.step()? {
StepResult::Row => {
@@ -99,21 +114,24 @@ pub fn parse_schema_rows(
let root_page: i64 = row.get::<i64>(3)?;
match row.get::<&str>(4) {
Ok(sql) => {
let index = schema::Index::from_sql(sql, root_page as usize)?;
schema.add_index(Arc::new(index));
unparsed_indexes.push(UnparsedIndex::FromSql {
table_name: row.get::<&str>(2)?.to_string(),
root_page: root_page as usize,
sql: sql.to_string(),
});
}
_ => {
// Automatic index on primary key, e.g.
// table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b)
// index|sqlite_autoindex_foo_1|foo|3|
let index_name = row.get::<&str>(1)?;
let table_name = row.get::<&str>(2)?;
let index_name = row.get::<&str>(1)?.to_string();
let table_name = row.get::<&str>(2)?.to_string();
let root_page = row.get::<i64>(3)?;
automatic_indexes.push((
index_name.to_string(),
table_name.to_string(),
root_page,
));
unparsed_indexes.push(UnparsedIndex::FromConstraint {
name: index_name,
table_name,
root_page: root_page as usize,
});
}
}
}
@@ -130,12 +148,31 @@ pub fn parse_schema_rows(
StepResult::Busy => break,
}
}
for (index_name, table_name, root_page) in automatic_indexes {
// We need to process these after all tables are loaded into memory due to the schema.get_table() call
let table = schema.get_btree_table(&table_name).unwrap();
let index =
schema::Index::automatic_from_primary_key(&table, &index_name, root_page as usize)?;
schema.add_index(Arc::new(index));
for unparsed_index in unparsed_indexes {
match unparsed_index {
UnparsedIndex::FromSql {
table_name,
root_page,
sql,
} => {
let table = schema.get_btree_table(&table_name).unwrap();
let index = schema::Index::from_sql(&sql, root_page as usize, table.as_ref())?;
schema.add_index(Arc::new(index));
}
UnparsedIndex::FromConstraint {
name,
table_name,
root_page,
} => {
let table = schema.get_btree_table(&table_name).unwrap();
let index = schema::Index::automatic_from_primary_key(
table.as_ref(),
&name,
root_page as usize,
)?;
schema.add_index(Arc::new(index));
}
}
}
}
Ok(())