mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-10 11:44:22 +01:00
Merge 'Index utils' from Jussi Saurio
Extracted a few commits out from #1351 which are also going to be useful elsewhere Reviewed-by: Pere Diaz Bou <pere-altea@homail.com> Closes #1352
This commit is contained in:
@@ -179,6 +179,10 @@ impl BTreeTable {
|
||||
col.is_rowid_alias
|
||||
}
|
||||
|
||||
/// Returns the column position and column for a given column name.
|
||||
/// Returns None if the column name is not found.
|
||||
/// E.g. if table is CREATE TABLE t(a, b, c)
|
||||
/// then get_column("b") returns (1, &Column { .. })
|
||||
pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> {
|
||||
let name = normalize_ident(name);
|
||||
for (i, column) in self.columns.iter().enumerate() {
|
||||
@@ -669,10 +673,16 @@ pub struct Index {
|
||||
pub struct IndexColumn {
|
||||
pub name: String,
|
||||
pub order: SortOrder,
|
||||
/// the position of the column in the source table.
|
||||
/// for example:
|
||||
/// CREATE TABLE t(a,b,c)
|
||||
/// CREATE INDEX idx ON t(b)
|
||||
/// b.pos_in_table == 1
|
||||
pub pos_in_table: usize,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn from_sql(sql: &str, root_page: usize) -> Result<Index> {
|
||||
pub fn from_sql(sql: &str, root_page: usize, table: &BTreeTable) -> Result<Index> {
|
||||
let mut parser = Parser::new(sql.as_bytes());
|
||||
let cmd = parser.next()?;
|
||||
match cmd {
|
||||
@@ -684,13 +694,21 @@ impl Index {
|
||||
..
|
||||
})) => {
|
||||
let index_name = normalize_ident(&idx_name.name.0);
|
||||
let index_columns = columns
|
||||
.into_iter()
|
||||
.map(|col| IndexColumn {
|
||||
name: normalize_ident(&col.expr.to_string()),
|
||||
let mut index_columns = Vec::with_capacity(columns.len());
|
||||
for col in columns.into_iter() {
|
||||
let name = normalize_ident(&col.expr.to_string());
|
||||
let Some((pos_in_table, _)) = table.get_column(&name) else {
|
||||
return Err(crate::LimboError::InternalError(format!(
|
||||
"Column {} is in index {} but not found in table {}",
|
||||
name, index_name, table.name
|
||||
)));
|
||||
};
|
||||
index_columns.push(IndexColumn {
|
||||
name,
|
||||
order: col.order.unwrap_or(SortOrder::Asc),
|
||||
})
|
||||
.collect();
|
||||
pos_in_table,
|
||||
});
|
||||
}
|
||||
Ok(Index {
|
||||
name: index_name,
|
||||
table_name: normalize_ident(&tbl_name.0),
|
||||
@@ -719,15 +737,16 @@ impl Index {
|
||||
.iter()
|
||||
.map(|col_name| {
|
||||
// Verify that each primary key column exists in the table
|
||||
if table.get_column(col_name).is_none() {
|
||||
let Some((pos_in_table, _)) = table.get_column(col_name) else {
|
||||
return Err(crate::LimboError::InternalError(format!(
|
||||
"Primary key column {} not found in table {}",
|
||||
col_name, table.name
|
||||
"Column {} is in index {} but not found in table {}",
|
||||
col_name, index_name, table.name
|
||||
)));
|
||||
}
|
||||
};
|
||||
Ok(IndexColumn {
|
||||
name: normalize_ident(col_name),
|
||||
order: SortOrder::Asc, // Primary key indexes are always ascending
|
||||
pos_in_table,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
@@ -740,6 +759,18 @@ impl Index {
|
||||
unique: true, // Primary key indexes are always unique
|
||||
})
|
||||
}
|
||||
|
||||
/// Given a column position in the table, return the position in the index.
|
||||
/// Returns None if the column is not found in the index.
|
||||
/// For example, given:
|
||||
/// CREATE TABLE t(a, b, c)
|
||||
/// CREATE INDEX idx ON t(b)
|
||||
/// then column_table_pos_to_index_pos(1) returns Some(0)
|
||||
pub fn column_table_pos_to_index_pos(&self, table_pos: usize) -> Option<usize> {
|
||||
self.columns
|
||||
.iter()
|
||||
.position(|c| c.pos_in_table == table_pos)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -55,9 +55,10 @@ pub fn translate_create_index(
|
||||
root_page: 0, // we dont have access till its created, after we parse the schema table
|
||||
columns: columns
|
||||
.iter()
|
||||
.map(|c| IndexColumn {
|
||||
name: c.0 .1.name.as_ref().unwrap().clone(),
|
||||
order: c.1,
|
||||
.map(|((pos_in_table, col), order)| IndexColumn {
|
||||
name: col.name.as_ref().unwrap().clone(),
|
||||
order: *order,
|
||||
pos_in_table: *pos_in_table,
|
||||
})
|
||||
.collect(),
|
||||
unique: unique_if_not_exists.0,
|
||||
|
||||
@@ -732,13 +732,7 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
|
||||
for index in table_indexes {
|
||||
// Check how many terms in the where clause constrain the index in column order
|
||||
find_index_constraints(
|
||||
where_clause,
|
||||
table_index,
|
||||
table_reference,
|
||||
index,
|
||||
&mut constraints_cur,
|
||||
)?;
|
||||
find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?;
|
||||
// naive scoring since we don't have statistics: prefer the index where we can use the most columns
|
||||
// e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c.
|
||||
let score = constraints_cur.len();
|
||||
@@ -843,7 +837,6 @@ impl UnwrapParens for ast::Expr {
|
||||
fn get_column_position_in_index(
|
||||
expr: &ast::Expr,
|
||||
table_index: usize,
|
||||
table_reference: &TableReference,
|
||||
index: &Arc<Index>,
|
||||
) -> Result<Option<usize>> {
|
||||
let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else {
|
||||
@@ -852,13 +845,7 @@ fn get_column_position_in_index(
|
||||
if *table != table_index {
|
||||
return Ok(None);
|
||||
}
|
||||
let Some(column) = table_reference.table.get_column_at(*column) else {
|
||||
return Ok(None);
|
||||
};
|
||||
Ok(index
|
||||
.columns
|
||||
.iter()
|
||||
.position(|col| Some(&col.name) == column.name.as_ref()))
|
||||
Ok(index.column_table_pos_to_index_pos(*column))
|
||||
}
|
||||
|
||||
/// Find all [IndexConstraint]s for a given WHERE clause
|
||||
@@ -868,7 +855,6 @@ fn get_column_position_in_index(
|
||||
fn find_index_constraints(
|
||||
where_clause: &mut Vec<WhereTerm>,
|
||||
table_index: usize,
|
||||
table_reference: &TableReference,
|
||||
index: &Arc<Index>,
|
||||
out_constraints: &mut Vec<IndexConstraint>,
|
||||
) -> Result<()> {
|
||||
@@ -908,9 +894,7 @@ fn find_index_constraints(
|
||||
}
|
||||
|
||||
// Check if lhs is a column that is in the i'th position of the index
|
||||
if Some(position_in_index)
|
||||
== get_column_position_in_index(lhs, table_index, table_reference, index)?
|
||||
{
|
||||
if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? {
|
||||
out_constraints.push(IndexConstraint {
|
||||
operator: *operator,
|
||||
position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs),
|
||||
@@ -919,9 +903,7 @@ fn find_index_constraints(
|
||||
break;
|
||||
}
|
||||
// Check if rhs is a column that is in the i'th position of the index
|
||||
if Some(position_in_index)
|
||||
== get_column_position_in_index(rhs, table_index, table_reference, index)?
|
||||
{
|
||||
if Some(position_in_index) == get_column_position_in_index(rhs, table_index, index)? {
|
||||
out_constraints.push(IndexConstraint {
|
||||
operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5
|
||||
position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs),
|
||||
|
||||
69
core/util.rs
69
core/util.rs
@@ -36,6 +36,21 @@ pub fn normalize_ident(identifier: &str) -> String {
|
||||
|
||||
pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_";
|
||||
|
||||
enum UnparsedIndex {
|
||||
/// CREATE INDEX idx ON table_name(sql)
|
||||
FromSql {
|
||||
table_name: String,
|
||||
root_page: usize,
|
||||
sql: String,
|
||||
},
|
||||
/// Implicitly created index due to primary key constraints (or UNIQUE, but not implemented)
|
||||
FromConstraint {
|
||||
name: String,
|
||||
table_name: String,
|
||||
root_page: usize,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn parse_schema_rows(
|
||||
rows: Option<Statement>,
|
||||
schema: &mut Schema,
|
||||
@@ -45,7 +60,7 @@ pub fn parse_schema_rows(
|
||||
) -> Result<()> {
|
||||
if let Some(mut rows) = rows {
|
||||
rows.set_mv_tx_id(mv_tx_id);
|
||||
let mut automatic_indexes = Vec::new();
|
||||
let mut unparsed_indexes = Vec::with_capacity(10);
|
||||
loop {
|
||||
match rows.step()? {
|
||||
StepResult::Row => {
|
||||
@@ -99,21 +114,24 @@ pub fn parse_schema_rows(
|
||||
let root_page: i64 = row.get::<i64>(3)?;
|
||||
match row.get::<&str>(4) {
|
||||
Ok(sql) => {
|
||||
let index = schema::Index::from_sql(sql, root_page as usize)?;
|
||||
schema.add_index(Arc::new(index));
|
||||
unparsed_indexes.push(UnparsedIndex::FromSql {
|
||||
table_name: row.get::<&str>(2)?.to_string(),
|
||||
root_page: root_page as usize,
|
||||
sql: sql.to_string(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
// Automatic index on primary key, e.g.
|
||||
// table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b)
|
||||
// index|sqlite_autoindex_foo_1|foo|3|
|
||||
let index_name = row.get::<&str>(1)?;
|
||||
let table_name = row.get::<&str>(2)?;
|
||||
let index_name = row.get::<&str>(1)?.to_string();
|
||||
let table_name = row.get::<&str>(2)?.to_string();
|
||||
let root_page = row.get::<i64>(3)?;
|
||||
automatic_indexes.push((
|
||||
index_name.to_string(),
|
||||
table_name.to_string(),
|
||||
root_page,
|
||||
));
|
||||
unparsed_indexes.push(UnparsedIndex::FromConstraint {
|
||||
name: index_name,
|
||||
table_name,
|
||||
root_page: root_page as usize,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -130,12 +148,31 @@ pub fn parse_schema_rows(
|
||||
StepResult::Busy => break,
|
||||
}
|
||||
}
|
||||
for (index_name, table_name, root_page) in automatic_indexes {
|
||||
// We need to process these after all tables are loaded into memory due to the schema.get_table() call
|
||||
let table = schema.get_btree_table(&table_name).unwrap();
|
||||
let index =
|
||||
schema::Index::automatic_from_primary_key(&table, &index_name, root_page as usize)?;
|
||||
schema.add_index(Arc::new(index));
|
||||
for unparsed_index in unparsed_indexes {
|
||||
match unparsed_index {
|
||||
UnparsedIndex::FromSql {
|
||||
table_name,
|
||||
root_page,
|
||||
sql,
|
||||
} => {
|
||||
let table = schema.get_btree_table(&table_name).unwrap();
|
||||
let index = schema::Index::from_sql(&sql, root_page as usize, table.as_ref())?;
|
||||
schema.add_index(Arc::new(index));
|
||||
}
|
||||
UnparsedIndex::FromConstraint {
|
||||
name,
|
||||
table_name,
|
||||
root_page,
|
||||
} => {
|
||||
let table = schema.get_btree_table(&table_name).unwrap();
|
||||
let index = schema::Index::automatic_from_primary_key(
|
||||
table.as_ref(),
|
||||
&name,
|
||||
root_page as usize,
|
||||
)?;
|
||||
schema.add_index(Arc::new(index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user