From 198aedb04298c2beb21698510a1e0262eed0f865 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 09:54:18 +0300 Subject: [PATCH 1/2] Refactor: add 'pos_in_table' to IndexColumn for easier lookup --- core/schema.rs | 41 +++++++++++++++++------- core/translate/index.rs | 7 +++-- core/util.rs | 69 +++++++++++++++++++++++++++++++---------- 3 files changed, 87 insertions(+), 30 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 70cc726c6..364ad6f99 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -179,6 +179,10 @@ impl BTreeTable { col.is_rowid_alias } + /// Returns the column position and column for a given column name. + /// Returns None if the column name is not found. + /// E.g. if table is CREATE TABLE t(a, b, c) + /// then get_column("b") returns (1, &Column { .. }) pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> { let name = normalize_ident(name); for (i, column) in self.columns.iter().enumerate() { @@ -669,10 +673,16 @@ pub struct Index { pub struct IndexColumn { pub name: String, pub order: SortOrder, + /// the position of the column in the source table. + /// for example: + /// CREATE TABLE t(a,b,c) + /// CREATE INDEX idx ON t(b) + /// b.pos_in_table == 1 + pub pos_in_table: usize, } impl Index { - pub fn from_sql(sql: &str, root_page: usize) -> Result { + pub fn from_sql(sql: &str, root_page: usize, table: &BTreeTable) -> Result { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next()?; match cmd { @@ -684,13 +694,21 @@ impl Index { .. })) => { let index_name = normalize_ident(&idx_name.name.0); - let index_columns = columns - .into_iter() - .map(|col| IndexColumn { - name: normalize_ident(&col.expr.to_string()), + let mut index_columns = Vec::with_capacity(columns.len()); + for col in columns.into_iter() { + let name = normalize_ident(&col.expr.to_string()); + let Some((pos_in_table, _)) = table.get_column(&name) else { + return Err(crate::LimboError::InternalError(format!( + "Column {} is in index {} but not found in table {}", + name, index_name, table.name + ))); + }; + index_columns.push(IndexColumn { + name, order: col.order.unwrap_or(SortOrder::Asc), - }) - .collect(); + pos_in_table, + }); + } Ok(Index { name: index_name, table_name: normalize_ident(&tbl_name.0), @@ -719,15 +737,16 @@ impl Index { .iter() .map(|col_name| { // Verify that each primary key column exists in the table - if table.get_column(col_name).is_none() { + let Some((pos_in_table, _)) = table.get_column(col_name) else { return Err(crate::LimboError::InternalError(format!( - "Primary key column {} not found in table {}", - col_name, table.name + "Column {} is in index {} but not found in table {}", + col_name, index_name, table.name ))); - } + }; Ok(IndexColumn { name: normalize_ident(col_name), order: SortOrder::Asc, // Primary key indexes are always ascending + pos_in_table, }) }) .collect::>>()?; diff --git a/core/translate/index.rs b/core/translate/index.rs index 366b986e7..de79aed23 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -55,9 +55,10 @@ pub fn translate_create_index( root_page: 0, // we dont have access till its created, after we parse the schema table columns: columns .iter() - .map(|c| IndexColumn { - name: c.0 .1.name.as_ref().unwrap().clone(), - order: c.1, + .map(|((pos_in_table, col), order)| IndexColumn { + name: col.name.as_ref().unwrap().clone(), + order: *order, + pos_in_table: *pos_in_table, }) .collect(), unique: unique_if_not_exists.0, diff --git a/core/util.rs b/core/util.rs index 80a52f387..b3ce8ecd0 100644 --- a/core/util.rs +++ b/core/util.rs @@ -36,6 +36,21 @@ pub fn normalize_ident(identifier: &str) -> String { pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_"; +enum UnparsedIndex { + /// CREATE INDEX idx ON table_name(sql) + FromSql { + table_name: String, + root_page: usize, + sql: String, + }, + /// Implicitly created index due to primary key constraints (or UNIQUE, but not implemented) + FromConstraint { + name: String, + table_name: String, + root_page: usize, + }, +} + pub fn parse_schema_rows( rows: Option, schema: &mut Schema, @@ -45,7 +60,7 @@ pub fn parse_schema_rows( ) -> Result<()> { if let Some(mut rows) = rows { rows.set_mv_tx_id(mv_tx_id); - let mut automatic_indexes = Vec::new(); + let mut unparsed_indexes = Vec::with_capacity(10); loop { match rows.step()? { StepResult::Row => { @@ -99,21 +114,24 @@ pub fn parse_schema_rows( let root_page: i64 = row.get::(3)?; match row.get::<&str>(4) { Ok(sql) => { - let index = schema::Index::from_sql(sql, root_page as usize)?; - schema.add_index(Arc::new(index)); + unparsed_indexes.push(UnparsedIndex::FromSql { + table_name: row.get::<&str>(2)?.to_string(), + root_page: root_page as usize, + sql: sql.to_string(), + }); } _ => { // Automatic index on primary key, e.g. // table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b) // index|sqlite_autoindex_foo_1|foo|3| - let index_name = row.get::<&str>(1)?; - let table_name = row.get::<&str>(2)?; + let index_name = row.get::<&str>(1)?.to_string(); + let table_name = row.get::<&str>(2)?.to_string(); let root_page = row.get::(3)?; - automatic_indexes.push(( - index_name.to_string(), - table_name.to_string(), - root_page, - )); + unparsed_indexes.push(UnparsedIndex::FromConstraint { + name: index_name, + table_name, + root_page: root_page as usize, + }); } } } @@ -130,12 +148,31 @@ pub fn parse_schema_rows( StepResult::Busy => break, } } - for (index_name, table_name, root_page) in automatic_indexes { - // We need to process these after all tables are loaded into memory due to the schema.get_table() call - let table = schema.get_btree_table(&table_name).unwrap(); - let index = - schema::Index::automatic_from_primary_key(&table, &index_name, root_page as usize)?; - schema.add_index(Arc::new(index)); + for unparsed_index in unparsed_indexes { + match unparsed_index { + UnparsedIndex::FromSql { + table_name, + root_page, + sql, + } => { + let table = schema.get_btree_table(&table_name).unwrap(); + let index = schema::Index::from_sql(&sql, root_page as usize, table.as_ref())?; + schema.add_index(Arc::new(index)); + } + UnparsedIndex::FromConstraint { + name, + table_name, + root_page, + } => { + let table = schema.get_btree_table(&table_name).unwrap(); + let index = schema::Index::automatic_from_primary_key( + table.as_ref(), + &name, + root_page as usize, + )?; + schema.add_index(Arc::new(index)); + } + } } } Ok(()) From a467060e1c509fefeb010564b8848738cb7075b6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 09:57:33 +0300 Subject: [PATCH 2/2] Index: add method column_table_pos_to_index_pos() --- core/schema.rs | 12 ++++++++++++ core/translate/optimizer.rs | 26 ++++---------------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 364ad6f99..fbd3a86ec 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -759,6 +759,18 @@ impl Index { unique: true, // Primary key indexes are always unique }) } + + /// Given a column position in the table, return the position in the index. + /// Returns None if the column is not found in the index. + /// For example, given: + /// CREATE TABLE t(a, b, c) + /// CREATE INDEX idx ON t(b) + /// then column_table_pos_to_index_pos(1) returns Some(0) + pub fn column_table_pos_to_index_pos(&self, table_pos: usize) -> Option { + self.columns + .iter() + .position(|c| c.pos_in_table == table_pos) + } } #[cfg(test)] diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 7c29b8834..c4bf12810 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -732,13 +732,7 @@ pub fn try_extract_index_search_from_where_clause( for index in table_indexes { // Check how many terms in the where clause constrain the index in column order - find_index_constraints( - where_clause, - table_index, - table_reference, - index, - &mut constraints_cur, - )?; + find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?; // naive scoring since we don't have statistics: prefer the index where we can use the most columns // e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c. let score = constraints_cur.len(); @@ -843,7 +837,6 @@ impl UnwrapParens for ast::Expr { fn get_column_position_in_index( expr: &ast::Expr, table_index: usize, - table_reference: &TableReference, index: &Arc, ) -> Result> { let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else { @@ -852,13 +845,7 @@ fn get_column_position_in_index( if *table != table_index { return Ok(None); } - let Some(column) = table_reference.table.get_column_at(*column) else { - return Ok(None); - }; - Ok(index - .columns - .iter() - .position(|col| Some(&col.name) == column.name.as_ref())) + Ok(index.column_table_pos_to_index_pos(*column)) } /// Find all [IndexConstraint]s for a given WHERE clause @@ -868,7 +855,6 @@ fn get_column_position_in_index( fn find_index_constraints( where_clause: &mut Vec, table_index: usize, - table_reference: &TableReference, index: &Arc, out_constraints: &mut Vec, ) -> Result<()> { @@ -908,9 +894,7 @@ fn find_index_constraints( } // Check if lhs is a column that is in the i'th position of the index - if Some(position_in_index) - == get_column_position_in_index(lhs, table_index, table_reference, index)? - { + if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? { out_constraints.push(IndexConstraint { operator: *operator, position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs), @@ -919,9 +903,7 @@ fn find_index_constraints( break; } // Check if rhs is a column that is in the i'th position of the index - if Some(position_in_index) - == get_column_position_in_index(rhs, table_index, table_reference, index)? - { + if Some(position_in_index) == get_column_position_in_index(rhs, table_index, index)? { out_constraints.push(IndexConstraint { operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5 position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs),