diff --git a/core/schema.rs b/core/schema.rs index ea6a26279..0a5a8d80f 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -158,7 +158,7 @@ impl PartialEq for Table { pub struct BTreeTable { pub root_page: usize, pub name: String, - pub primary_key_column_names: Vec, + pub primary_key_columns: Vec<(String, SortOrder)>, pub columns: Vec, pub has_rowid: bool, pub is_strict: bool, @@ -166,8 +166,8 @@ pub struct BTreeTable { impl BTreeTable { pub fn get_rowid_alias_column(&self) -> Option<(usize, &Column)> { - if self.primary_key_column_names.len() == 1 { - let (idx, col) = self.get_column(&self.primary_key_column_names[0]).unwrap(); + if self.primary_key_columns.len() == 1 { + let (idx, col) = self.get_column(&self.primary_key_columns[0].0).unwrap(); if self.column_is_rowid_alias(col) { return Some((idx, col)); } @@ -265,7 +265,7 @@ fn create_table( let table_name = normalize_ident(&tbl_name.name.0); trace!("Creating table {}", table_name); let mut has_rowid = true; - let mut primary_key_column_names = vec![]; + let mut primary_key_columns = vec![]; let mut cols = vec![]; let is_strict: bool; match body { @@ -282,7 +282,7 @@ fn create_table( } = c.constraint { for column in columns { - primary_key_column_names.push(match column.expr { + let col_name = match column.expr { Expr::Id(id) => normalize_ident(&id.0), Expr::Literal(Literal::String(value)) => { value.trim_matches('\'').to_owned() @@ -290,7 +290,9 @@ fn create_table( _ => { todo!("Unsupported primary key expression"); } - }); + }; + primary_key_columns + .push((col_name, column.order.unwrap_or(SortOrder::Asc))); } } } @@ -347,10 +349,17 @@ fn create_table( let mut default = None; let mut primary_key = false; let mut notnull = false; + let mut order = SortOrder::Asc; for c_def in &col_def.constraints { match &c_def.constraint { - limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. } => { + limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { + order: o, + .. + } => { primary_key = true; + if let Some(o) = o { + order = o.clone(); + } } limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } => { notnull = true; @@ -363,8 +372,11 @@ fn create_table( } if primary_key { - primary_key_column_names.push(name.clone()); - } else if primary_key_column_names.contains(&name) { + primary_key_columns.push((name.clone(), order)); + } else if primary_key_columns + .iter() + .any(|(col_name, _)| col_name == &name) + { primary_key = true; } @@ -386,7 +398,7 @@ fn create_table( }; // flip is_rowid_alias back to false if the table has multiple primary keys // or if the table has no rowid - if !has_rowid || primary_key_column_names.len() > 1 { + if !has_rowid || primary_key_columns.len() > 1 { for col in cols.iter_mut() { col.is_rowid_alias = false; } @@ -395,7 +407,7 @@ fn create_table( root_page, name: table_name, has_rowid, - primary_key_column_names, + primary_key_columns, columns: cols, is_strict, }) @@ -621,7 +633,7 @@ pub fn sqlite_schema_table() -> BTreeTable { name: "sqlite_schema".to_string(), has_rowid: true, is_strict: false, - primary_key_column_names: vec![], + primary_key_columns: vec![], columns: vec![ Column { name: Some("type".to_string()), @@ -740,16 +752,16 @@ impl Index { index_name: &str, root_page: usize, ) -> Result { - if table.primary_key_column_names.is_empty() { + if table.primary_key_columns.is_empty() { return Err(crate::LimboError::InternalError( "Cannot create automatic index for table without primary key".to_string(), )); } let index_columns = table - .primary_key_column_names + .primary_key_columns .iter() - .map(|col_name| { + .map(|(col_name, order)| { // Verify that each primary key column exists in the table let Some((pos_in_table, _)) = table.get_column(col_name) else { return Err(crate::LimboError::InternalError(format!( @@ -759,7 +771,7 @@ impl Index { }; Ok(IndexColumn { name: normalize_ident(col_name), - order: SortOrder::Asc, // Primary key indexes are always ascending + order: order.clone(), pos_in_table, }) }) @@ -905,8 +917,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -923,8 +935,11 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a", "b"], - table.primary_key_column_names, + vec![ + ("a".to_string(), SortOrder::Asc), + ("b".to_string(), SortOrder::Asc) + ], + table.primary_key_columns, "primary key column names should be ['a', 'b']" ); Ok(()) @@ -932,7 +947,7 @@ mod tests { #[test] pub fn test_primary_key_separate_single() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a));"#; + let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a desc));"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!(column.primary_key, "column 'a' should be a primary key"); @@ -941,8 +956,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Desc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -950,7 +965,7 @@ mod tests { #[test] pub fn test_primary_key_separate_multiple() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a, b));"#; + let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a, b desc));"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!(column.primary_key, "column 'a' should be a primary key"); @@ -959,8 +974,11 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a", "b"], - table.primary_key_column_names, + vec![ + ("a".to_string(), SortOrder::Asc), + ("b".to_string(), SortOrder::Desc) + ], + table.primary_key_columns, "primary key column names should be ['a', 'b']" ); Ok(()) @@ -977,8 +995,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -994,8 +1012,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -1143,7 +1161,7 @@ mod tests { name: "t1".to_string(), has_rowid: true, is_strict: false, - primary_key_column_names: vec!["nonexistent".to_string()], + primary_key_columns: vec![("nonexistent".to_string(), SortOrder::Asc)], columns: vec![Column { name: Some("a".to_string()), ty: Type::Integer, diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3fd3e3ec9..df3e2ae3f 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1,4 +1,5 @@ use crate::{ + schema::Index, storage::{ pager::Pager, sqlite3_ondisk::{ @@ -7,6 +8,7 @@ use crate::{ }, }, translate::plan::IterationDirection, + types::IndexKeySortOrder, MvCursor, }; @@ -364,6 +366,7 @@ pub struct BTreeCursor { /// Reusable immutable record, used to allow better allocation strategy. reusable_immutable_record: RefCell>, empty_record: Cell, + pub index_key_sort_order: IndexKeySortOrder, } impl BTreeCursor { @@ -388,9 +391,22 @@ impl BTreeCursor { }, reusable_immutable_record: RefCell::new(None), empty_record: Cell::new(true), + index_key_sort_order: IndexKeySortOrder::default(), } } + pub fn new_index( + mv_cursor: Option>>, + pager: Rc, + root_page: usize, + index: &Index, + ) -> Self { + let index_key_sort_order = IndexKeySortOrder::from_index(index); + let mut cursor = Self::new(mv_cursor, pager, root_page); + cursor.index_key_sort_order = index_key_sort_order; + cursor + } + /// Check if the table is empty. /// This is done by checking if the root page has no cells. fn is_empty_table(&self) -> Result> { @@ -547,8 +563,11 @@ impl BTreeCursor { let record_values = record.get_values(); let record_slice_same_num_cols = &record_values[..index_key.get_values().len()]; - let order = - compare_immutable(record_slice_same_num_cols, index_key.get_values()); + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); order }; @@ -602,8 +621,11 @@ impl BTreeCursor { let record_values = record.get_values(); let record_slice_same_num_cols = &record_values[..index_key.get_values().len()]; - let order = - compare_immutable(record_slice_same_num_cols, index_key.get_values()); + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); order }; let found = match op { @@ -849,10 +871,18 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable( - &self.get_immutable_record().as_ref().unwrap().get_values(), - index_key.get_values(), - ); + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_same_num_cols = + &record.get_values()[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), @@ -901,10 +931,18 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable( - &self.get_immutable_record().as_ref().unwrap().get_values(), - index_key.get_values(), - ); + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_same_num_cols = + &record.get_values()[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; let found = match op { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), @@ -1031,7 +1069,11 @@ impl BTreeCursor { let record = record.as_ref().unwrap(); let record_slice_equal_number_of_cols = &record.get_values().as_slice()[..index_key.get_values().len()]; - let order = record_slice_equal_number_of_cols.cmp(index_key.get_values()); + let order = compare_immutable( + record_slice_equal_number_of_cols, + index_key.get_values(), + self.index_key_sort_order, + ); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), @@ -1278,6 +1320,7 @@ impl BTreeCursor { let interior_cell_vs_index_key = compare_immutable( record_slice_equal_number_of_cols, index_key.get_values(), + self.index_key_sort_order, ); // in sqlite btrees left child pages have <= keys. // in general, in forwards iteration we want to find the first key that matches the seek condition. @@ -1430,7 +1473,8 @@ impl BTreeCursor { self.get_immutable_record() .as_ref() .unwrap() - .get_values() + .get_values(), + self.index_key_sort_order, ) == Ordering::Equal { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); @@ -3017,6 +3061,7 @@ impl BTreeCursor { let order = compare_immutable( key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), + self.index_key_sort_order, ); match order { Ordering::Less | Ordering::Equal => { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 1b709e0d3..76057c53b 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -845,27 +845,37 @@ fn emit_seek( is_index: bool, ) -> Result<()> { let Some(seek) = seek_def.seek.as_ref() else { - assert!(seek_def.iter_dir == IterationDirection::Backwards, "A SeekDef without a seek operation should only be used in backwards iteration direction"); - program.emit_insn(Insn::Last { - cursor_id: seek_cursor_id, - pc_if_empty: loop_end, - }); + // If there is no seek key, we start from the first or last row of the index, + // depending on the iteration direction. + match seek_def.iter_dir { + IterationDirection::Forwards => { + program.emit_insn(Insn::Rewind { + cursor_id: seek_cursor_id, + pc_if_empty: loop_end, + }); + } + IterationDirection::Backwards => { + program.emit_insn(Insn::Last { + cursor_id: seek_cursor_id, + pc_if_empty: loop_end, + }); + } + } return Ok(()); }; // We allocated registers for the full index key, but our seek key might not use the full index key. - // Later on for the termination condition we will overwrite the NULL registers. // See [crate::translate::optimizer::build_seek_def] for more details about in which cases we do and don't use the full index key. for i in 0..seek_def.key.len() { let reg = start_reg + i; if i >= seek.len { - if seek_def.null_pad_unset_cols() { + if seek.null_pad { program.emit_insn(Insn::Null { dest: reg, dest_end: None, }); } } else { - let expr = &seek_def.key[i]; + let expr = &seek_def.key[i].0; translate_expr(program, Some(tables), &expr, reg, &t_ctx.resolver)?; // If the seek key column is not verifiably non-NULL, we need check whether it is NULL, // and if so, jump to the loop end. @@ -879,7 +889,7 @@ fn emit_seek( } } } - let num_regs = if seek_def.null_pad_unset_cols() { + let num_regs = if seek.null_pad { seek_def.key.len() } else { seek.len @@ -943,19 +953,46 @@ fn emit_seek_termination( program.resolve_label(loop_start, program.offset()); return Ok(()); }; - let num_regs = termination.len; - // If the seek termination was preceded by a seek (which happens in most cases), - // we can re-use the registers that were allocated for the full index key. - let start_idx = seek_def.seek.as_ref().map_or(0, |seek| seek.len); - for i in start_idx..termination.len { + + // How many non-NULL values were used for seeking. + let seek_len = seek_def.seek.as_ref().map_or(0, |seek| seek.len); + + // How many values will be used for the termination condition. + let num_regs = if termination.null_pad { + seek_def.key.len() + } else { + termination.len + }; + for i in 0..seek_def.key.len() { let reg = start_reg + i; - translate_expr( - program, - Some(tables), - &seek_def.key[i], - reg, - &t_ctx.resolver, - )?; + let is_last = i == seek_def.key.len() - 1; + + // For all index key values apart from the last one, we are guaranteed to use the same values + // as were used for the seek, so we don't need to emit them again. + if i < seek_len && !is_last { + continue; + } + // For the last index key value, we need to emit a NULL if the termination condition is NULL-padded. + // See [SeekKey::null_pad] and [crate::translate::optimizer::build_seek_def] for why this is the case. + if i >= termination.len && !termination.null_pad { + continue; + } + if is_last && termination.null_pad { + program.emit_insn(Insn::Null { + dest: reg, + dest_end: None, + }); + // if the seek key is shorter than the termination key, we need to translate the remaining suffix of the termination key. + // if not, we just reuse what was emitted for the seek. + } else if seek_len < termination.len { + translate_expr( + program, + Some(tables), + &seek_def.key[i].0, + reg, + &t_ctx.resolver, + )?; + } } program.resolve_label(loop_start, program.offset()); let mut rowid_reg = None; diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index c4bf12810..80875da91 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -779,6 +779,7 @@ pub fn try_extract_index_search_from_where_clause( pub struct IndexConstraint { position_in_where_clause: (usize, BinaryExprSide), operator: ast::Operator, + index_column_sort_order: SortOrder, } /// Helper enum for [IndexConstraint] to indicate which side of a binary comparison expression is being compared to the index column. @@ -898,6 +899,7 @@ fn find_index_constraints( out_constraints.push(IndexConstraint { operator: *operator, position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs), + index_column_sort_order: index.columns[position_in_index].order, }); found = true; break; @@ -907,6 +909,7 @@ fn find_index_constraints( out_constraints.push(IndexConstraint { operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5 position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs), + index_column_sort_order: index.columns[position_in_index].order, }); found = true; break; @@ -963,7 +966,7 @@ pub fn build_seek_def_from_index_constraints( } else { *rhs }; - key.push(cmp_expr); + key.push((cmp_expr, constraint.index_column_sort_order)); } // We know all but potentially the last term is an equality, so we can use the operator of the last term @@ -995,46 +998,80 @@ pub fn build_seek_def_from_index_constraints( /// 2. In contrast, having (x=10 AND y>20) forms a valid index key GT(x:10, y:20) because after the seek, we can simply terminate as soon as x > 10, /// i.e. use GT(x:10, y:20) as the [SeekKey] and GT(x:10) as the [TerminationKey]. /// +/// The preceding examples are for an ascending index. The logic is similar for descending indexes, but an important distinction is that +/// since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc. +/// So when you see e.g. a SeekOp::GT below for a descending index, it actually means that we are seeking the first row where the index key is LESS than the seek key. +/// fn build_seek_def( op: ast::Operator, iter_dir: IterationDirection, - key: Vec, + key: Vec<(ast::Expr, SortOrder)>, ) -> Result { let key_len = key.len(); + let sort_order_of_last_key = key.last().unwrap().1; + + // For the commented examples below, keep in mind that since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc. + // Also keep in mind that index keys are compared based on the number of columns given, so for example: + // - if key is GT(x:10), then (x=10, y=usize::MAX) is not GT because only X is compared. (x=11, y=) is GT. + // - if key is GT(x:10, y:20), then (x=10, y=21) is GT because both X and Y are compared. + // - if key is GT(x:10, y:NULL), then (x=10, y=0) is GT because NULL is always LT in index key comparisons. Ok(match (iter_dir, op) { // Forwards, EQ: // Example: (x=10 AND y=20) - // Seek key: GE(x:10, y:20) - // Termination key: GT(x:10, y:20) + // Seek key: start from the first GE(x:10, y:20) + // Termination key: end at the first GT(x:10, y:20) + // Ascending vs descending doesn't matter because all the comparisons are equalities. (IterationDirection::Forwards, ast::Operator::Equals) => SeekDef { key, iter_dir, seek: Some(SeekKey { len: key_len, + null_pad: false, op: SeekOp::GE, }), termination: Some(TerminationKey { len: key_len, + null_pad: false, op: SeekOp::GT, }), }, // Forwards, GT: - // Example: (x=10 AND y>20) - // Seek key: GT(x:10, y:20) - // Termination key: GT(x:10) + // Ascending index example: (x=10 AND y>20) + // Seek key: start from the first GT(x:10, y:20), e.g. (x=10, y=21) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) + // + // Descending index example: (x=10 AND y>20) + // Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10) + // Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20) (IterationDirection::Forwards, ast::Operator::Greater) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::GT, SeekOp::GT) + } else { + ( + key_len - 1, + key_len, + SeekOp::LE.reverse(), + SeekOp::LE.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::GT, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::GT, + op: termination_op, + null_pad: false, }) } else { None @@ -1042,22 +1079,42 @@ fn build_seek_def( } } // Forwards, GE: - // Example: (x=10 AND y>=20) - // Seek key: GE(x:10, y:20) - // Termination key: GT(x:10) + // Ascending index example: (x=10 AND y>=20) + // Seek key: start from the first GE(x:10, y:20), e.g. (x=10, y=20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) + // + // Descending index example: (x=10 AND y>=20) + // Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10) + // Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19), so reversed -> GT(x:10, y:20) (IterationDirection::Forwards, ast::Operator::GreaterEquals) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::GE, SeekOp::GT) + } else { + ( + key_len - 1, + key_len, + SeekOp::LE.reverse(), + SeekOp::LT.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::GE, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::GT, + op: termination_op, + null_pad: false, }) } else { None @@ -1065,70 +1122,142 @@ fn build_seek_def( } } // Forwards, LT: - // Example: (x=10 AND y<20) - // Seek key: GT(x:10, y: NULL) // NULL is always LT, indicating we only care about x - // Termination key: GE(x:10, y:20) - (IterationDirection::Forwards, ast::Operator::Less) => SeekDef { - key, - iter_dir, - seek: Some(SeekKey { - len: key_len - 1, - op: SeekOp::GT, - }), - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::GE, - }), - }, + // Ascending index example: (x=10 AND y<20) + // Seek key: start from the first GT(x:10, y: NULL), e.g. (x=10, y=0) + // Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20) + // + // Descending index example: (x=10 AND y<20) + // Seek key: start from the first LT(x:10, y:20), e.g. (x=10, y=19), so reversed -> GT(x:10, y:20) + // Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT) + (IterationDirection::Forwards, ast::Operator::Less) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::GT, SeekOp::GE) + } else { + (key_len, key_len - 1, SeekOp::GT, SeekOp::GE) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + } + } // Forwards, LE: - // Example: (x=10 AND y<=20) - // Seek key: GE(x:10, y:NULL) // NULL is always LT, indicating we only care about x - // Termination key: GT(x:10, y:20) - (IterationDirection::Forwards, ast::Operator::LessEquals) => SeekDef { - key, - iter_dir, - seek: Some(SeekKey { - len: key_len - 1, - op: SeekOp::GE, - }), - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::GT, - }), - }, + // Ascending index example: (x=10 AND y<=20) + // Seek key: start from the first GE(x:10, y:NULL), e.g. (x=10, y=0) + // Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21) + // + // Descending index example: (x=10 AND y<=20) + // Seek key: start from the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20) + // Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT) + (IterationDirection::Forwards, ast::Operator::LessEquals) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::GT, SeekOp::GT) + } else { + ( + key_len, + key_len - 1, + SeekOp::LE.reverse(), + SeekOp::LE.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + } + } // Backwards, EQ: // Example: (x=10 AND y=20) - // Seek key: LE(x:10, y:20) - // Termination key: LT(x:10, y:20) + // Seek key: start from the last LE(x:10, y:20) + // Termination key: end at the first LT(x:10, y:20) + // Ascending vs descending doesn't matter because all the comparisons are equalities. (IterationDirection::Backwards, ast::Operator::Equals) => SeekDef { key, iter_dir, seek: Some(SeekKey { len: key_len, op: SeekOp::LE, + null_pad: false, }), termination: Some(TerminationKey { len: key_len, op: SeekOp::LT, + null_pad: false, }), }, // Backwards, LT: - // Example: (x=10 AND y<20) - // Seek key: LT(x:10, y:20) - // Termination key: LT(x:10) + // Ascending index example: (x=10 AND y<20) + // Seek key: start from the last LT(x:10, y:20), e.g. (x=10, y=19) + // Termination key: end at the first LE(x:10, NULL), e.g. (x=9, y=usize::MAX) + // + // Descending index example: (x=10 AND y<20) + // Seek key: start from the last GT(x:10, y:NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL) + // Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20) (IterationDirection::Backwards, ast::Operator::Less) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::LT, SeekOp::LE) + } else { + ( + key_len - 1, + key_len, + SeekOp::GT.reverse(), + SeekOp::GE.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::LT, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::LT, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, }) } else { None @@ -1136,22 +1265,42 @@ fn build_seek_def( } } // Backwards, LE: - // Example: (x=10 AND y<=20) - // Seek key: LE(x:10, y:20) - // Termination key: LT(x:10) + // Ascending index example: (x=10 AND y<=20) + // Seek key: start from the last LE(x:10, y:20), e.g. (x=10, y=20) + // Termination key: end at the first LT(x:10, NULL), e.g. (x=9, y=usize::MAX) + // + // Descending index example: (x=10 AND y<=20) + // Seek key: start from the last GT(x:10, NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL) + // Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20) (IterationDirection::Backwards, ast::Operator::LessEquals) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::LE, SeekOp::LE) + } else { + ( + key_len - 1, + key_len, + SeekOp::GT.reverse(), + SeekOp::GT.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::LE, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::LT, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, }) } else { None @@ -1159,49 +1308,89 @@ fn build_seek_def( } } // Backwards, GT: - // Example: (x=10 AND y>20) - // Seek key: LE(x:10) // try to find the last row where x = 10, not considering y at all. - // Termination key: LE(x:10, y:20) + // Ascending index example: (x=10 AND y>20) + // Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX) + // Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20) + // + // Descending index example: (x=10 AND y>20) + // Seek key: start from the last GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10) (IterationDirection::Backwards, ast::Operator::Greater) => { - let seek_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::LE, SeekOp::LE) + } else { + ( + key_len, + key_len - 1, + SeekOp::GT.reverse(), + SeekOp::GT.reverse(), + ) + }; SeekDef { key, iter_dir, seek: if seek_key_len > 0 { Some(SeekKey { len: seek_key_len, - op: SeekOp::LE, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, }) } else { None }, - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::LE, - }), } } // Backwards, GE: - // Example: (x=10 AND y>=20) - // Seek key: LE(x:10) // try to find the last row where x = 10, not considering y at all. - // Termination key: LT(x:10, y:20) + // Ascending index example: (x=10 AND y>=20) + // Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX) + // Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19) + // + // Descending index example: (x=10 AND y>=20) + // Seek key: start from the last GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10) (IterationDirection::Backwards, ast::Operator::GreaterEquals) => { - let seek_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::LE, SeekOp::LT) + } else { + ( + key_len, + key_len - 1, + SeekOp::GE.reverse(), + SeekOp::GT.reverse(), + ) + }; SeekDef { key, iter_dir, seek: if seek_key_len > 0 { Some(SeekKey { len: seek_key_len, - op: SeekOp::LE, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, }) } else { None }, - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::LT, - }), } } (_, op) => { @@ -1252,7 +1441,8 @@ pub fn try_extract_rowid_search_expression( | ast::Operator::Less | ast::Operator::LessEquals => { let rhs_owned = rhs.take_ownership(); - let seek_def = build_seek_def(*operator, iter_dir, vec![rhs_owned])?; + let seek_def = + build_seek_def(*operator, iter_dir, vec![(rhs_owned, SortOrder::Asc)])?; return Ok(Some(Search::Seek { index: None, seek_def, @@ -1280,7 +1470,8 @@ pub fn try_extract_rowid_search_expression( | ast::Operator::LessEquals => { let lhs_owned = lhs.take_ownership(); let op = opposite_cmp_op(*operator); - let seek_def = build_seek_def(op, iter_dir, vec![lhs_owned])?; + let seek_def = + build_seek_def(op, iter_dir, vec![(lhs_owned, SortOrder::Asc)])?; return Ok(Some(Search::Seek { index: None, seek_def, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 038dd90ee..bb581ab13 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,5 +1,5 @@ use core::fmt; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, fmt::{Display, Formatter}, @@ -391,10 +391,10 @@ impl TableReference { pub struct SeekDef { /// The key to use when seeking and when terminating the scan that follows the seek. /// For example, given: - /// - CREATE INDEX i ON t (x, y) + /// - CREATE INDEX i ON t (x, y desc) /// - SELECT * FROM t WHERE x = 1 AND y >= 30 - /// The key is [1, 30] - pub key: Vec, + /// The key is [(1, ASC), (30, DESC)] + pub key: Vec<(ast::Expr, SortOrder)>, /// The condition to use when seeking. See [SeekKey] for more details. pub seek: Option, /// The condition to use when terminating the scan that follows the seek. See [TerminationKey] for more details. @@ -403,35 +403,22 @@ pub struct SeekDef { pub iter_dir: IterationDirection, } -impl SeekDef { - /// Whether we should null pad unset columns when seeking. - /// This is only done for forward seeks. - /// The reason it is done is that sometimes our full index key is not used in seeking. - /// See [SeekKey] for more details. - /// - /// For example, given: - /// - CREATE INDEX i ON t (x, y) - /// - SELECT * FROM t WHERE x = 1 AND y < 30 - /// We want to seek to the first row where x = 1, and then iterate forwards. - /// In this case, the seek key is GT(1, NULL) since '30' cannot be used to seek (since we want y < 30), - /// and any value of y will be greater than NULL. - /// - /// In backwards iteration direction, we do not null pad because we want to seek to the last row that matches the seek key. - /// For example, given: - /// - CREATE INDEX i ON t (x, y) - /// - SELECT * FROM t WHERE x = 1 AND y > 30 ORDER BY y - /// We want to seek to the last row where x = 1, and then iterate backwards. - /// In this case, the seek key is just LE(1) so any row with x = 1 will be a match. - pub fn null_pad_unset_cols(&self) -> bool { - self.iter_dir == IterationDirection::Forwards - } -} - /// A condition to use when seeking. #[derive(Debug, Clone)] pub struct SeekKey { /// How many columns from [SeekDef::key] are used in seeking. pub len: usize, + /// Whether to NULL pad the last column of the seek key to match the length of [SeekDef::key]. + /// The reason it is done is that sometimes our full index key is not used in seeking, + /// but we want to find the lowest value that matches the non-null prefix of the key. + /// For example, given: + /// - CREATE INDEX i ON t (x, y) + /// - SELECT * FROM t WHERE x = 1 AND y < 30 + /// We want to seek to the first row where x = 1, and then iterate forwards. + /// In this case, the seek key is GT(1, NULL) since NULL is always LT in index key comparisons. + /// We can't use just GT(1) because in index key comparisons, only the given number of columns are compared, + /// so this means any index keys with (x=1) will compare equal, e.g. (x=1, y=usize::MAX) will compare equal to the seek key (x:1) + pub null_pad: bool, /// The comparison operator to use when seeking. pub op: SeekOp, } @@ -441,6 +428,9 @@ pub struct SeekKey { pub struct TerminationKey { /// How many columns from [SeekDef::key] are used in terminating the scan that follows the seek. pub len: usize, + /// Whether to NULL pad the last column of the termination key to match the length of [SeekDef::key]. + /// See [SeekKey::null_pad]. + pub null_pad: bool, /// The comparison operator to use when terminating the scan that follows the seek. pub op: SeekOp, } diff --git a/core/types.rs b/core/types.rs index 3d531adfe..045f13393 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1,8 +1,10 @@ use limbo_ext::{AggCtx, FinalizeFunction, StepFunction}; +use limbo_sqlite3_parser::ast::SortOrder; use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; use crate::pseudo::PseudoCursor; +use crate::schema::Index; use crate::storage::btree::BTreeCursor; use crate::storage::sqlite3_ondisk::write_varint; use crate::translate::plan::IterationDirection; @@ -1043,8 +1045,58 @@ impl PartialOrd for RefValue { } } -pub fn compare_immutable(l: &[RefValue], r: &[RefValue]) -> std::cmp::Ordering { - l.partial_cmp(r).unwrap() +/// A bitfield that represents the comparison spec for index keys. +/// Since indexed columns can individually specify ASC/DESC, each key must +/// be compared differently. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct IndexKeySortOrder(u64); + +impl IndexKeySortOrder { + pub fn get_sort_order_for_col(&self, column_idx: usize) -> SortOrder { + assert!(column_idx < 64, "column index out of range: {}", column_idx); + match self.0 & (1 << column_idx) { + 0 => SortOrder::Asc, + _ => SortOrder::Desc, + } + } + + pub fn from_index(index: &Index) -> Self { + let mut spec = 0; + for (i, column) in index.columns.iter().enumerate() { + spec |= ((column.order == SortOrder::Desc) as u64) << i; + } + IndexKeySortOrder(spec) + } + + pub fn default() -> Self { + Self(0) + } +} + +impl Default for IndexKeySortOrder { + fn default() -> Self { + Self::default() + } +} + +pub fn compare_immutable( + l: &[RefValue], + r: &[RefValue], + index_key_sort_order: IndexKeySortOrder, +) -> std::cmp::Ordering { + assert_eq!(l.len(), r.len()); + for (i, (l, r)) in l.iter().zip(r).enumerate() { + let column_order = index_key_sort_order.get_sort_order_for_col(i); + let cmp = l.partial_cmp(r).unwrap(); + if !cmp.is_eq() { + return match column_order { + SortOrder::Asc => cmp, + SortOrder::Desc => cmp.reverse(), + }; + } + } + std::cmp::Ordering::Equal } const I8_LOW: i64 = -128; @@ -1253,6 +1305,16 @@ impl SeekOp { SeekOp::LE | SeekOp::LT => IterationDirection::Backwards, } } + + pub fn reverse(&self) -> Self { + match self { + SeekOp::EQ => SeekOp::EQ, + SeekOp::GE => SeekOp::LE, + SeekOp::GT => SeekOp::LT, + SeekOp::LE => SeekOp::GE, + SeekOp::LT => SeekOp::GT, + } + } } #[derive(Clone, PartialEq, Debug)] diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 41268a342..d00ee6129 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -12,6 +12,7 @@ use crate::{ }, printf::exec_printf, }, + types::compare_immutable, }; use std::{borrow::BorrowMut, rc::Rc, sync::Arc}; @@ -839,16 +840,18 @@ pub fn op_open_read( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); let mut cursors = state.cursors.borrow_mut(); match cursor_type { CursorType::BTreeTable(_) => { + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } - CursorType::BTreeIndex(_) => { + CursorType::BTreeIndex(index) => { + let cursor = + BTreeCursor::new_index(mv_cursor, pager.clone(), *root_page, index.as_ref()); cursors .get_mut(*cursor_id) .unwrap() @@ -2051,9 +2054,11 @@ pub fn op_idx_ge( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let record_values = &record_values[..idx_values.len()]; + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_ge() { target_pc.to_offset_int() } else { @@ -2109,9 +2114,10 @@ pub fn op_idx_le( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_le() { target_pc.to_offset_int() } else { @@ -2149,9 +2155,10 @@ pub fn op_idx_gt( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_gt() { target_pc.to_offset_int() } else { @@ -2189,9 +2196,10 @@ pub fn op_idx_lt( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_lt() { target_pc.to_offset_int() } else { @@ -3979,7 +3987,10 @@ pub fn op_open_write( }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let mut cursors = state.cursors.borrow_mut(); - let is_index = cursor_type.is_index(); + let maybe_index = match cursor_type { + CursorType::BTreeIndex(index) => Some(index), + _ => None, + }; let mv_cursor = match state.mv_tx_id { Some(tx_id) => { let table_id = root_page; @@ -3991,13 +4002,15 @@ pub fn op_open_write( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); - if is_index { + if let Some(index) = maybe_index { + let cursor = + BTreeCursor::new_index(mv_cursor, pager.clone(), root_page as usize, index.as_ref()); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } else { + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); cursors .get_mut(*cursor_id) .unwrap() diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 3814be97b..f76a005ba 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -202,6 +202,8 @@ mod tests { } #[test] + /// A test for verifying that index seek+scan works correctly for compound keys + /// on indexes with various column orderings. pub fn index_scan_compound_key_fuzz() { let (mut rng, seed) = if std::env::var("SEED").is_ok() { let seed = std::env::var("SEED").unwrap().parse::().unwrap(); @@ -209,8 +211,25 @@ mod tests { } else { rng_from_time() }; - let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y))"); - let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + // Create all different 3-column primary key permutations + let dbs = [ + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z))"), + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z))"), + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z))"), + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z desc))"), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z desc))", + ), + ]; let mut pk_tuples = HashSet::new(); while pk_tuples.len() < 100000 { pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000))); @@ -225,125 +244,175 @@ mod tests { )); } let insert = format!("INSERT INTO t VALUES {}", tuples.join(", ")); - sqlite_conn.execute(&insert, params![]).unwrap(); - sqlite_conn.close().unwrap(); - let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); - let limbo_conn = db.connect_limbo(); + + // Insert all tuples into all databases + let sqlite_conns = dbs + .iter() + .map(|db| rusqlite::Connection::open(db.path.clone()).unwrap()) + .collect::>(); + for sqlite_conn in sqlite_conns.into_iter() { + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + } + let sqlite_conns = dbs + .iter() + .map(|db| rusqlite::Connection::open(db.path.clone()).unwrap()) + .collect::>(); + let limbo_conns = dbs.iter().map(|db| db.connect_limbo()).collect::>(); const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; - const ORDER_BY: [Option<&str>; 3] = [None, Some("ORDER BY x DESC"), Some("ORDER BY x ASC")]; - const SECONDARY_ORDER_BY: [Option<&str>; 3] = [None, Some(", y DESC"), Some(", y ASC")]; + // For verifying index scans, we only care about cases where all but potentially the last column are constrained by an equality (=), + // because this is the only way to utilize an index efficiently for seeking. This is called the "left-prefix rule" of indexes. + // Hence we generate constraint combinations in this manner; as soon as a comparison is not an equality, we stop generating more constraints for the where clause. + // Examples: + // x = 1 AND y = 2 AND z > 3 + // x = 1 AND y > 2 + // x > 1 + let col_comp_first = COMPARISONS + .iter() + .cloned() + .map(|x| (Some(x), None, None)) + .collect::>(); + let col_comp_second = COMPARISONS + .iter() + .cloned() + .map(|x| (Some("="), Some(x), None)) + .collect::>(); + let col_comp_third = COMPARISONS + .iter() + .cloned() + .map(|x| (Some("="), Some("="), Some(x))) + .collect::>(); - let print_dump_on_fail = |insert: &str, seed: u64| { - let comment = format!("-- seed: {}; dump for manual debugging:", seed); - let pragma_journal_mode = "PRAGMA journal_mode = wal;"; - let create_table = "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y));"; - let dump = format!( - "{}\n{}\n{}\n{}\n{}", - comment, pragma_journal_mode, create_table, comment, insert - ); - println!("{}", dump); - }; + let all_comps = [col_comp_first, col_comp_second, col_comp_third].concat(); - for comp in COMPARISONS.iter() { - for order_by in ORDER_BY.iter() { - // make it more likely that the full 2-column index is utilized for seeking - let iter_count_per_permutation = if *comp == "=" { 2000 } else { 500 }; + const ORDER_BY: [Option<&str>; 3] = [None, Some("DESC"), Some("ASC")]; + + const ITERATIONS: usize = 10000; + for i in 0..ITERATIONS { + if i % (ITERATIONS / 100) == 0 { println!( - "fuzzing {} iterations with comp: {:?}, order_by: {:?}", - iter_count_per_permutation, comp, order_by + "index_scan_compound_key_fuzz: iteration {}/{}", + i + 1, + ITERATIONS ); - for _ in 0..iter_count_per_permutation { - let first_col_val = rng.random_range(0..=3000); - let mut limit = "LIMIT 5"; - let mut second_idx_col_cond = "".to_string(); - let mut second_idx_col_comp = "".to_string(); + } + let (comp1, comp2, comp3) = all_comps[rng.random_range(0..all_comps.len())]; + // Similarly as for the constraints, generate order by permutations so that the only columns involved in the index seek are potentially part of the ORDER BY. + let (order_by1, order_by2, order_by3) = { + if comp1.is_some() && comp2.is_some() && comp3.is_some() { + ( + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ) + } else if comp1.is_some() && comp2.is_some() { + ( + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + None, + ) + } else { + (ORDER_BY[rng.random_range(0..ORDER_BY.len())], None, None) + } + }; - // somtetimes include the second index column in the where clause. - // make it more probable when first column has '=' constraint since those queries are usually faster to run - let second_col_prob = if *comp == "=" { 0.7 } else { 0.02 }; - if rng.random_bool(second_col_prob) { - let second_idx_col = rng.random_range(0..3000); + // Generate random values for the WHERE clause constraints + let (col_val_first, col_val_second, col_val_third) = { + if comp1.is_some() && comp2.is_some() && comp3.is_some() { + ( + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + ) + } else if comp1.is_some() && comp2.is_some() { + ( + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + None, + ) + } else { + (Some(rng.random_range(0..=3000)), None, None) + } + }; - second_idx_col_comp = - COMPARISONS[rng.random_range(0..COMPARISONS.len())].to_string(); - second_idx_col_cond = - format!(" AND y {} {}", second_idx_col_comp, second_idx_col); - } + // Use a small limit to make the test complete faster + let limit = 5; - // if the first constraint is =, then half the time, use the second index column in the ORDER BY too - let mut secondary_order_by = None; - let use_secondary_order_by = order_by.is_some() - && *comp == "=" - && second_idx_col_comp != "" - && rng.random_bool(0.5); - let full_order_by = if use_secondary_order_by { - secondary_order_by = - SECONDARY_ORDER_BY[rng.random_range(0..SECONDARY_ORDER_BY.len())]; - if let Some(secondary) = secondary_order_by { - format!("{}{}", order_by.unwrap_or(""), secondary,) - } else { - order_by.unwrap_or("").to_string() - } - } else { - order_by.unwrap_or("").to_string() - }; + // Generate WHERE clause string + let where_clause_components = vec![ + comp1.map(|x| format!("x {} {}", x, col_val_first.unwrap())), + comp2.map(|x| format!("y {} {}", x, col_val_second.unwrap())), + comp3.map(|x| format!("z {} {}", x, col_val_third.unwrap())), + ] + .into_iter() + .filter_map(|x| x) + .collect::>(); + let where_clause = if where_clause_components.is_empty() { + "".to_string() + } else { + format!("WHERE {}", where_clause_components.join(" AND ")) + }; - // There are certain cases where SQLite does not bother iterating in reverse order despite the ORDER BY. - // These cases include e.g. - // SELECT * FROM t WHERE x = 3 ORDER BY x DESC - // SELECT * FROM t WHERE x = 3 and y < 100 ORDER BY x DESC - // - // The common thread being that the ORDER BY column is also constrained by an equality predicate, meaning - // that it doesn't semantically matter what the ordering is. - // - // We do not currently replicate this "lazy" behavior, so in these cases we want the full result set and ensure - // that if the result is not exactly equal, then the ordering must be the exact reverse. - let allow_reverse_ordering = { - if *comp != "=" { - false - } else if secondary_order_by.is_some() { - second_idx_col_comp == "=" - } else { - true - } - }; - if allow_reverse_ordering { - // see comment above about ordering and the '=' comparison operator; omitting LIMIT for that reason - // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure - // correctness in the = case - limit = ""; - } - let query = format!( - // e.g. SELECT * FROM t WHERE x = 1 AND y > 2 ORDER BY x DESC LIMIT 5 - "SELECT * FROM t WHERE x {} {} {} {} {}", - comp, first_col_val, second_idx_col_cond, full_order_by, limit, - ); - log::debug!("query: {}", query); - let limbo = limbo_exec_rows(&db, &limbo_conn, &query); - let sqlite = sqlite_exec_rows(&sqlite_conn, &query); - let is_equal = limbo == sqlite; - if !is_equal { - if allow_reverse_ordering { - let limbo_row_count = limbo.len(); - let sqlite_row_count = sqlite.len(); - if limbo_row_count == sqlite_row_count { - let limbo_rev = limbo.iter().cloned().rev().collect::>(); - assert_eq!(limbo_rev, sqlite, "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", query, limbo, sqlite, seed, allow_reverse_ordering); + // Generate ORDER BY string + let order_by_components = vec![ + order_by1.map(|x| format!("x {}", x)), + order_by2.map(|x| format!("y {}", x)), + order_by3.map(|x| format!("z {}", x)), + ] + .into_iter() + .filter_map(|x| x) + .collect::>(); + let order_by = if order_by_components.is_empty() { + "".to_string() + } else { + format!("ORDER BY {}", order_by_components.join(", ")) + }; + + // Generate final query string + let query = format!( + "SELECT * FROM t {} {} LIMIT {}", + where_clause, order_by, limit + ); + log::debug!("query: {}", query); + + // Execute the query on all databases and compare the results + for (i, sqlite_conn) in sqlite_conns.iter().enumerate() { + let limbo = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + if limbo != sqlite { + // if the order by contains exclusively components that are constrained by an equality (=), + // sqlite sometimes doesn't bother with ASC/DESC because it doesn't semantically matter + // so we need to check that limbo and sqlite return the same results when the ordering is reversed. + // because we are generally using LIMIT (to make the test complete faster), we need to rerun the query + // without limit and then check that the results are the same if reversed. + let order_by_only_equalities = !order_by_components.is_empty() + && order_by_components.iter().all(|o: &String| { + if o.starts_with("x ") { + comp1.map_or(false, |c| c == "=") + } else if o.starts_with("y ") { + comp2.map_or(false, |c| c == "=") } else { - print_dump_on_fail(&insert, seed); - let error_msg = format!("row count mismatch (limbo row count: {}, sqlite row count: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed, allow_reverse_ordering); - panic!("{}", error_msg); + comp3.map_or(false, |c| c == "=") } - } else { - print_dump_on_fail(&insert, seed); - panic!( - "query: {}, limbo row count: {}, limbo: {:?}, sqlite row count: {}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", - query, limbo.len(), limbo, sqlite.len(), sqlite, seed, allow_reverse_ordering - ); + }); + + if order_by_only_equalities { + let query_no_limit = + format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); + let limbo_no_limit = + limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); + let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); + let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); + if limbo_rev == sqlite_no_limit { + continue; } } + panic!( + "limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", + limbo, sqlite, seed, query + ); } } }