Store in-memory index definitions most-recently-seen-first

This solves an issue where an INSERT statement conflicts with
multiple indices. In that case, sqlite iterates the linked list
`pTab->pIndex` in order and handles the first conflict encountered.
The newest parsed index is always added to the head of the list.

To be compatible with this behavior, we also need to put the most
recently parsed index definition first in our indexes list for a given
table.
This commit is contained in:
Jussi Saurio
2025-09-22 10:11:50 +03:00
parent ef9f2f9a33
commit eada24b508
7 changed files with 60 additions and 15 deletions

View File

@@ -27,7 +27,7 @@ use crate::{
};
use crate::{util::normalize_ident, Result};
use core::fmt;
use std::collections::{HashMap, HashSet};
use std::collections::{HashMap, HashSet, VecDeque};
use std::ops::Deref;
use std::sync::Arc;
use std::sync::Mutex;
@@ -64,7 +64,7 @@ pub struct Schema {
pub views: ViewsMap,
/// table_name to list of indexes for the table
pub indexes: HashMap<String, Vec<Arc<Index>>>,
pub indexes: HashMap<String, VecDeque<Arc<Index>>>,
pub has_indexes: std::collections::HashSet<String>,
pub indexes_enabled: bool,
pub schema_version: u32,
@@ -77,7 +77,7 @@ impl Schema {
pub fn new(indexes_enabled: bool) -> Self {
let mut tables: HashMap<String, Arc<Table>> = HashMap::new();
let has_indexes = std::collections::HashSet::new();
let indexes: HashMap<String, Vec<Arc<Index>>> = HashMap::new();
let indexes: HashMap<String, VecDeque<Arc<Index>>> = HashMap::new();
#[allow(clippy::arc_with_non_send_sync)]
tables.insert(
SCHEMA_TABLE_NAME.to_string(),
@@ -244,17 +244,23 @@ impl Schema {
pub fn add_index(&mut self, index: Arc<Index>) {
let table_name = normalize_ident(&index.table_name);
// We must add the new index to the front of the deque, because SQLite stores index definitions as a linked list
// where the newest parsed index entry is at the head of list. If we would add it to the back of a regular Vec for example,
// then we would evaluate ON CONFLICT DO UPDATE clauses in the wrong index iteration order and UPDATE the wrong row. One might
// argue that this is an implementation detail and we should not care about this, but it makes e.g. the fuzz test 'partial_index_mutation_and_upsert_fuzz'
// fail, so let's just be compatible.
self.indexes
.entry(table_name)
.or_default()
.push(index.clone())
.push_front(index.clone())
}
pub fn get_indices(&self, table_name: &str) -> &[Arc<Index>] {
pub fn get_indices(&self, table_name: &str) -> impl Iterator<Item = &Arc<Index>> {
let name = normalize_ident(table_name);
self.indexes
.get(&name)
.map_or_else(|| &[] as &[Arc<Index>], |v| v.as_slice())
.map(|v| v.iter())
.unwrap_or_default()
}
pub fn get_index(&self, table_name: &str, index_name: &str) -> Option<&Arc<Index>> {

View File

@@ -96,7 +96,7 @@ pub fn prepare_delete_plan(
} else {
crate::bail_parse_error!("Table is neither a virtual table nor a btree table");
};
let indexes = schema.get_indices(table.get_name()).to_vec();
let indexes = schema.get_indices(table.get_name()).cloned().collect();
let joined_tables = vec![JoinedTable {
op: Operation::default_scan_for(&table),
table,

View File

@@ -364,7 +364,6 @@ pub fn translate_insert(
// (idx name, root_page, idx cursor id)
let idx_cursors = schema
.get_indices(table_name.as_str())
.iter()
.map(|idx| {
(
&idx.name,

View File

@@ -1,4 +1,8 @@
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
use std::{
cmp::Ordering,
collections::{HashMap, VecDeque},
sync::Arc,
};
use crate::{
schema::{Column, Index},
@@ -175,7 +179,7 @@ fn estimate_selectivity(column: &Column, op: ast::Operator) -> f64 {
pub fn constraints_from_where_clause(
where_clause: &[WhereTerm],
table_references: &TableReferences,
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
available_indexes: &HashMap<String, VecDeque<Arc<Index>>>,
) -> Result<Vec<TableConstraints>> {
let mut constraints = Vec::new();
@@ -315,7 +319,7 @@ pub fn constraints_from_where_clause(
}
for index in available_indexes
.get(table_reference.table.get_name())
.unwrap_or(&Vec::new())
.unwrap_or(&VecDeque::new())
{
if let Some(position_in_index) =
index.column_table_pos_to_index_pos(constraint.table_col_pos)

View File

@@ -1,4 +1,9 @@
use std::{cell::RefCell, cmp::Ordering, collections::HashMap, sync::Arc};
use std::{
cell::RefCell,
cmp::Ordering,
collections::{HashMap, VecDeque},
sync::Arc,
};
use constraints::{
constraints_from_where_clause, usable_constraints_for_join_order, Constraint, ConstraintRef,
@@ -178,7 +183,7 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
fn optimize_table_access(
schema: &Schema,
table_references: &mut TableReferences,
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
available_indexes: &HashMap<String, VecDeque<Arc<Index>>>,
where_clause: &mut [WhereTerm],
order_by: &mut Vec<(Box<ast::Expr>, SortOrder)>,
group_by: &mut Option<GroupBy>,

View File

@@ -377,12 +377,11 @@ pub fn prepare_update_plan(
.any(|(idx, _)| columns[*idx].is_rowid_alias);
let indexes_to_update = if rowid_alias_used {
// If the rowid alias is used in the SET clause, we need to update all indexes
indexes.to_vec()
indexes.cloned().collect()
} else {
// otherwise we need to update the indexes whose columns are set in the SET clause,
// or if the colunns used in the partial index WHERE clause are being updated
indexes
.iter()
.filter_map(|idx| {
let mut needs = idx
.columns

View File

@@ -31,6 +31,38 @@ use crate::{
},
};
// The following comment is copied directly from SQLite source and should be used as a guiding light
// whenever we encounter compatibility bugs related to conflict clause handling:
/* UNIQUE and PRIMARY KEY constraints should be handled in the following
** order:
**
** (1) OE_Update
** (2) OE_Abort, OE_Fail, OE_Rollback, OE_Ignore
** (3) OE_Replace
**
** OE_Fail and OE_Ignore must happen before any changes are made.
** OE_Update guarantees that only a single row will change, so it
** must happen before OE_Replace. Technically, OE_Abort and OE_Rollback
** could happen in any order, but they are grouped up front for
** convenience.
**
** 2018-08-14: Ticket https://www.sqlite.org/src/info/908f001483982c43
** The order of constraints used to have OE_Update as (2) and OE_Abort
** and so forth as (1). But apparently PostgreSQL checks the OE_Update
** constraint before any others, so it had to be moved.
**
** Constraint checking code is generated in this order:
** (A) The rowid constraint
** (B) Unique index constraints that do not have OE_Replace as their
** default conflict resolution strategy
** (C) Unique index that do use OE_Replace by default.
**
** The ordering of (2) and (3) is accomplished by making sure the linked
** list of indexes attached to a table puts all OE_Replace indexes last
** in the list. See sqlite3CreateIndex() for where that happens.
*/
/// A ConflictTarget is extracted from each ON CONFLICT target,
// e.g. INSERT INTO x(a) ON CONFLICT *(a COLLATE nocase)*
#[derive(Debug, Clone)]