From 18d66b35398d3dc8c81ba82a58738d845cb4de58 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Mon, 21 Jul 2025 18:09:37 +0200 Subject: [PATCH 1/4] Deserialize keys only once when sorting immutable records --- core/vdbe/sorter.rs | 49 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 07e8088c3..394b66e67 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -15,7 +15,7 @@ use crate::{ }, storage::sqlite3_ondisk::{read_varint, varint_len, write_varint}, translate::collate::CollationSeq, - types::{compare_immutable, IOResult, ImmutableRecord, KeyInfo}, + types::{compare_immutable, IOResult, ImmutableRecord, KeyInfo, RecordCursor, RefValue}, Result, }; @@ -133,7 +133,7 @@ impl Sorter { record.clone(), self.key_len, self.index_key_info.clone(), - )); + )?); self.current_buffer_size += payload_size; self.max_payload_size_in_buffer = self.max_payload_size_in_buffer.max(payload_size); Ok(()) @@ -205,7 +205,7 @@ impl Sorter { record, self.key_len, self.index_key_info.clone(), - )), + )?), chunk_idx, )); if let SortedChunkIOState::WaitingForRead = chunk.io_state.get() { @@ -444,38 +444,37 @@ impl SortedChunk { struct SortableImmutableRecord { record: ImmutableRecord, - key_len: usize, + key_values: Vec, index_key_info: Rc>, } impl SortableImmutableRecord { - fn new(record: ImmutableRecord, key_len: usize, index_key_info: Rc>) -> Self { - Self { - record, - key_len, - index_key_info, + fn new( + record: ImmutableRecord, + key_len: usize, + index_key_info: Rc>, + ) -> Result { + let mut key_values = Vec::with_capacity(key_len); + let mut cursor = RecordCursor::with_capacity(key_len); + cursor.ensure_parsed_upto(&record, key_len - 1)?; + for i in 0..cursor.serial_types.len() { + key_values.push(cursor.deserialize_column(&record, i)?); } + Ok(Self { + record, + key_values, + index_key_info, + }) } } impl Ord for SortableImmutableRecord { fn cmp(&self, other: &Self) -> Ordering { - let this_values = self.record.get_values(); - let other_values = other.record.get_values(); - - let a_key = if this_values.len() >= self.key_len { - &this_values[..self.key_len] - } else { - &this_values[..] - }; - - let b_key = if other_values.len() >= self.key_len { - &other_values[..self.key_len] - } else { - &other_values[..] - }; - - compare_immutable(a_key, b_key, self.index_key_info.as_ref()) + compare_immutable( + &self.key_values, + &other.key_values, + self.index_key_info.as_ref(), + ) } } From d75e26eee44d5e931030289de5c336fcb8d8f810 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Mon, 21 Jul 2025 19:25:27 +0200 Subject: [PATCH 2/4] Deserialize keys incrementally --- core/vdbe/sorter.rs | 66 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 394b66e67..93305ca2c 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -15,7 +15,8 @@ use crate::{ }, storage::sqlite3_ondisk::{read_varint, varint_len, write_varint}, translate::collate::CollationSeq, - types::{compare_immutable, IOResult, ImmutableRecord, KeyInfo, RecordCursor, RefValue}, + turso_assert, + types::{IOResult, ImmutableRecord, KeyInfo, RecordCursor, RefValue}, Result, }; @@ -444,7 +445,8 @@ impl SortedChunk { struct SortableImmutableRecord { record: ImmutableRecord, - key_values: Vec, + cursor: RecordCursor, + key_values: RefCell>, index_key_info: Rc>, } @@ -454,15 +456,16 @@ impl SortableImmutableRecord { key_len: usize, index_key_info: Rc>, ) -> Result { - let mut key_values = Vec::with_capacity(key_len); let mut cursor = RecordCursor::with_capacity(key_len); cursor.ensure_parsed_upto(&record, key_len - 1)?; - for i in 0..cursor.serial_types.len() { - key_values.push(cursor.deserialize_column(&record, i)?); - } + turso_assert!( + index_key_info.len() >= cursor.serial_types.len(), + "index_key_info.len() < cursor.serial_types.len()" + ); Ok(Self { record, - key_values, + cursor, + key_values: RefCell::new(Vec::with_capacity(key_len)), index_key_info, }) } @@ -470,11 +473,50 @@ impl SortableImmutableRecord { impl Ord for SortableImmutableRecord { fn cmp(&self, other: &Self) -> Ordering { - compare_immutable( - &self.key_values, - &other.key_values, - self.index_key_info.as_ref(), - ) + assert_eq!( + self.cursor.serial_types.len(), + other.cursor.serial_types.len() + ); + let mut this_key_values = self.key_values.borrow_mut(); + let mut other_key_values = other.key_values.borrow_mut(); + + for i in 0..self.cursor.serial_types.len() { + // Lazily deserialize the key values if they haven't been deserialized yet. + if i >= this_key_values.len() { + this_key_values.push( + self.cursor + .deserialize_column(&self.record, i) + .expect("Failed to deserialize the key value"), + ); + } + if i >= other_key_values.len() { + other_key_values.push( + other + .cursor + .deserialize_column(&other.record, i) + .expect("Failed to deserialize the key value"), + ); + } + + let this_key_value = &this_key_values[i]; + let other_key_value = &other_key_values[i]; + let column_order = self.index_key_info[i].sort_order; + let collation = self.index_key_info[i].collation; + + let cmp = match (this_key_value, other_key_value) { + (RefValue::Text(left), RefValue::Text(right)) => { + collation.compare_strings(left.as_str(), right.as_str()) + } + _ => this_key_value.partial_cmp(other_key_value).unwrap(), + }; + if !cmp.is_eq() { + return match column_order { + SortOrder::Asc => cmp, + SortOrder::Desc => cmp.reverse(), + }; + } + } + std::cmp::Ordering::Equal } } From d847c88df6c0f6074d199fb0ae2f387b5b6ef452 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Tue, 22 Jul 2025 06:37:50 +0200 Subject: [PATCH 3/4] cosmetic --- core/vdbe/sorter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 93305ca2c..f97e95f25 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -481,7 +481,7 @@ impl Ord for SortableImmutableRecord { let mut other_key_values = other.key_values.borrow_mut(); for i in 0..self.cursor.serial_types.len() { - // Lazily deserialize the key values if they haven't been deserialized yet. + // Lazily deserialize the key values if they haven't been deserialized already. if i >= this_key_values.len() { this_key_values.push( self.cursor From 1e51d23bd6275d8d1eee8aa9fc43331aed2dc291 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Wed, 23 Jul 2025 11:22:01 -0700 Subject: [PATCH 4/4] store the key deserialization error instead of panicking --- core/vdbe/sorter.rs | 78 ++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 22 deletions(-) diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index f97e95f25..021256d2a 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -108,15 +108,25 @@ impl Sorter { } pub fn next(&mut self) -> Result> { - if self.chunks.is_empty() { + let record = if self.chunks.is_empty() { // Serve from the in-memory buffer. - self.current = self.records.pop().map(|r| r.record); + self.records.pop() } else { // Serve from sorted chunk files. match self.next_from_chunk_heap()? { IOResult::IO => return Ok(IOResult::IO), - IOResult::Done(record) => self.current = record, + IOResult::Done(record) => record, } + }; + match record { + Some(record) => { + if let Some(error) = record.deserialization_error.replace(None) { + // If there was a key deserialization error during the comparison, return the error. + return Err(error); + } + self.current = Some(record.record); + } + None => self.current = None, } Ok(IOResult::Done(())) } @@ -169,7 +179,7 @@ impl Sorter { Ok(IOResult::Done(())) } - fn next_from_chunk_heap(&mut self) -> Result>> { + fn next_from_chunk_heap(&mut self) -> Result>> { let mut all_read_complete = true; for chunk_idx in self.wait_for_read_complete.iter() { let chunk_io_state = self.chunks[*chunk_idx].io_state.get(); @@ -190,7 +200,7 @@ impl Sorter { if let Some((next_record, next_chunk_idx)) = self.chunk_heap.pop() { self.push_to_chunk_heap(next_chunk_idx)?; - Ok(IOResult::Done(Some(next_record.0.record))) + Ok(IOResult::Done(Some(next_record.0))) } else { Ok(IOResult::Done(None)) } @@ -448,6 +458,8 @@ struct SortableImmutableRecord { cursor: RecordCursor, key_values: RefCell>, index_key_info: Rc>, + /// The key deserialization error, if any. + deserialization_error: RefCell>, } impl SortableImmutableRecord { @@ -467,39 +479,61 @@ impl SortableImmutableRecord { cursor, key_values: RefCell::new(Vec::with_capacity(key_len)), index_key_info, + deserialization_error: RefCell::new(None), }) } + + /// Attempts to deserialize the key value at the given index. + /// If the key value has already been deserialized, this does nothing. + /// The deserialized key value is stored in the `key_values` field. + /// In case of an error, the error is stored in the `deserialization_error` field. + fn try_deserialize_key(&self, idx: usize) { + let mut key_values = self.key_values.borrow_mut(); + if idx < key_values.len() { + // The key value with this index has already been deserialized. + return; + } + match self.cursor.deserialize_column(&self.record, idx) { + Ok(value) => key_values.push(value), + Err(error) => { + self.deserialization_error.replace(Some(error)); + } + } + } } impl Ord for SortableImmutableRecord { fn cmp(&self, other: &Self) -> Ordering { + if self.deserialization_error.borrow().is_some() + || other.deserialization_error.borrow().is_some() + { + // If one of the records has a deserialization error, circumvent the comparison and return early. + return Ordering::Equal; + } assert_eq!( self.cursor.serial_types.len(), other.cursor.serial_types.len() ); - let mut this_key_values = self.key_values.borrow_mut(); - let mut other_key_values = other.key_values.borrow_mut(); + let this_key_values_len = self.key_values.borrow().len(); + let other_key_values_len = other.key_values.borrow().len(); for i in 0..self.cursor.serial_types.len() { // Lazily deserialize the key values if they haven't been deserialized already. - if i >= this_key_values.len() { - this_key_values.push( - self.cursor - .deserialize_column(&self.record, i) - .expect("Failed to deserialize the key value"), - ); + if i >= this_key_values_len { + self.try_deserialize_key(i); + if self.deserialization_error.borrow().is_some() { + return Ordering::Equal; + } } - if i >= other_key_values.len() { - other_key_values.push( - other - .cursor - .deserialize_column(&other.record, i) - .expect("Failed to deserialize the key value"), - ); + if i >= other_key_values_len { + other.try_deserialize_key(i); + if other.deserialization_error.borrow().is_some() { + return Ordering::Equal; + } } - let this_key_value = &this_key_values[i]; - let other_key_value = &other_key_values[i]; + let this_key_value = &self.key_values.borrow()[i]; + let other_key_value = &other.key_values.borrow()[i]; let column_order = self.index_key_info[i].sort_order; let collation = self.index_key_info[i].collation;