From 8642d416c7fa64cdf9f15bd036b7aa0295258cdf Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 25 Mar 2025 09:49:22 +0100 Subject: [PATCH 1/3] Introduce immutable record. Currently we have a Record, which is a dumb vector of cloned values. This is incredibly bad for performance as we do not want to clone objects unless needed. Therefore, let's start by introducing this type so that any record that has already been serialized will be returned from btree in the format of a simple payload with reference to payload. --- core/storage/btree.rs | 73 +++++---- core/storage/sqlite3_ondisk.rs | 47 +++--- core/types.rs | 278 +++++++++++++++++++++++++++++++++ core/vdbe/mod.rs | 18 ++- 4 files changed, 360 insertions(+), 56 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 58724430e..c4561b95a 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -6,7 +6,10 @@ use crate::storage::sqlite3_ondisk::{ }; use crate::MvCursor; -use crate::types::{CursorResult, OwnedValue, Record, SeekKey, SeekOp}; +use crate::types::{ + compare_immutable_to_record, compare_record_to_immutable, CursorResult, ImmutableRecord, + OwnedValue, Record, RefValue, SeekKey, SeekOp, +}; use crate::{return_corrupt, LimboError, Result}; use std::cell::{Cell, Ref, RefCell}; @@ -179,7 +182,7 @@ pub struct BTreeCursor { root_page: usize, /// Rowid and record are stored before being consumed. rowid: Cell>, - record: RefCell>, + record: RefCell>, null_flag: bool, /// Index internal pages are consumed on the way up, so we store going upwards flag in case /// we just moved to a parent page and the parent page is an internal index page which requires @@ -259,7 +262,7 @@ impl BTreeCursor { /// Move the cursor to the previous record and return it. /// Used in backwards iteration. - fn get_prev_record(&mut self) -> Result, Option)>> { + fn get_prev_record(&mut self) -> Result, Option)>> { loop { let page = self.stack.top(); let cell_idx = self.stack.current_cell_index(); @@ -327,7 +330,7 @@ impl BTreeCursor { _rowid, _payload, .. }) => { self.stack.retreat(); - let record: Record = crate::storage::sqlite3_ondisk::read_record(&_payload)?; + let record = crate::storage::sqlite3_ondisk::read_record(&_payload)?; return Ok(CursorResult::Ok((Some(_rowid), Some(record)))); } BTreeCell::IndexInteriorCell(_) => todo!(), @@ -341,14 +344,14 @@ impl BTreeCursor { fn get_next_record( &mut self, predicate: Option<(SeekKey<'_>, SeekOp)>, - ) -> Result, Option)>> { + ) -> Result, Option)>> { if let Some(mv_cursor) = &self.mv_cursor { let mut mv_cursor = mv_cursor.borrow_mut(); let rowid = mv_cursor.current_row_id(); match rowid { Some(rowid) => { let record = mv_cursor.current_row().unwrap().unwrap(); - let record: Record = crate::storage::sqlite3_ondisk::read_record(&record.data)?; + let record = crate::storage::sqlite3_ondisk::read_record(&record.data)?; mv_cursor.forward(); return Ok(CursorResult::Ok((Some(rowid.row_id), Some(record)))); } @@ -451,7 +454,7 @@ impl BTreeCursor { let record = crate::storage::sqlite3_ondisk::read_record(payload)?; if predicate.is_none() { let rowid = match record.last_value() { - Some(OwnedValue::Integer(rowid)) => *rowid as u64, + Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; return Ok(CursorResult::Ok((Some(rowid), Some(record)))); @@ -461,14 +464,16 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; + let order = + compare_immutable_to_record(&record.get_values(), &index_key.get_values()); let found = match op { - SeekOp::GT => &record > *index_key, - SeekOp::GE => &record >= *index_key, - SeekOp::EQ => &record == *index_key, + SeekOp::GT => order.is_gt(), + SeekOp::GE => order.is_ge(), + SeekOp::EQ => order.is_eq(), }; if found { let rowid = match record.last_value() { - Some(OwnedValue::Integer(rowid)) => *rowid as u64, + Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; return Ok(CursorResult::Ok((Some(rowid), Some(record)))); @@ -481,7 +486,7 @@ impl BTreeCursor { let record = crate::storage::sqlite3_ondisk::read_record(payload)?; if predicate.is_none() { let rowid = match record.last_value() { - Some(OwnedValue::Integer(rowid)) => *rowid as u64, + Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; return Ok(CursorResult::Ok((Some(rowid), Some(record)))); @@ -490,14 +495,16 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; + let order = + compare_immutable_to_record(&record.get_values(), &index_key.get_values()); let found = match op { - SeekOp::GT => &record > *index_key, - SeekOp::GE => &record >= *index_key, - SeekOp::EQ => &record == *index_key, + SeekOp::GT => order.is_gt(), + SeekOp::GE => order.is_ge(), + SeekOp::EQ => order.is_eq(), }; if found { let rowid = match record.last_value() { - Some(OwnedValue::Integer(rowid)) => *rowid as u64, + Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; return Ok(CursorResult::Ok((Some(rowid), Some(record)))); @@ -517,7 +524,7 @@ impl BTreeCursor { &mut self, key: SeekKey<'_>, op: SeekOp, - ) -> Result, Option)>> { + ) -> Result, Option)>> { return_if_io!(self.move_to(key.clone(), op.clone())); { @@ -565,23 +572,19 @@ impl BTreeCursor { unreachable!("index seek key should be a record"); }; let record = crate::storage::sqlite3_ondisk::read_record(payload)?; + let order = compare_immutable_to_record( + &record.get_values().as_slice()[..record.len() - 1], + &index_key.get_values().as_slice()[..], + ); let found = match op { - SeekOp::GT => { - record.get_values()[..record.len() - 1] > index_key.get_values()[..] - } - SeekOp::GE => { - record.get_values()[..record.len() - 1] - >= index_key.get_values()[..] - } - SeekOp::EQ => { - record.get_values()[..record.len() - 1] - == index_key.get_values()[..] - } + SeekOp::GT => order.is_gt(), + SeekOp::GE => order.is_ge(), + SeekOp::EQ => order.is_eq(), }; self.stack.advance(); if found { let rowid = match record.last_value() { - Some(OwnedValue::Integer(rowid)) => *rowid as u64, + Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; return Ok(CursorResult::Ok((Some(rowid), Some(record)))); @@ -745,10 +748,14 @@ impl BTreeCursor { unreachable!("index seek key should be a record"); }; let record = crate::storage::sqlite3_ondisk::read_record(payload)?; + let order = compare_record_to_immutable( + &index_key.get_values(), + &record.get_values(), + ); let target_leaf_page_is_in_the_left_subtree = match cmp { - SeekOp::GT => index_key < &record, - SeekOp::GE => index_key <= &record, - SeekOp::EQ => index_key <= &record, + SeekOp::GT => order.is_gt(), + SeekOp::GE => order.is_ge(), + SeekOp::EQ => order.is_eq(), }; if target_leaf_page_is_in_the_left_subtree { // we don't advance in case of index tree internal nodes because we will visit this node going up @@ -1738,7 +1745,7 @@ impl BTreeCursor { Ok(CursorResult::Ok(rowid.is_some())) } - pub fn record(&self) -> Ref> { + pub fn record(&self) -> Ref> { self.record.borrow() } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index f7406008a..4a52a0ad5 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -47,7 +47,7 @@ use crate::io::{Buffer, Completion, ReadCompletion, SyncCompletion, WriteComplet use crate::storage::buffer_pool::BufferPool; use crate::storage::database::DatabaseStorage; use crate::storage::pager::Pager; -use crate::types::{OwnedValue, Record, Text, TextSubtype}; +use crate::types::{ImmutableRecord, RawSlice, RefValue, TextRef, TextSubtype}; use crate::{File, Result}; use parking_lot::RwLock; use std::cell::RefCell; @@ -1062,11 +1062,12 @@ pub fn validate_serial_type(value: u64) -> Result { } } -pub fn read_record(payload: &[u8]) -> Result { +pub fn read_record(payload: &[u8]) -> Result { let mut pos = 0; let (header_size, nr) = read_varint(payload)?; assert!((header_size as usize) >= nr); let mut header_size = (header_size as usize) - nr; + let payload = payload.to_vec(); pos += nr; let mut serial_types = Vec::with_capacity(header_size); @@ -1086,12 +1087,17 @@ pub fn read_record(payload: &[u8]) -> Result { values.push(value); } - Ok(Record::new(values)) + Ok(ImmutableRecord { + payload: std::pin::Pin::new(payload), + values, + }) } -pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, usize)> { +/// Reads a value that might reference the buffer it is reading from. Be sure to store RefValue with the buffer +/// always. +pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usize)> { if serial_type.is_null() { - return Ok((OwnedValue::Null, 0)); + return Ok((RefValue::Null, 0)); } if serial_type.is_int8() { @@ -1099,7 +1105,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us crate::bail_corrupt_error!("Invalid UInt8 value"); } let val = buf[0] as i8; - return Ok((OwnedValue::Integer(val as i64), 1)); + return Ok((RefValue::Integer(val as i64), 1)); } if serial_type.is_beint16() { @@ -1107,7 +1113,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us crate::bail_corrupt_error!("Invalid BEInt16 value"); } return Ok(( - OwnedValue::Integer(i16::from_be_bytes([buf[0], buf[1]]) as i64), + RefValue::Integer(i16::from_be_bytes([buf[0], buf[1]]) as i64), 2, )); } @@ -1118,7 +1124,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us } let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; return Ok(( - OwnedValue::Integer(i32::from_be_bytes([sign_extension, buf[0], buf[1], buf[2]]) as i64), + RefValue::Integer(i32::from_be_bytes([sign_extension, buf[0], buf[1], buf[2]]) as i64), 3, )); } @@ -1128,7 +1134,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us crate::bail_corrupt_error!("Invalid BEInt32 value"); } return Ok(( - OwnedValue::Integer(i32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]) as i64), + RefValue::Integer(i32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]) as i64), 4, )); } @@ -1139,7 +1145,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us } let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; return Ok(( - OwnedValue::Integer(i64::from_be_bytes([ + RefValue::Integer(i64::from_be_bytes([ sign_extension, sign_extension, buf[0], @@ -1158,7 +1164,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us crate::bail_corrupt_error!("Invalid BEInt64 value"); } return Ok(( - OwnedValue::Integer(i64::from_be_bytes([ + RefValue::Integer(i64::from_be_bytes([ buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], ])), 8, @@ -1170,7 +1176,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us crate::bail_corrupt_error!("Invalid BEFloat64 value"); } return Ok(( - OwnedValue::Float(f64::from_be_bytes([ + RefValue::Float(f64::from_be_bytes([ buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], ])), 8, @@ -1178,11 +1184,11 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us } if serial_type.is_constint0() { - return Ok((OwnedValue::Integer(0), 0)); + return Ok((RefValue::Integer(0), 0)); } if serial_type.is_constint1() { - return Ok((OwnedValue::Integer(1), 0)); + return Ok((RefValue::Integer(1), 0)); } if serial_type.is_blob() { @@ -1190,7 +1196,9 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us if buf.len() < n { crate::bail_corrupt_error!("Invalid Blob value"); } - return Ok((OwnedValue::Blob(Rc::new(buf[0..n].to_vec())), n)); + let ptr = &buf[0] as *const u8; + let slice = RawSlice { data: ptr, len: n }; + return Ok((RefValue::Blob(slice), n)); } if serial_type.is_string() { @@ -1202,10 +1210,11 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(OwnedValue, us n ); } - let bytes = buf[0..n].to_vec(); + let ptr = &buf[0] as *const u8; + let slice = RawSlice { data: ptr, len: n }; return Ok(( - OwnedValue::Text(Text { - value: Rc::new(bytes), + RefValue::Text(TextRef { + value: slice, subtype: TextSubtype::Text, }), n, @@ -1530,6 +1539,8 @@ pub fn read_u32(buf: &[u8], pos: usize) -> u32 { #[cfg(test)] mod tests { + use crate::OwnedValue; + use super::*; use rstest::rstest; diff --git a/core/types.rs b/core/types.rs index 9295b80d1..4ebe73bf3 100644 --- a/core/types.rs +++ b/core/types.rs @@ -8,7 +8,9 @@ use crate::storage::sqlite3_ondisk::write_varint; use crate::vdbe::sorter::Sorter; use crate::vdbe::VTabOpaqueCursor; use crate::Result; +use std::cmp::Ordering; use std::fmt::Display; +use std::pin::Pin; use std::rc::Rc; #[derive(Debug, Clone, Copy, PartialEq)] @@ -33,6 +35,12 @@ pub struct Text { pub subtype: TextSubtype, } +#[derive(Debug, Clone, PartialEq)] +pub struct TextRef { + pub value: RawSlice, + pub subtype: TextSubtype, +} + impl Text { pub fn from_str>(value: S) -> Self { Self::new(&value.into()) @@ -72,6 +80,21 @@ pub enum OwnedValue { Record(Record), } +#[derive(Debug, Clone, PartialEq)] +pub struct RawSlice { + pub data: *const u8, + pub len: usize, +} + +#[derive(Debug, PartialEq)] +pub enum RefValue { + Null, + Integer(i64), + Float(f64), + Text(TextRef), + Blob(RawSlice), +} + impl OwnedValue { // A helper function that makes building a text OwnedValue easier. pub fn build_text(text: &str) -> Self { @@ -486,6 +509,15 @@ impl<'a> FromValue<'a> for &'a str { } } +/// This struct serves the purpose of not allocating multiple vectors of bytes if not needed. +/// A value in a record that has already been serialized can stay serialized and what this struct offsers +/// is easy acces to each value which point to the payload. +#[derive(Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct ImmutableRecord { + pub payload: Pin>, // << point to this + pub values: Vec, +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Record { values: Vec, @@ -518,6 +550,246 @@ impl Record { } } +impl ImmutableRecord { + // pub fn get<'a, T: FromValue<'a> + 'a>(&'a self, idx: usize) -> Result { + // let value = &self.values[idx]; + // T::from_value(value) + // } + + pub fn count(&self) -> usize { + self.values.len() + } + + pub fn last_value(&self) -> Option<&RefValue> { + self.values.last() + } + + pub fn get_values(&self) -> &Vec { + &self.values + } + + pub fn get_value(&self, idx: usize) -> &RefValue { + &self.values[idx] + } + + pub fn len(&self) -> usize { + self.values.len() + } +} + +impl RefValue { + pub fn to_owned(&self) -> OwnedValue { + match self { + RefValue::Null => OwnedValue::Null, + RefValue::Integer(i) => OwnedValue::Integer(*i), + RefValue::Float(f) => OwnedValue::Float(*f), + RefValue::Text(text_ref) => OwnedValue::Text(Text { + value: Rc::new(text_ref.value.to_slice().to_vec()), + subtype: text_ref.subtype.clone(), + }), + RefValue::Blob(b) => OwnedValue::Blob(Rc::new(b.to_slice().to_vec())), + } + } +} +impl Eq for RefValue {} + +impl Ord for RefValue { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.partial_cmp(other).unwrap() + } +} + +#[allow(clippy::non_canonical_partial_ord_impl)] +impl PartialOrd for RefValue { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right), + (Self::Integer(int_left), Self::Float(float_right)) => { + (*int_left as f64).partial_cmp(float_right) + } + (Self::Float(float_left), Self::Integer(int_right)) => { + float_left.partial_cmp(&(*int_right as f64)) + } + (Self::Float(float_left), Self::Float(float_right)) => { + float_left.partial_cmp(float_right) + } + // Numeric vs Text/Blob + (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => { + Some(std::cmp::Ordering::Less) + } + (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => { + Some(std::cmp::Ordering::Greater) + } + + (Self::Text(text_left), Self::Text(text_right)) => text_left + .value + .to_slice() + .partial_cmp(text_right.value.to_slice()), + // Text vs Blob + (Self::Text(_), Self::Blob(_)) => Some(std::cmp::Ordering::Less), + (Self::Blob(_), Self::Text(_)) => Some(std::cmp::Ordering::Greater), + + (Self::Blob(blob_left), Self::Blob(blob_right)) => { + blob_left.to_slice().partial_cmp(blob_right.to_slice()) + } + (Self::Null, Self::Null) => Some(std::cmp::Ordering::Equal), + (Self::Null, _) => Some(std::cmp::Ordering::Less), + (_, Self::Null) => Some(std::cmp::Ordering::Greater), + } + } +} + +#[allow(clippy::non_canonical_partial_ord_impl)] +impl PartialOrd for RefValue { + fn partial_cmp(&self, other: &OwnedValue) -> Option { + match (self, other) { + (Self::Integer(int_left), OwnedValue::Integer(int_right)) => { + int_left.partial_cmp(int_right) + } + (Self::Integer(int_left), OwnedValue::Float(float_right)) => { + (*int_left as f64).partial_cmp(float_right) + } + (Self::Float(float_left), OwnedValue::Integer(int_right)) => { + float_left.partial_cmp(&(*int_right as f64)) + } + (Self::Float(float_left), OwnedValue::Float(float_right)) => { + float_left.partial_cmp(float_right) + } + // Numeric vs Text/Blob + (Self::Integer(_) | Self::Float(_), OwnedValue::Text(_) | OwnedValue::Blob(_)) => { + Some(std::cmp::Ordering::Less) + } + (Self::Text(_) | Self::Blob(_), OwnedValue::Integer(_) | OwnedValue::Float(_)) => { + Some(std::cmp::Ordering::Greater) + } + + (Self::Text(text_left), OwnedValue::Text(text_right)) => { + let text_left = text_left.value.to_slice(); + text_left.partial_cmp(&text_right.value) + } + // Text vs Blob + (Self::Text(_), OwnedValue::Blob(_)) => Some(std::cmp::Ordering::Less), + (Self::Blob(_), OwnedValue::Text(_)) => Some(std::cmp::Ordering::Greater), + + (Self::Blob(blob_left), OwnedValue::Blob(blob_right)) => { + let blob_left = blob_left.to_slice(); + blob_left.partial_cmp(blob_right) + } + (Self::Null, OwnedValue::Null) => Some(std::cmp::Ordering::Equal), + (Self::Null, _) => Some(std::cmp::Ordering::Less), + (_, OwnedValue::Null) => Some(std::cmp::Ordering::Greater), + other => todo!("{:?}", other), + } + } +} + +#[allow(clippy::non_canonical_partial_ord_impl)] +impl PartialOrd for OwnedValue { + fn partial_cmp(&self, other: &RefValue) -> Option { + match (self, other) { + (Self::Integer(int_left), RefValue::Integer(int_right)) => { + int_left.partial_cmp(int_right) + } + (Self::Integer(int_left), RefValue::Float(float_right)) => { + (*int_left as f64).partial_cmp(float_right) + } + (Self::Float(float_left), RefValue::Integer(int_right)) => { + float_left.partial_cmp(&(*int_right as f64)) + } + (Self::Float(float_left), RefValue::Float(float_right)) => { + float_left.partial_cmp(float_right) + } + // Numeric vs Text/Blob + (Self::Integer(_) | Self::Float(_), RefValue::Text(_) | RefValue::Blob(_)) => { + Some(std::cmp::Ordering::Less) + } + (Self::Text(_) | Self::Blob(_), RefValue::Integer(_) | RefValue::Float(_)) => { + Some(std::cmp::Ordering::Greater) + } + + (Self::Text(text_left), RefValue::Text(text_right)) => { + let text_right = text_right.value.to_slice(); + text_left.value.as_slice().partial_cmp(text_right) + } + // Text vs Blob + (Self::Text(_), RefValue::Blob(_)) => Some(std::cmp::Ordering::Less), + (Self::Blob(_), RefValue::Text(_)) => Some(std::cmp::Ordering::Greater), + + (Self::Blob(blob_left), RefValue::Blob(blob_right)) => { + let blob_right = blob_right.to_slice(); + blob_left.as_slice().partial_cmp(blob_right) + } + (Self::Null, RefValue::Null) => Some(std::cmp::Ordering::Equal), + (Self::Null, _) => Some(std::cmp::Ordering::Less), + (_, RefValue::Null) => Some(std::cmp::Ordering::Greater), + other => todo!("{:?}", other), + } + } +} + +impl PartialEq for OwnedValue { + fn eq(&self, other: &RefValue) -> bool { + match (self, other) { + (Self::Integer(int_left), RefValue::Integer(int_right)) => int_left == int_right, + (Self::Float(float_left), RefValue::Float(float_right)) => float_left == float_right, + (Self::Text(text_left), RefValue::Text(text_right)) => { + text_left.value.as_slice() == text_right.value.to_slice() + } + (Self::Blob(blob_left), RefValue::Blob(blob_right)) => { + blob_left.as_slice() == blob_right.to_slice() + } + (Self::Null, RefValue::Null) => true, + _ => false, + } + } +} + +impl PartialEq for RefValue { + fn eq(&self, other: &OwnedValue) -> bool { + match (self, other) { + (Self::Integer(int_left), OwnedValue::Integer(int_right)) => int_left == int_right, + (Self::Float(float_left), OwnedValue::Float(float_right)) => float_left == float_right, + (Self::Text(text_left), OwnedValue::Text(text_right)) => { + text_left.value.to_slice() == text_right.value.as_slice() + } + (Self::Blob(blob_left), OwnedValue::Blob(blob_right)) => { + blob_left.to_slice() == blob_right.as_slice() + } + (Self::Null, OwnedValue::Null) => true, + _ => false, + } + } +} + +pub fn compare_record_to_immutable( + record: &[OwnedValue], + immutable: &[RefValue], +) -> std::cmp::Ordering { + for (a, b) in record.iter().zip(immutable.iter()) { + match a.partial_cmp(b).unwrap() { + Ordering::Equal => {} + order => { + return order; + } + } + } + Ordering::Equal +} +pub fn compare_immutable_to_record( + immutable: &[RefValue], + record: &[OwnedValue], +) -> std::cmp::Ordering { + for (a, b) in immutable.iter().zip(record.iter()) { + match a.partial_cmp(b).unwrap() { + Ordering::Equal => {} + order => { + return order; + } + } + } + Ordering::Equal +} + const I8_LOW: i64 = -128; const I8_HIGH: i64 = 127; const I16_LOW: i64 = -32768; @@ -716,6 +988,12 @@ pub enum SeekKey<'a> { IndexKey(&'a Record), } +impl RawSlice { + pub fn to_slice(&self) -> &[u8] { + unsafe { std::slice::from_raw_parts(self.data, self.len) } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 04bd7453a..8fd4fc69e 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1102,7 +1102,7 @@ impl Program { if cursor.get_null_flag() { OwnedValue::Null } else { - record.get_value(*column).clone() + record.get_value(*column).to_owned() } } else { OwnedValue::Null @@ -1580,7 +1580,9 @@ impl Program { let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] - >= record_from_regs.get_values()[..] + .iter() + .zip(&record_from_regs.get_values()[..]) + .all(|(a, b)| a >= b) { target_pc.to_offset_int() } else { @@ -1608,7 +1610,9 @@ impl Program { let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] - <= record_from_regs.get_values()[..] + .iter() + .zip(&record_from_regs.get_values()[..]) + .all(|(a, b)| a <= b) { target_pc.to_offset_int() } else { @@ -1636,7 +1640,9 @@ impl Program { let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] - > record_from_regs.get_values()[..] + .iter() + .zip(&record_from_regs.get_values()[..]) + .all(|(a, b)| a > b) { target_pc.to_offset_int() } else { @@ -1664,7 +1670,9 @@ impl Program { let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] - < record_from_regs.get_values()[..] + .iter() + .zip(&record_from_regs.get_values()[..]) + .all(|(a, b)| a < b) { target_pc.to_offset_int() } else { From 63cf86ba36647a76a8b754f6d040712bb21126ed Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 26 Mar 2025 10:10:19 +0100 Subject: [PATCH 2/3] fix comparison of records --- core/storage/btree.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index c4561b95a..f4546d5a8 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -498,9 +498,9 @@ impl BTreeCursor { let order = compare_immutable_to_record(&record.get_values(), &index_key.get_values()); let found = match op { - SeekOp::GT => order.is_gt(), - SeekOp::GE => order.is_ge(), - SeekOp::EQ => order.is_eq(), + SeekOp::GT => order.is_lt(), + SeekOp::GE => order.is_le(), + SeekOp::EQ => order.is_le(), }; if found { let rowid = match record.last_value() { @@ -753,9 +753,9 @@ impl BTreeCursor { &record.get_values(), ); let target_leaf_page_is_in_the_left_subtree = match cmp { - SeekOp::GT => order.is_gt(), - SeekOp::GE => order.is_ge(), - SeekOp::EQ => order.is_eq(), + SeekOp::GT => order.is_lt(), + SeekOp::GE => order.is_le(), + SeekOp::EQ => order.is_le(), }; if target_leaf_page_is_in_the_left_subtree { // we don't advance in case of index tree internal nodes because we will visit this node going up From f07f10ac53caf84fefad80e1f0061f5b7a9a40f4 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 26 Mar 2025 18:11:44 +0100 Subject: [PATCH 3/3] fix read empty blob/text --- core/storage/sqlite3_ondisk.rs | 14 +++++++++++--- core/types.rs | 13 ++++++++++--- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 4a52a0ad5..e7f8eaadd 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1196,8 +1196,11 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz if buf.len() < n { crate::bail_corrupt_error!("Invalid Blob value"); } + if n == 0 { + return Ok((RefValue::Blob(RawSlice::new(std::ptr::null(), 0)), 0)); + } let ptr = &buf[0] as *const u8; - let slice = RawSlice { data: ptr, len: n }; + let slice = RawSlice::new(ptr, n); return Ok((RefValue::Blob(slice), n)); } @@ -1210,8 +1213,12 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz n ); } - let ptr = &buf[0] as *const u8; - let slice = RawSlice { data: ptr, len: n }; + let slice = if n == 0 { + RawSlice::new(std::ptr::null(), 0) + } else { + let ptr = &buf[0] as *const u8; + RawSlice::new(ptr, n) + }; return Ok(( RefValue::Text(TextRef { value: slice, @@ -1557,6 +1564,7 @@ mod tests { #[case(&[1, 2], SERIAL_TYPE_CONSTINT0, OwnedValue::Integer(0))] #[case(&[65, 66], SERIAL_TYPE_CONSTINT1, OwnedValue::Integer(1))] #[case(&[1, 2, 3], 18, OwnedValue::Blob(vec![1, 2, 3].into()))] + #[case(&[], 12, OwnedValue::Blob(vec![].into()))] // empty blob #[case(&[65, 66, 67], 19, OwnedValue::build_text("ABC"))] #[case(&[0x80], SERIAL_TYPE_INT8, OwnedValue::Integer(-128))] #[case(&[0x80, 0], SERIAL_TYPE_BEINT16, OwnedValue::Integer(-32768))] diff --git a/core/types.rs b/core/types.rs index 4ebe73bf3..d860979d1 100644 --- a/core/types.rs +++ b/core/types.rs @@ -82,8 +82,8 @@ pub enum OwnedValue { #[derive(Debug, Clone, PartialEq)] pub struct RawSlice { - pub data: *const u8, - pub len: usize, + data: *const u8, + len: usize, } #[derive(Debug, PartialEq)] @@ -989,8 +989,15 @@ pub enum SeekKey<'a> { } impl RawSlice { + pub fn new(data: *const u8, len: usize) -> Self { + Self { data, len } + } pub fn to_slice(&self) -> &[u8] { - unsafe { std::slice::from_raw_parts(self.data, self.len) } + if self.data.is_null() { + &[] + } else { + unsafe { std::slice::from_raw_parts(self.data, self.len) } + } } }