mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-26 12:34:22 +01:00
Merge 'Fix vector deserialization alignment and blob/text empty mismatch' from bit-aloo
* Previously, deserializing an empty vector used `Vec::new()`, resulting in zero capacity, which is not guaranteed to be aligned for `f32`/`f64`. This could lead to undefined behavior when interpreting the data. * We also inconsistently treated empty input: `"[]"` (text) was accepted as a zero-length vector, but empty blobs (`&[]`) were rejected. * Now: * We initialize empty vectors with at least one element’s capacity to preserve alignment. * We allow zero-sized blobs and treat them the same as `"[]""` input as empty vectors. Closes #2371
This commit is contained in:
@@ -25,11 +25,59 @@ pub struct Vector {
|
||||
}
|
||||
|
||||
impl Vector {
|
||||
/// # Safety
|
||||
///
|
||||
/// This method is used to reinterpret the underlying `Vec<u8>` data
|
||||
/// as a `&[f32]` slice. This is only valid if:
|
||||
/// - The buffer is correctly aligned for `f32`
|
||||
/// - The length of the buffer is exactly `dims * size_of::<f32>()`
|
||||
pub fn as_f32_slice(&self) -> &[f32] {
|
||||
unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const f32, self.dims) }
|
||||
if self.dims == 0 {
|
||||
return &[];
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
self.data.len(),
|
||||
self.dims * std::mem::size_of::<f32>(),
|
||||
"data length must equal dims * size_of::<f32>()"
|
||||
);
|
||||
|
||||
let ptr = self.data.as_ptr();
|
||||
let align = std::mem::align_of::<f32>();
|
||||
assert_eq!(
|
||||
ptr.align_offset(align),
|
||||
0,
|
||||
"data pointer must be aligned to {align} bytes for f32 access"
|
||||
);
|
||||
|
||||
unsafe { std::slice::from_raw_parts(ptr as *const f32, self.dims) }
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// This method is used to reinterpret the underlying `Vec<u8>` data
|
||||
/// as a `&[f64]` slice. This is only valid if:
|
||||
/// - The buffer is correctly aligned for `f64`
|
||||
/// - The length of the buffer is exactly `dims * size_of::<f64>()`
|
||||
pub fn as_f64_slice(&self) -> &[f64] {
|
||||
if self.dims == 0 {
|
||||
return &[];
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
self.data.len(),
|
||||
self.dims * std::mem::size_of::<f64>(),
|
||||
"data length must equal dims * size_of::<f64>()"
|
||||
);
|
||||
|
||||
let ptr = self.data.as_ptr();
|
||||
let align = std::mem::align_of::<f64>();
|
||||
assert_eq!(
|
||||
ptr.align_offset(align),
|
||||
0,
|
||||
"data pointer must be aligned to {align} bytes for f64 access"
|
||||
);
|
||||
|
||||
unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const f64, self.dims) }
|
||||
}
|
||||
}
|
||||
@@ -281,11 +329,6 @@ pub fn vector_f64_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
}
|
||||
|
||||
pub fn vector_type(blob: &[u8]) -> Result<VectorType> {
|
||||
if blob.is_empty() {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
// Even-sized blobs are always float32.
|
||||
if blob.len() % 2 == 0 {
|
||||
return Ok(VectorType::Float32);
|
||||
@@ -706,6 +749,7 @@ mod tests {
|
||||
let v2 = float32_vec_from(&[]);
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
assert_eq!(result.dims, 0);
|
||||
assert_eq!(f32_slice_from_vector(&result), Vec::<f32>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user