mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-23 08:55:40 +01:00
move vector operations under operations/ folder
This commit is contained in:
@@ -1,25 +0,0 @@
|
||||
use super::vector_types::Vector;
|
||||
use crate::Result;
|
||||
|
||||
pub(crate) mod euclidean;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum DistanceType {
|
||||
/// Euclidean distance. This is a very common distance metric that
|
||||
/// accounts for both magnitude and direction when determining the distance
|
||||
/// between vectors. Euclidean distance has a range of [0, ∞).
|
||||
Euclidean,
|
||||
|
||||
// TODO(asukamilet): Refactor the current `vector_types.rs` to integrate
|
||||
#[allow(dead_code)]
|
||||
/// Cosine distance. This is a measure of similarity between two vectors
|
||||
Cosine,
|
||||
}
|
||||
|
||||
pub trait DistanceCalculator {
|
||||
#[allow(unused)]
|
||||
fn distance_type() -> DistanceType;
|
||||
|
||||
fn calculate(v1: &Vector, v2: &Vector) -> Result<f64>;
|
||||
}
|
||||
@@ -1,10 +1,9 @@
|
||||
use crate::types::Value;
|
||||
use crate::vdbe::Register;
|
||||
use crate::vector::distance::{euclidean::Euclidean, DistanceCalculator};
|
||||
use crate::LimboError;
|
||||
use crate::Result;
|
||||
|
||||
pub mod distance;
|
||||
pub mod operations;
|
||||
pub mod vector_types;
|
||||
use vector_types::*;
|
||||
|
||||
@@ -76,7 +75,7 @@ pub fn vector_distance_cos(args: &[Register]) -> Result<Value> {
|
||||
|
||||
let x = parse_vector(&args[0], None)?;
|
||||
let y = parse_vector(&args[1], None)?;
|
||||
let dist = do_vector_distance_cos(&x, &y)?;
|
||||
let dist = operations::distance_cos::vector_distance_cos(&x, &y)?;
|
||||
Ok(Value::Float(dist))
|
||||
}
|
||||
|
||||
@@ -89,19 +88,7 @@ pub fn vector_distance_l2(args: &[Register]) -> Result<Value> {
|
||||
|
||||
let x = parse_vector(&args[0], None)?;
|
||||
let y = parse_vector(&args[1], None)?;
|
||||
// Validate that both vectors have the same dimensions and type
|
||||
if x.dims != y.dims {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Vectors must have the same dimensions".to_string(),
|
||||
));
|
||||
}
|
||||
if x.vector_type != y.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Vectors must be of the same type".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let dist = Euclidean::calculate(&x, &y)?;
|
||||
let dist = operations::distance_l2::vector_distance_l2(&x, &y)?;
|
||||
Ok(Value::Float(dist))
|
||||
}
|
||||
|
||||
@@ -114,14 +101,7 @@ pub fn vector_concat(args: &[Register]) -> Result<Value> {
|
||||
|
||||
let x = parse_vector(&args[0], None)?;
|
||||
let y = parse_vector(&args[1], None)?;
|
||||
|
||||
if x.vector_type != y.vector_type {
|
||||
return Err(LimboError::InvalidArgument(
|
||||
"Vectors must be of the same type".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let vector = vector_types::vector_concat(&x, &y)?;
|
||||
let vector = operations::concat::vector_concat(&x, &y)?;
|
||||
match vector.vector_type {
|
||||
VectorType::Float32Dense => Ok(vector_serialize_f32(vector)),
|
||||
VectorType::Float64Dense => Ok(vector_serialize_f64(vector)),
|
||||
@@ -153,7 +133,8 @@ pub fn vector_slice(args: &[Register]) -> Result<Value> {
|
||||
));
|
||||
}
|
||||
|
||||
let result = vector_types::vector_slice(&vector, start_index as usize, end_index as usize)?;
|
||||
let result =
|
||||
operations::slice::vector_slice(&vector, start_index as usize, end_index as usize)?;
|
||||
|
||||
Ok(match result.vector_type {
|
||||
VectorType::Float32Dense => vector_serialize_f32(result),
|
||||
|
||||
101
core/vector/operations/concat.rs
Normal file
101
core/vector/operations/concat.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use crate::{vector::vector_types::Vector, LimboError, Result};
|
||||
|
||||
pub fn vector_concat(v1: &Vector, v2: &Vector) -> Result<Vector> {
|
||||
if v1.vector_type != v2.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Mismatched vector types".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut data = Vec::with_capacity(v1.data.len() + v2.data.len());
|
||||
data.extend_from_slice(&v1.data);
|
||||
data.extend_from_slice(&v2.data);
|
||||
|
||||
Ok(Vector {
|
||||
vector_type: v1.vector_type,
|
||||
dims: v1.dims + v2.dims,
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::vector::{
|
||||
operations::concat::vector_concat,
|
||||
vector_types::{Vector, VectorType},
|
||||
};
|
||||
|
||||
fn float32_vec_from(slice: &[f32]) -> Vector {
|
||||
let mut data = Vec::new();
|
||||
for &v in slice {
|
||||
data.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
Vector {
|
||||
vector_type: VectorType::Float32Dense,
|
||||
dims: slice.len(),
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
fn f32_slice_from_vector(vector: &Vector) -> Vec<f32> {
|
||||
vector.as_f32_slice().to_vec()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_normal_case() {
|
||||
let v1 = float32_vec_from(&[1.0, 2.0, 3.0]);
|
||||
let v2 = float32_vec_from(&[4.0, 5.0, 6.0]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 6);
|
||||
assert_eq!(result.vector_type, VectorType::Float32Dense);
|
||||
assert_eq!(
|
||||
f32_slice_from_vector(&result),
|
||||
vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_empty_left() {
|
||||
let v1 = float32_vec_from(&[]);
|
||||
let v2 = float32_vec_from(&[4.0, 5.0]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 2);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![4.0, 5.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_empty_right() {
|
||||
let v1 = float32_vec_from(&[1.0, 2.0]);
|
||||
let v2 = float32_vec_from(&[]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 2);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![1.0, 2.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_both_empty() {
|
||||
let v1 = float32_vec_from(&[]);
|
||||
let v2 = float32_vec_from(&[]);
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
assert_eq!(result.dims, 0);
|
||||
assert_eq!(f32_slice_from_vector(&result), Vec::<f32>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_different_lengths() {
|
||||
let v1 = float32_vec_from(&[1.0]);
|
||||
let v2 = float32_vec_from(&[2.0, 3.0, 4.0]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 4);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![1.0, 2.0, 3.0, 4.0]);
|
||||
}
|
||||
}
|
||||
91
core/vector/operations/distance_cos.rs
Normal file
91
core/vector/operations/distance_cos.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use crate::{
|
||||
vector::vector_types::{Vector, VectorType},
|
||||
LimboError, Result,
|
||||
};
|
||||
|
||||
pub fn vector_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
match v1.vector_type {
|
||||
VectorType::Float32Dense => vector_f32_distance_cos(v1, v2),
|
||||
VectorType::Float64Dense => vector_f64_distance_cos(v1, v2),
|
||||
}
|
||||
}
|
||||
|
||||
fn vector_f32_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
if v1.dims != v2.dims {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector dimensions".to_string(),
|
||||
));
|
||||
}
|
||||
if v1.vector_type != v2.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector type".to_string(),
|
||||
));
|
||||
}
|
||||
let (mut dot, mut norm1, mut norm2) = (0.0, 0.0, 0.0);
|
||||
let v1_data = v1.as_f32_slice();
|
||||
let v2_data = v2.as_f32_slice();
|
||||
|
||||
// Check for non-finite values
|
||||
if v1_data.iter().any(|x| !x.is_finite()) || v2_data.iter().any(|x| !x.is_finite()) {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
for i in 0..v1.dims {
|
||||
let e1 = v1_data[i];
|
||||
let e2 = v2_data[i];
|
||||
dot += e1 * e2;
|
||||
norm1 += e1 * e1;
|
||||
norm2 += e2 * e2;
|
||||
}
|
||||
|
||||
// Check for zero norms to avoid division by zero
|
||||
if norm1 == 0.0 || norm2 == 0.0 {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(1.0 - (dot / (norm1 * norm2).sqrt()) as f64)
|
||||
}
|
||||
|
||||
fn vector_f64_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
if v1.dims != v2.dims {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector dimensions".to_string(),
|
||||
));
|
||||
}
|
||||
if v1.vector_type != v2.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector type".to_string(),
|
||||
));
|
||||
}
|
||||
let (mut dot, mut norm1, mut norm2) = (0.0, 0.0, 0.0);
|
||||
let v1_data = v1.as_f64_slice();
|
||||
let v2_data = v2.as_f64_slice();
|
||||
|
||||
// Check for non-finite values
|
||||
if v1_data.iter().any(|x| !x.is_finite()) || v2_data.iter().any(|x| !x.is_finite()) {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
for i in 0..v1.dims {
|
||||
let e1 = v1_data[i];
|
||||
let e2 = v2_data[i];
|
||||
dot += e1 * e2;
|
||||
norm1 += e1 * e1;
|
||||
norm2 += e2 * e2;
|
||||
}
|
||||
|
||||
// Check for zero norms
|
||||
if norm1 == 0.0 || norm2 == 0.0 {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(1.0 - (dot / (norm1 * norm2).sqrt()))
|
||||
}
|
||||
@@ -1,24 +1,31 @@
|
||||
use super::{DistanceCalculator, DistanceType};
|
||||
use crate::vector::vector_types::{Vector, VectorType};
|
||||
use crate::Result;
|
||||
use crate::{
|
||||
vector::vector_types::{Vector, VectorType},
|
||||
LimboError, Result,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Euclidean;
|
||||
|
||||
impl DistanceCalculator for Euclidean {
|
||||
fn distance_type() -> DistanceType {
|
||||
DistanceType::Euclidean
|
||||
pub fn vector_distance_l2(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
// Validate that both vectors have the same dimensions and type
|
||||
if v1.dims != v2.dims {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Vectors must have the same dimensions".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
fn calculate(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
match v1.vector_type {
|
||||
VectorType::Float32Dense => Ok(euclidean_distance_f32(v1.as_f32_slice(), v2.as_f32_slice())),
|
||||
VectorType::Float64Dense => Ok(euclidean_distance_f64(v1.as_f64_slice(), v2.as_f64_slice())),
|
||||
if v1.vector_type != v2.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Vectors must be of the same type".to_string(),
|
||||
));
|
||||
}
|
||||
match v1.vector_type {
|
||||
VectorType::Float32Dense => {
|
||||
Ok(vector_f32_distance_l2(v1.as_f32_slice(), v2.as_f32_slice()))
|
||||
}
|
||||
VectorType::Float64Dense => {
|
||||
Ok(vector_f64_distance_l2(v1.as_f64_slice(), v2.as_f64_slice()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn euclidean_distance_f32(v1: &[f32], v2: &[f32]) -> f64 {
|
||||
fn vector_f32_distance_l2(v1: &[f32], v2: &[f32]) -> f64 {
|
||||
let sum = v1
|
||||
.iter()
|
||||
.zip(v2.iter())
|
||||
@@ -27,7 +34,7 @@ fn euclidean_distance_f32(v1: &[f32], v2: &[f32]) -> f64 {
|
||||
sum.sqrt()
|
||||
}
|
||||
|
||||
fn euclidean_distance_f64(v1: &[f64], v2: &[f64]) -> f64 {
|
||||
fn vector_f64_distance_l2(v1: &[f64], v2: &[f64]) -> f64 {
|
||||
let sum = v1
|
||||
.iter()
|
||||
.zip(v2.iter())
|
||||
@@ -58,7 +65,7 @@ mod tests {
|
||||
];
|
||||
let results = vectors
|
||||
.iter()
|
||||
.map(|v| euclidean_distance_f32(&query, v))
|
||||
.map(|v| vector_f32_distance_l2(&query, v))
|
||||
.collect::<Vec<f64>>();
|
||||
assert_eq!(results, expected);
|
||||
}
|
||||
@@ -67,6 +74,6 @@ mod tests {
|
||||
fn test_odd_len() {
|
||||
let v = (0..5).map(|x| x as f32).collect::<Vec<f32>>();
|
||||
let query = (2..7).map(|x| x as f32).collect::<Vec<f32>>();
|
||||
assert_eq!(euclidean_distance_f32(&v, &query), 20.0_f64.sqrt());
|
||||
assert_eq!(vector_f32_distance_l2(&v, &query), 20.0_f64.sqrt());
|
||||
}
|
||||
}
|
||||
4
core/vector/operations/mod.rs
Normal file
4
core/vector/operations/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod concat;
|
||||
pub mod distance_cos;
|
||||
pub mod distance_l2;
|
||||
pub mod slice;
|
||||
139
core/vector/operations/slice.rs
Normal file
139
core/vector/operations/slice.rs
Normal file
@@ -0,0 +1,139 @@
|
||||
use crate::{
|
||||
vector::vector_types::{Vector, VectorType},
|
||||
LimboError, Result,
|
||||
};
|
||||
|
||||
pub fn vector_slice(vector: &Vector, start_idx: usize, end_idx: usize) -> Result<Vector> {
|
||||
fn extract_bytes<T, const N: usize>(
|
||||
slice: &[T],
|
||||
start: usize,
|
||||
end: usize,
|
||||
to_bytes: impl Fn(&T) -> [u8; N],
|
||||
) -> Result<Vec<u8>> {
|
||||
if start > end {
|
||||
return Err(LimboError::InvalidArgument(
|
||||
"start index must not be greater than end index".into(),
|
||||
));
|
||||
}
|
||||
if end > slice.len() || end < start {
|
||||
return Err(LimboError::ConversionError(
|
||||
"vector_slice range out of bounds".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut buf = Vec::with_capacity((end - start) * N);
|
||||
for item in &slice[start..end] {
|
||||
buf.extend_from_slice(&to_bytes(item));
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
let (vector_type, data) = match vector.vector_type {
|
||||
VectorType::Float32Dense => (
|
||||
VectorType::Float32Dense,
|
||||
extract_bytes::<f32, 4>(vector.as_f32_slice(), start_idx, end_idx, |v| {
|
||||
v.to_le_bytes()
|
||||
})?,
|
||||
),
|
||||
VectorType::Float64Dense => (
|
||||
VectorType::Float64Dense,
|
||||
extract_bytes::<f64, 8>(vector.as_f64_slice(), start_idx, end_idx, |v| {
|
||||
v.to_le_bytes()
|
||||
})?,
|
||||
),
|
||||
};
|
||||
|
||||
Ok(Vector {
|
||||
vector_type,
|
||||
dims: end_idx - start_idx,
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::vector::{
|
||||
operations::slice::vector_slice,
|
||||
vector_types::{Vector, VectorType},
|
||||
};
|
||||
|
||||
fn float32_vec_from(slice: &[f32]) -> Vector {
|
||||
let mut data = Vec::new();
|
||||
for &v in slice {
|
||||
data.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
Vector {
|
||||
vector_type: VectorType::Float32Dense,
|
||||
dims: slice.len(),
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
fn f32_slice_from_vector(vector: &Vector) -> Vec<f32> {
|
||||
vector.as_f32_slice().to_vec()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_normal_case() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0, 3.0, 4.0, 5.0]);
|
||||
let result = vector_slice(&input_vec, 1, 4).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 3);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![2.0, 3.0, 4.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_full_range() {
|
||||
let input_vec = float32_vec_from(&[10.0, 20.0, 30.0]);
|
||||
let result = vector_slice(&input_vec, 0, 3).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 3);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![10.0, 20.0, 30.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_single_element() {
|
||||
let input_vec = float32_vec_from(&[4.40, 2.71]);
|
||||
let result = vector_slice(&input_vec, 1, 2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 1);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![2.71]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_empty_list() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let result = vector_slice(&input_vec, 2, 2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_zero_length() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0, 3.0]);
|
||||
let err = vector_slice(&input_vec, 2, 1);
|
||||
assert!(err.is_err(), "Expected error on zero-length range");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_out_of_bounds() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let err = vector_slice(&input_vec, 0, 5);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_start_out_of_bounds() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let err = vector_slice(&input_vec, 5, 5);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_end_out_of_bounds() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let err = vector_slice(&input_vec, 1, 3);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
}
|
||||
@@ -241,93 +241,6 @@ pub fn vector_deserialize_f32(blob: &[u8]) -> Result<Vector> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn do_vector_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
match v1.vector_type {
|
||||
VectorType::Float32Dense => vector_f32_distance_cos(v1, v2),
|
||||
VectorType::Float64Dense => vector_f64_distance_cos(v1, v2),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn vector_f32_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
if v1.dims != v2.dims {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector dimensions".to_string(),
|
||||
));
|
||||
}
|
||||
if v1.vector_type != v2.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector type".to_string(),
|
||||
));
|
||||
}
|
||||
let (mut dot, mut norm1, mut norm2) = (0.0, 0.0, 0.0);
|
||||
let v1_data = v1.as_f32_slice();
|
||||
let v2_data = v2.as_f32_slice();
|
||||
|
||||
// Check for non-finite values
|
||||
if v1_data.iter().any(|x| !x.is_finite()) || v2_data.iter().any(|x| !x.is_finite()) {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
for i in 0..v1.dims {
|
||||
let e1 = v1_data[i];
|
||||
let e2 = v2_data[i];
|
||||
dot += e1 * e2;
|
||||
norm1 += e1 * e1;
|
||||
norm2 += e2 * e2;
|
||||
}
|
||||
|
||||
// Check for zero norms to avoid division by zero
|
||||
if norm1 == 0.0 || norm2 == 0.0 {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(1.0 - (dot / (norm1 * norm2).sqrt()) as f64)
|
||||
}
|
||||
|
||||
pub fn vector_f64_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
if v1.dims != v2.dims {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector dimensions".to_string(),
|
||||
));
|
||||
}
|
||||
if v1.vector_type != v2.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector type".to_string(),
|
||||
));
|
||||
}
|
||||
let (mut dot, mut norm1, mut norm2) = (0.0, 0.0, 0.0);
|
||||
let v1_data = v1.as_f64_slice();
|
||||
let v2_data = v2.as_f64_slice();
|
||||
|
||||
// Check for non-finite values
|
||||
if v1_data.iter().any(|x| !x.is_finite()) || v2_data.iter().any(|x| !x.is_finite()) {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
for i in 0..v1.dims {
|
||||
let e1 = v1_data[i];
|
||||
let e2 = v2_data[i];
|
||||
dot += e1 * e2;
|
||||
norm1 += e1 * e1;
|
||||
norm2 += e2 * e2;
|
||||
}
|
||||
|
||||
// Check for zero norms
|
||||
if norm1 == 0.0 || norm2 == 0.0 {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(1.0 - (dot / (norm1 * norm2).sqrt()))
|
||||
}
|
||||
|
||||
pub fn vector_type(blob: &[u8]) -> Result<VectorType> {
|
||||
// Even-sized blobs are always float32.
|
||||
if blob.len() % 2 == 0 {
|
||||
@@ -359,73 +272,10 @@ pub fn vector_type(blob: &[u8]) -> Result<VectorType> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn vector_concat(v1: &Vector, v2: &Vector) -> Result<Vector> {
|
||||
if v1.vector_type != v2.vector_type {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Mismatched vector types".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut data = Vec::with_capacity(v1.data.len() + v2.data.len());
|
||||
data.extend_from_slice(&v1.data);
|
||||
data.extend_from_slice(&v2.data);
|
||||
|
||||
Ok(Vector {
|
||||
vector_type: v1.vector_type,
|
||||
dims: v1.dims + v2.dims,
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn vector_slice(vector: &Vector, start_idx: usize, end_idx: usize) -> Result<Vector> {
|
||||
fn extract_bytes<T, const N: usize>(
|
||||
slice: &[T],
|
||||
start: usize,
|
||||
end: usize,
|
||||
to_bytes: impl Fn(&T) -> [u8; N],
|
||||
) -> Result<Vec<u8>> {
|
||||
if start > end {
|
||||
return Err(LimboError::InvalidArgument(
|
||||
"start index must not be greater than end index".into(),
|
||||
));
|
||||
}
|
||||
if end > slice.len() || end < start {
|
||||
return Err(LimboError::ConversionError(
|
||||
"vector_slice range out of bounds".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut buf = Vec::with_capacity((end - start) * N);
|
||||
for item in &slice[start..end] {
|
||||
buf.extend_from_slice(&to_bytes(item));
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
let (vector_type, data) = match vector.vector_type {
|
||||
VectorType::Float32Dense => (
|
||||
VectorType::Float32Dense,
|
||||
extract_bytes::<f32, 4>(vector.as_f32_slice(), start_idx, end_idx, |v| {
|
||||
v.to_le_bytes()
|
||||
})?,
|
||||
),
|
||||
VectorType::Float64Dense => (
|
||||
VectorType::Float64Dense,
|
||||
extract_bytes::<f64, 8>(vector.as_f64_slice(), start_idx, end_idx, |v| {
|
||||
v.to_le_bytes()
|
||||
})?,
|
||||
),
|
||||
};
|
||||
|
||||
Ok(Vector {
|
||||
vector_type,
|
||||
dims: end_idx - start_idx,
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::vector::operations;
|
||||
|
||||
use super::*;
|
||||
use quickcheck::{Arbitrary, Gen};
|
||||
use quickcheck_macros::quickcheck;
|
||||
@@ -659,7 +509,7 @@ mod tests {
|
||||
/// - Assumes vectors are well-formed (same type and dimension)
|
||||
/// - The distance must be between 0 and 2
|
||||
fn test_vector_distance<const DIMS: usize>(v1: &Vector, v2: &Vector) -> bool {
|
||||
match do_vector_distance_cos(v1, v2) {
|
||||
match operations::distance_cos::vector_distance_cos(v1, v2) {
|
||||
Ok(distance) => (0.0..=2.0).contains(&distance),
|
||||
Err(_) => true,
|
||||
}
|
||||
@@ -689,143 +539,6 @@ mod tests {
|
||||
assert_eq!(vector.vector_type, VectorType::Float32Dense);
|
||||
}
|
||||
|
||||
fn float32_vec_from(slice: &[f32]) -> Vector {
|
||||
let mut data = Vec::new();
|
||||
for &v in slice {
|
||||
data.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
Vector {
|
||||
vector_type: VectorType::Float32Dense,
|
||||
dims: slice.len(),
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
fn f32_slice_from_vector(vector: &Vector) -> Vec<f32> {
|
||||
vector.as_f32_slice().to_vec()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_normal_case() {
|
||||
let v1 = float32_vec_from(&[1.0, 2.0, 3.0]);
|
||||
let v2 = float32_vec_from(&[4.0, 5.0, 6.0]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 6);
|
||||
assert_eq!(result.vector_type, VectorType::Float32Dense);
|
||||
assert_eq!(
|
||||
f32_slice_from_vector(&result),
|
||||
vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_empty_left() {
|
||||
let v1 = float32_vec_from(&[]);
|
||||
let v2 = float32_vec_from(&[4.0, 5.0]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 2);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![4.0, 5.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_empty_right() {
|
||||
let v1 = float32_vec_from(&[1.0, 2.0]);
|
||||
let v2 = float32_vec_from(&[]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 2);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![1.0, 2.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_both_empty() {
|
||||
let v1 = float32_vec_from(&[]);
|
||||
let v2 = float32_vec_from(&[]);
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
assert_eq!(result.dims, 0);
|
||||
assert_eq!(f32_slice_from_vector(&result), Vec::<f32>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_concat_different_lengths() {
|
||||
let v1 = float32_vec_from(&[1.0]);
|
||||
let v2 = float32_vec_from(&[2.0, 3.0, 4.0]);
|
||||
|
||||
let result = vector_concat(&v1, &v2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 4);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![1.0, 2.0, 3.0, 4.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_normal_case() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0, 3.0, 4.0, 5.0]);
|
||||
let result = vector_slice(&input_vec, 1, 4).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 3);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![2.0, 3.0, 4.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_full_range() {
|
||||
let input_vec = float32_vec_from(&[10.0, 20.0, 30.0]);
|
||||
let result = vector_slice(&input_vec, 0, 3).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 3);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![10.0, 20.0, 30.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_single_element() {
|
||||
let input_vec = float32_vec_from(&[4.40, 2.71]);
|
||||
let result = vector_slice(&input_vec, 1, 2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 1);
|
||||
assert_eq!(f32_slice_from_vector(&result), vec![2.71]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_empty_list() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let result = vector_slice(&input_vec, 2, 2).unwrap();
|
||||
|
||||
assert_eq!(result.dims, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_zero_length() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0, 3.0]);
|
||||
let err = vector_slice(&input_vec, 2, 1);
|
||||
assert!(err.is_err(), "Expected error on zero-length range");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_out_of_bounds() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let err = vector_slice(&input_vec, 0, 5);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_start_out_of_bounds() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let err = vector_slice(&input_vec, 5, 5);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_slice_end_out_of_bounds() {
|
||||
let input_vec = float32_vec_from(&[1.0, 2.0]);
|
||||
let err = vector_slice(&input_vec, 1, 3);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[quickcheck]
|
||||
fn prop_vector_text_roundtrip_2d(v: ArbitraryVector<2>) -> bool {
|
||||
test_vector_text_roundtrip(v.into())
|
||||
|
||||
Reference in New Issue
Block a user