mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-31 13:54:27 +01:00
rsoundex
This commit is contained in:
@@ -5,10 +5,11 @@ mod caver;
|
||||
mod common;
|
||||
mod editdist;
|
||||
mod phonetic;
|
||||
mod rsoundex;
|
||||
mod soundex;
|
||||
|
||||
register_extension! {
|
||||
scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver},
|
||||
scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver, fuzzy_rsoundex},
|
||||
}
|
||||
|
||||
/// Calculates and returns the Levenshtein distance of two non NULL strings.
|
||||
@@ -419,6 +420,16 @@ pub fn fuzzy_caver(args: &[Value]) {
|
||||
}
|
||||
}
|
||||
|
||||
#[scalar(name = "fuzzy_rsoundex")]
|
||||
pub fn fuzzy_rsoundex(args: &[Value]) {
|
||||
let arg1 = args[0].to_text();
|
||||
if let Some(txt) = rsoundex::rsoundex(arg1) {
|
||||
Value::from_text(txt)
|
||||
} else {
|
||||
Value::null()
|
||||
}
|
||||
}
|
||||
|
||||
//tests adapted from sqlean fuzzy
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
@@ -599,4 +610,22 @@ mod tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn test_rsoundex() {
|
||||
let cases = vec![
|
||||
(None, None),
|
||||
(Some(""), Some("".to_string())),
|
||||
(Some("phonetics"), Some("P1080603".to_string())),
|
||||
(Some("is"), Some("I03".to_string())),
|
||||
(Some("awesome"), Some("A03080".to_string())),
|
||||
];
|
||||
|
||||
for (input, expected) in cases {
|
||||
let result = rsoundex::rsoundex(input);
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"fuzzy_rsoundex({input:?}) failed: expected {expected:?}, got {result:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
49
extensions/fuzzy/src/rsoundex.rs
Normal file
49
extensions/fuzzy/src/rsoundex.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
/// Computes and returns the soundex representation of a given non NULL string.
|
||||
/// More information about the algorithm can be found here:
|
||||
/// http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html
|
||||
pub fn rsoundex(input: Option<&str>) -> Option<String> {
|
||||
if let Some(s) = input {
|
||||
if s.is_empty() {
|
||||
return Some("".to_string());
|
||||
}
|
||||
|
||||
let str_bytes = s.as_bytes();
|
||||
let str_len = str_bytes.len();
|
||||
|
||||
let mut code = String::with_capacity(str_len + 1);
|
||||
code.push(str_bytes[0].to_ascii_uppercase() as char);
|
||||
|
||||
let mut buf: Vec<char> = Vec::with_capacity(str_len);
|
||||
for &b in str_bytes {
|
||||
buf.push(refined_soundex_encode(b as char));
|
||||
}
|
||||
|
||||
let mut prev: Option<char> = None;
|
||||
for c in buf {
|
||||
if Some(c) != prev {
|
||||
code.push(c);
|
||||
prev = Some(c);
|
||||
}
|
||||
}
|
||||
|
||||
Some(code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
//helper
|
||||
fn refined_soundex_encode(c: char) -> char {
|
||||
match c.to_ascii_lowercase() {
|
||||
'b' | 'p' => '1',
|
||||
'f' | 'v' => '2',
|
||||
'c' | 'k' | 's' => '3',
|
||||
'g' | 'j' => '4',
|
||||
'q' | 'x' | 'z' => '5',
|
||||
'd' | 't' => '6',
|
||||
'l' => '7',
|
||||
'm' | 'n' => '8',
|
||||
'r' => '9',
|
||||
_ => '0',
|
||||
}
|
||||
}
|
||||
@@ -590,6 +590,9 @@ def validate_fuzzy_phonetic(a):
|
||||
def validate_fuzzy_caver(a):
|
||||
return a == "AWSM111111"
|
||||
|
||||
def validate_fuzzy_rsoundex(a):
|
||||
return a == "A03080"
|
||||
|
||||
def test_fuzzy():
|
||||
limbo = TestTursoShell()
|
||||
ext_path = "./target/debug/liblimbo_fuzzy"
|
||||
@@ -644,6 +647,11 @@ def test_fuzzy():
|
||||
validate_fuzzy_caver,
|
||||
"fuzzy caver function works",
|
||||
)
|
||||
limbo.run_test_fn(
|
||||
"SELECT fuzzy_rsoundex('awesome');",
|
||||
validate_fuzzy_rsoundex,
|
||||
"fuzzy rsoundex function works",
|
||||
)
|
||||
|
||||
def test_vfs():
|
||||
limbo = TestTursoShell()
|
||||
|
||||
Reference in New Issue
Block a user