This commit is contained in:
danawan
2025-09-25 16:10:34 +07:00
parent 54a95a0b55
commit 189caa5d5d
3 changed files with 87 additions and 1 deletions

View File

@@ -5,10 +5,11 @@ mod caver;
mod common;
mod editdist;
mod phonetic;
mod rsoundex;
mod soundex;
register_extension! {
scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver},
scalars: {levenshtein, damerau_levenshtein, edit_distance, hamming, jaronwin, osadist, fuzzy_soundex, fuzzy_phonetic, fuzzy_caver, fuzzy_rsoundex},
}
/// Calculates and returns the Levenshtein distance of two non NULL strings.
@@ -419,6 +420,16 @@ pub fn fuzzy_caver(args: &[Value]) {
}
}
#[scalar(name = "fuzzy_rsoundex")]
pub fn fuzzy_rsoundex(args: &[Value]) {
let arg1 = args[0].to_text();
if let Some(txt) = rsoundex::rsoundex(arg1) {
Value::from_text(txt)
} else {
Value::null()
}
}
//tests adapted from sqlean fuzzy
#[cfg(test)]
mod tests {
@@ -599,4 +610,22 @@ mod tests {
);
}
}
#[test]
fn test_rsoundex() {
let cases = vec![
(None, None),
(Some(""), Some("".to_string())),
(Some("phonetics"), Some("P1080603".to_string())),
(Some("is"), Some("I03".to_string())),
(Some("awesome"), Some("A03080".to_string())),
];
for (input, expected) in cases {
let result = rsoundex::rsoundex(input);
assert_eq!(
result, expected,
"fuzzy_rsoundex({input:?}) failed: expected {expected:?}, got {result:?}"
);
}
}
}

View File

@@ -0,0 +1,49 @@
/// Computes and returns the soundex representation of a given non NULL string.
/// More information about the algorithm can be found here:
/// http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html
pub fn rsoundex(input: Option<&str>) -> Option<String> {
if let Some(s) = input {
if s.is_empty() {
return Some("".to_string());
}
let str_bytes = s.as_bytes();
let str_len = str_bytes.len();
let mut code = String::with_capacity(str_len + 1);
code.push(str_bytes[0].to_ascii_uppercase() as char);
let mut buf: Vec<char> = Vec::with_capacity(str_len);
for &b in str_bytes {
buf.push(refined_soundex_encode(b as char));
}
let mut prev: Option<char> = None;
for c in buf {
if Some(c) != prev {
code.push(c);
prev = Some(c);
}
}
Some(code)
} else {
None
}
}
//helper
fn refined_soundex_encode(c: char) -> char {
match c.to_ascii_lowercase() {
'b' | 'p' => '1',
'f' | 'v' => '2',
'c' | 'k' | 's' => '3',
'g' | 'j' => '4',
'q' | 'x' | 'z' => '5',
'd' | 't' => '6',
'l' => '7',
'm' | 'n' => '8',
'r' => '9',
_ => '0',
}
}

View File

@@ -590,6 +590,9 @@ def validate_fuzzy_phonetic(a):
def validate_fuzzy_caver(a):
return a == "AWSM111111"
def validate_fuzzy_rsoundex(a):
return a == "A03080"
def test_fuzzy():
limbo = TestTursoShell()
ext_path = "./target/debug/liblimbo_fuzzy"
@@ -644,6 +647,11 @@ def test_fuzzy():
validate_fuzzy_caver,
"fuzzy caver function works",
)
limbo.run_test_fn(
"SELECT fuzzy_rsoundex('awesome');",
validate_fuzzy_rsoundex,
"fuzzy rsoundex function works",
)
def test_vfs():
limbo = TestTursoShell()