commit | 3dfff3d4339e205651e99cab4ad8e193c07eaf83 | [log] [tgz] |
---|---|---|
author | Danny Guo <dguo@users.noreply.github.com> | Wed Aug 24 02:53:08 2016 |
committer | GitHub <noreply@github.com> | Wed Aug 24 02:53:08 2016 |
tree | 8890ceb9cac224e5f85e74b3a6b08ee9cf131eb2 | |
parent | 819e3e2fd51bc14386161444f6fbb727e764becb [diff] |
Fix Jaro and Jaro-Winkler panic (#6) When given two strings both with length one, Jaro and Jaro-Winkler would panic with an arithmetic operation overflow.
Rust implementations of string similarity metrics:
# Cargo.toml [dependencies] strsim = "0.5.0"
extern crate strsim; use strsim::{hamming, levenshtein, damerau_levenshtein, jaro, jaro_winkler, levenshtein_against_vec, damerau_levenshtein_against_vec, jaro_against_vec, jaro_winkler_against_vec}; fn main() { match hamming("hamming", "hammers") { Ok(distance) => assert_eq!(3, distance), Err(why) => panic!("{:?}", why) } assert_eq!(3, levenshtein("kitten", "sitting")); assert_eq!(1, damerau_levenshtein("specter", "spectre")); assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() < 0.001); assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() < 0.001); // get vectors of values back let v = vec!["test", "test1", "test12", "test123", "", "tset"]; assert_eq!(levenshtein_against_vec("test", &v), vec![0, 1, 2, 3, 4, 2]); assert_eq!(damerau_levenshtein_against_vec("test", &v), vec![0, 1, 2, 3, 4, 1]); let jaro_distances = jaro_against_vec("test", &v); let jaro_expected = vec![1.0, 0.933333, 0.888889, 0.857143, 0.0, 0.916667]; let jaro_delta: f64 = jaro_distances.iter() .zip(jaro_expected.iter()) .map(|(x, y)| (x - y).abs() as f64) .fold(0.0, |x, y| x + y as f64); assert!(jaro_delta < 0.0001); let jaro_winkler_distances = jaro_winkler_against_vec("test", &v); let jaro_winkler_expected = vec![1.0, 0.96, 0.933333, 0.914286, 0.0, 0.925]; let jaro_winkler_delta = jaro_winkler_distances.iter() .zip(jaro_winkler_expected.iter()) .map(|(x, y)| (x - y).abs() as f64) .fold(0.0, |x, y| x + y as f64); assert!(jaro_winkler_delta < 0.0001); }
Install Vagrant, and run vagrant up
.