Use ndarray
diff --git a/Cargo.toml b/Cargo.toml
index 74921cd..75ac0a1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,9 @@
documentation = "https://docs.rs/strsim/"
exclude = ["/.travis.yml", "/appveyor.yml", "/dev"]
+[dependencies]
+ndarray = "0.12.1"
+
[badges]
travis-ci = { repository = "dguo/strsim-rs" }
appveyor = { repository = "dguo/strsim-rs" }
diff --git a/src/lib.rs b/src/lib.rs
index 41c5789..59a1bd8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,7 @@
//! This library implements string similarity metrics.
+extern crate ndarray;
+
use std::char;
use std::cmp::{max, min};
use std::collections::HashMap;
@@ -8,6 +10,8 @@
use std::hash::Hash;
use std::str::Chars;
+use ndarray::Array2;
+
#[derive(Debug, PartialEq)]
pub enum StrSimError {
DifferentLengthArgs,
@@ -326,18 +330,18 @@
if a_len == 0 { return b_len; }
if b_len == 0 { return a_len; }
- let mut distances = vec![vec![0; b_len + 2]; a_len + 2];
+ let mut distances = Array2::<usize>::zeros((a_len + 2, b_len + 2));
let max_distance = a_len + b_len;
- distances[0][0] = max_distance;
+ distances[[0, 0]] = max_distance;
for i in 0..(a_len + 1) {
- distances[i + 1][0] = max_distance;
- distances[i + 1][1] = i;
+ distances[[i + 1, 0]] = max_distance;
+ distances[[i + 1, 1]] = i;
}
for j in 0..(b_len + 1) {
- distances[0][j + 1] = max_distance;
- distances[1][j + 1] = j;
+ distances[[0, j + 1]] = max_distance;
+ distances[[1, j + 1]] = j;
}
let mut elems: HashMap<Elem, usize> = HashMap::new();
@@ -347,25 +351,22 @@
for j in 1..(b_len + 1) {
let k = match elems.get(&b_elems[j - 1]) {
- Some(value) => value.clone(),
+ Some(&value) => value,
None => 0
};
- let l = db;
+ let insertion_cost = distances[[i, j + 1]] + 1;
+ let deletion_cost = distances[[i + 1, j]] + 1;
+ let transposition_cost = distances[[k, db]] + (i - k - 1) + 1 +
+ (j - db - 1);
+ let mut substitution_cost = distances[[i, j]] + 1;
- let mut cost = 1;
if a_elems[i - 1] == b_elems[j - 1] {
- cost = 0;
db = j;
+ substitution_cost -= 1;
}
- let substitution_cost = distances[i][j] + cost;
- let insertion_cost = distances[i][j + 1] + 1;
- let deletion_cost = distances[i + 1][j] + 1;
- let transposition_cost = distances[k][l] + (i - k - 1) + 1 +
- (j - l - 1);
-
- distances[i + 1][j + 1] = min(substitution_cost,
+ distances[[i + 1, j + 1]] = min(substitution_cost,
min(insertion_cost,
min(deletion_cost,
transposition_cost)));
@@ -374,7 +375,7 @@
elems.insert(a_elems[i - 1].clone(), i);
}
- distances[a_len + 1][b_len + 1]
+ distances[[a_len + 1, b_len + 1]]
}
/// Like optimal string alignment, but substrings can be edited an unlimited