ROSE  0.11.98.0
Levenshtein.h
1 #ifndef ROSE_EditDistance_Levenshtein_H
2 #define ROSE_EditDistance_Levenshtein_H
3 
4 namespace Rose {
5 namespace EditDistance {
6 
7 // Stack for Levenshtein distance. Used internally
8 template<typename T>
10  typedef std::pair<T/*key*/, size_t/*value*/> KeyVal;
11  typedef std::list<KeyVal> KeyValList;
12  KeyValList pairs;
13 
14  void unique_push_zero(const T& key) {
15  for (typename KeyValList::iterator pi=pairs.begin(); pi!=pairs.end(); ++pi) {
16  if (pi->first==key)
17  return;
18  }
19  pairs.push_front(KeyVal(key, 0));
20  }
21 
22  size_t& operator[](const T& key) {
23  for (typename KeyValList::iterator pi=pairs.begin(); pi!=pairs.end(); ++pi) {
24  if (pi->first==key)
25  return pi->second;
26  }
27  assert(!"not found");
28  abort();
29  }
30 };
31 
36 template<typename T>
37 size_t
38 levenshteinDistance(const std::vector<T> &src, const std::vector<T> &tgt)
39 {
40  // Implementation is cut-n-pasted from above, but removed the line for swaps and associated variables
41  if (src.empty() || tgt.empty())
42  return std::max(src.size(), tgt.size());
43 
44  const size_t x = src.size();
45  const size_t y = tgt.size();
46  std::vector<std::vector<size_t> > score(x+2, std::vector<size_t>(y+2, 0));
47  size_t score_ceil = x + y;
48  score[0][0] = score_ceil;
49  for (size_t i=0; i<=x; ++i) {
50  score[i+1][1] = i;
51  score[i+1][0] = score_ceil;
52  }
53  for (size_t j=0; j<=y; ++j) {
54  score[1][j+1] = j;
55  score[0][j+1] = score_ceil;
56  }
57 
59  for (size_t i=0; i<x; ++i)
60  dict.unique_push_zero(src[i]);
61  for (size_t j=0; j<y; ++j)
62  dict.unique_push_zero(tgt[j]);
63 
64  for (size_t i=1; i<=x; ++i) {
65  for (size_t j=1; j<=y; ++j) {
66  if (src[i-1]==tgt[j-1]) {
67  score[i+1][j+1] = score[i][j];
68  } else {
69  score[i+1][j+1] = std::min(score[i][j], std::min(score[i+1][j], score[i][j+1])) + 1;
70  }
71  }
72  dict[src[i-1]] = i;
73  }
74 
75  return score[x+1][y+1];
76 }
77 
78 
79 } // namespace
80 } // namespace
81 
82 #endif
size_t levenshteinDistance(const std::vector< T > &src, const std::vector< T > &tgt)
Levenshtein edit distance.
Definition: Levenshtein.h:38
Main namespace for the ROSE library.