-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTrie_Levenshtein.cpp
More file actions
155 lines (132 loc) · 4.46 KB
/
Trie_Levenshtein.cpp
File metadata and controls
155 lines (132 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// - - - - - - - - - - - - - - - - - - - -
// Name : Jeremy Choo, Siyuan Liu
// ID : 1602380, 1589879
// CMPUT 275, Winter 2019
//
// project: Text Editor
// - - - - - - - - - - - - - - - - - - - -
#include "Trie_Levenshtein.h"
using namespace std;
TrieNode::TrieNode(char letter) {
this->isWord = false;
this->letter = letter;
this->word = word;
}
TrieNode* TrieNode::getChild(char letter) {
/*
This function will return the child node of the given letter if it exists.
Null otherwise.
*/
if (!children.empty()) {
if (children.find(letter) != children.end()) {
return children[letter];
}
}
return NULL;
}
Trie::Trie() {
this->root = new TrieNode(' ');
}
Trie::~Trie() {
if (this->root != NULL) {
delete this->root;
this->root = NULL;
result.clear();
}
}
void Trie::insert(string word) {
/*
This function will insert the new word letter by letter into the Trie
by traversing from the root to the leaf, and keep adding new TrieNode
if there is no child for the current node.
*/
int length = word.length();
TrieNode* current = this->root;
if (length == 0) {
current->isWord = true;
}
for (int index = 0; index < length; index++) {
char letter = word[index];
TrieNode* child = current->getChild(letter);
if (child != NULL) {
current = child;
} else {
current->children[letter] = new TrieNode(letter);
current = current->getChild(letter);
}
if (index == length - 1) {
current->isWord = true;
current->word = word;
}
}
}
void Trie::traverseTrie(TrieNode* node, char letter, vector<char>& word,
vector<int>& previousRow) {
/*
This function is the recursive helper function that traverses the Trie in search of the minimum
Levenshtein distance and store the word that has a levenshtein distance of smaler than or equal
3 which is the maxCost into the vector named result with a pair (the word and the levenshtein
distance).
Argument:
node - the current TrieNode
letter - the current character of the current word we're working with
word - an array representation of the current word
previousRow - a row in the Levenshtein Distance matrix
*/
int size = previousRow.size();
vector<int> currentRow(size);
currentRow[0] = previousRow[0] + 1;
int minimumElement = currentRow[0];
int insertCost, deleteCost, replaceCost;
// calculate the levenshtein distance
for (int i = 1; i < size; i++) {
insertCost = currentRow[i - 1] + 1;
deleteCost = previousRow[i] + 1;
if (word[i - 1] == letter) {
replaceCost = previousRow[i - 1];
} else {
replaceCost = previousRow[i - 1] + 1;
}
currentRow[i] = min({insertCost, deleteCost, replaceCost});
if (currentRow[i] < minimumElement) {
minimumElement = currentRow[i];
}
}
// get the minimum distance and the word
if (currentRow[size - 1] <= maxCost && node->isWord) {
if (currentRow[size - 1] <= minLevDist) {
// keep track of the smallest levenshtein distance
minLevDist = currentRow[size - 1];
}
// the the word and levenshtein distance calculated
result.push_back({node->word, currentRow[size - 1]});
}
if (minimumElement <= maxCost) {
for (const auto& c : node->children) {
traverseTrie(c.second, c.first, word, currentRow);
}
}
}
void Trie::computeMinimumLevenshteinDistance(vector<char>& word) {
/*
This function is the helper function of calculating the Levenshtein
distance by calling the function traverseTrie and start the recursion.
Arguement:
word: the string that to be used to compare with all the words in Trie
*/
// always clear the result before calculating so that the previous result
// will not remain in the vector.
if (!result.empty())
result.clear();
// get a large value from the random library
minLevDist = RAND_MAX;
int iWordLength = word.size();
vector<int> currentRow(iWordLength + 1);
// initialize the first row
for (int i = 0; i <= iWordLength; i++) {
currentRow[i] = i;
}
// start traversing the Trie
for (const auto& c : root->children)
traverseTrie(c.second, c.first, word, currentRow);
}