You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
This algorithm is based on the concept of hashing, so if you are not familiar with string hashing, refer to the string hashing article on Internet. This algorithm was authored by Rabin and Karp in 1987. The Rabin-Karp string matching algorithm calculates a hash value for the pattern, as well as for each M-character subsequences of text to be compared. If the hash values are unequal, the algorithm will determine the hash value for next M-character sequence. If the hash values are equal, the algorithm will analyze the pattern and the M-character sequence. In this way, there is only one comparison per text subsequence, and character matching is only required when the hash values match.
Problem:
Given two strings - a pattern 's' and a text 't', determine if the pattern appears in the text and if it does, enumerate all its occurrences in O(|s| + |t|) time.
Algorithm:
Calculate the hash for the pattern 's'. Calculate hash values for all the prefixes of the text 't'. Now, we can compare a substring of length |s| with s in constant time using the calculated hashes. So, compare each substring of length |s| with the pattern. This will take a total of O(|t|) time. Hence the final complexity of the algorithm is O(|t| + |s|) : is required for calculating the hash of the pattern and for comparing each substring of length |s| with the pattern.
Implementation in C++:
vector<int> rabin_karp(string const& s, string const& t) {
const int p = 31;
const int m = 1e9 + 9;
int S = s.size(), T = t.size();
vector<long long> p_pow(max(S, T));
p_pow[0] = 1;
for (int i = 1; i < (int)p_pow.size(); i++)
p_pow[i] = (p_pow[i-1] * p) % m;
vector<long long> h(T + 1, 0);
for (int i = 0; i < T; i++)
h[i+1] = (h[i] + (t[i] - 'a' + 1) * p_pow[i]) % m;
long long h_s = 0;
for (int i = 0; i < S; i++)
h_s = (h_s + (s[i] - 'a' + 1) * p_pow[i]) % m;
vector<int> occurences;
for (int i = 0; i + S - 1 < T; i++) {
long long cur_h = (h[i+S] + m - h[i]) % m;
if (cur_h == h_s * p_pow[i] % m)
occurences.push_back(i);
}
return occurences;
}
Implementation in Java:
public class RabinKarp {
public final static int d = 10;
static void search(String pattern, String txt, int q) {
int m = pattern.length();
int n = txt.length();
int i, j;
int p = 0;
int t = 0;
int h = 1;
for (i = 0; i < m - 1; i++)
h = (h * d) % q;
// Calculate hash value for pattern and text
for (i = 0; i < m; i++) {
p = (d * p + pattern.charAt(i)) % q;
t = (d * t + txt.charAt(i)) % q;
}
// Find the match
for (i = 0; i <= n - m; i++) {
if (p == t) {
for (j = 0; j < m; j++) {
if (txt.charAt(i + j) != pattern.charAt(j))
break;
}
if (j == m)
System.out.println("Pattern is found at position: " + (i + 1));
}
if (i < n - m) {
t = (d * (t - txt.charAt(i) * h) + txt.charAt(i + m)) % q;
if (t < 0)
t = (t + q);
}
}
}
public static void main(String[] args) {
String txt = "ABCCDDAEFG";
String pattern = "CDD";
int q = 13;
search(pattern, txt, q);
}
}
Implementation in Python:
d = 10
def search(pattern, text, q):
m = len(pattern)
n = len(text)
p = 0
t = 0
h = 1
i = 0
j = 0
for i in range(m-1):
h = (h*d) % q
for i in range(m):
p = (d*p + ord(pattern[i])) % q
t = (d*t + ord(text[i])) % q
for i in range(n-m+1):
if p == t:
for j in range(m):
if text[i+j] != pattern[j]:
break
j += 1
if j == m:
print("Pattern is found at position: " + str(i+1))
if i < n-m:
t = (d*(t-ord(text[i])*h) + ord(text[i+m])) % q
if t < 0:
t = t+q
text = "ABCCDDAEFG"
pattern = "CDD"
q = 13
search(pattern, text, q)