Convert strings to numbers for fast comparison.The Problem: Comparing two strings of length n takes O(n). For many comparisons, this is too slow.The Insight: Map each string to a number (hash). If hashes differ, strings differ. If hashes match, strings are probably equal.How it works: Treat the string as a number in base B:
H(s)=s[0]⋅Bn−1+s[1]⋅Bn−2+...+s[n−1]⋅B0(modMOD)Example: “abc” with B=31:
H=1⋅312+2⋅311+3⋅310=961+62+3=1026Substring Hash in O(1): Using prefix hashes:
H(s[l..r])=H(s[0..r])−H(s[0..l−1])⋅Br−l+1Collision Risk: Two different strings can have the same hash. Use:
Large prime MOD (10^9 + 7)
Double hashing: Two different (BASE, MOD) pairs—collision probability becomes ~1/10^18
Copy
class StringHash {public: const long long MOD = 1e9 + 7; const long long BASE = 31; int n; vector<long long> hash_, power; StringHash(const string& s) : n(s.size()), hash_(n + 1), power(n + 1) { power[0] = 1; for (int i = 0; i < n; i++) { hash_[i + 1] = (hash_[i] * BASE + (s[i] - 'a' + 1)) % MOD; power[i + 1] = (power[i] * BASE) % MOD; } } // Get hash of s[l..r] (0-indexed, inclusive) long long getHash(int l, int r) { long long h = (hash_[r + 1] - hash_[l] * power[r - l + 1] % MOD + MOD) % MOD; return h; }};// Usagestring s = "abcabc";StringHash sh(s);cout << (sh.getHash(0, 2) == sh.getHash(3, 5)); // true: "abc" == "abc"
Find all occurrences of pattern in text in O(n + m).The Problem: Naive search is O(n·m)—for each position in text, compare entire pattern.KMP’s Insight: When a mismatch occurs, we’ve already matched some characters. Use this information to skip ahead instead of starting over.
LPS[i] = length of the Longest Proper Prefix of pattern[0..i] which is also a Suffix.Example: pattern = “ABABAC”
Copy
i=0: "A" → No proper prefix → LPS[0] = 0i=1: "AB" → No match → LPS[1] = 0 i=2: "ABA" → "A" = "A" → LPS[2] = 1i=3: "ABAB" → "AB" = "AB" → LPS[3] = 2i=4: "ABABA" → "ABA" = "ABA" → LPS[4] = 3i=5: "ABABAC" → No suffix matches prefix ending with C → LPS[5] = 0LPS = [0, 0, 1, 2, 3, 0]
Why this helps: When mismatch at position j, we know pattern[0..j-1] matched. The LPS tells us the longest prefix we don’t need to re-check.
Copy
vector<int> computeLPS(const string& pattern) { int m = pattern.size(); vector<int> lps(m, 0); // Longest Proper Prefix which is also Suffix int len = 0; int i = 1; while (i < m) { if (pattern[i] == pattern[len]) { len++; lps[i] = len; i++; } else { if (len != 0) { len = lps[len - 1]; } else { lps[i] = 0; i++; } } } return lps;}
vector<int> kmpSearch(const string& text, const string& pattern) { int n = text.size(), m = pattern.size(); vector<int> lps = computeLPS(pattern); vector<int> matches; int i = 0, j = 0; while (i < n) { if (pattern[j] == text[i]) { i++; j++; } if (j == m) { matches.push_back(i - j); // Match found at index i-j j = lps[j - 1]; } else if (i < n && pattern[j] != text[i]) { if (j != 0) { j = lps[j - 1]; } else { i++; } } } return matches;}
z[i] = length of longest substring starting from i that matches a prefix of s.Example: s = “aabxaab”
Copy
i=0: (undefined, by convention z[0]=0 or n)i=1: s[1..] = "abxaab" vs s = "aabxaab" → "a" matches → z[1] = 1i=2: s[2..] = "bxaab" vs s → No match → z[2] = 0i=3: s[3..] = "xaab" vs s → No match → z[3] = 0i=4: s[4..] = "aab" vs s → "aab" matches → z[4] = 3i=5: s[5..] = "ab" vs s → "a" matches → z[5] = 1i=6: s[6..] = "b" vs s → No match → z[6] = 0Z = [0, 1, 0, 0, 3, 1, 0]
The Optimization (Z-box technique): Maintain [l, r)—the rightmost segment that matches a prefix. If i < r, we can reuse previously computed values!Pattern Matching Trick: To find pattern P in text T:
Create combined = P + ”"+T( is a separator not in P or T)
Compute Z-function
Wherever z[i] == len(P), we found a match at position i - len(P) - 1 in T
Copy
vector<int> zFunction(const string& s) { int n = s.size(); vector<int> z(n, 0); int l = 0, r = 0; for (int i = 1; i < n; i++) { if (i < r) { z[i] = min(r - i, z[i - l]); } while (i + z[i] < n && s[z[i]] == s[i + z[i]]) { z[i]++; } if (i + z[i] > r) { l = i; r = i + z[i]; } } return z;}// Pattern matching using Z-functionvector<int> findPattern(const string& text, const string& pattern) { string combined = pattern + "$" + text; vector<int> z = zFunction(combined); vector<int> matches; int m = pattern.size(); for (int i = m + 1; i < combined.size(); i++) { if (z[i] == m) { matches.push_back(i - m - 1); } } return matches;}
Efficient for prefix-based queries on a dictionary.Structure: A tree where:
Each edge is labeled with a character
Each node represents a prefix (path from root)
Words sharing a prefix share that path
Visual Example with words [“cat”, “car”, “card”, “dog”]:
Copy
(root) / \ c d | | a o / \ | t r g* * * | d** = word ends here
Operations:
Insert/Search: O(length of word)
Count words with prefix: O(length of prefix)
Autocomplete: O(prefix length + number of suggestions)
When to use:
Dictionary with prefix queries
XOR maximization (binary trie)
Counting distinct prefixes
Copy
class Trie {public: struct Node { int children[26] = {}; int count = 0; // Words ending here int prefixCount = 0; // Words with this prefix }; vector<Node> nodes; Trie() { nodes.push_back(Node()); // Root } void insert(const string& s) { int cur = 0; for (char c : s) { int idx = c - 'a'; if (!nodes[cur].children[idx]) { nodes[cur].children[idx] = nodes.size(); nodes.push_back(Node()); } cur = nodes[cur].children[idx]; nodes[cur].prefixCount++; } nodes[cur].count++; } bool search(const string& s) { int cur = 0; for (char c : s) { int idx = c - 'a'; if (!nodes[cur].children[idx]) return false; cur = nodes[cur].children[idx]; } return nodes[cur].count > 0; } int countPrefix(const string& prefix) { int cur = 0; for (char c : prefix) { int idx = c - 'a'; if (!nodes[cur].children[idx]) return 0; cur = nodes[cur].children[idx]; } return nodes[cur].prefixCount; }};
int longestPalindrome(const string& s) { int n = s.size(); string rev = s; reverse(rev.begin(), rev.end()); StringHash sh(s); StringHash rh(rev); auto isPalindrome = [&](int l, int r) { // Compare s[l..r] with reverse return sh.getHash(l, r) == rh.getHash(n - 1 - r, n - 1 - l); }; int ans = 1; // Odd length palindromes for (int i = 0; i < n; i++) { int lo = 0, hi = min(i, n - 1 - i); while (lo < hi) { int mid = (lo + hi + 1) / 2; if (isPalindrome(i - mid, i + mid)) { lo = mid; } else { hi = mid - 1; } } ans = max(ans, 2 * lo + 1); } // Even length palindromes (similar) return ans;}
The Problem: Find all palindromes in O(n) time.The Trick: Insert ’#’ between characters to handle even-length palindromes uniformly.
“abba” becomes “#a#b#b#a#”
Now all palindromes have odd length with a center
The Optimization: Similar to Z-function, maintain the rightmost palindrome [c-r, c+r]. If i is inside this palindrome, we can use the mirror position 2c-i to get a starting value for p[i].Visual:
Copy
String: #a#b#a#p[i]: 0 1 0 3 0 1 0 ^ center (full palindrome "#a#b#a#" has radius 3)Radius 3 in transformed string = palindrome length 3 in original "aba"
Key Insight: p[i] in the transformed string equals the length of the palindrome centered at position i/2 in the original string.
Copy
vector<int> manacher(const string& s) { string t = "#"; for (char c : s) { t += c; t += '#'; } int n = t.size(); vector<int> p(n, 0); // p[i] = radius of palindrome centered at i int c = 0, r = 0; // Center and right boundary of rightmost palindrome for (int i = 0; i < n; i++) { if (i < r) { p[i] = min(r - i, p[2 * c - i]); } while (i - p[i] - 1 >= 0 && i + p[i] + 1 < n && t[i - p[i] - 1] == t[i + p[i] + 1]) { p[i]++; } if (i + p[i] > r) { c = i; r = i + p[i]; } } return p; // p[i] is the length of palindrome in original string}
long long countDistinct(const string& s) { int n = s.size(); set<pair<long long, long long>> seen; // Use double hash DoubleHash dh(s); for (int i = 0; i < n; i++) { for (int j = i; j < n; j++) { seen.insert(dh.getHash(i, j)); } } return seen.size();}// More efficient: Use suffix array// Number of distinct substrings = n*(n+1)/2 - sum of LCP values
int longestCommonSubstring(const string& s, const string& t) { DoubleHash hs(s), ht(t); auto check = [&](int len) -> bool { set<pair<long long, long long>> hashes; for (int i = 0; i + len <= s.size(); i++) { hashes.insert(hs.getHash(i, i + len - 1)); } for (int i = 0; i + len <= t.size(); i++) { if (hashes.count(ht.getHash(i, i + len - 1))) { return true; } } return false; }; int lo = 0, hi = min(s.size(), t.size()); while (lo < hi) { int mid = (lo + hi + 1) / 2; if (check(mid)) { lo = mid; } else { hi = mid - 1; } } return lo;}
class AhoCorasick {public: struct Node { int children[26] = {}; int fail = 0; int output = -1; // Index of pattern ending here }; vector<Node> nodes; AhoCorasick() { nodes.push_back(Node()); } void insert(const string& s, int idx) { int cur = 0; for (char c : s) { int i = c - 'a'; if (!nodes[cur].children[i]) { nodes[cur].children[i] = nodes.size(); nodes.push_back(Node()); } cur = nodes[cur].children[i]; } nodes[cur].output = idx; } void build() { queue<int> q; for (int i = 0; i < 26; i++) { if (nodes[0].children[i]) { q.push(nodes[0].children[i]); } } while (!q.empty()) { int u = q.front(); q.pop(); for (int i = 0; i < 26; i++) { if (nodes[u].children[i]) { int v = nodes[u].children[i]; int f = nodes[u].fail; while (f && !nodes[f].children[i]) { f = nodes[f].fail; } nodes[v].fail = nodes[f].children[i]; if (nodes[v].fail == v) nodes[v].fail = 0; q.push(v); } } } } vector<pair<int, int>> search(const string& text) { vector<pair<int, int>> matches; // {position, pattern_idx} int cur = 0; for (int i = 0; i < text.size(); i++) { int c = text[i] - 'a'; while (cur && !nodes[cur].children[c]) { cur = nodes[cur].fail; } cur = nodes[cur].children[c]; int temp = cur; while (temp) { if (nodes[temp].output != -1) { matches.push_back({i, nodes[temp].output}); } temp = nodes[temp].fail; } } return matches; }};