Add Trie and KMP algorithm. Optimized DSU and update the interface while updating the call of DSU in graph.cpp.

haomingbai · haomingbai · commit 7e540818edb0 · 2025-08-03T06:19:59.000+08:00
diff --git a/concepts.cpp b/concepts.cpp
@@ -97,3 +97,9 @@ concept FullyComparable = requires(T a, T b) {
   { a == b } -> std::convertible_to<bool>;
   { a > b } -> std::convertible_to<bool>;
 };
+
+template <typename Container, typename E>
+concept RandomResizableContainer =
+    RandomStdContainer<Container, E> && requires(Container arr, size_t x) {
+      { arr.resize(x) };
+    };
diff --git a/dp/knapsack.cpp b/dp/knapsack.cpp
@@ -1,46 +1,21 @@
+/**
+ * @file knapsack.cpp
+ * @brief
+ * @author Haoming Bai <haomingbai@hotmail.com>
+ * @date   2025-07-28
+ *
+ * Copyright © 2025 Haoming Bai
+ * SPDX-License-Identifier: MIT
+ *
+ * @details
+ */
+
 #include <utility>
 #include <vector>
-#include <map>
-#include <algorithm>
-
-using namespace std;
 
-typedef unsigned long long int ull;
-
-auto knapsack(vector<pair<ull, ull>> &value_weight, ull max_weight) -> pair<ull, ull> &&
-{
-    if (value_weight.empty())
-    {
-        return make_pair(0, 0); // With a cost of zero, we can get the value of zero
-    }
-    vector<map<ull, ull>> sto;
-    sto[0][0] = 0, sto[0][value_weight.front().second] = value_weight.front().first;
-    size_t obj_num = value_weight.size();
-    for (size_t i = 1; i < obj_num; i++)
-    {
-        // range from 0th to (obj_num - 1)th obj.
-        for (auto &it : sto[i - 1])
-        {
-            // it.first indicates weight, second indicates value
-            // "it" means the case with objs from 0 to i - 1
-            sto[i][it.first] = max(sto[i][it.first], it.second); // No take the ith object, compare with another scheme, in which the ith obj was taken.
-            if (it.first <= max_weight - value_weight[i].second) // no exceed the max weight
-            {
-                sto[i][it.first + value_weight[i].second] = it.second + value_weight[i].first; // take the ith obj
-            }
-        }
-    }
-    ull weight{0}, value{0};
-    for (auto &&i : sto[obj_num - 1])
-    {
-        if (i.second > value)
-        {
-            weight = i.first, value = i.second;
-        }
-    }
-    return make_pair(weight, value);
-}
+using W = int;
+using V = int;
 
-int main()
-{
+std::pair<W, V> simple_knapsack(const std::vector<W, V> &wei_val, W max_weight) {
+  std::vector<V> buf(max_weight);
 }
diff --git a/dsu.cpp b/dsu.cpp
@@ -1,5 +1,5 @@
 /**
- * @file dsu.cpp
+ * @file DSU.cpp
  * @brief
  * @author Haoming Bai <haomingbai@hotmail.com>
  * @date   2025-06-26
@@ -13,43 +13,53 @@
 #pragma once
 
 #include <cstddef>
+#include <utility>
 #include <vector>
 
-class dsu {
-  std::vector<std::size_t> parent;
+class DSU {
+  std::vector<std::size_t> parent_, size_;
 
  public:
-  dsu(std::size_t size) : parent(size) {
-    for (auto i = 0uz; i < parent.size(); i++) {
-      parent[i] = i;
+  DSU(std::size_t size) : parent_(size), size_(size, 1) {
+    for (auto i = 0ul; i < parent_.size(); i++) {
+      parent_[i] = i;
     }
   }
 
   std::size_t findRoot(std::size_t idx) {
-    if (parent[idx] == idx) {
-      return idx;
+    // 递归终止条件, 寻找到根或者当前位置高度为1(根为0).
+    if (parent_[idx] == idx || parent_[parent_[idx]] == parent_[idx]) {
+      return parent_[idx];
     }
 
-    std::size_t res = idx;
-    while (res != parent[res]) {
-      res = parent[res];
-    }
+    parent_[idx] = findRoot(parent_[idx]);
+    return parent_[idx];
+  }
+
+  void unite(std::size_t idx1, std::size_t idx2) {
+    // 先找到根节点, 因为只有根节点维护了大小数据.
+    idx1 = findRoot(idx1);
+    idx2 = findRoot(idx2);
 
-    std::size_t curr = idx;
-    while (parent[curr] != res) {
-      auto to_modify = curr;
-      curr = parent[to_modify];
-      parent[to_modify] = res;
+    // 如果二者根相同, 那么二者已经在同一集合.
+    if (idx1 == idx2) {
+      return;
     }
 
-    return res;
-  }
+    // 因为要把2挂靠到1上, 所以下标1必须是较大的.
+    if (size_[idx1] < size_[idx2]) {
+      std::swap(idx1, idx2);
+    }
 
-  void unite(std::size_t idx1, std::size_t idx2) {
-    parent[findRoot(idx2)] = findRoot(idx1);
+    // 将2挂靠到1
+    parent_[idx2] = idx1;
+    // 此时1的树根是二者的共同树根, 所以只有1需要维护树大小.
+    // 1的树大小是原先二者树大小之和, 因为2树和1树合并了.
+    size_[idx1] += size_[idx2];
+    return;
   }
 
-  bool inSameTree(std::size_t idx1, std::size_t idx2) {
+  bool inSameSet(std::size_t idx1, std::size_t idx2) {
     return findRoot(idx1) == findRoot(idx2);
   }
 };
diff --git a/graph.cpp b/graph.cpp
@@ -351,11 +351,11 @@ template <typename T>
 std::vector<Edge<T>> kruskal(std::vector<Edge<T>> edges) {
   std::sort(edges.begin(), edges.end(),
             [](const auto &a, const auto &b) { return a.weight < b.weight; });
-  dsu visited(edges.size());
+  DSU visited(edges.size());
 
   std::vector<Edge<T>> res;
   for (auto &it : res) {
-    if (visited.inSameTree(it.p1, it.p2)) {
+    if (visited.inSameSet(it.p1, it.p2)) {
       res.emplace_back(it);
       visited.unite(it.p1, it.p2);
     }
diff --git a/main.tex b/main.tex
@@ -80,10 +80,10 @@
   \centering
   \vspace*{2cm}
 
-  % 封面主图占位符，可替换为实际图片文件
+  % 封面主图占位符, 可替换为实际图片文件
   % \includegraphics[width=0.5\textwidth]{cover_image1_placeholder}\\[1.5cm]
 
-  % 书名与副标题，中间使用 en-dash
+  % 书名与副标题, 中间使用 en-dash
   {\Huge\bfseries 基础算法示例 -- C++实现\par}
   \vspace{2cm}
   % 作者、机构等信息（可修改）
@@ -236,4 +236,44 @@ \chapter{计算几何}
 
 \lstinputlisting[language=C++, caption=geometry.cpp, style=MyCStyle]{./geometry.cpp}
 
+\chapter{字符串}
+
+在计算机系统中, 字符串作为信息的基本载体, 承载着从数据存储到逻辑控制的核心功能. 尤其在类Unix生态中, 诸如\texttt{grep}的文本搜索、\texttt{sed}的流编辑及\texttt{awk}的模式处理等工具, 均构建于高效字符串操作之上, 印证了Knuth「字符串处理是程序设计技术的试金石」的论断. 
+
+本章将实现字符串处理中的部分基础算法模板, 涵盖：
+\begin{itemize}
+    \item 字符串匹配（单模式/多模式）
+    \item 字典树与自动机
+\end{itemize}
+
+后续代码模板均以工业级效率为标准设计, 可直接应用于竞赛及工程场景. 
+
+\section{模式匹配}
+
+文本模式匹配是信息检索与文本处理的基石, 其效率直接影响搜索引擎响应速度、基因序列分析等关键场景的性能. 以Unix工具链为例, 当\texttt{grep}在GB级日志中检索模式时, 朴素匹配$O(nm)$的时间复杂度将导致灾难性延迟. 
+
+本节实现Knuth-Morris-Pratt(KMP)算法模板, 其核心在于：
+\begin{itemize}
+    \item 通过\textbf{失配函数}预处理模式串($O(m)$)
+    \item 实现$O(n)$时间复杂度匹配
+    \item 避免回溯的\textbf{状态机跳转}机制
+\end{itemize}
+代码设计支持动态模式更新与流式数据匹配, 可直接集成至文本处理系统. 
+
+\lstinputlisting[language=C++, caption=kmp.cpp, style=MyCStyle]{./str/kmp.cpp}
+
+\section{字典树}
+
+
+字典树 (Trie) 作为高效处理字符串集合的树形数据结构, 在搜索引擎自动补全、拼写检查及路由协议中具有不可替代性. 其核心优势在于：
+\begin{itemize}
+    \item \textbf{前缀共享}：具有公共前缀的字符串共享存储路径
+    \item \textbf{检索加速}：$O(L)$时间完成键查询（$L$为键长）
+    \item \textbf{字典序遍历}：天然支持按字典序访问所有键
+\end{itemize}
+
+本节实现基于数组的双版本Trie模板, 部分支持字符串的删除.
+
+\lstinputlisting[language=C++, caption=trie.cpp, style=MyCStyle]{./str/trie.cpp}
+
 \end{document}
diff --git a/str/kmp.cpp b/str/kmp.cpp
@@ -0,0 +1,65 @@
+/**
+ * @file kmp.cpp
+ * @brief
+ * @author Haoming Bai <haomingbai@hotmail.com>
+ * @date   2025-08-03
+ *
+ * Copyright © 2025 Haoming Bai
+ * SPDX-License-Identifier: MIT
+ *
+ * @details
+ */
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "../concepts.cpp"
+
+size_t PatternMatching(const std::string_view src,
+                       const std::string_view pattern);
+
+template <typename E, RandomResizableContainer<size_t> SizeContainer,
+          RandomStdContainer<E> CharContainer>
+void PrefixFunction(SizeContainer &prefixes, const CharContainer &str) {
+  if (str.empty()) {
+    return;
+  }
+
+  prefixes.resize(str.size(), 0);
+
+  for (size_t i = 1; i < str.size(); i++) {
+    auto curr = prefixes[i - 1];
+    while (curr != 0 && str[curr] != str[i]) {
+      curr = prefixes[curr - 1];
+    }
+    if (str[curr] == str[i]) {
+      curr++;
+    }
+    prefixes[i] = curr;
+  }
+}
+
+size_t PatternMatching(const std::string_view src,
+                       const std::string_view pattern) {
+  if (pattern.empty()) {
+    // 匹配失败返回-1是本能.
+    return SIZE_MAX;
+  }
+  std::string str(pattern);
+  str += src;
+
+  std::vector<size_t> prefixes;
+  PrefixFunction<char>(prefixes, str);
+  for (size_t i = 0, offset = pattern.size(); i < src.size(); i++) {
+    if (prefixes[i + offset] >= pattern.size()) {
+      // 让返回的下标指向第一次完成匹配的子串的第一个字符.
+      return i - (offset - 1);
+    }
+  }
+
+  // 这就是没有匹配到.
+  return SIZE_MAX;
+}
diff --git a/str/trie.cpp b/str/trie.cpp