Skip to content

Commit 7e54081

Browse files
committed
Add Trie and KMP algorithm. Optimized DSU and update the interface while updating the call of DSU in graph.cpp.
1 parent a6c88d9 commit 7e54081

File tree

7 files changed

+301
-67
lines changed

7 files changed

+301
-67
lines changed

concepts.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,9 @@ concept FullyComparable = requires(T a, T b) {
9797
{ a == b } -> std::convertible_to<bool>;
9898
{ a > b } -> std::convertible_to<bool>;
9999
};
100+
101+
template <typename Container, typename E>
102+
concept RandomResizableContainer =
103+
RandomStdContainer<Container, E> && requires(Container arr, size_t x) {
104+
{ arr.resize(x) };
105+
};

dp/knapsack.cpp

100755100644
Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,21 @@
1+
/**
2+
* @file knapsack.cpp
3+
* @brief
4+
* @author Haoming Bai <haomingbai@hotmail.com>
5+
* @date 2025-07-28
6+
*
7+
* Copyright © 2025 Haoming Bai
8+
* SPDX-License-Identifier: MIT
9+
*
10+
* @details
11+
*/
12+
113
#include <utility>
214
#include <vector>
3-
#include <map>
4-
#include <algorithm>
5-
6-
using namespace std;
715

8-
typedef unsigned long long int ull;
9-
10-
auto knapsack(vector<pair<ull, ull>> &value_weight, ull max_weight) -> pair<ull, ull> &&
11-
{
12-
if (value_weight.empty())
13-
{
14-
return make_pair(0, 0); // With a cost of zero, we can get the value of zero
15-
}
16-
vector<map<ull, ull>> sto;
17-
sto[0][0] = 0, sto[0][value_weight.front().second] = value_weight.front().first;
18-
size_t obj_num = value_weight.size();
19-
for (size_t i = 1; i < obj_num; i++)
20-
{
21-
// range from 0th to (obj_num - 1)th obj.
22-
for (auto &it : sto[i - 1])
23-
{
24-
// it.first indicates weight, second indicates value
25-
// "it" means the case with objs from 0 to i - 1
26-
sto[i][it.first] = max(sto[i][it.first], it.second); // No take the ith object, compare with another scheme, in which the ith obj was taken.
27-
if (it.first <= max_weight - value_weight[i].second) // no exceed the max weight
28-
{
29-
sto[i][it.first + value_weight[i].second] = it.second + value_weight[i].first; // take the ith obj
30-
}
31-
}
32-
}
33-
ull weight{0}, value{0};
34-
for (auto &&i : sto[obj_num - 1])
35-
{
36-
if (i.second > value)
37-
{
38-
weight = i.first, value = i.second;
39-
}
40-
}
41-
return make_pair(weight, value);
42-
}
16+
using W = int;
17+
using V = int;
4318

44-
int main()
45-
{
19+
std::pair<W, V> simple_knapsack(const std::vector<W, V> &wei_val, W max_weight) {
20+
std::vector<V> buf(max_weight);
4621
}

dsu.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* @file dsu.cpp
2+
* @file DSU.cpp
33
* @brief
44
* @author Haoming Bai <haomingbai@hotmail.com>
55
* @date 2025-06-26
@@ -13,43 +13,53 @@
1313
#pragma once
1414

1515
#include <cstddef>
16+
#include <utility>
1617
#include <vector>
1718

18-
class dsu {
19-
std::vector<std::size_t> parent;
19+
class DSU {
20+
std::vector<std::size_t> parent_, size_;
2021

2122
public:
22-
dsu(std::size_t size) : parent(size) {
23-
for (auto i = 0uz; i < parent.size(); i++) {
24-
parent[i] = i;
23+
DSU(std::size_t size) : parent_(size), size_(size, 1) {
24+
for (auto i = 0ul; i < parent_.size(); i++) {
25+
parent_[i] = i;
2526
}
2627
}
2728

2829
std::size_t findRoot(std::size_t idx) {
29-
if (parent[idx] == idx) {
30-
return idx;
30+
// 递归终止条件, 寻找到根或者当前位置高度为1(根为0).
31+
if (parent_[idx] == idx || parent_[parent_[idx]] == parent_[idx]) {
32+
return parent_[idx];
3133
}
3234

33-
std::size_t res = idx;
34-
while (res != parent[res]) {
35-
res = parent[res];
36-
}
35+
parent_[idx] = findRoot(parent_[idx]);
36+
return parent_[idx];
37+
}
38+
39+
void unite(std::size_t idx1, std::size_t idx2) {
40+
// 先找到根节点, 因为只有根节点维护了大小数据.
41+
idx1 = findRoot(idx1);
42+
idx2 = findRoot(idx2);
3743

38-
std::size_t curr = idx;
39-
while (parent[curr] != res) {
40-
auto to_modify = curr;
41-
curr = parent[to_modify];
42-
parent[to_modify] = res;
44+
// 如果二者根相同, 那么二者已经在同一集合.
45+
if (idx1 == idx2) {
46+
return;
4347
}
4448

45-
return res;
46-
}
49+
// 因为要把2挂靠到1上, 所以下标1必须是较大的.
50+
if (size_[idx1] < size_[idx2]) {
51+
std::swap(idx1, idx2);
52+
}
4753

48-
void unite(std::size_t idx1, std::size_t idx2) {
49-
parent[findRoot(idx2)] = findRoot(idx1);
54+
// 将2挂靠到1
55+
parent_[idx2] = idx1;
56+
// 此时1的树根是二者的共同树根, 所以只有1需要维护树大小.
57+
// 1的树大小是原先二者树大小之和, 因为2树和1树合并了.
58+
size_[idx1] += size_[idx2];
59+
return;
5060
}
5161

52-
bool inSameTree(std::size_t idx1, std::size_t idx2) {
62+
bool inSameSet(std::size_t idx1, std::size_t idx2) {
5363
return findRoot(idx1) == findRoot(idx2);
5464
}
5565
};

graph.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,11 +351,11 @@ template <typename T>
351351
std::vector<Edge<T>> kruskal(std::vector<Edge<T>> edges) {
352352
std::sort(edges.begin(), edges.end(),
353353
[](const auto &a, const auto &b) { return a.weight < b.weight; });
354-
dsu visited(edges.size());
354+
DSU visited(edges.size());
355355

356356
std::vector<Edge<T>> res;
357357
for (auto &it : res) {
358-
if (visited.inSameTree(it.p1, it.p2)) {
358+
if (visited.inSameSet(it.p1, it.p2)) {
359359
res.emplace_back(it);
360360
visited.unite(it.p1, it.p2);
361361
}

main.tex

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@
8080
\centering
8181
\vspace*{2cm}
8282

83-
% 封面主图占位符可替换为实际图片文件
83+
% 封面主图占位符, 可替换为实际图片文件
8484
% \includegraphics[width=0.5\textwidth]{cover_image1_placeholder}\\[1.5cm]
8585

86-
% 书名与副标题中间使用 en-dash
86+
% 书名与副标题, 中间使用 en-dash
8787
{\Huge\bfseries 基础算法示例 -- C++实现\par}
8888
\vspace{2cm}
8989
% 作者、机构等信息(可修改)
@@ -236,4 +236,44 @@ \chapter{计算几何}
236236

237237
\lstinputlisting[language=C++, caption=geometry.cpp, style=MyCStyle]{./geometry.cpp}
238238

239+
\chapter{字符串}
240+
241+
在计算机系统中, 字符串作为信息的基本载体, 承载着从数据存储到逻辑控制的核心功能. 尤其在类Unix生态中, 诸如\texttt{grep}的文本搜索、\texttt{sed}的流编辑及\texttt{awk}的模式处理等工具, 均构建于高效字符串操作之上, 印证了Knuth「字符串处理是程序设计技术的试金石」的论断.
242+
243+
本章将实现字符串处理中的部分基础算法模板, 涵盖:
244+
\begin{itemize}
245+
\item 字符串匹配(单模式/多模式)
246+
\item 字典树与自动机
247+
\end{itemize}
248+
249+
后续代码模板均以工业级效率为标准设计, 可直接应用于竞赛及工程场景.
250+
251+
\section{模式匹配}
252+
253+
文本模式匹配是信息检索与文本处理的基石, 其效率直接影响搜索引擎响应速度、基因序列分析等关键场景的性能. 以Unix工具链为例, 当\texttt{grep}在GB级日志中检索模式时, 朴素匹配$O(nm)$的时间复杂度将导致灾难性延迟.
254+
255+
本节实现Knuth-Morris-Pratt(KMP)算法模板, 其核心在于:
256+
\begin{itemize}
257+
\item 通过\textbf{失配函数}预处理模式串($O(m)$)
258+
\item 实现$O(n)$时间复杂度匹配
259+
\item 避免回溯的\textbf{状态机跳转}机制
260+
\end{itemize}
261+
代码设计支持动态模式更新与流式数据匹配, 可直接集成至文本处理系统.
262+
263+
\lstinputlisting[language=C++, caption=kmp.cpp, style=MyCStyle]{./str/kmp.cpp}
264+
265+
\section{字典树}
266+
267+
268+
字典树 (Trie) 作为高效处理字符串集合的树形数据结构, 在搜索引擎自动补全、拼写检查及路由协议中具有不可替代性. 其核心优势在于:
269+
\begin{itemize}
270+
\item \textbf{前缀共享}:具有公共前缀的字符串共享存储路径
271+
\item \textbf{检索加速}:$O(L)$时间完成键查询($L$为键长)
272+
\item \textbf{字典序遍历}:天然支持按字典序访问所有键
273+
\end{itemize}
274+
275+
本节实现基于数组的双版本Trie模板, 部分支持字符串的删除.
276+
277+
\lstinputlisting[language=C++, caption=trie.cpp, style=MyCStyle]{./str/trie.cpp}
278+
239279
\end{document}

str/kmp.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/**
2+
* @file kmp.cpp
3+
* @brief
4+
* @author Haoming Bai <haomingbai@hotmail.com>
5+
* @date 2025-08-03
6+
*
7+
* Copyright © 2025 Haoming Bai
8+
* SPDX-License-Identifier: MIT
9+
*
10+
* @details
11+
*/
12+
13+
#include <cstddef>
14+
#include <cstdint>
15+
#include <string>
16+
#include <string_view>
17+
#include <vector>
18+
19+
#include "../concepts.cpp"
20+
21+
size_t PatternMatching(const std::string_view src,
22+
const std::string_view pattern);
23+
24+
template <typename E, RandomResizableContainer<size_t> SizeContainer,
25+
RandomStdContainer<E> CharContainer>
26+
void PrefixFunction(SizeContainer &prefixes, const CharContainer &str) {
27+
if (str.empty()) {
28+
return;
29+
}
30+
31+
prefixes.resize(str.size(), 0);
32+
33+
for (size_t i = 1; i < str.size(); i++) {
34+
auto curr = prefixes[i - 1];
35+
while (curr != 0 && str[curr] != str[i]) {
36+
curr = prefixes[curr - 1];
37+
}
38+
if (str[curr] == str[i]) {
39+
curr++;
40+
}
41+
prefixes[i] = curr;
42+
}
43+
}
44+
45+
size_t PatternMatching(const std::string_view src,
46+
const std::string_view pattern) {
47+
if (pattern.empty()) {
48+
// 匹配失败返回-1是本能.
49+
return SIZE_MAX;
50+
}
51+
std::string str(pattern);
52+
str += src;
53+
54+
std::vector<size_t> prefixes;
55+
PrefixFunction<char>(prefixes, str);
56+
for (size_t i = 0, offset = pattern.size(); i < src.size(); i++) {
57+
if (prefixes[i + offset] >= pattern.size()) {
58+
// 让返回的下标指向第一次完成匹配的子串的第一个字符.
59+
return i - (offset - 1);
60+
}
61+
}
62+
63+
// 这就是没有匹配到.
64+
return SIZE_MAX;
65+
}

0 commit comments

Comments
 (0)