位于分支 master

haomingbai · haomingbai · commit f23aec15e026 · 2025-11-04T15:24:28.000+08:00
您的分支与上游分支 'origin/master' 一致。

 要提交的变更：
	修改：     main.tex
	修改：     nearest_point_pair.cpp
	修改：     str/suffix_array.cpp
diff --git a/main.tex b/main.tex
@@ -246,7 +246,7 @@ \chapter{计算几何}
 
 \lstinputlisting[language=C++, caption=geometry.cpp, style=MyCStyle]{./geometry.cpp}
 
-\section{最近点对}
+\section*{最近点对}
 
 在平面中, 我们经常需要找到两个最近的点. 相比最传统的暴力方法, 即找到所有点对, 并计算出任意两者之间的距离, 一些方法可以大幅度简化计算, 进而有效降低我们的程序的时间复杂度. 本题中, 我们采用了一种分治与合并的方法, 从概率上, 让算法的复杂度降低到 $O(nlog(n))$.
 
diff --git a/nearest_point_pair.cpp b/nearest_point_pair.cpp
@@ -10,71 +10,126 @@
  * @details
  */
 
+/**
+ * @file nearest_point_pair.cpp
+ * @brief
+ * @author Haoming Bai <haomingbai@hotmail.com>
+ * @date   2025-09-16
+ *
+ * Copyright © 2025 Haoming Bai
+ * SPDX-License-Identifier: MIT
+ *
+ * @details
+ */
+
 #include <algorithm>
-#include <cmath>
 #include <cstddef>
-#include <cstdint>
-#include <cstdlib>
+#include <functional>
+#include <limits>
 #include <span>
+#include <vector>
+
+#include "./concepts.cpp"
 
+template <Multiplyable T>
 struct Point2D {
-  double x, y;
+  double x, y;  // 保持你原来的设计不变（模板参数存在但成员为 double）
 };
 
-double DistanceWith(const Point2D &a, const Point2D &b) {
-  return std::sqrt((a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y));
+// 保持签名但改为用 T 做整数运算（将坐标 cast 为 T）
+// 假设你已经声明输入为整数且平方不会溢出（按你要求）
+template <Multiplyable T>
+T DistanceSquareWith(const Point2D<T>& a, const Point2D<T>& b) {
+  T dx = static_cast<T>(a.x) - static_cast<T>(b.x);
+  T dy = static_cast<T>(a.y) - static_cast<T>(b.y);
+  return dx * dx + dy * dy;
 }
 
-double FindNearestDistance(std::span<Point2D> point_list) {
-  if (point_list.size() == 2) {
-    return DistanceWith(point_list.front(), point_list.back());
-  } else if (point_list.size() < 2) {
-    return MAXFLOAT;
-  }
-
-  auto left_span = point_list.subspan(0, point_list.size() / 2);
-  auto right_span = point_list.subspan(point_list.size() / 2);
-
-  double distance_left = FindNearestDistance(left_span);
-  double distance_right = FindNearestDistance(right_span);
-
-  auto dist_to_cmp = std::min(distance_left, distance_right);
-
-  auto mid_x = (point_list[point_list.size() / 2 - 1].x +
-                point_list[point_list.size() / 2].x) /
-               2;
-  size_t left_edge = SIZE_MAX, right_edge = SIZE_MAX;
-  size_t left_mid_edge = left_span.size() - 1;
-  size_t right_mid_edge = point_list.size() / 2;
-
-  {
-    for (ptrdiff_t i = left_span.size() - 1; i >= 0; i--) {
-      if (std::abs(left_span[i].x - mid_x) < dist_to_cmp) {
-        left_edge = i;
-      } else {
-        break;
+// 保持外部签名不变：输入是按 x 排序好的点（你的 main 已经做了 sort）
+template <Multiplyable T>
+T FindNearestDistanceSquare(std::span<Point2D<T>> point_list) {
+  const size_t n = point_list.size();
+  if (n < 2) return std::numeric_limits<T>::max();
+  if (n == 2) return DistanceSquareWith(point_list.front(), point_list.back());
+
+  // 为递归使用，直接操作底层数据指针，避免频繁拷贝 span 对象
+  Point2D<T>* base = point_list.data();
+
+  // 递归 lambda，半开区间 [l, r)
+  std::function<T(size_t, size_t)> rec;
+  rec = [&](size_t l, size_t r) -> T {
+    size_t len = r - l;
+    if (len < 2) {
+      return std::numeric_limits<T>::max();
+    }
+    if (len == 2) {
+      // 两点时按原样比较（不改变顺序）
+      return DistanceSquareWith(base[l], base[l + 1]);
+    }
+
+    size_t m = l + (len >> 1);
+    // 保存分割线 x 坐标。**注意**：必须在递归之前或基于当前 x 排序的假设确定
+    // mid_x。 这里我们使用分割点为右半部分首元素的
+    // x（常见做法，避免浮点平均）。
+    T mid_x = static_cast<T>(base[m].x);
+
+    // 递归求左右最短
+    T dl = rec(l, m);
+    T dr = rec(m, r);
+    T d = dl < dr ? dl : dr;
+
+    // 找到横向候选区间：从中间向两边线性扩展直到 dx^2 >= d
+    // （比起二分查找，这里更简单、分支更少，且通常很快——因为带宽一般较小）
+    size_t left_edge = m;  // inclusive
+    if (m > l) {
+      for (ptrdiff_t i = static_cast<ptrdiff_t>(m) - 1;
+           i >= static_cast<ptrdiff_t>(l); --i) {
+        T dx = static_cast<T>(base[i].x) - mid_x;
+        if (dx * dx < d)
+          left_edge = static_cast<size_t>(i);
+        else
+          break;
       }
     }
 
-    for (long i = 0; i < right_span.size(); i++) {
-      if (std::abs(right_span[i].x - mid_x) < dist_to_cmp) {
-        right_edge = i + right_mid_edge;
-      } else {
+    size_t right_edge = m;  // exclusive
+    for (size_t j = m; j < r; ++j) {
+      T dx = static_cast<T>(base[j].x) - mid_x;
+      if (dx * dx < d)
+        right_edge = j + 1;  // j included
+      else
         break;
-      }
     }
-  }
 
-  if (left_edge == SIZE_MAX || right_edge == SIZE_MAX) {
-    return dist_to_cmp;
-  }
+    // 如果没有跨中线的候选点，直接返回
+    if (left_edge >= right_edge) return d;
+
+    // 把候选点放到临时数组并按 y 排序（这样在 strip 内可以早停）
+    std::vector<Point2D<T>*> strip;
+    strip.reserve(right_edge - left_edge);
+    for (size_t idx = left_edge; idx < right_edge; ++idx)
+      strip.push_back(&base[idx]);
 
-  for (auto i = left_edge; i <= left_mid_edge; i++) {
-    for (auto j = right_mid_edge; j <= right_edge; j++) {
-      dist_to_cmp =
-          std::min(dist_to_cmp, DistanceWith(point_list[i], point_list[j]));
+    std::sort(strip.begin(), strip.end(),
+              [](const Point2D<T>* a, const Point2D<T>* b) {
+                if (a->y != b->y) return a->y < b->y;
+                return a->x < b->x;
+              });
+
+    // 经典 strip 比较：对每个点只检查 y 差绝对值小于 sqrt(d) 的后续点
+    // 这里比较使用 dy^2 >= d 的早停条件（避免 sqrt）
+    for (size_t i = 0; i < strip.size(); ++i) {
+      for (size_t j = i + 1; j < strip.size(); ++j) {
+        T dy = static_cast<T>(strip[j]->y) - static_cast<T>(strip[i]->y);
+        if (dy * dy >= d) break;  // y 差已足够大，后面不用再看
+        // 计算完整平方距离
+        T cur = DistanceSquareWith(*strip[i], *strip[j]);
+        if (cur < d) d = cur;
+      }
     }
-  }
 
-  return dist_to_cmp;
+    return d;
+  };
+
+  return rec(0, n);
 }
diff --git a/str/suffix_array.cpp b/str/suffix_array.cpp
@@ -42,7 +42,6 @@ inline void _InducedSort(const std::vector<long> &str,
   // 为了方便抄板子, 我这里先把一些变量提取出来.
   // 开销应该不大, 因为估计编译器一下就给消掉了.
   const long n = str.size(), max_val = buckets.size() - 1;
-
   // 从左向右扫描SA数组,
   // 这里的目标是从LMS进行L诱导.
   // 然后把L放到每个桶的头部.
@@ -59,7 +58,6 @@ inline void _InducedSort(const std::vector<long> &str,
       prev_bucket.left++;
     }
   }
-
   // 将桶的底部重置, 这里的意思是删除那些LMS.
   // 删除掉LMS之后, 再根据之前放入的L的字符,
   // 诱导出所有的S的字符的位置.
@@ -71,7 +69,6 @@ inline void _InducedSort(const std::vector<long> &str,
     // 也就意味着这个桶满了.
     buckets[i].right = prefix_sums[i] - 1;
   }
-
   // 从右往左扫描.
   // 这次扫描要把S类型的字符放进桶.
   for (long i = n - 1; i >= 0; i--) {
@@ -88,7 +85,6 @@ inline void _InducedSort(const std::vector<long> &str,
     }
   }
 }
-
 // str必须是已经被处理好的, 确认了最后的数字是全局唯一最小的哨兵的串.
 // max_val可以给的稍微大一点也没关系.
 inline std::vector<long> _SAIS(const std::vector<long> &str,
@@ -103,21 +99,17 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
   // str[i] < str[i + 1], 记作S,
   // str[i] == str[i + 1], type[i] = type[i + 1].
   std::vector<Type> type(str.size(), S_TYPE);
-
   // 这里因为建立桶的需求,
   // 所以需要统计每个字符在这里都出现了几次.
   // 因为我们两次诱导排序,
   // 使用的是同一个串, 所以我们就不再排序的过程中扫描这个了哈.
   std::vector<long> cnt_occurance(max_val + 1, 0);
-
   // 尾部字符单独统计,
   // 因为下面扫描全字符串是从倒数第二个字符开始的.
   cnt_occurance[str.back()]++;
-
   // 收集所有LMS的下标.
   std::vector<long> lms_incidies;
   lms_incidies.reserve(str.size() / 2);
-
   // 逆序遍历字符串, 获取类型.
   // 这里逆序遍历的原因是, 如果
   // str[i] == str[i + 1], 那么则有:
@@ -140,11 +132,9 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
     } else {
       assert(false);
     }
-
     // 记录出现次数.
     cnt_occurance[str[i]]++;
   }
-
   // 创建前缀和数组, 为建立桶和诱导排序做准备.
   std::vector<long> prefix_sums(max_val + 2);
   std::partial_sum(cnt_occurance.begin(), cnt_occurance.end(),
@@ -158,7 +148,6 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
     buckets[i].left = prefix_sums[i - 1];
     buckets[i].right = prefix_sums[i] - 1;
   }
-
   // 放入LMS.
   // 这里对于同一个字母, 入桶的顺序应该是倒序的.
   // 这个似乎和诱导排序的实现有关系.
@@ -169,19 +158,15 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
     auto curr_idx = *it;
     auto curr_char = str[curr_idx];
     auto &curr_bucket = buckets[curr_char];
-
     // 将对应的下标放入桶中.
     SA[curr_bucket.right] = curr_idx;
     curr_bucket.right--;
   }
-
   // 进行第一次诱导排序.
   _InducedSort(str, type, SA, prefix_sums, buckets);
-
   // 创建名字和下标的对应关系.
   // 这里用names数组表达对应位置的名字.
   std::vector<long> names(str.size(), -1);
-
   // 这两个变量分别记录了下发的名字的数量,
   // 和上一个被探测到的LMS的坐标.
   long name_cnt = 0;
@@ -203,7 +188,6 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
         return false;
       }
     };
-
     // 这里的这个函数是用来比较两个LMS子串是否相等的.
     const auto is_lms_eq = [&](const unsigned long idx1,
                                const unsigned long idx2) -> bool {
@@ -217,7 +201,6 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
       if (idx1 == str.size() - 1 || idx2 == str.size() - 1) {
         return false;
       }
-
       // 从偏移量为0开始比较
       long offset = 0;
       do {
@@ -229,18 +212,15 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
         if (type[idx1 + offset] != type[idx2 + offset]) {
           return false;
         }
-
         // 手动更新偏移量
         offset++;
         // 循环条件: 两个待比较位置都没有来到下一个LMS
       } while (!is_lms(idx1 + offset) && !is_lms(idx2 + offset));
-
       // 如果有一个没有到达下一个LMS但是另外一个到达,
       // 那么二者一定不相等.
       if (!is_lms(idx1 + offset) || !is_lms(idx2 + offset)) {
         return false;
       }
-
       // 否则还是比较这两个LMS对应的字符.
       if (str[idx1 + offset] != str[idx2 + offset]) {
         return false;
@@ -250,7 +230,6 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
       }
       return true;
     };
-
     // 这个时候, it是当前正在处理的的lms下标
     if (is_lms(it)) {
       // 如果上一个LMS存在,
@@ -271,13 +250,11 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
         // 那么肯定要分配一个新名字.
         name_cnt++;
       }
-
       // 将名字下发下去.
       names[it] = name_cnt - 1;
       last_lms_idx = it;
     }
   }
-
   // 命名唯一, 无需递归,
   // 直接返回.
   if (static_cast<unsigned long>(name_cnt) == lms_incidies.size()) {
@@ -296,10 +273,8 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
       auto curr_lms_idx = lms_incidies[i];
       lms_str[i] = names[curr_lms_idx];
     }
-
     // 最大的一个名字是最大的name_cnt - 1.
     lms_SA = _SAIS(lms_str, name_cnt - 1);
-
     // 生成一个新的桶并清空SA数组,
     // 进行第二次诱导排序.
     std::fill(SA.begin(), SA.end(), -1);
@@ -308,7 +283,6 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
       buckets[i].left = prefix_sums[i - 1];
       buckets[i].right = prefix_sums[i] - 1;
     }
-
     // 这里倒序遍历, 同时将遍历到的位置放在桶对应字母的右侧.
     // 因此可以保证lms_SA中靠右的下标会优先被处理后放入桶的右侧.
     // 因此对于同一个字母, lms_SA中靠右的在桶中也靠右.
@@ -321,19 +295,15 @@ inline std::vector<long> _SAIS(const std::vector<long> &str,
       // 存放的那个lms的名字对应的下标,
       // 就是lms_incidies[lms_SA[i]]中存放的那个下标.
       auto curr_lms_idx = lms_incidies[lms_SA[i]];
-
       // 同样地获取桶
       auto curr_char = str[curr_lms_idx];
       auto &curr_bucket = buckets[curr_char];
-
       // 将下标放在桶的右侧.
       SA[curr_bucket.right] = curr_lms_idx;
       curr_bucket.right--;
     }
-
     // 第二次诱导排序.
     _InducedSort(str, type, SA, prefix_sums, buckets);
-
     return SA;
   }
 }
@@ -350,7 +320,6 @@ std::vector<unsigned long> BuildSuffixArray(const Container &str) {
     if (processed[i] > max_val) max_val = processed[i];
   }
   processed.back() = 0;  // 哨兵，唯一且最小
-
   auto res = _SAIS(processed, max_val);
   // res[0] 对应哨兵的位置 (通常是 processed.size() - 1)
   std::vector<unsigned long> processed_res(std::next(res.begin()), res.end());
@@ -368,7 +337,6 @@ std::vector<unsigned long> suffix_array(Container &&str) {
     if (processed[i] > max_val) max_val = processed[i];
   }
   processed.back() = 0;  // 哨兵，唯一且最小
-
   auto res = _SAIS(processed, max_val);
   // res[0] 对应哨兵的位置 (通常是 processed.size() - 1)
   std::vector<unsigned long> processed_res(std::next(res.begin()), res.end());