From a7c9671a093c8df5ef361d8964fc82c0462cc2c6 Mon Sep 17 00:00:00 2001 From: Missing-Hex <1304266750@qq.com> Date: Fri, 5 Jun 2026 20:52:25 +0800 Subject: [PATCH] refactor: omtimize subspace diag freq in bpcg --- source/source_hsolver/diago_bpcg.cpp | 6 ++- source/source_hsolver/diago_bpcg.h | 2 + .../source_hsolver/kernels/bpcg_kernel_op.cpp | 44 +++++++++++++++++++ .../source_hsolver/kernels/bpcg_kernel_op.h | 16 ++++++- 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/source/source_hsolver/diago_bpcg.cpp b/source/source_hsolver/diago_bpcg.cpp index d4db3d790bc..980c9156802 100644 --- a/source/source_hsolver/diago_bpcg.cpp +++ b/source/source_hsolver/diago_bpcg.cpp @@ -282,6 +282,10 @@ void DiagoBPCG::diag(const HPsiFunc& hpsi_func, int max_iter = current_scf_iter > 1 ? this->nline : this->nline * 6; + + // Compute optimal frequency for subspace diagonalization based on problem size + this->optimal_freq = compute_optimal_freq(this->n_band, this->n_basis, this->nline); + do { ++ntry; @@ -314,7 +318,7 @@ void DiagoBPCG::diag(const HPsiFunc& hpsi_func, // orthogonal psi by cholesky method this->orth_cholesky(this->work, this->psi, this->hpsi, this->hsub); - if (current_scf_iter == 1 && ntry % this->nline == 0) { + if (current_scf_iter == 1 && ntry % this->optimal_freq == 0) { this->calc_hsub_with_block(hpsi_func, psi_in, this->psi, this->hpsi, this->hsub, this->work, this->eigen); } } while (ntry < max_iter && this->test_error(this->err_st, ethr_band)); diff --git a/source/source_hsolver/diago_bpcg.h b/source/source_hsolver/diago_bpcg.h index 27f528024ba..e89a59a5b10 100644 --- a/source/source_hsolver/diago_bpcg.h +++ b/source/source_hsolver/diago_bpcg.h @@ -84,6 +84,8 @@ class DiagoBPCG int n_dim = 0; /// max iter steps for all-band cg loop int nline = 4; + /// optimal frequency for subspace diagonalization (computed dynamically) + int optimal_freq = 4; /// parallel matrix multiplication ModuleBase::PGemmCN pmmcn; PLinearTransform plintrans; diff --git a/source/source_hsolver/kernels/bpcg_kernel_op.cpp b/source/source_hsolver/kernels/bpcg_kernel_op.cpp index 88f94e288c6..e3af30c98af 100644 --- a/source/source_hsolver/kernels/bpcg_kernel_op.cpp +++ b/source/source_hsolver/kernels/bpcg_kernel_op.cpp @@ -207,6 +207,50 @@ struct refresh_hcc_scc_vcc_op } }; + +/** + * @brief Compute the optimal frequency for subspace diagonalization calls + * based on problem size. Uses a tiered approach: + * - Small problems: less frequent calls reduce overhead + * - Large problems: more frequent calls maintain orthogonality + * + * The problem size is computed as n_band * n_basis using size_t to avoid + * integer overflow. The frequency is clamped to [1, nline] range and + * should not exceed the base iteration count. + * + * Tier thresholds (problem_size = n_band * n_basis): + * size < 10,000 -> freq = nline (small, minimal overhead needed) + * size < 100,000 -> freq = min(6, nline) + * size < 500,000 -> freq = min(5, nline) + * size < 2,000,000 -> freq = min(4, nline) (baseline) + * size < 10,000,000 -> freq = min(3, nline) + * size >= 10,000,000 -> freq = min(2, nline) (large, frequent calls) + * + * @param n_band Number of bands (eigenvectors). + * @param n_basis Number of basis functions. + * @param nline Base iteration count for SCF > 1. + * @return Optimal frequency (number of CG steps between subspace diag calls). + */ +int compute_optimal_freq(const int n_band, const int n_basis, const int nline) +{ + const size_t problem_size = static_cast(n_band) * static_cast(n_basis); + int freq = nline; + if (problem_size >= 10000000) { + freq = 2; + } else if (problem_size >= 2000000) { + freq = 3; + } else if (problem_size >= 500000) { + freq = 4; + } else if (problem_size >= 100000) { + freq = 5; + } else if (problem_size >= 10000) { + freq = 6; + } + if (freq < 1) freq = 1; + if (freq > nline) freq = nline; + return freq; +} + template struct calc_grad_with_block_op, base_device::DEVICE_CPU>; template struct line_minimize_with_block_op, base_device::DEVICE_CPU>; template struct calc_grad_with_block_op, base_device::DEVICE_CPU>; diff --git a/source/source_hsolver/kernels/bpcg_kernel_op.h b/source/source_hsolver/kernels/bpcg_kernel_op.h index 9ac7c5e2cee..a29819b7877 100644 --- a/source/source_hsolver/kernels/bpcg_kernel_op.h +++ b/source/source_hsolver/kernels/bpcg_kernel_op.h @@ -3,7 +3,21 @@ #include "source_base/macros.h" #include "source_base/module_device/types.h" namespace hsolver -{ +{/** + * @brief Compute the optimal frequency for subspace diagonalization calls + * based on problem size (n_band * n_basis). + * + * Large problems benefit from more frequent subspace diagonalization to + * maintain orthogonality, while small problems can reduce overhead by + * calling it less frequently. + * + * @param n_band Number of bands (eigenvectors). + * @param n_basis Number of basis functions. + * @param nline Base iteration count for SCF > 1. + * @return Optimal frequency (number of CG steps between calc_hsub_with_block calls). + */ +int compute_optimal_freq(const int n_band, const int n_basis, const int nline); + template struct line_minimize_with_block_op