From 0dd7bd4c5538d26c002e581d215c4bbbb951bb62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AE=B6=E9=BD=90?= <1544375273@qq.com> Date: Tue, 7 Apr 2026 16:59:48 +0800 Subject: [PATCH 1/2] Add DiagoPPCG class for diagonal preconditioned conjugate gradient Add ppcg diago_hsolver.cpp to the Hsolver, yet not completed. --- source/source_hsolver/diago_ppcg.cpp | 761 +++++++++++++++++++++++++++ 1 file changed, 761 insertions(+) create mode 100644 source/source_hsolver/diago_ppcg.cpp diff --git a/source/source_hsolver/diago_ppcg.cpp b/source/source_hsolver/diago_ppcg.cpp new file mode 100644 index 00000000000..9ae7907165d --- /dev/null +++ b/source/source_hsolver/diago_ppcg.cpp @@ -0,0 +1,761 @@ +#include "source_hsolver/diago_ppcg.h" + +#include +#include +#include + +extern "C" +{ +void dsyevd_(const char* jobz, const char* uplo, + const int* n, double* a, const int* lda, double* w, + double* work, const int* lwork, int* iwork, + const int* liwork, int* info); + +void ssyevd_(const char* jobz, const char* uplo, + const int* n, float* a, const int* lda, float* w, + float* work, const int* lwork, int* iwork, + const int* liwork, int* info); + +void dsygvd_(const int* itype, const char* jobz, const char* uplo, + const int* n, double* a, const int* lda, double* b, + const int* ldb, double* w, double* work, const int* lwork, + int* iwork, const int* liwork, int* info); + +void ssygvd_(const int* itype, const char* jobz, const char* uplo, + const int* n, float* a, const int* lda, float* b, + const int* ldb, float* w, float* work, const int* lwork, + int* iwork, const int* liwork, int* info); + +void dpotrf_(const char* uplo, const int* n, double* a, const int* lda, int* info); +void spotrf_(const char* uplo, const int* n, float* a, const int* lda, int* info); +} + +namespace hsolver +{ +namespace +{ + +template +struct LapackReal; + +template <> +struct LapackReal +{ + static void syevd(const int n, double* a, double* w) + { + const char jobz = 'V'; + const char uplo = 'U'; + const int lda = n; + int info = 0; + const int lwork = std::max(1, 1 + 6 * n + 2 * n * n); + const int liwork = std::max(1, 3 + 5 * n); + std::vector work(lwork, 0.0); + std::vector iwork(liwork, 0); + dsyevd_(&jobz, &uplo, &n, a, &lda, w, + work.data(), &lwork, iwork.data(), &liwork, &info); + if (info != 0) + { + throw std::runtime_error("DiagoPPCG: dsyevd failed."); + } + } + + static void sygvd(const int n, double* a, double* b, double* w) + { + const int itype = 1; + const char jobz = 'V'; + const char uplo = 'U'; + const int lda = n; + const int ldb = n; + int info = 0; + const int lwork = std::max(1, 1 + 18 * n + 10 * n * n); + const int liwork = std::max(1, 3 + 10 * n); + std::vector work(lwork, 0.0); + std::vector iwork(liwork, 0); + dsygvd_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, iwork.data(), &liwork, &info); + if (info != 0) + { + throw std::runtime_error("DiagoPPCG: dsygvd failed."); + } + } + + static void potrf(const int n, double* a) + { + const char uplo = 'U'; + const int lda = n; + int info = 0; + dpotrf_(&uplo, &n, a, &lda, &info); + if (info != 0) + { + throw std::runtime_error("DiagoPPCG: dpotrf failed."); + } + } +}; + +template <> +struct LapackReal +{ + static void syevd(const int n, float* a, float* w) + { + const char jobz = 'V'; + const char uplo = 'U'; + const int lda = n; + int info = 0; + const int lwork = std::max(1, 1 + 6 * n + 2 * n * n); + const int liwork = std::max(1, 3 + 5 * n); + std::vector work(lwork, 0.0f); + std::vector iwork(liwork, 0); + ssyevd_(&jobz, &uplo, &n, a, &lda, w, + work.data(), &lwork, iwork.data(), &liwork, &info); + if (info != 0) + { + throw std::runtime_error("DiagoPPCG: ssyevd failed."); + } + } + + static void sygvd(const int n, float* a, float* b, float* w) + { + const int itype = 1; + const char jobz = 'V'; + const char uplo = 'U'; + const int lda = n; + const int ldb = n; + int info = 0; + const int lwork = std::max(1, 1 + 18 * n + 10 * n * n); + const int liwork = std::max(1, 3 + 10 * n); + std::vector work(lwork, 0.0f); + std::vector iwork(liwork, 0); + ssygvd_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, iwork.data(), &liwork, &info); + if (info != 0) + { + throw std::runtime_error("DiagoPPCG: ssygvd failed."); + } + } + + static void potrf(const int n, float* a) + { + const char uplo = 'U'; + const int lda = n; + int info = 0; + spotrf_(&uplo, &n, a, &lda, &info); + if (info != 0) + { + throw std::runtime_error("DiagoPPCG: spotrf failed."); + } + } +}; + +template +inline T zero_value() +{ + return T(0); +} + +template +inline void set_zero(std::vector& x) +{ + std::fill(x.begin(), x.end(), zero_value()); +} + +} // namespace + +template +DiagoPPCG::DiagoPPCG(const Real& diag_thr, + const int& diag_iter_max, + const int& sbsize, + const int& rr_step, + const bool gamma_g0_real) + : maxter_(diag_iter_max), + sbsize_(std::max(1, sbsize)), + rr_step_(std::max(1, rr_step)), + diag_thr_(std::max(diag_thr, static_cast(1.0e-14))), + gamma_g0_real_(gamma_g0_real) +{ +} + +template +void DiagoPPCG::validate_input(T* psi_in, Real* eigenvalue_in, const Real* prec) const +{ + if (psi_in == nullptr || eigenvalue_in == nullptr) + { + throw std::invalid_argument("DiagoPPCG: psi/eigenvalue pointer is null."); + } + if (prec == nullptr) + { + throw std::invalid_argument("DiagoPPCG: precondition pointer is null."); + } + if (ld_psi_ <= 0 || n_band_ <= 0 || n_dim_ <= 0) + { + throw std::invalid_argument("DiagoPPCG: invalid dimensions."); + } + if (n_dim_ > ld_psi_) + { + throw std::invalid_argument("DiagoPPCG: dim must not exceed ld_psi."); + } +} + +template +void DiagoPPCG::force_g0_real(T* x, const int ncol) const +{ + if (!gamma_g0_real_ || n_dim_ <= 0) + { + return; + } + for (int j = 0; j < ncol; ++j) + { + x[idx(0, j, ld_psi_)] = T(std::real(x[idx(0, j, ld_psi_)]), 0.0); + } +} + +template +void DiagoPPCG::apply_h(const HPsiFunc& hpsi_func, T* psi_in, T* hpsi_out, const int ncol) const +{ + hpsi_func(psi_in, hpsi_out, ld_psi_, ncol); +} + +template +void DiagoPPCG::apply_s(const SPsiFunc& spsi_func, T* psi_in, T* spsi_out, const int ncol) const +{ + if (spsi_func) + { + spsi_func(psi_in, spsi_out, ld_psi_, ncol); + } + else + { + for (int j = 0; j < ncol; ++j) + { + std::copy(psi_in + j * ld_psi_, psi_in + (j + 1) * ld_psi_, spsi_out + j * ld_psi_); + } + } +} + +template +typename DiagoPPCG::Real DiagoPPCG::gamma_dot(const T* x, const T* y) const +{ + Real acc = 0; + for (int i = 0; i < n_dim_; ++i) + { + acc += static_cast(std::real(std::conj(x[i]) * y[i])); + } + return acc; +} + +template +void DiagoPPCG::gram(const T* a, + const T* b, + const int ncol_a, + const int ncol_b, + std::vector& out, + const int ld_out) const +{ + out.assign(ld_out * ncol_b, static_cast(0)); + for (int jb = 0; jb < ncol_b; ++jb) + { + for (int ia = 0; ia < ncol_a; ++ia) + { + out[ia + jb * ld_out] = gamma_dot(a + ia * ld_psi_, b + jb * ld_psi_); + } + } +} + +template +void DiagoPPCG::copy_cols(const T* src, const std::vector& cols, std::vector& dst) const +{ + dst.assign(ld_psi_ * cols.size(), zero_value()); + for (int j = 0; j < static_cast(cols.size()); ++j) + { + const int c = cols[j]; + std::copy(src + c * ld_psi_, src + c * ld_psi_ + ld_psi_, dst.begin() + j * ld_psi_); + } +} + +template +void DiagoPPCG::scatter_cols(T* dst, const std::vector& cols, const std::vector& src) const +{ + for (int j = 0; j < static_cast(cols.size()); ++j) + { + const int c = cols[j]; + std::copy(src.begin() + j * ld_psi_, src.begin() + (j + 1) * ld_psi_, dst + c * ld_psi_); + } +} + +template +void DiagoPPCG::project_against(const T* basis, + const std::vector& basis_cols, + std::vector& x, + const std::vector& x_cols) const +{ + if (basis_cols.empty() || x_cols.empty()) + { + return; + } + + std::vector basis_block; + copy_cols(basis, basis_cols, basis_block); + + std::vector x_block; + copy_cols(x.data(), x_cols, x_block); + + std::vector g; + gram(basis_block.data(), + x_block.data(), + static_cast(basis_cols.size()), + static_cast(x_cols.size()), + g, + static_cast(basis_cols.size())); + + for (int j = 0; j < static_cast(x_cols.size()); ++j) + { + const int c = x_cols[j]; + for (int i = 0; i < static_cast(basis_cols.size()); ++i) + { + const int bc = basis_cols[i]; + const Real coeff = g[i + j * static_cast(basis_cols.size())]; + for (int ig = 0; ig < n_dim_; ++ig) + { + x[idx(ig, c, ld_psi_)] -= basis[idx(ig, bc, ld_psi_)] * coeff; + } + } + } +} + +template +void DiagoPPCG::divide_by_preconditioner(const std::vector& active_cols, + const Real* prec, + std::vector& x) const +{ + for (const int c : active_cols) + { + for (int ig = 0; ig < n_dim_; ++ig) + { + x[idx(ig, c, ld_psi_)] /= std::max(prec[ig], static_cast(1.0e-12)); + } + } +} + +template +void DiagoPPCG::lock_epairs(const std::vector& residual, + const std::vector& ethr_band, + std::vector& active_cols) const +{ + active_cols.clear(); + for (int j = 0; j < n_band_; ++j) + { + Real nrm2 = 0; + for (int ig = 0; ig < n_dim_; ++ig) + { + nrm2 += static_cast(std::norm(residual[idx(ig, j, ld_psi_)])); + } + const Real rnrm = std::sqrt(std::max(nrm2, static_cast(0))); + const Real thr = std::sqrt(std::max(static_cast(ethr_band[j]), diag_thr_)); + if (rnrm > thr) + { + active_cols.push_back(j); + } + } +} + +template +void DiagoPPCG::build_small_subspace(const T* psi, + const std::vector& cols, + const bool use_p, + SmallSubspace& subspace) const +{ + const int l = static_cast(cols.size()); + const int nblk = use_p ? 3 : 2; + const int dim = nblk * l; + subspace.k.assign(dim * dim, static_cast(0)); + subspace.m.assign(dim * dim, static_cast(0)); + subspace.eval.assign(dim, static_cast(0)); + + std::vector psi_l, hpsi_l, w_l, hw_l, p_l, hp_l; + copy_cols(psi, cols, psi_l); + copy_cols(hpsi_.data(), cols, hpsi_l); + copy_cols(w_.data(), cols, w_l); + copy_cols(hw_.data(), cols, hw_l); + if (use_p) + { + copy_cols(p_.data(), cols, p_l); + copy_cols(hp_.data(), cols, hp_l); + } + + auto fill_sym_block = [&](const std::vector& a, + const std::vector& b, + const int r0, + const int c0, + std::vector& mat) + { + std::vector g; + gram(a.data(), b.data(), l, l, g, l); + for (int j = 0; j < l; ++j) + { + for (int i = 0; i < l; ++i) + { + mat[(r0 + i) + (c0 + j) * dim] = g[i + j * l]; + mat[(c0 + j) + (r0 + i) * dim] = g[i + j * l]; + } + } + }; + + fill_sym_block(psi_l, hpsi_l, 0, 0, subspace.k); + fill_sym_block(psi_l, psi_l, 0, 0, subspace.m); + + fill_sym_block(w_l, hw_l, l, l, subspace.k); + fill_sym_block(w_l, w_l, l, l, subspace.m); + + fill_sym_block(psi_l, hw_l, 0, l, subspace.k); + fill_sym_block(psi_l, w_l, 0, l, subspace.m); + + if (use_p) + { + fill_sym_block(p_l, hp_l, 2 * l, 2 * l, subspace.k); + fill_sym_block(p_l, p_l, 2 * l, 2 * l, subspace.m); + + fill_sym_block(psi_l, hp_l, 0, 2 * l, subspace.k); + fill_sym_block(psi_l, p_l, 0, 2 * l, subspace.m); + + fill_sym_block(w_l, hp_l, l, 2 * l, subspace.k); + fill_sym_block(w_l, p_l, l, 2 * l, subspace.m); + } +} + +template +void DiagoPPCG::solve_small_generalized(const int dim, SmallSubspace& subspace) const +{ + LapackReal::sygvd(dim, subspace.k.data(), subspace.m.data(), subspace.eval.data()); +} + +template +void DiagoPPCG::update_one_block(T* psi, + const std::vector& cols, + const int l, + const bool use_p, + const SmallSubspace& subspace) const +{ + const int dim = (use_p ? 3 : 2) * l; + const Real* eigvec = subspace.k.data(); + + std::vector psi_l, hpsi_l, w_l, hw_l, p_l, hp_l; + copy_cols(psi, cols, psi_l); + copy_cols(hpsi_.data(), cols, hpsi_l); + copy_cols(w_.data(), cols, w_l); + copy_cols(hw_.data(), cols, hw_l); + if (use_p) + { + copy_cols(p_.data(), cols, p_l); + copy_cols(hp_.data(), cols, hp_l); + } + + std::vector psi_new(ld_psi_ * l, zero_value()); + std::vector hpsi_new(ld_psi_ * l, zero_value()); + std::vector p_new(ld_psi_ * l, zero_value()); + std::vector hp_new(ld_psi_ * l, zero_value()); + + for (int j = 0; j < l; ++j) + { + for (int i = 0; i < l; ++i) + { + const Real cpsi = eigvec[i + j * dim]; + const Real cw = eigvec[(l + i) + j * dim]; + + for (int ig = 0; ig < n_dim_; ++ig) + { + psi_new[idx(ig, j, ld_psi_)] += psi_l[idx(ig, i, ld_psi_)] * cpsi + + w_l[idx(ig, i, ld_psi_)] * cw; + hpsi_new[idx(ig, j, ld_psi_)] += hpsi_l[idx(ig, i, ld_psi_)] * cpsi + + hw_l[idx(ig, i, ld_psi_)] * cw; + p_new[idx(ig, j, ld_psi_)] += w_l[idx(ig, i, ld_psi_)] * cw; + hp_new[idx(ig, j, ld_psi_)] += hw_l[idx(ig, i, ld_psi_)] * cw; + } + + if (use_p) + { + const Real cp = eigvec[(2 * l + i) + j * dim]; + for (int ig = 0; ig < n_dim_; ++ig) + { + psi_new[idx(ig, j, ld_psi_)] += p_l[idx(ig, i, ld_psi_)] * cp; + hpsi_new[idx(ig, j, ld_psi_)] += hp_l[idx(ig, i, ld_psi_)] * cp; + p_new[idx(ig, j, ld_psi_)] += p_l[idx(ig, i, ld_psi_)] * cp; + hp_new[idx(ig, j, ld_psi_)] += hp_l[idx(ig, i, ld_psi_)] * cp; + } + } + } + } + + scatter_cols(psi, cols, psi_new); + scatter_cols(hpsi_.data(), cols, hpsi_new); + scatter_cols(p_.data(), cols, p_new); + scatter_cols(hp_.data(), cols, hp_new); +} + +template +void DiagoPPCG::right_solve_upper_real(const std::vector& r, + const int n, + std::vector& x) const +{ + std::vector b = x; + for (int row = 0; row < n_dim_; ++row) + { + for (int j = 0; j < n; ++j) + { + T v = b[idx(row, j, ld_psi_)]; + for (int k = 0; k < j; ++k) + { + v -= x[idx(row, k, ld_psi_)] * r[k + j * n]; + } + x[idx(row, j, ld_psi_)] = v / r[j + j * n]; + } + } +} + +template +void DiagoPPCG::chol_qr_active(T* psi, const std::vector& active_cols) const +{ + if (active_cols.empty()) + { + return; + } + + const int nact = static_cast(active_cols.size()); + std::vector psi_a, hpsi_a; + copy_cols(psi, active_cols, psi_a); + copy_cols(hpsi_.data(), active_cols, hpsi_a); + + std::vector s(nact * nact, static_cast(0)); + gram(psi_a.data(), psi_a.data(), nact, nact, s, nact); + + LapackReal::potrf(nact, s.data()); + + right_solve_upper_real(s, nact, psi_a); + right_solve_upper_real(s, nact, hpsi_a); + + scatter_cols(psi, active_cols, psi_a); + scatter_cols(hpsi_.data(), active_cols, hpsi_a); +} + +template +void DiagoPPCG::rayleigh_ritz(T* psi, + Real* eigenvalue, + std::vector& active_cols, + const std::vector& ethr_band) +{ + std::vector hsub(n_band_ * n_band_, static_cast(0)); + gram(psi, hpsi_.data(), n_band_, n_band_, hsub, n_band_); + + std::vector eval(n_band_, static_cast(0)); + LapackReal::syevd(n_band_, hsub.data(), eval.data()); + + std::vector psi_old(psi, psi + ld_psi_ * n_band_); + std::vector hpsi_old = hpsi_; + + std::fill(psi, psi + ld_psi_ * n_band_, zero_value()); + set_zero(hpsi_); + + for (int j = 0; j < n_band_; ++j) + { + for (int i = 0; i < n_band_; ++i) + { + const Real c = hsub[i + j * n_band_]; + for (int ig = 0; ig < n_dim_; ++ig) + { + psi[idx(ig, j, ld_psi_)] += psi_old[idx(ig, i, ld_psi_)] * c; + hpsi_[idx(ig, j, ld_psi_)] += hpsi_old[idx(ig, i, ld_psi_)] * c; + } + } + eigenvalue[j] = eval[j]; + } + + set_zero(w_); + for (int j = 0; j < n_band_; ++j) + { + for (int ig = 0; ig < n_dim_; ++ig) + { + w_[idx(ig, j, ld_psi_)] = hpsi_[idx(ig, j, ld_psi_)] - psi[idx(ig, j, ld_psi_)] * eigenvalue[j]; + } + } + + lock_epairs(w_, ethr_band, active_cols); +} + +template +typename DiagoPPCG::Real +DiagoPPCG::trace_of_active_projected(const T* psi, const std::vector& active_cols) const +{ + if (active_cols.empty()) + { + return static_cast(0); + } + + std::vector psi_a, hpsi_a; + copy_cols(psi, active_cols, psi_a); + copy_cols(hpsi_.data(), active_cols, hpsi_a); + + const int nact = static_cast(active_cols.size()); + std::vector g(nact * nact, static_cast(0)); + gram(psi_a.data(), hpsi_a.data(), nact, nact, g, nact); + + Real tr = 0; + for (int i = 0; i < nact; ++i) + { + tr += g[i + i * nact]; + } + return tr; +} + +template +double DiagoPPCG::diag(const HPsiFunc& hpsi_func, + const SPsiFunc&, + const int ld_psi, + const int nband, + const int dim, + T* psi_in, + Real* eigenvalue_in, + const std::vector& ethr_band, + const Real* prec) +{ + ld_psi_ = ld_psi; + n_band_ = nband; + n_dim_ = dim; + + validate_input(psi_in, eigenvalue_in, prec); + + hpsi_.assign(ld_psi_ * n_band_, zero_value()); + w_.assign(ld_psi_ * n_band_, zero_value()); + hw_.assign(ld_psi_ * n_band_, zero_value()); + p_.assign(ld_psi_ * n_band_, zero_value()); + hp_.assign(ld_psi_ * n_band_, zero_value()); + + std::vector all_cols(n_band_); + std::iota(all_cols.begin(), all_cols.end(), 0); + + force_g0_real(psi_in, n_band_); + apply_h(hpsi_func, psi_in, hpsi_.data(), n_band_); + + std::vector g(n_band_ * n_band_, static_cast(0)); + gram(psi_in, hpsi_.data(), n_band_, n_band_, g, n_band_); + + for (int j = 0; j < n_band_; ++j) + { + eigenvalue_in[j] = g[j + j * n_band_]; + for (int ig = 0; ig < n_dim_; ++ig) + { + T sum = zero_value(); + for (int i = 0; i < n_band_; ++i) + { + sum += psi_in[idx(ig, i, ld_psi_)] * g[i + j * n_band_]; + } + w_[idx(ig, j, ld_psi_)] = hpsi_[idx(ig, j, ld_psi_)] - sum; + } + } + + std::vector active_cols; + lock_epairs(w_, ethr_band, active_cols); + + Real trG = trace_of_active_projected(psi_in, active_cols); + Real trdif = static_cast(-1); + double avg_iter = 1.0; + int iter = 1; + + while (!active_cols.empty() && iter <= maxter_) + { + const int nact = static_cast(active_cols.size()); + const int nsb = std::max(1, (nact + sbsize_ - 1) / sbsize_); + const Real trtol = diag_thr_ * std::sqrt(static_cast(nact)); + + divide_by_preconditioner(active_cols, prec, w_); + project_against(psi_in, all_cols, w_, active_cols); + + std::vector w_active; + copy_cols(w_.data(), active_cols, w_active); + force_g0_real(w_active.data(), nact); + std::vector hw_active(ld_psi_ * nact, zero_value()); + apply_h(hpsi_func, w_active.data(), hw_active.data(), nact); + scatter_cols(hw_.data(), active_cols, hw_active); + + avg_iter += static_cast(nact) / static_cast(n_band_); + + const bool use_p = (iter != 1); + if (use_p) + { + project_against(psi_in, active_cols, p_, active_cols); + } + + for (int isb = 0; isb < nsb; ++isb) + { + const int i0 = isb * sbsize_; + const int l = std::min(sbsize_, nact - i0); + std::vector cols(active_cols.begin() + i0, active_cols.begin() + i0 + l); + + SmallSubspace subspace; + build_small_subspace(psi_in, cols, use_p, subspace); + solve_small_generalized((use_p ? 3 : 2) * l, subspace); + update_one_block(psi_in, cols, l, use_p, subspace); + } + + if (iter % rr_step_ == 0) + { + rayleigh_ritz(psi_in, eigenvalue_in, active_cols, ethr_band); + trdif = static_cast(-1); + trG = 0; + for (const int c : active_cols) + { + trG += eigenvalue_in[c]; + } + } + else + { + chol_qr_active(psi_in, active_cols); + + std::vector psi_a, hpsi_a; + copy_cols(psi_in, active_cols, psi_a); + copy_cols(hpsi_.data(), active_cols, hpsi_a); + + const int na = static_cast(active_cols.size()); + std::vector ga(na * na, static_cast(0)); + gram(psi_a.data(), hpsi_a.data(), na, na, ga, na); + + set_zero(w_); + for (int ja = 0; ja < na; ++ja) + { + for (int ig = 0; ig < n_dim_; ++ig) + { + T sum = zero_value(); + for (int ia = 0; ia < na; ++ia) + { + sum += psi_a[idx(ig, ia, ld_psi_)] * ga[ia + ja * na]; + } + w_[idx(ig, active_cols[ja], ld_psi_)] = hpsi_a[idx(ig, ja, ld_psi_)] - sum; + } + } + + Real trG1 = 0; + for (int i = 0; i < na; ++i) + { + trG1 += ga[i + i * na]; + } + + trdif = std::abs(trG1 - trG); + trG = trG1; + + if (trdif >= 0 && trdif <= trtol) + { + break; + } + } + + ++iter; + } + + if ((iter - 1) % rr_step_ != 0) + { + rayleigh_ritz(psi_in, eigenvalue_in, active_cols, ethr_band); + } + + return avg_iter; +} + +template class DiagoPPCG, base_device::DEVICE_CPU>; +template class DiagoPPCG, base_device::DEVICE_CPU>; + +} // namespace hsolver From da09339cf913f42aa413462640987fcba4af5d00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AE=B6=E9=BD=90?= <1544375273@qq.com> Date: Tue, 7 Apr 2026 17:07:47 +0800 Subject: [PATCH 2/2] Add DiagoPPCG header file for gamma-point PPCG Add diago_ppcg,h to the branch, yet some changes in CMake needed. --- source/source_hsolver/diago_ppcg.h | 142 +++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 source/source_hsolver/diago_ppcg.h diff --git a/source/source_hsolver/diago_ppcg.h b/source/source_hsolver/diago_ppcg.h new file mode 100644 index 00000000000..7dbca12db54 --- /dev/null +++ b/source/source_hsolver/diago_ppcg.h @@ -0,0 +1,142 @@ +#ifndef DIAGO_PPCG_H_ +#define DIAGO_PPCG_H_ + +#include "source_base/module_device/types.h" +#include "source_base/macros.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace hsolver +{ + +/** + * @brief Gamma-point PPCG draft for source_hsolver. + * + * Notes: + * 1. This file is intentionally aligned with the source_hsolver naming/interface style. + * 2. The implementation below is a CPU-first draft and follows the uploaded ppcg_gamma.f90 logic. + * 3. Projected matrices are treated as real-symmetric by using gamma-point real inner products. + * 4. This is not a full MPI / band-group / GPU port of the original QE Fortran implementation. + */ +template , typename Device = base_device::DEVICE_CPU> +class DiagoPPCG final +{ + private: + using Real = typename GetTypeReal::type; + + public: + using HPsiFunc = std::function; + using SPsiFunc = std::function; + + DiagoPPCG(const Real& diag_thr, + const int& diag_iter_max, + const int& sbsize = 4, + const int& rr_step = 2, + const bool gamma_g0_real = true); + + double diag(const HPsiFunc& hpsi_func, + const SPsiFunc& spsi_func, + const int ld_psi, + const int nband, + const int dim, + T* psi_in, + Real* eigenvalue_in, + const std::vector& ethr_band, + const Real* prec = nullptr); + + private: + struct SmallSubspace + { + std::vector k; + std::vector m; + std::vector eval; + }; + + int ld_psi_ = 0; + int n_band_ = 0; + int n_dim_ = 0; + int maxter_ = 0; + int sbsize_ = 1; + int rr_step_ = 1; + + Real diag_thr_ = static_cast(1.0e-8); + bool gamma_g0_real_ = true; + + std::vector hpsi_; + std::vector w_; + std::vector hw_; + std::vector p_; + std::vector hp_; + + static int idx(const int i, const int j, const int ld) + { + return i + j * ld; + } + + void validate_input(T* psi_in, Real* eigenvalue_in, const Real* prec) const; + void force_g0_real(T* x, const int ncol) const; + + void apply_h(const HPsiFunc& hpsi_func, T* psi_in, T* hpsi_out, const int ncol) const; + void apply_s(const SPsiFunc& spsi_func, T* psi_in, T* spsi_out, const int ncol) const; + + Real gamma_dot(const T* x, const T* y) const; + + void gram(const T* a, + const T* b, + const int ncol_a, + const int ncol_b, + std::vector& out, + const int ld_out) const; + + void copy_cols(const T* src, const std::vector& cols, std::vector& dst) const; + void scatter_cols(T* dst, const std::vector& cols, const std::vector& src) const; + + void project_against(const T* basis, + const std::vector& basis_cols, + std::vector& x, + const std::vector& x_cols) const; + + void divide_by_preconditioner(const std::vector& active_cols, + const Real* prec, + std::vector& x) const; + + void lock_epairs(const std::vector& residual, + const std::vector& ethr_band, + std::vector& active_cols) const; + + void build_small_subspace(const T* psi, + const std::vector& cols, + const bool use_p, + SmallSubspace& subspace) const; + + void solve_small_generalized(const int dim, SmallSubspace& subspace) const; + + void update_one_block(T* psi, + const std::vector& cols, + const int l, + const bool use_p, + const SmallSubspace& subspace) const; + + void chol_qr_active(T* psi, const std::vector& active_cols) const; + void right_solve_upper_real(const std::vector& r, + const int n, + std::vector& x) const; + + void rayleigh_ritz(T* psi, + Real* eigenvalue, + std::vector& active_cols, + const std::vector& ethr_band); + + Real trace_of_active_projected(const T* psi, const std::vector& active_cols) const; +}; + +} // namespace hsolver + +#endif // DIAGO_PPCG_H_