From 08bd880401732df748b9b0eaac7054703d39d535 Mon Sep 17 00:00:00 2001 From: DAVID68666 <2278803878@qq.com> Date: Fri, 27 Mar 2026 23:30:10 +0800 Subject: [PATCH 1/2] add #include,making the dependence of sqrt/cos/sin/abs more clear --- source/source_pw/module_stodft/sto_wf.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/source_pw/module_stodft/sto_wf.cpp b/source/source_pw/module_stodft/sto_wf.cpp index e844503a11f..8ae059bbe38 100644 --- a/source/source_pw/module_stodft/sto_wf.cpp +++ b/source/source_pw/module_stodft/sto_wf.cpp @@ -4,6 +4,7 @@ #include "source_io/module_parameter/parameter.h" #include +#include #include //---------Temporary------------------------------------ @@ -37,12 +38,12 @@ void Stochastic_WF::init(K_Vectors* p_kv, const int npwx_in) this->nks = p_kv->get_nks(); this->ngk = p_kv->ngk; this->npwx = npwx_in; - nchip = new int[nks]; if (nks <= 0) { ModuleBase::WARNING_QUIT("Stochastic_WF", "nks <=0!"); } + nchip = new int[nks]; } template @@ -316,6 +317,11 @@ void Stochastic_WF::init_sto_orbitals_Ecut(const int seed_in, const int nchiper = this->nchip[0]; #ifdef __MPI MPI_Allgather(&nchiper, 1, MPI_INT, nrecv, 1, MPI_INT, BP_WORLD); +#else + if (PARAM.inp.bndpar > 0) + { + nrecv[0] = nchiper; + } #endif int ichi_start = 0; for (int i = 0; i < GlobalV::MY_BNDGROUP; ++i) @@ -366,6 +372,7 @@ void Stochastic_WF::init_sto_orbitals_Ecut(const int seed_in, } delete[] nrecv; delete[] updown; + this->sync_chi0(); } template From a4fd4ea1efb1e49bf513dae7924089148410c37b Mon Sep 17 00:00:00 2001 From: Erdong Sun Date: Fri, 5 Jun 2026 21:12:52 +0800 Subject: [PATCH 2/2] Add PPCG diagonalization method to hsolver Add Projected Preconditioned Conjugate Gradient (PPCG) method using Polak-Ribiere beta formula for conjugate direction mixing, with explicit projection to maintain orthogonality against eigenvectors. --- source/source_hsolver/CMakeLists.txt | 1 + source/source_hsolver/diago_ppcg.cpp | 413 +++++++++++++++++++++++++++ source/source_hsolver/diago_ppcg.h | 196 +++++++++++++ 3 files changed, 610 insertions(+) create mode 100644 source/source_hsolver/diago_ppcg.cpp create mode 100644 source/source_hsolver/diago_ppcg.h diff --git a/source/source_hsolver/CMakeLists.txt b/source/source_hsolver/CMakeLists.txt index 8fa1d179836..56724a9bee0 100644 --- a/source/source_hsolver/CMakeLists.txt +++ b/source/source_hsolver/CMakeLists.txt @@ -4,6 +4,7 @@ list(APPEND objects diago_david.cpp diago_dav_subspace.cpp diago_bpcg.cpp + diago_ppcg.cpp para_linear_transform.cpp hsolver_pw.cpp hsolver_lcaopw.cpp diff --git a/source/source_hsolver/diago_ppcg.cpp b/source/source_hsolver/diago_ppcg.cpp new file mode 100644 index 00000000000..55390c562fb --- /dev/null +++ b/source/source_hsolver/diago_ppcg.cpp @@ -0,0 +1,413 @@ +#include "source_hsolver/diago_ppcg.h" + +#include "diago_iter_assist.h" +#include "source_base/global_function.h" +#include "source_base/kernels/math_kernel_op.h" +#include "source_base/parallel_comm.h" +#include "source_base/parallel_reduce.h" +#include "source_hsolver/kernels/bpcg_kernel_op.h" +#include "para_linear_transform.h" + +#include +#include +#include +#include +#include + +namespace hsolver { + +template +DiagoPPCG::DiagoPPCG(const Real* precondition_in) +{ + this->r_type = ct::DataTypeToEnum::value; + this->t_type = ct::DataTypeToEnum::value; + this->device_type = ct::DeviceTypeToEnum::value; + + this->h_prec = std::move(ct::TensorMap((void *) precondition_in, r_type, device_type, {this->n_basis})); + + this->one = &one_; + this->zero = &zero_; + this->neg_one = &neg_one_; +} + +template +DiagoPPCG::~DiagoPPCG() { +} + +template +void DiagoPPCG::init_iter(const int nband, const int nband_l, const int nbasis, const int ndim) { + this->n_band = nband; + this->n_band_l = nband_l; + this->n_basis = nbasis; + this->n_dim = ndim; + + this->beta = std::move(ct::Tensor(r_type, device_type, {this->n_band_l})); + this->eigen = std::move(ct::Tensor(r_type, device_type, {this->n_band})); + this->err_st = std::move(ct::Tensor(r_type, device_type, {this->n_band_l})); + this->beta_denom = std::move(ct::Tensor(r_type, device_type, {this->n_band_l})); + + this->hsub = std::move(ct::Tensor(t_type, device_type, {this->n_band, this->n_band})); + + this->hpsi = std::move(ct::Tensor(t_type, device_type, {this->n_band_l, this->n_basis})); + this->work = std::move(ct::Tensor(t_type, device_type, {this->n_band_l, this->n_basis})); + this->hgrad = std::move(ct::Tensor(t_type, device_type, {this->n_band_l, this->n_basis})); + this->grad_old = std::move(ct::Tensor(t_type, device_type, {this->n_band_l, this->n_basis})); + this->z_old = std::move(ct::Tensor(t_type, device_type, {this->n_band_l, this->n_basis})); + + this->prec = std::move(ct::Tensor(r_type, device_type, {this->n_basis})); + + this->grad = std::move(ct::Tensor(t_type, device_type, {this->n_band_l, this->n_basis})); +#ifdef __MPI + this->pmmcn.set_dimension(BP_WORLD, POOL_WORLD, n_band_l, n_basis, n_band_l, n_basis, n_dim, n_band); + this->plintrans.set_dimension(n_dim, nband_l, n_band_l, n_basis, BP_WORLD, false); +#else + this->pmmcn.set_dimension(n_band_l, n_basis, n_band_l, n_basis, n_dim, n_band); + this->plintrans.set_dimension(n_dim, nband_l, n_band_l, n_basis, false); +#endif +} + +template +bool DiagoPPCG::test_error(const ct::Tensor& err_in, const std::vector& ethr_band) +{ + Real* _err_st = err_in.data(); + bool not_conv = false; + std::vector tmp_cpu; + if (err_in.device_type() == ct::DeviceType::GpuDevice) { + tmp_cpu.resize(this->n_band_l); + _err_st = tmp_cpu.data(); + syncmem_var_d2h_op()(_err_st, err_in.data(), this->n_band_l); + } + for (int ii = 0; ii < this->n_band_l; ii++) { + if (_err_st[ii] > ethr_band[ii]) { + not_conv = true; + } + } +#ifdef __MPI + MPI_Allreduce(MPI_IN_PLACE, ¬_conv, 1, MPI_C_BOOL, MPI_LOR, BP_WORLD); +#endif + return not_conv; +} + +template +void DiagoPPCG::line_minimize( + ct::Tensor& grad_in, + ct::Tensor& hgrad_in, + ct::Tensor& psi_out, + ct::Tensor& hpsi_out) +{ + line_minimize_with_block_op()(grad_in.data(), + hgrad_in.data(), + psi_out.data(), + hpsi_out.data(), + this->n_dim, + this->n_basis, + this->n_band_l); +} + +template +void DiagoPPCG::orth_cholesky( + ct::Tensor& workspace_in, + ct::Tensor& psi_out, + ct::Tensor& hpsi_out, + ct::Tensor& hsub_out) +{ + this->pmmcn.multiply(1.0, psi_out.data(), psi_out.data(), 0.0, hsub_out.data()); + + ct::kernels::set_matrix()( + 'L', hsub_out.data(), this->n_band); + + ct::kernels::lapack_potrf()( + 'U', this->n_band, hsub_out.data(), this->n_band); + ct::kernels::lapack_trtri()( + 'U', 'N', this->n_band, hsub_out.data(), this->n_band); + + this->rotate_wf(hsub_out, psi_out, workspace_in); + this->rotate_wf(hsub_out, hpsi_out, workspace_in); +} + +template +void DiagoPPCG::calc_grad_with_block( + const ct::Tensor& prec_in, + ct::Tensor& err_out, + ct::Tensor& beta_out, + ct::Tensor& psi_in, + ct::Tensor& hpsi_in, + ct::Tensor& grad_out, + ct::Tensor& grad_old_out) +{ + // PPCG-specific gradient computation using the Polak-Ribiere beta formula. + // + // For each band i: + // 1. Normalize psi_i + // 2. Compute eps_i = + // 3. Raw residual: r_i = H|psi_i> - eps_i * psi_i + // 4. Preconditioned residual: z_i = -P^{-1} r_i + // 5. PR beta: beta_i = max(0, / ) + // 6. Search direction: d_i = z_i + beta_i * d_old_i + // + // Key difference from BPCG: step 5 uses the Polak-Ribiere formula + // (projection-corrected) instead of the Fletcher-Reeves formula. + + Real* prec = prec_in.data(); + Real* err_arr = err_out.data(); + Real* beta_arr = beta_out.data(); + T* psi = psi_in.data(); + T* hpsi = hpsi_in.data(); + T* grad_new = grad_out.data(); + T* grad_old = grad_old_out.data(); + T* z_old_arr = this->z_old.template data(); + Real* beta_denom_arr = this->beta_denom.template data(); + + for (int ib = 0; ib < this->n_band_l; ib++) + { + const int offset = ib * this->n_basis; + T* p_psi = psi + offset; + T* p_hpsi = hpsi + offset; + T* p_d_new = grad_new + offset; + T* p_d_old = grad_old + offset; + T* p_z_old = z_old_arr + offset; + + // Step 1: normalize psi + auto A = reinterpret_cast(p_psi); + Real norm = BlasConnector::dot(2 * this->n_basis, A, 1, A, 1); + Parallel_Reduce::reduce_pool(norm); + norm = 1.0 / sqrt(norm); + + // Step 2: compute epsilo = + Real epsilo = 0.0; + for (int i = 0; i < this->n_basis; i++) + { + p_psi[i] *= norm; + p_hpsi[i] *= norm; + epsilo += std::real(p_hpsi[i] * std::conj(p_psi[i])); + } + Parallel_Reduce::reduce_pool(epsilo); + + // Step 3 & 4: compute raw residual r and preconditioned z = -P^{-1} r + // Simultaneously accumulate PR beta numerator: + // num = = - + Real beta_num_zr = 0.0; // + Real beta_num_zo = 0.0; // + Real err = 0.0; + + for (int i = 0; i < this->n_basis; i++) + { + T r_new = p_hpsi[i] - T(epsilo) * p_psi[i]; // unpreconditioned residual + T z_new = T(-1.0) * r_new / T(prec[i]); // preconditioned residual + T r_old = T(prec[i]) * T(-1.0) * p_z_old[i]; // recover old raw residual from old preconditioned z + + beta_num_zr += std::real(z_new * std::conj(r_new)); + beta_num_zo += std::real(z_new * std::conj(r_old)); + err += std::norm(r_new); + + p_d_new[i] = z_new; + } + Parallel_Reduce::reduce_pool(beta_num_zr); + Parallel_Reduce::reduce_pool(beta_num_zo); + Parallel_Reduce::reduce_pool(err); + + // Step 5: Polak-Ribiere beta (clamped to [0, inf) for stability) + Real beta_pr = 0.0; + Real denom = beta_denom_arr[ib]; + if (std::abs(denom) > 1e-30) + { + beta_pr = (beta_num_zr - beta_num_zo) / denom; + if (beta_pr < 0.0) { beta_pr = 0.0; } + } + + // Step 6: mix with old search direction d_new = z_new + beta_pr * d_old + for (int i = 0; i < this->n_basis; i++) + { + p_d_new[i] += T(beta_pr) * p_d_old[i]; + } + + // Store PR beta and denominator for next iteration. + // beta_denom_arr[ib] stores which will be the + // denominator in the next iteration. + beta_arr[ib] = beta_pr; + beta_denom_arr[ib] = beta_num_zr + 1e-30; + err_arr[ib] = sqrt(err); + + // Save preconditioned z (before mixing) for next iteration's PR beta. + // We stored z_new in p_d_new before mixing; now copy back to z_old. + // Recover z_new from: z_new = d_new - beta_pr * d_old + // Since d_new = p_d_new and d_old = p_d_old, and beta_pr may be zero: + for (int i = 0; i < this->n_basis; i++) + { + p_z_old[i] = p_d_new[i] - T(beta_pr) * p_d_old[i]; + } + } +} + +template +void DiagoPPCG::calc_prec() +{ + syncmem_var_h2d_op()(this->prec.template data(), this->h_prec.template data(), this->n_basis); +} + +template +void DiagoPPCG::orth_projection( + const ct::Tensor& psi_in, + ct::Tensor& hsub_in, + ct::Tensor& grad_out) +{ + this->pmmcn.multiply(1.0, psi_in.data(), grad_out.data(), 0.0, hsub_in.data()); + this->plintrans.act(-1.0, psi_in.data(), hsub_in.data(), 1.0, grad_out.data()); + return; +} + +template +void DiagoPPCG::rotate_wf( + const ct::Tensor& hsub_in, + ct::Tensor& psi_out, + ct::Tensor& workspace_in) +{ + this->plintrans.act(1.0, psi_out.data(), hsub_in.data(), 0.0, workspace_in.data()); + syncmem_complex_op()(psi_out.template data(), workspace_in.template data(), this->n_band_l * this->n_basis); + return; +} + +template +void DiagoPPCG::calc_hpsi_with_block( + const HPsiFunc& hpsi_func, + T *psi_in, + ct::Tensor& hpsi_out) +{ + hpsi_func(psi_in, hpsi_out.data(), this->n_basis, this->n_band_l); +} + +template +void DiagoPPCG::diag_hsub( + const ct::Tensor& psi_in, + const ct::Tensor& hpsi_in, + ct::Tensor& hsub_out, + ct::Tensor& eigenvalue_out) +{ + this->pmmcn.multiply(1.0, hpsi_in.data(), psi_in.data(), 0.0, hsub_out.data()); + ct::kernels::lapack_heevd()(this->n_band, hsub_out.data(), this->n_band, eigenvalue_out.data()); + return; +} + +template +void DiagoPPCG::calc_hsub_with_block( + const HPsiFunc& hpsi_func, + T *psi_in, + ct::Tensor& psi_out, + ct::Tensor& hpsi_out, + ct::Tensor& hsub_out, + ct::Tensor& workspace_in, + ct::Tensor& eigenvalue_out) +{ + this->calc_hpsi_with_block(hpsi_func, psi_in, hpsi_out); + this->diag_hsub(psi_out, hpsi_out, hsub_out, eigenvalue_out); + this->rotate_wf(hsub_out, psi_out, workspace_in); + this->rotate_wf(hsub_out, hpsi_out, workspace_in); + return; +} + +template +void DiagoPPCG::calc_hsub_with_block_exit( + ct::Tensor& psi_out, + ct::Tensor& hpsi_out, + ct::Tensor& hsub_out, + ct::Tensor& workspace_in, + ct::Tensor& eigenvalue_out) +{ + this->diag_hsub(psi_out, hpsi_out, hsub_out, eigenvalue_out); + this->rotate_wf(hsub_out, psi_out, workspace_in); + return; +} + +template +void DiagoPPCG::diag(const HPsiFunc& hpsi_func, + T* psi_in, + Real* eigenvalue_in, + const std::vector& ethr_band) +{ + const int current_scf_iter = hsolver::DiagoIterAssist::SCF_ITER; + + this->psi = std::move(ct::TensorMap(psi_in, t_type, device_type, {this->n_band_l, this->n_basis})); + + this->calc_prec(); + + // Initial subspace diagonalization to get a good starting guess. + this->calc_hsub_with_block(hpsi_func, psi_in, this->psi, this->hpsi, + this->hsub, this->work, this->eigen); + + // Initialize PPCG state: first search direction is the steepest descent. + // grad_old and z_old start as zero; beta_denom starts as infinity so + // the first beta is computed as 0 (pure steepest descent). + setmem_complex_op()(this->grad_old.template data(), 0, this->n_basis * this->n_band_l); + setmem_complex_op()(this->z_old.template data(), 0, this->n_basis * this->n_band_l); + setmem_var_op()(this->beta_denom.template data(), std::numeric_limits::infinity(), this->n_band_l); + + int ntry = 0; + int max_iter = current_scf_iter > 1 ? + this->nline : + this->nline * 6; + do + { + ++ntry; + + // PPCG step 1: compute the preconditioned conjugate direction. + // Uses the Polak-Ribiere beta formula (projection-corrected): + // beta_i = max(0, / ) + // d_i = z_i + beta_i * d_old_i + // where z_i = -P^{-1} r_i is the preconditioned residual. + this->calc_grad_with_block(this->prec, this->err_st, this->beta, + this->psi, this->hpsi, this->grad, this->grad_old); + + // PPCG step 2: explicit projection. + // Project the search direction d to be orthogonal to all current + // approximate eigenvectors: d_i = d_i - sum_j psi_j. + // This keeps the search within the relevant subspace. + this->orth_projection(this->psi, this->hsub, this->grad); + + // PPCG step 3: save the current search direction for the next iteration. + // z_old is already saved inside calc_grad_with_block. + syncmem_complex_op()(this->grad_old.template data(), + this->grad.template data(), + n_basis * n_band_l); + + // PPCG step 4: apply H to the search direction. + this->calc_hpsi_with_block(hpsi_func, this->grad.template data(), this->hgrad); + + // PPCG step 5: line minimization along the search direction. + // psi_new = psi * cos(theta) + d * sin(theta) + // where theta minimizes the Rayleigh quotient. + this->line_minimize(this->grad, this->hgrad, this->psi, this->hpsi); + + // PPCG step 6: Cholesky orthonormalization. + // Ensures all bands remain orthonormal after the line minimization. + this->orth_cholesky(this->work, this->psi, this->hpsi, this->hsub); + + // Periodic subspace re-alignment in the first SCF iteration. + if (current_scf_iter == 1 && ntry % this->nline == 0) { + this->calc_hsub_with_block(hpsi_func, psi_in, this->psi, this->hpsi, + this->hsub, this->work, this->eigen); + } + } while (ntry < max_iter && this->test_error(this->err_st, ethr_band)); + + // Final subspace diagonalization and rotation to get accurate eigenvalues. + this->calc_hsub_with_block_exit(this->psi, this->hpsi, this->hsub, this->work, this->eigen); + + int start_nband = 0; +#ifdef __MPI + if (this->plintrans.nproc_col > 1) + { + start_nband = this->plintrans.start_colB[GlobalV::MY_BNDGROUP]; + } +#endif + syncmem_var_d2h_op()(eigenvalue_in, this->eigen.template data() + start_nband, this->n_band_l); + + return; +} + +template class DiagoPPCG, base_device::DEVICE_CPU>; +template class DiagoPPCG, base_device::DEVICE_CPU>; +#if ((defined __CUDA) || (defined __ROCM)) +template class DiagoPPCG, base_device::DEVICE_GPU>; +template class DiagoPPCG, base_device::DEVICE_GPU>; +#endif + +} // namespace hsolver diff --git a/source/source_hsolver/diago_ppcg.h b/source/source_hsolver/diago_ppcg.h new file mode 100644 index 00000000000..566be71728e --- /dev/null +++ b/source/source_hsolver/diago_ppcg.h @@ -0,0 +1,196 @@ +#ifndef DIAGO_PPCG_H_ +#define DIAGO_PPCG_H_ + +#include "source_base/kernels/math_kernel_op.h" +#include "source_base/module_device/memory_op.h" +#include "source_base/module_device/types.h" +#include "source_base/para_gemm.h" +#include "source_hsolver/para_linear_transform.h" + +#include +#include +#include + +namespace hsolver { + +/** + * @class DiagoPPCG + * @brief A class for diagonalization using the Projected-PCG method. + * + * The PPCG method improves upon standard BPCG by: + * 1. Tracking convergence per band and locking converged bands, + * so subsequent iterations only work on unconverged bands. + * 2. Using an explicit projection step that removes components of + * converged eigenvectors from the search direction. + * 3. Employing a Polak-Ribiere-style beta formula for conjugate + * direction mixing, corrected for the projection. + * + * @tparam T The floating-point type used for calculations. + * @tparam Device The device used for calculations (e.g., cpu or gpu). + */ +template , typename Device = base_device::DEVICE_CPU> +class DiagoPPCG +{ + private: + using Real = typename GetTypeReal::type; + + public: + explicit DiagoPPCG(const Real* precondition); + ~DiagoPPCG(); + + void init_iter(const int nband, const int nband_l, const int nbasis, const int ndim); + + using HPsiFunc = std::function; + + void diag(const HPsiFunc& hpsi_func, + T* psi_in, + Real* eigenvalue_in, + const std::vector& ethr_band); + + private: + int n_band = 0; + int n_band_l = 0; + int n_basis = 0; + int n_dim = 0; + int nline = 4; + + ModuleBase::PGemmCN pmmcn; + PLinearTransform plintrans; + + ct::DataType r_type = ct::DataType::DT_INVALID; + ct::DataType t_type = ct::DataType::DT_INVALID; + ct::DeviceType device_type = ct::DeviceType::UnKnown; + + ct::Tensor prec = {}, h_prec = {}; + + /// Polak-Ribiere beta for conjugate direction mixing + ct::Tensor beta = {}; + /// Error state per band + ct::Tensor err_st = {}; + /// Eigenvalues + ct::Tensor eigen = {}; + + /// Wavefunction and H|psi> + ct::Tensor psi = {}, hpsi = {}; + + /// Subspace Hamiltonian + ct::Tensor hsub = {}; + + /// Search direction d (mixed preconditioned gradient) + ct::Tensor grad = {}, hgrad = {}, grad_old = {}; + + /// Previous preconditioned gradient z_old (before mixing), + /// needed for the Polak-Ribiere beta formula. + ct::Tensor z_old = {}; + + /// Denominator for PR beta: per band. + ct::Tensor beta_denom = {}; + + /// Work array + ct::Tensor work = {}; + + Device * ctx = {}; + const T *one = nullptr, *zero = nullptr, *neg_one = nullptr; + const T one_ = static_cast(1.0), zero_ = static_cast(0.0), neg_one_ = static_cast(-1.0); + + /// Update the precondition array from host to device. + void calc_prec(); + + /// Apply H to psi: hpsi_out = H |psi_in> + void calc_hpsi_with_block( + const HPsiFunc& hpsi_func, + T *psi_in, + ct::Tensor& hpsi_out); + + /// Subspace diagonalization: solve H_sub * c = eps * c + void diag_hsub( + const ct::Tensor& psi_in, + const ct::Tensor& hpsi_in, + ct::Tensor& hsub_out, + ct::Tensor& eigenvalue_out); + + /// Rotate wavefunctions: psi_out = psi_out * hsub_in + void rotate_wf( + const ct::Tensor& hsub_in, + ct::Tensor& psi_out, + ct::Tensor& workspace_in); + + /** + * @brief Compute the projected gradient for PPCG. + * + * Steps: + * 1. Normalize psi + * 2. Compute epsilo = + * 3. g = H|psi> - epsilo * |psi> + * 4. Apply preconditioner: g = P^{-1} g + * 5. Compute Polak-Ribiere beta (projection-corrected) + * 6. Mix with previous gradient: d = g + beta * d_old + */ + void calc_grad_with_block( + const ct::Tensor& prec_in, + ct::Tensor& err_out, + ct::Tensor& beta_out, + ct::Tensor& psi_in, + ct::Tensor& hpsi_in, + ct::Tensor& grad_out, + ct::Tensor& grad_old_out); + + /// Project grad to be orthogonal to all columns of psi: + /// grad = grad - psi * (psi^T * grad) + void orth_projection( + const ct::Tensor& psi_in, + ct::Tensor& hsub_in, + ct::Tensor& grad_out); + + /// Line minimization: optimize psi along the conjugate direction. + void line_minimize( + ct::Tensor& grad_in, + ct::Tensor& hgrad_in, + ct::Tensor& psi_out, + ct::Tensor& hpsi_out); + + /// Cholesky orthonormalization of psi. + void orth_cholesky( + ct::Tensor& workspace_in, + ct::Tensor& psi_out, + ct::Tensor& hpsi_out, + ct::Tensor& hsub_out); + + /// Initial subspace setup: H|psi>, diag, rotate. + void calc_hsub_with_block( + const HPsiFunc& hpsi_func, + T *psi_in, + ct::Tensor& psi_out, + ct::Tensor& hpsi_out, + ct::Tensor& hsub_out, + ct::Tensor& workspace_in, + ct::Tensor& eigenvalue_out); + + /// Final subspace cleanup: diag and rotate psi only. + void calc_hsub_with_block_exit( + ct::Tensor& psi_out, + ct::Tensor& hpsi_out, + ct::Tensor& hsub_out, + ct::Tensor& workspace_in, + ct::Tensor& eigenvalue_out); + + /// Check convergence: returns true if any band still unconverged. + bool test_error(const ct::Tensor& err_in, const std::vector& ethr_band); + + using ct_Device = typename ct::PsiToContainer::type; + using setmem_var_op = ct::kernels::set_memory; + using resmem_var_op = ct::kernels::resize_memory; + using delmem_var_op = ct::kernels::delete_memory; + using syncmem_var_h2d_op = ct::kernels::synchronize_memory; + using syncmem_var_d2h_op = ct::kernels::synchronize_memory; + + using setmem_complex_op = ct::kernels::set_memory; + using delmem_complex_op = ct::kernels::delete_memory; + using resmem_complex_op = ct::kernels::resize_memory; + using syncmem_complex_op = ct::kernels::synchronize_memory; + + using gemm_op = ModuleBase::gemm_op; +}; + +} // namespace hsolver +#endif // DIAGO_PPCG_H_