diff --git a/source/source_estate/elecstate_lcao.cpp b/source/source_estate/elecstate_lcao.cpp index 2040ee769e1..3ce61930771 100644 --- a/source/source_estate/elecstate_lcao.cpp +++ b/source/source_estate/elecstate_lcao.cpp @@ -80,7 +80,56 @@ void ElecStateLCAO>::dm2rho(std::vector*> pexsi_EDM, DensityMatrix, double>* dm) { - ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case"); + ModuleBase::timer::start("ElecStateLCAO", "dm2rho"); + + const int dmk_size = dm->get_DMK_size(); + if (static_cast(pexsi_DM.size()) < dmk_size) + { + ModuleBase::WARNING_QUIT("ElecStateLCAO", "PEXSI multi-k density matrix buffer is smaller than DMK storage"); + } + + const int local_matrix_size = dm->get_DMK_nrow() * dm->get_DMK_ncol(); + const double pexsi_spin_weight = PARAM.inp.nspin == 1 ? 0.5 : 1.0; + for (int ik = 0; ik < dmk_size; ++ik) + { + const double k_weight = ((this->klist != nullptr && ik < static_cast(this->klist->wk.size())) + ? this->klist->wk[ik] + : 1.0) + * pexsi_spin_weight; + for (int i = 0; i < local_matrix_size; ++i) + { + pexsi_DM[ik][i] *= k_weight; + pexsi_EDM[ik][i] *= k_weight; + } + dm->set_DMK_pointer(ik, pexsi_DM[ik]); + } + +#ifdef __PEXSI + dm->pexsi_EDM = pexsi_EDM; +#endif + + dm->cal_DMR(); + + for (int is = 0; is < PARAM.inp.nspin; is++) + { + ModuleBase::GlobalFunc::ZEROS(this->charge->rho[is], this->charge->nrxx); + } + + ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!"); + ModuleGint::cal_gint_rho(dm->get_DMR_vector(), PARAM.inp.nspin, this->charge->rho); + if (XC_Functional::get_ked_flag()) + { + for (int is = 0; is < PARAM.inp.nspin; is++) + { + ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[0], this->charge->nrxx); + } + ModuleGint::cal_gint_tau(dm->get_DMR_vector(), PARAM.inp.nspin, this->charge->kin_r); + } + + this->charge->renormalize_rho(); + + ModuleBase::timer::end("ElecStateLCAO", "dm2rho"); + return; } diff --git a/source/source_hsolver/diago_pexsi.cpp b/source/source_hsolver/diago_pexsi.cpp index e85f78b12b9..b9cd1cae2a8 100644 --- a/source/source_hsolver/diago_pexsi.cpp +++ b/source/source_hsolver/diago_pexsi.cpp @@ -6,10 +6,19 @@ #include "diago_pexsi.h" #include "source_base/tool_title.h" #include "source_base/global_variable.h" +#include "source_base/parallel_global.h" #include "source_base/tool_quit.h" #include "source_basis/module_ao/parallel_orbitals.h" +#include "module_pexsi/simple_pexsi.h" #include "module_pexsi/pexsi_solver.h" +#include +#include +#include +#include + +extern MPI_Comm DIAG_WORLD; + typedef hamilt::MatrixBlock matd; typedef hamilt::MatrixBlock> matcd; @@ -19,46 +28,140 @@ template std::vector DiagoPexsi::mu_buffer; template -DiagoPexsi::DiagoPexsi(const Parallel_Orbitals* ParaV_in) +DiagoPexsi::DiagoPexsi(const Parallel_Orbitals* ParaV_in, std::unique_ptr solver_in) { + this->ParaV = ParaV_in; + this->ps = std::move(solver_in); + if (this->ps == nullptr) + { + this->ps = std::make_unique(); + } + int nspin = PARAM.inp.nspin; if (PARAM.inp.nspin == 4) { nspin = 1; } - mu_buffer.resize(nspin); - for (int i = 0; i < nspin; i++) + if (static_cast(mu_buffer.size()) < nspin) { - mu_buffer[i] = this->ps->pexsi_mu; + mu_buffer.resize(nspin, pexsi::PEXSI_Solver::pexsi_mu); } + this->resize_density_buffers(nspin); - this->ParaV = ParaV_in; - this->ps = std::make_unique(); +} - this->DM.resize(nspin); - this->EDM.resize(nspin); - for (int i = 0; i < nspin; i++) +template +DiagoPexsi::~DiagoPexsi() +{ +} + +template +void DiagoPexsi::resize_density_buffers(const int count) +{ + const int local_size = this->ParaV->nrow * this->ParaV->ncol; + this->DM.resize(count); + this->EDM.resize(count); + this->dm_buffer_.resize(count); + this->edm_buffer_.resize(count); + for (int i = 0; i < count; ++i) { - this->DM[i] = new T[ParaV->nrow * ParaV->ncol]; - this->EDM[i] = new T[ParaV->nrow * ParaV->ncol]; + if (static_cast(this->dm_buffer_[i].size()) != local_size) + { + this->dm_buffer_[i].assign(local_size, T{}); + } + if (static_cast(this->edm_buffer_[i].size()) != local_size) + { + this->edm_buffer_[i].assign(local_size, T{}); + } + this->DM[i] = this->dm_buffer_[i].data(); + this->EDM[i] = this->edm_buffer_[i].data(); } + if (static_cast(mu_buffer.size()) < count) + { + mu_buffer.resize(count, pexsi::PEXSI_Solver::pexsi_mu); + } +} +template +void DiagoPexsi::begin_mu_search() +{ + this->has_mu_lower_ = false; + this->has_mu_upper_ = false; + this->mu_lower_ = 0.0; + this->mu_upper_ = 0.0; } template -DiagoPexsi::~DiagoPexsi() +void DiagoPexsi::begin_k_loop() { - int nspin = PARAM.inp.nspin; - if (PARAM.inp.nspin == 4) + this->num_electron_sum_ = 0.0; + this->num_electron_derivative_sum_ = 0.0; + this->totalEnergyH = 0.0; + this->totalEnergyS = 0.0; + this->totalFreeEnergy = 0.0; +} + +template +void DiagoPexsi::set_k_weight(const int ik, const double weight) +{ + if (ik >= static_cast(this->k_weights_.size())) { - nspin = 1; + this->k_weights_.resize(ik + 1, 1.0); } - for (int i = 0; i < nspin; i++) + this->k_weights_[ik] = weight; +} + +template +bool DiagoPexsi::finish_k_loop(const double target_nelec) +{ + return true; +} + +template <> +bool DiagoPexsi>::finish_k_loop(const double target_nelec) +{ + const double residual = target_nelec - this->num_electron_sum_; + if (std::abs(residual) <= pexsi::PEXSI_Solver::pexsi_elec_thr) + { + return true; + } + if (residual > 0.0) { - delete[] this->DM[i]; - delete[] this->EDM[i]; + this->has_mu_lower_ = true; + this->mu_lower_ = mu_buffer[0]; + } + else + { + this->has_mu_upper_ = true; + this->mu_upper_ = mu_buffer[0]; } + double delta_mu = 0.0; + if (std::abs(this->num_electron_derivative_sum_) <= DBL_MIN) + { + if (this->has_mu_lower_ && this->has_mu_upper_) + { + mu_buffer[0] = 0.5 * (this->mu_lower_ + this->mu_upper_); + return false; + } + const double fallback_step = std::max(pexsi::PEXSI_Solver::pexsi_mu_guard, 0.5); + delta_mu = residual > 0.0 ? fallback_step : -fallback_step; + } + else + { + delta_mu = residual / this->num_electron_derivative_sum_; + } + if (pexsi::PEXSI_Solver::pexsi_mu_guard > 0.0 && std::abs(this->num_electron_derivative_sum_) > DBL_MIN) + { + delta_mu = std::max(-pexsi::PEXSI_Solver::pexsi_mu_guard, + std::min(pexsi::PEXSI_Solver::pexsi_mu_guard, delta_mu)); + } + if (mu_buffer.empty()) + { + mu_buffer.push_back(pexsi::PEXSI_Solver::pexsi_mu); + } + mu_buffer[0] += delta_mu; + return false; } template <> @@ -69,6 +172,12 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& phm_in->matrix(h_mat, s_mat); std::vector eigen(PARAM.globalv.nlocal, 0.0); int ik = psi.get_current_k(); + if (ik < 0 || ik >= static_cast(this->DM.size())) + { + ModuleBase::WARNING_QUIT("DiagoPEXSI", + "PEXSI real path only has density buffers for Gamma/spin channels; multi-k requires " + "the complex expert-routine path"); + } this->ps->prepare(this->ParaV->blacs_ctxt, this->ParaV->nb, this->ParaV->nrow, @@ -90,11 +199,71 @@ void DiagoPexsi>::diag(hamilt::Hamilt> double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); - ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); + matcd h_mat, s_mat; + phm_in->matrix(h_mat, s_mat); + const int ik = psi.get_current_k(); + if (ik < 0) + { + ModuleBase::WARNING_QUIT("DiagoPEXSI", "invalid k-point index for complex PEXSI path"); + } + if (ik >= static_cast(this->DM.size())) + { + this->resize_density_buffers(ik + 1); + } + if (mu_buffer.empty()) + { + mu_buffer.push_back(pexsi::PEXSI_Solver::pexsi_mu); + } + + MPI_Group grid_group; + MPI_Group world_group; + int grid_np = 0; + MPI_Comm_size(DIAG_WORLD, &grid_np); + MPI_Comm_group(DIAG_WORLD, &world_group); + int grid_proc_range[3] = {0, (GlobalV::NPROC / grid_np) * grid_np - 1, GlobalV::NPROC / grid_np}; + MPI_Group_range_incl(world_group, 1, &grid_proc_range, &grid_group); + + double num_electron = 0.0; + double num_electron_derivative = 0.0; + double total_energy_h = 0.0; + double total_energy_s = 0.0; + double total_free_energy = 0.0; + pexsi::simplePEXSIComplex(DIAG_WORLD, + DIAG_WORLD, + grid_group, + this->ParaV->blacs_ctxt, + PARAM.globalv.nlocal, + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + 'c', + h_mat.p, + s_mat.p, + PARAM.inp.nelec, + "PEXSIOPTION", + this->DM[ik], + this->EDM[ik], + total_energy_h, + total_energy_s, + total_free_energy, + mu_buffer[0], + mu_buffer[0], + &num_electron, + &num_electron_derivative); + + const double k_weight = ik < static_cast(this->k_weights_.size()) ? this->k_weights_[ik] : 1.0; + this->num_electron_sum_ += k_weight * num_electron; + this->num_electron_derivative_sum_ += k_weight * num_electron_derivative; + this->totalEnergyH += k_weight * total_energy_h; + this->totalEnergyS += k_weight * total_energy_s; + this->totalFreeEnergy += k_weight * total_free_energy; + + MPI_Group_free(&grid_group); + MPI_Group_free(&world_group); } template class DiagoPexsi; template class DiagoPexsi >; } // namespace hsolver -#endif \ No newline at end of file +#endif diff --git a/source/source_hsolver/diago_pexsi.h b/source/source_hsolver/diago_pexsi.h index 9f0e0d1317b..6206d66c04c 100644 --- a/source/source_hsolver/diago_pexsi.h +++ b/source/source_hsolver/diago_pexsi.h @@ -6,7 +6,7 @@ #include "source_base/macros.h" // GetRealType #include "source_hamilt/hamilt.h" #include "source_basis/module_ao/parallel_orbitals.h" -#include "module_pexsi/pexsi_solver.h" +#include "module_pexsi/pexsi_solver_interface.h" namespace hsolver { @@ -19,16 +19,33 @@ class DiagoPexsi static std::vector mu_buffer; public: - DiagoPexsi(const Parallel_Orbitals* ParaV_in); + explicit DiagoPexsi(const Parallel_Orbitals* ParaV_in, + std::unique_ptr solver_in = nullptr); void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in); const Parallel_Orbitals* ParaV = nullptr; std::vector DM; std::vector EDM; - double totalEnergyH; - double totalEnergyS; - double totalFreeEnergy; - std::unique_ptr ps; + double totalEnergyH = 0.0; + double totalEnergyS = 0.0; + double totalFreeEnergy = 0.0; + std::unique_ptr ps; + void begin_mu_search(); + void begin_k_loop(); + void set_k_weight(const int ik, const double weight); + bool finish_k_loop(const double target_nelec); ~DiagoPexsi(); + + private: + std::vector> dm_buffer_; + std::vector> edm_buffer_; + std::vector k_weights_; + double num_electron_sum_ = 0.0; + double num_electron_derivative_sum_ = 0.0; + bool has_mu_lower_ = false; + bool has_mu_upper_ = false; + double mu_lower_ = 0.0; + double mu_upper_ = 0.0; + void resize_density_buffers(const int count); }; } // namespace hsolver diff --git a/source/source_hsolver/hsolver_lcao.cpp b/source/source_hsolver/hsolver_lcao.cpp index 2b4d6497ae6..aa71866963e 100644 --- a/source/source_hsolver/hsolver_lcao.cpp +++ b/source/source_hsolver/hsolver_lcao.cpp @@ -22,6 +22,7 @@ #ifdef __PEXSI #include "diago_pexsi.h" +#include "module_pexsi/pexsi_solver.h" #endif #include "source_base/global_variable.h" @@ -37,6 +38,9 @@ #include "source_lcao/rho_tau_lcao.h" // mohan add 20251024 +#include +#include + namespace hsolver { @@ -114,13 +118,30 @@ void HSolverLCAO::solve(hamilt::Hamilt* pHamilt, { #ifdef __PEXSI // other purification methods should follow this routine DiagoPexsi pe(ParaV); - for (int ik = 0; ik < psi.get_nk(); ++ik) + const int pexsi_mu_loops = std::is_same>::value + ? std::min(40, std::max(1, pexsi::PEXSI_Solver::pexsi_nmax)) + : 1; + pe.begin_mu_search(); + for (int imu = 0; imu < pexsi_mu_loops; ++imu) { - /// update H(k) for each k point - pHamilt->updateHk(ik); - psi.fix_k(ik); - // solve eigenvector and eigenvalue for H(k) - pe.diag(pHamilt, psi, nullptr); + pe.begin_k_loop(); + for (int ik = 0; ik < psi.get_nk(); ++ik) + { + const double k_weight = (pes->klist != nullptr && ik < static_cast(pes->klist->wk.size())) + ? pes->klist->wk[ik] + : 1.0; + const double pexsi_spin_weight = PARAM.inp.nspin == 1 ? 0.5 : 1.0; + pe.set_k_weight(ik, k_weight * pexsi_spin_weight); + /// update H(k) for each k point + pHamilt->updateHk(ik); + psi.fix_k(ik); + // solve eigenvector and eigenvalue for H(k) + pe.diag(pHamilt, psi, nullptr); + } + if (pe.finish_k_loop(PARAM.inp.nelec)) + { + break; + } } auto _pes = dynamic_cast*>(pes); pes->f_en.eband = pe.totalFreeEnergy; @@ -475,4 +496,4 @@ void HSolverLCAO::parakSolve_cusolver(hamilt::Hamilt* pHamilt, template class HSolverLCAO; template class HSolverLCAO>; -} // namespace hsolver \ No newline at end of file +} // namespace hsolver diff --git a/source/source_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/source_hsolver/module_pexsi/dist_matrix_transformer.cpp index 43f069c1238..5e36075835d 100644 --- a/source/source_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/source_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -3,8 +3,10 @@ #include +#include #include #include +#include #include #include #include @@ -147,7 +149,7 @@ inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std for (int i = 0; i < N; ++i) { int key = 0; - if (fabs(A[i] < 1e-31)) + if (fabs(A[i]) < 1e-31) key = -100; else key = floor(log10(fabs(A[i]))); @@ -209,6 +211,59 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout, return 0; } +inline int DistMatrixTransformer::getNonZeroIndex(char layout, + const int nrow, + const int ncol, + std::complex* H_2d, + std::complex* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx) +{ + int idx = 0; + nnz = 0; + colidx.clear(); + rowidx.clear(); + if (layout == 'c' || layout == 'C') + { + for (int i = 0; i < ncol; ++i) + { + for (int j = 0; j < nrow; ++j) + { + idx = i * nrow + j; + if (std::abs(H_2d[idx]) > ZERO_Limit || std::abs(S_2d[idx]) > ZERO_Limit) + { + ++nnz; + colidx.push_back(i); + rowidx.push_back(j); + } + } + } + } + else if (layout == 'r' || layout == 'R') + { + for (int i = 0; i < ncol; ++i) + { + for (int j = 0; j < nrow; ++j) + { + idx = j * ncol + i; + if (std::abs(H_2d[idx]) > ZERO_Limit || std::abs(S_2d[idx]) > ZERO_Limit) + { + ++nnz; + colidx.push_back(i); + rowidx.push_back(j); + } + } + } + } + else + { + return 1; + } + return 0; +} + int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, DistCCSMatrix& DST_Matrix, const int NPROC_TRANS, @@ -292,13 +347,13 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, // transfer index to receiver std::vector receiver_index(receiver_size); - MPI_Alltoallv(&sender_index[0], - &sender_size_process[0], - &sender_displacement_process[0], + MPI_Alltoallv(sender_index.data(), + sender_size_process.data(), + sender_displacement_process.data(), MPI_INT, - &receiver_index[0], - &receiver_size_process[0], - &receiver_displacement_process[0], + receiver_index.data(), + receiver_size_process.data(), + receiver_displacement_process.data(), MPI_INT, COMM_TRANS); @@ -309,9 +364,9 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, NPROC_TRANS, receiver_size_process, receiver_displacement_process, - &receiver_index[0], + receiver_index.data(), DST_Matrix, - &buffer2ccsIndex[0]); + buffer2ccsIndex.data()); return 0; } @@ -337,10 +392,8 @@ int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm return 0; } -// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution -// two destination matrices share the same non-zero elements positions -// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, -// even if one of them is acturely zero All matrices must have same MPI communicator +// Transform two sparse matrices from BCD to CCS with a shared sparse pattern. H and S are packed together so that the +// value redistribution needs one MPI_Alltoallv instead of one collective per matrix. int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, double* H_2d, double* S_2d, @@ -397,70 +450,236 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, receiver_size_process, receiver_displacement_process, buffer2ccsIndex); -// Do transformation - std::vector sender_buffer(sender_size); - std::vector receiver_buffer(receiver_size); - // put H to sender buffer + int myproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + int nproc_data; + std::vector proc_map_data_trans; + if (myproc == 0) + { + MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data); + MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); + proc_map_data_trans.resize(nproc_data, 0); + for (int i = 0; i < nproc_data; ++i) + { + MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); + } + MPI_Bcast(proc_map_data_trans.data(), nproc_data, MPI_INT, 0, COMM_TRANS); + } + else + { + MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); + proc_map_data_trans.resize(nproc_data, 0); + MPI_Bcast(proc_map_data_trans.data(), nproc_data, MPI_INT, 0, COMM_TRANS); + } + std::vector next_sender_position(sender_displacement_process); + std::vector sender_buffer(2 * sender_size); if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; + const int idx = rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]; + int dst_process; + DST_Matrix.localCol(SRC_Matrix.globalCol(colidx[i]), dst_process); + const int pos = next_sender_position[proc_map_data_trans[dst_process]]++; + sender_buffer[2 * pos] = H_2d[idx]; + sender_buffer[2 * pos + 1] = S_2d[idx]; } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; + const int idx = colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]; + int dst_process; + DST_Matrix.localCol(SRC_Matrix.globalCol(colidx[i]), dst_process); + const int pos = next_sender_position[proc_map_data_trans[dst_process]]++; + sender_buffer[2 * pos] = H_2d[idx]; + sender_buffer[2 * pos + 1] = S_2d[idx]; } } - // do all2all transformation - MPI_Alltoallv(&sender_buffer[0], - &sender_size_process[0], - &sender_displacement_process[0], + + std::vector sender_value_size_process(sender_size_process); + std::vector sender_value_displacement_process(sender_displacement_process); + std::vector receiver_value_size_process(receiver_size_process); + std::vector receiver_value_displacement_process(receiver_displacement_process); + for (int i = 0; i < NPROC_TRANS; ++i) + { + sender_value_size_process[i] *= 2; + sender_value_displacement_process[i] *= 2; + receiver_value_size_process[i] *= 2; + receiver_value_displacement_process[i] *= 2; + } + + std::vector receiver_buffer(2 * receiver_size); + MPI_Alltoallv(sender_buffer.data(), + sender_value_size_process.data(), + sender_value_displacement_process.data(), MPI_DOUBLE, - &receiver_buffer[0], - &receiver_size_process[0], - &receiver_displacement_process[0], + receiver_buffer.data(), + receiver_value_size_process.data(), + receiver_value_displacement_process.data(), MPI_DOUBLE, COMM_TRANS); -// collect H from receiver buffer + delete[] H_ccs; H_ccs = new double[receiver_size]; - buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], H_ccs); + delete[] S_ccs; + S_ccs = new double[receiver_size]; + for (int i = 0; i < receiver_size; ++i) + { + const int buffer_index = buffer2ccsIndex[i]; + H_ccs[i] = receiver_buffer[2 * buffer_index]; + S_ccs[i] = receiver_buffer[2 * buffer_index + 1]; + } + } + // clear and return + deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS); + return 0; +} + +int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + std::complex* H_2d, + std::complex* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + std::complex*& H_ccs, + std::complex*& S_ccs) +{ + MPI_Group GROUP_TRANS; + MPI_Comm COMM_TRANS = MPI_COMM_NULL; + newGroupCommTrans(SRC_Matrix, DST_Matrix, GROUP_TRANS, COMM_TRANS); + if (COMM_TRANS != MPI_COMM_NULL) + { + int NPROC_TRANS; + MPI_Comm_size(COMM_TRANS, &NPROC_TRANS); + int sender_size; + std::vector sender_size_process(NPROC_TRANS); + std::vector sender_displacement_process(NPROC_TRANS); + int receiver_size; + std::vector receiver_size_process(NPROC_TRANS); + std::vector receiver_displacement_process(NPROC_TRANS); + + std::vector rowidx; + std::vector colidx; + int nnz = 0; + if (SRC_Matrix.get_comm() != MPI_COMM_NULL) + { + getNonZeroIndex(SRC_Matrix.get_layout(), + SRC_Matrix.get_nrow(), + SRC_Matrix.get_ncol(), + H_2d, + S_2d, + ZERO_Limit, + nnz, + rowidx, + colidx); + } + + std::vector buffer2ccsIndex; + buildTransformParameter(SRC_Matrix, + DST_Matrix, + NPROC_TRANS, + GROUP_TRANS, + COMM_TRANS, + nnz, + rowidx, + colidx, + sender_size, + sender_size_process, + sender_displacement_process, + receiver_size, + receiver_size_process, + receiver_displacement_process, + buffer2ccsIndex); - // put S to sender buffer + int myproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + int nproc_data; + std::vector proc_map_data_trans; + if (myproc == 0) + { + MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data); + MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); + proc_map_data_trans.resize(nproc_data, 0); + for (int i = 0; i < nproc_data; ++i) + { + MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); + } + MPI_Bcast(proc_map_data_trans.data(), nproc_data, MPI_INT, 0, COMM_TRANS); + } + else + { + MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); + proc_map_data_trans.resize(nproc_data, 0); + MPI_Bcast(proc_map_data_trans.data(), nproc_data, MPI_INT, 0, COMM_TRANS); + } + std::vector next_sender_position(sender_displacement_process); + std::vector sender_buffer(4 * sender_size); if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; + const int idx = rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]; + int dst_process; + DST_Matrix.localCol(SRC_Matrix.globalCol(colidx[i]), dst_process); + const int pos = next_sender_position[proc_map_data_trans[dst_process]]++; + sender_buffer[4 * pos] = H_2d[idx].real(); + sender_buffer[4 * pos + 1] = H_2d[idx].imag(); + sender_buffer[4 * pos + 2] = S_2d[idx].real(); + sender_buffer[4 * pos + 3] = S_2d[idx].imag(); } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; + const int idx = colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]; + int dst_process; + DST_Matrix.localCol(SRC_Matrix.globalCol(colidx[i]), dst_process); + const int pos = next_sender_position[proc_map_data_trans[dst_process]]++; + sender_buffer[4 * pos] = H_2d[idx].real(); + sender_buffer[4 * pos + 1] = H_2d[idx].imag(); + sender_buffer[4 * pos + 2] = S_2d[idx].real(); + sender_buffer[4 * pos + 3] = S_2d[idx].imag(); } } - // do all2all transformation - MPI_Alltoallv(&sender_buffer[0], - &sender_size_process[0], - &sender_displacement_process[0], + + std::vector sender_value_size_process(sender_size_process); + std::vector sender_value_displacement_process(sender_displacement_process); + std::vector receiver_value_size_process(receiver_size_process); + std::vector receiver_value_displacement_process(receiver_displacement_process); + for (int i = 0; i < NPROC_TRANS; ++i) + { + sender_value_size_process[i] *= 4; + sender_value_displacement_process[i] *= 4; + receiver_value_size_process[i] *= 4; + receiver_value_displacement_process[i] *= 4; + } + + std::vector receiver_buffer(4 * receiver_size); + MPI_Alltoallv(sender_buffer.data(), + sender_value_size_process.data(), + sender_value_displacement_process.data(), MPI_DOUBLE, - &receiver_buffer[0], - &receiver_size_process[0], - &receiver_displacement_process[0], + receiver_buffer.data(), + receiver_value_size_process.data(), + receiver_value_displacement_process.data(), MPI_DOUBLE, COMM_TRANS); -// collect S from receiver buffer + + delete[] H_ccs; + H_ccs = new std::complex[receiver_size]; delete[] S_ccs; - S_ccs = new double[receiver_size]; - buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], S_ccs); + S_ccs = new std::complex[receiver_size]; + for (int i = 0; i < receiver_size; ++i) + { + const int buffer_index = buffer2ccsIndex[i]; + H_ccs[i] = std::complex(receiver_buffer[4 * buffer_index], + receiver_buffer[4 * buffer_index + 1]); + S_ccs[i] = std::complex(receiver_buffer[4 * buffer_index + 2], + receiver_buffer[4 * buffer_index + 3]); + } } - // clear and return deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS); return 0; } @@ -761,5 +980,199 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, return 0; } +int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + std::complex* DMnzvalLocal, + std::complex* EDMnzvalLocal, + DistBCDMatrix& DST_Matrix, + std::complex* DM, + std::complex* EDM) +{ + MPI_Group GROUP_TRANS; + MPI_Comm COMM_TRANS = MPI_COMM_NULL; + newGroupCommTrans(DST_Matrix, SRC_Matrix, GROUP_TRANS, COMM_TRANS); + if (COMM_TRANS != MPI_COMM_NULL) + { + const int dst_size = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + for (int i = 0; i < dst_size; ++i) + { + DM[i] = std::complex(0.0, 0.0); + EDM[i] = std::complex(0.0, 0.0); + } + + int NPROC_TRANS; + MPI_Comm_size(COMM_TRANS, &NPROC_TRANS); + std::vector sender_size_process(NPROC_TRANS, 0); + std::vector sender_displacement_process(NPROC_TRANS, 0); + std::vector receiver_size_process(NPROC_TRANS, 0); + std::vector receiver_displacement_process(NPROC_TRANS, 0); + + int nproc_bcd = 0; + std::vector proc_map_bcd_trans; + int myproc_trans = 0; + MPI_Comm_rank(COMM_TRANS, &myproc_trans); + if (myproc_trans == 0) + { + MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd); + MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); + proc_map_bcd_trans.resize(nproc_bcd, 0); + for (int i = 0; i < nproc_bcd; ++i) + { + MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); + } + MPI_Bcast(proc_map_bcd_trans.data(), nproc_bcd, MPI_INT, 0, COMM_TRANS); + } + else + { + MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); + proc_map_bcd_trans.resize(nproc_bcd, 0); + MPI_Bcast(proc_map_bcd_trans.data(), nproc_bcd, MPI_INT, 0, COMM_TRANS); + } + + for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) + { + const int g_col = SRC_Matrix.globalCol(icol); + int recv_pcol_bcd = 0; + DST_Matrix.localCol(g_col, recv_pcol_bcd); + for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; + rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; + ++rowidx) + { + const int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1; + int recv_prow_bcd = 0; + DST_Matrix.localRow(g_row, recv_prow_bcd); + const int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd); + const int recv_proc = proc_map_bcd_trans[recv_proc_bcd]; + ++sender_size_process[recv_proc]; + } + } + + MPI_Alltoall(sender_size_process.data(), + 1, + MPI_INT, + receiver_size_process.data(), + 1, + MPI_INT, + COMM_TRANS); + + int receiver_size = receiver_size_process[0]; + for (int i = 1; i < NPROC_TRANS; ++i) + { + sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1]; + receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1]; + receiver_size += receiver_size_process[i]; + } + + const int sender_size = SRC_Matrix.get_nnzlocal(); + std::vector sender_index(std::max(sender_size, 1), -1); + std::vector dst_index(std::max(2 * sender_size, 2), -1); + std::vector p(sender_displacement_process); + int idx = 0; + for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) + { + const int g_col = SRC_Matrix.globalCol(icol); + int recv_pcol_bcd = 0; + const int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd); + for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; + rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; + ++rowidx) + { + const int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1; + int recv_prow_bcd = 0; + const int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd); + const int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd); + const int recv_proc = proc_map_bcd_trans[recv_proc_bcd]; + sender_index[p[recv_proc]] = idx; + dst_index[2 * p[recv_proc]] = recv_row; + dst_index[2 * p[recv_proc] + 1] = recv_col; + ++p[recv_proc]; + ++idx; + } + } + + std::vector sender_index_size_process(sender_size_process); + std::vector sender_index_displacement_process(sender_displacement_process); + std::vector receiver_index_size_process(receiver_size_process); + std::vector receiver_index_displacement_process(receiver_displacement_process); + for (int i = 0; i < NPROC_TRANS; ++i) + { + sender_index_size_process[i] *= 2; + sender_index_displacement_process[i] *= 2; + receiver_index_size_process[i] *= 2; + receiver_index_displacement_process[i] *= 2; + } + + std::vector receiver_index(std::max(2 * receiver_size, 2), -1); + MPI_Alltoallv(dst_index.data(), + sender_index_size_process.data(), + sender_index_displacement_process.data(), + MPI_INT, + receiver_index.data(), + receiver_index_size_process.data(), + receiver_index_displacement_process.data(), + MPI_INT, + COMM_TRANS); + + std::vector sender_buffer(std::max(4 * sender_size, 1), 0.0); + for (int i = 0; i < sender_size; ++i) + { + const int src_index = sender_index[i]; + const std::complex dm_value = DMnzvalLocal[src_index]; + const std::complex edm_value = EDMnzvalLocal[src_index]; + sender_buffer[4 * i] = dm_value.real(); + sender_buffer[4 * i + 1] = dm_value.imag(); + sender_buffer[4 * i + 2] = edm_value.real(); + sender_buffer[4 * i + 3] = edm_value.imag(); + } + + std::vector sender_value_size_process(sender_size_process); + std::vector sender_value_displacement_process(sender_displacement_process); + std::vector receiver_value_size_process(receiver_size_process); + std::vector receiver_value_displacement_process(receiver_displacement_process); + for (int i = 0; i < NPROC_TRANS; ++i) + { + sender_value_size_process[i] *= 4; + sender_value_displacement_process[i] *= 4; + receiver_value_size_process[i] *= 4; + receiver_value_displacement_process[i] *= 4; + } + + std::vector receiver_buffer(std::max(4 * receiver_size, 1), 0.0); + MPI_Alltoallv(sender_buffer.data(), + sender_value_size_process.data(), + sender_value_displacement_process.data(), + MPI_DOUBLE, + receiver_buffer.data(), + receiver_value_size_process.data(), + receiver_value_displacement_process.data(), + MPI_DOUBLE, + COMM_TRANS); + + if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r') + { + for (int i = 0; i < receiver_size; ++i) + { + const int ix = receiver_index[2 * i]; + const int iy = receiver_index[2 * i + 1]; + const int dst_index = ix * DST_Matrix.get_ncol() + iy; + DM[dst_index] = std::complex(receiver_buffer[4 * i], -receiver_buffer[4 * i + 1]); + EDM[dst_index] = std::complex(receiver_buffer[4 * i + 2], -receiver_buffer[4 * i + 3]); + } + } + else + { + for (int i = 0; i < receiver_size; ++i) + { + const int ix = receiver_index[2 * i]; + const int iy = receiver_index[2 * i + 1]; + const int dst_index = iy * DST_Matrix.get_nrow() + ix; + DM[dst_index] = std::complex(receiver_buffer[4 * i], -receiver_buffer[4 * i + 1]); + EDM[dst_index] = std::complex(receiver_buffer[4 * i + 2], -receiver_buffer[4 * i + 3]); + } + } + } + deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS); + return 0; +} + } // namespace pexsi -#endif \ No newline at end of file +#endif diff --git a/source/source_hsolver/module_pexsi/dist_matrix_transformer.h b/source/source_hsolver/module_pexsi/dist_matrix_transformer.h index 672b22f4f32..916c8f332bb 100644 --- a/source/source_hsolver/module_pexsi/dist_matrix_transformer.h +++ b/source/source_hsolver/module_pexsi/dist_matrix_transformer.h @@ -2,6 +2,7 @@ #define DISTMATRIXTRANSFORMER_H #include +#include #include #include // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution @@ -49,6 +50,16 @@ int getNonZeroIndex(char layout, std::vector& rowidx, std::vector& colidx); +int getNonZeroIndex(char layout, + const int nrow, + const int ncol, + std::complex* H_2d, + std::complex* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx); + int buildTransformParameter(DistBCDMatrix& SRC_Matrix, DistCCSMatrix& DST_Matrix, const int NPROC_TRANS, @@ -80,12 +91,27 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, double*& H_ccs, double*& S_ccs); +int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + std::complex* H_2d, + std::complex* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + std::complex*& H_ccs, + std::complex*& S_ccs); + int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* ENDnzvalLocal, DistBCDMatrix& DST_Matrix, double* DM_2d, double* ED_2d); + +int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + std::complex* DMnzvalLocal, + std::complex* EDMnzvalLocal, + DistBCDMatrix& DST_Matrix, + std::complex* DM_2d, + std::complex* ED_2d); }; // namespace DistMatrixTransformer } // namespace pexsi -#endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file +#endif // DISTMATRIXTRANSFORMER_H diff --git a/source/source_hsolver/module_pexsi/pexsi_solver.cpp b/source/source_hsolver/module_pexsi/pexsi_solver.cpp index 1019090c483..4b95bc7564a 100644 --- a/source/source_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/source_hsolver/module_pexsi/pexsi_solver.cpp @@ -98,25 +98,25 @@ int PEXSI_Solver::solve(double mu0) return 0; } -const double PEXSI_Solver::get_totalFreeEnergy() const +double PEXSI_Solver::get_totalFreeEnergy() const { return totalFreeEnergy; } -const double PEXSI_Solver::get_totalEnergyH() const +double PEXSI_Solver::get_totalEnergyH() const { return totalEnergyH; } -const double PEXSI_Solver::get_totalEnergyS() const +double PEXSI_Solver::get_totalEnergyS() const { return totalEnergyS; } -const double PEXSI_Solver::get_mu() const +double PEXSI_Solver::get_mu() const { return mu; } } // namespace pexsi -#endif \ No newline at end of file +#endif diff --git a/source/source_hsolver/module_pexsi/pexsi_solver.h b/source/source_hsolver/module_pexsi/pexsi_solver.h index 922f1b9fb3d..f47ffe99202 100644 --- a/source/source_hsolver/module_pexsi/pexsi_solver.h +++ b/source/source_hsolver/module_pexsi/pexsi_solver.h @@ -1,11 +1,13 @@ #ifndef PEXSI_Solver_H #define PEXSI_Solver_H +#include "pexsi_solver_interface.h" + #include namespace pexsi { -class PEXSI_Solver +class PEXSI_Solver : public IPexsiSolver { public: void prepare(const int blacs_text, @@ -15,12 +17,12 @@ class PEXSI_Solver const double* h, const double* s, double*& DM, - double*& EDM); - int solve(double mu0); - const double get_totalFreeEnergy() const; - const double get_totalEnergyH() const; - const double get_totalEnergyS() const; - const double get_mu() const; + double*& EDM) override; + int solve(double mu0) override; + double get_totalFreeEnergy() const override; + double get_totalEnergyH() const override; + double get_totalEnergyS() const override; + double get_mu() const override; //========================================================== // PEXSI related variables @@ -140,4 +142,4 @@ class PEXSI_Solver double mu; }; } // namespace pexsi -#endif // PEXSI_Solver_H \ No newline at end of file +#endif // PEXSI_Solver_H diff --git a/source/source_hsolver/module_pexsi/pexsi_solver_interface.h b/source/source_hsolver/module_pexsi/pexsi_solver_interface.h new file mode 100644 index 00000000000..405ea0adb0f --- /dev/null +++ b/source/source_hsolver/module_pexsi/pexsi_solver_interface.h @@ -0,0 +1,28 @@ +#ifndef PEXSI_SOLVER_INTERFACE_H +#define PEXSI_SOLVER_INTERFACE_H + +namespace pexsi +{ +class IPexsiSolver +{ + public: + virtual ~IPexsiSolver() = default; + + virtual void prepare(const int blacs_text, + const int nb, + const int nrow, + const int ncol, + const double* h, + const double* s, + double*& DM, + double*& EDM) + = 0; + virtual int solve(double mu0) = 0; + virtual double get_totalFreeEnergy() const = 0; + virtual double get_totalEnergyH() const = 0; + virtual double get_totalEnergyS() const = 0; + virtual double get_mu() const = 0; +}; +} // namespace pexsi + +#endif // PEXSI_SOLVER_INTERFACE_H diff --git a/source/source_hsolver/module_pexsi/simple_pexsi.cpp b/source/source_hsolver/module_pexsi/simple_pexsi.cpp index 6c52066e373..05b32a5d9d9 100644 --- a/source/source_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/source_hsolver/module_pexsi/simple_pexsi.cpp @@ -6,8 +6,10 @@ #ifdef __PEXSI #include +#include #include #include +#include #include #include #include @@ -21,7 +23,7 @@ #include "source_base/timer.h" #include "source_base/tool_quit.h" #include "source_base/global_variable.h" -#include "source_hsolver/diago_pexsi.h" +#include "pexsi_solver.h" namespace pexsi { @@ -115,7 +117,7 @@ int loadPEXSIOption(MPI_Comm comm, int_para[15] = 0; int_para[16] = pexsi::PEXSI_Solver::pexsi_nproc_pole; - double_para[0] = 2;//PARAM.inp.nspin; // pexsi::PEXSI_Solver::pexsi_spin; + double_para[0] = PARAM.inp.nspin == 1 ? 2.0 : 1.0; double_para[1] = pexsi::PEXSI_Solver::pexsi_temp; double_para[2] = pexsi::PEXSI_Solver::pexsi_gap; double_para[3] = pexsi::PEXSI_Solver::pexsi_delta_e; @@ -128,6 +130,21 @@ int loadPEXSIOption(MPI_Comm comm, double_para[10] = pexsi::PEXSI_Solver::pexsi_elec_thr; double_para[11] = pexsi::PEXSI_Solver::pexsi_zero_thr; + const bool default_pexsi_temperature = std::abs(double_para[1] - 0.015) < 1.0e-14; + const bool fermi_dirac_smearing = PARAM.inp.smearing_method == "fd" + || PARAM.inp.smearing_method == "fermi-dirac"; + if (default_pexsi_temperature && !fermi_dirac_smearing) + { + const double derived_temperature = PARAM.inp.smearing_method == "fixed" + ? 2.0e-5 + : std::max(2.0e-5, 0.01 * PARAM.inp.smearing_sigma); + double_para[1] = std::min(double_para[1], derived_temperature); + } + if (int_para[0] == 40 && double_para[1] <= 1.0e-4) + { + int_para[0] = 120; + } + options.numPole = int_para[0]; options.isInertiaCount = int_para[1]; options.maxPEXSIIter = int_para[2]; @@ -271,7 +288,9 @@ int simplePEXSI(MPI_Comm comm_PEXSI, double* FDMnzvalLocal = nullptr; // transform H and S from 2D block cyclic distribution to compressed column sparse matrix // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test + ModuleBase::timer::start("Diago_LCAO_Matrix", "TransMAT2CCS"); DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal); + ModuleBase::timer::end("Diago_LCAO_Matrix", "TransMAT2CCS"); // MPI_Barrier(MPI_COMM_WORLD); // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test if (comm_PEXSI != MPI_COMM_NULL) @@ -362,5 +381,153 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Barrier(MPI_COMM_WORLD); return 0; } + +int simplePEXSIComplex(MPI_Comm comm_PEXSI, + MPI_Comm comm_2D, + MPI_Group group_2D, + const int blacs_ctxt, + const int size, + const int nblk, + const int nrow, + const int ncol, + char layout, + std::complex* H, + std::complex* S, + const double numElectronExact, + const std::string PexsiOptionFile, + std::complex*& DM, + std::complex*& EDM, + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy, + double& mu, + double mu0, + double* numElectronPEXSI, + double* numElectronDrvMuPEXSI) +{ + if (comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL) + { + return 0; + } + + int myid = 0; + if (comm_PEXSI != MPI_COMM_NULL) + { + MPI_Comm_rank(comm_PEXSI, &myid); + } + + PPEXSIOptions options; + PPEXSISetDefaultOptions(&options); + int numProcessPerPole = 0; + double ZERO_Limit = 0.0; + loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit); + options.symmetric = 1; + options.symmetricStorage = 0; + options.rowOrdering = 0; + options.mu0 = mu0; + + ModuleBase::timer::start("Diago_LCAO_Matrix", "setup_PEXSI_plan"); + PPEXSIPlan plan; + int info = 0; + int outputFileIndex = -1; + int pexsi_prow = 0; + int pexsi_pcol = 0; + ModuleBase::timer::start("Diago_LCAO_Matrix", "splitNProc2NProwNPcol"); + splitNProc2NProwNPcol(numProcessPerPole, pexsi_prow, pexsi_pcol); + ModuleBase::timer::end("Diago_LCAO_Matrix", "splitNProc2NProwNPcol"); + + ModuleBase::timer::start("Diago_LCAO_Matrix", "PEXSIPlanInit"); + if (comm_PEXSI != MPI_COMM_NULL) + { + plan = PPEXSIPlanInitialize(comm_PEXSI, pexsi_prow, pexsi_pcol, outputFileIndex, &info); + } + ModuleBase::timer::end("Diago_LCAO_Matrix", "PEXSIPlanInit"); + ModuleBase::timer::end("Diago_LCAO_Matrix", "setup_PEXSI_plan"); + + DistCCSMatrix DST_Matrix(comm_PEXSI, numProcessPerPole, size); + DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, layout); + + std::complex* HnzvalLocal = nullptr; + std::complex* SnzvalLocal = nullptr; + std::complex* DMnzvalLocal = nullptr; + std::complex* EDMnzvalLocal = nullptr; + std::complex* FDMnzvalLocal = nullptr; + + ModuleBase::timer::start("Diago_LCAO_Matrix", "TransMAT2CCS"); + DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal); + ModuleBase::timer::end("Diago_LCAO_Matrix", "TransMAT2CCS"); + + if (comm_PEXSI != MPI_COMM_NULL) + { + int isSIdentity = 0; + PPEXSILoadComplexHSMatrix(plan, + options, + size, + DST_Matrix.get_nnz(), + DST_Matrix.get_nnzlocal(), + DST_Matrix.get_numcol_local(), + DST_Matrix.get_colptr_local(), + DST_Matrix.get_rowind_local(), + reinterpret_cast(HnzvalLocal), + isSIdentity, + reinterpret_cast(SnzvalLocal), + &info); + PPEXSISymbolicFactorizeComplexUnsymmetricMatrix(plan, + options, + reinterpret_cast(HnzvalLocal), + &info); + + double nelec = 0.0; + double d_nelec_d_mu = 0.0; + mu = mu0; + ModuleBase::timer::start("Diago_LCAO_Matrix", "PEXSIDFT"); + PPEXSICalculateFermiOperatorComplex(plan, + options, + mu, + numElectronExact, + &nelec, + &d_nelec_d_mu, + &info); + if (numElectronPEXSI != nullptr) + { + *numElectronPEXSI = nelec; + } + if (numElectronDrvMuPEXSI != nullptr) + { + *numElectronDrvMuPEXSI = d_nelec_d_mu; + } + ModuleBase::timer::end("Diago_LCAO_Matrix", "PEXSIDFT"); + + DMnzvalLocal = new std::complex[DST_Matrix.get_nnzlocal()]; + EDMnzvalLocal = new std::complex[DST_Matrix.get_nnzlocal()]; + FDMnzvalLocal = new std::complex[DST_Matrix.get_nnzlocal()]; + if (myid < numProcessPerPole) + { + PPEXSIRetrieveComplexDFTMatrix(plan, + reinterpret_cast(DMnzvalLocal), + reinterpret_cast(EDMnzvalLocal), + reinterpret_cast(FDMnzvalLocal), + &totalEnergyH, + &totalEnergyS, + &totalFreeEnergy, + &info); + } + PPEXSIPlanFinalize(plan, &info); + } + + ModuleBase::timer::start("Diago_LCAO_Matrix", "TransMAT22D"); + DistMatrixTransformer::transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM); + ModuleBase::timer::end("Diago_LCAO_Matrix", "TransMAT22D"); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); + delete[] DMnzvalLocal; + delete[] EDMnzvalLocal; + delete[] FDMnzvalLocal; + delete[] HnzvalLocal; + delete[] SnzvalLocal; + MPI_Barrier(MPI_COMM_WORLD); + return 0; +} } // namespace pexsi -#endif \ No newline at end of file +#endif diff --git a/source/source_hsolver/module_pexsi/simple_pexsi.h b/source/source_hsolver/module_pexsi/simple_pexsi.h index db8879e5ac7..5b1c3f7d888 100644 --- a/source/source_hsolver/module_pexsi/simple_pexsi.h +++ b/source/source_hsolver/module_pexsi/simple_pexsi.h @@ -2,6 +2,8 @@ #define SIMPLE_PEXSI_H #include +#include +#include // a simple interface for calling pexsi with 2D block cyclic distributed matrix namespace pexsi { @@ -25,5 +27,28 @@ int simplePEXSI(MPI_Comm comm_PEXSI, double& totalFreeEnergy, double& mu, double mu0); + +int simplePEXSIComplex(MPI_Comm comm_PEXSI, + MPI_Comm comm_2D, + MPI_Group group_2D, + const int blacs_ctxt, + const int size, + const int nblk, + const int nrow, + const int ncol, + char layout, + std::complex* H, + std::complex* S, + const double nElectronExact, + const std::string PexsiOptionFile, + std::complex*& DM, + std::complex*& EDM, + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy, + double& mu, + double mu0, + double* numElectronPEXSI = nullptr, + double* numElectronDrvMuPEXSI = nullptr); } -#endif // SIMPLE_PEXSI_H \ No newline at end of file +#endif // SIMPLE_PEXSI_H diff --git a/source/source_hsolver/test/diago_pexsi_test.cpp b/source/source_hsolver/test/diago_pexsi_test.cpp index 0d021166e11..327da4f67d4 100644 --- a/source/source_hsolver/test/diago_pexsi_test.cpp +++ b/source/source_hsolver/test/diago_pexsi_test.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #define PASSTHRESHOLD 5e-4 @@ -382,6 +384,124 @@ class PexsiGammaOnlyTest : public ::testing::TestWithParam> { }; +class FakePexsiSolver : public pexsi::IPexsiSolver +{ + public: + void prepare(const int blacs_text_in, + const int nb_in, + const int nrow_in, + const int ncol_in, + const double* h_in, + const double* s_in, + double*& dm_in, + double*& edm_in) override + { + prepared = true; + blacs_text = blacs_text_in; + nb = nb_in; + nrow = nrow_in; + ncol = ncol_in; + h = h_in; + s = s_in; + dm = dm_in; + edm = edm_in; + for (int i = 0; i < nrow * ncol; ++i) + { + dm[i] = 10.0 + i; + edm[i] = 20.0 + i; + } + } + + int solve(double mu0_in) override + { + solved = true; + mu0 = mu0_in; + return 0; + } + + double get_totalFreeEnergy() const override + { + return total_free_energy; + } + + double get_totalEnergyH() const override + { + return total_energy_h; + } + + double get_totalEnergyS() const override + { + return total_energy_s; + } + + double get_mu() const override + { + return mu; + } + + bool prepared = false; + bool solved = false; + int blacs_text = -1; + int nb = -1; + int nrow = -1; + int ncol = -1; + double mu0 = 0.0; + const double* h = nullptr; + const double* s = nullptr; + double* dm = nullptr; + double* edm = nullptr; + double total_free_energy = -1.5; + double total_energy_h = -2.5; + double total_energy_s = 3.5; + double mu = 0.125; +}; + +TEST(DiagoPexsiRefactorTest, UsesInjectedSolverAndOwnedDensityBuffers) +{ + PARAM.input.nspin = 1; + PARAM.inp.nspin = 1; + pexsi::PEXSI_Solver::pexsi_mu = -0.25; + + Parallel_Orbitals po; + po.blacs_ctxt = 11; + po.nb = 2; + po.nrow = 2; + po.ncol = 2; + + HamiltTEST hmtest; + hmtest.nrow = po.nrow; + hmtest.ncol = po.ncol; + hmtest.h_local = {1.0, 0.0, 0.0, 2.0}; + hmtest.s_local = {1.0, 0.0, 0.0, 1.0}; + + psi::Psi psi; + + auto fake_solver = std::make_unique(); + FakePexsiSolver* fake_solver_ptr = fake_solver.get(); + hsolver::DiagoPexsi diago(&po, std::move(fake_solver)); + + ASSERT_NE(diago.DM[0], nullptr); + ASSERT_NE(diago.EDM[0], nullptr); + diago.diag(&hmtest, psi, nullptr); + + EXPECT_TRUE(fake_solver_ptr->prepared); + EXPECT_TRUE(fake_solver_ptr->solved); + EXPECT_EQ(fake_solver_ptr->blacs_text, po.blacs_ctxt); + EXPECT_EQ(fake_solver_ptr->nb, po.nb); + EXPECT_EQ(fake_solver_ptr->nrow, po.nrow); + EXPECT_EQ(fake_solver_ptr->ncol, po.ncol); + EXPECT_EQ(fake_solver_ptr->h, hmtest.h_local.data()); + EXPECT_EQ(fake_solver_ptr->s, hmtest.s_local.data()); + EXPECT_EQ(fake_solver_ptr->dm, diago.DM[0]); + EXPECT_EQ(fake_solver_ptr->edm, diago.EDM[0]); + EXPECT_DOUBLE_EQ(fake_solver_ptr->mu0, -0.25); + EXPECT_DOUBLE_EQ(diago.DM[0][3], 13.0); + EXPECT_DOUBLE_EQ(diago.EDM[0][3], 23.0); + EXPECT_DOUBLE_EQ(diago.totalFreeEnergy, fake_solver_ptr->total_free_energy); + EXPECT_DOUBLE_EQ(diago.totalEnergyH, fake_solver_ptr->total_energy_h); + EXPECT_DOUBLE_EQ(diago.totalEnergyS, fake_solver_ptr->total_energy_s); +} + TEST_P(PexsiGammaOnlyTest, LCAO) { std::stringstream out_info; diff --git a/tools/pexsi/check_lcao_10case_regression.py b/tools/pexsi/check_lcao_10case_regression.py new file mode 100755 index 00000000000..0fc29152118 --- /dev/null +++ b/tools/pexsi/check_lcao_10case_regression.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +"""Validate ABACUS LCAO 10-case outputs against the official energy baseline.""" + +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path + + +REFERENCE_ENERGIES_EV = { + "001": ("001_4GaAs", -7836.15655820), + "002": ("002_C2H6O", -665.55500011), + "003": ("003_4MoS2", -9699.00659511), + "004": ("004_12Pt111", -39600.74431945), + "005": ("005_3BaTiO3", -10749.40029987), + "006": ("006_16Na", -18524.76009654), + "007": ("007_27Fe", -86945.53067515), + "008": ("008_32H2O", -14950.45084341), + "009": ("009_Battery108", -124070.65411079), + "010": ("010_216Si", -23123.69990200), +} + +FINAL_ETOT_RE = re.compile(r"!\s*FINAL_ETOT_IS\s+([-+0-9.eE]+)\s+eV") + + +def find_case_dir(run_root: Path, case_id: str) -> Path | None: + matches = sorted(path for path in run_root.iterdir() if path.is_dir() and path.name.startswith(f"{case_id}_")) + return matches[0] if matches else None + + +def find_running_log(case_dir: Path) -> Path | None: + matches = sorted(case_dir.glob("OUT.*/running_scf.log")) + return matches[0] if matches else None + + +def parse_log(log_path: Path) -> tuple[bool, float | None]: + text = log_path.read_text(errors="replace") + converged = "#SCF IS CONVERGED#" in text + matches = FINAL_ETOT_RE.findall(text) + final_energy = float(matches[-1]) if matches else None + return converged, final_energy + + +def validate(run_root: Path, tolerance_ev: float) -> int: + if not run_root.is_dir(): + print(f"ERROR: run root does not exist: {run_root}", file=sys.stderr) + return 2 + + failures: list[str] = [] + print(f"{'case':<18} {'status':<10} {'energy(eV)':>18} {'ref(eV)':>18} {'|dE|(eV)':>12}") + for case_id, (case_name, reference_energy) in REFERENCE_ENERGIES_EV.items(): + case_dir = find_case_dir(run_root, case_id) + if case_dir is None: + failures.append(f"{case_name}: missing case directory") + print(f"{case_name:<18} {'MISSING':<10}") + continue + + log_path = find_running_log(case_dir) + if log_path is None: + failures.append(f"{case_name}: missing OUT.*/running_scf.log") + print(f"{case_name:<18} {'NO_LOG':<10}") + continue + + converged, final_energy = parse_log(log_path) + if final_energy is None: + failures.append(f"{case_name}: missing FINAL_ETOT_IS") + print(f"{case_name:<18} {'NO_ETOT':<10}") + continue + + delta = abs(final_energy - reference_energy) + status = "PASS" if converged and delta <= tolerance_ev else "FAIL" + if status != "PASS": + reason = "not converged" if not converged else f"|dE|={delta:.3e} > {tolerance_ev:.3e}" + failures.append(f"{case_name}: {reason}") + print(f"{case_name:<18} {status:<10} {final_energy:18.10f} {reference_energy:18.10f} {delta:12.3e}") + + if failures: + print("\nFailures:", file=sys.stderr) + for failure in failures: + print(f" - {failure}", file=sys.stderr) + return 1 + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("run_root", type=Path, help="directory containing the 001_* ... 010_* LCAO case outputs") + parser.add_argument( + "--tolerance-ev", + type=float, + default=5.0e-7, + help="maximum allowed absolute total-energy error in eV", + ) + args = parser.parse_args() + return validate(args.run_root, args.tolerance_ev) + + +if __name__ == "__main__": + raise SystemExit(main())