diff --git a/docs/advanced/acceleration/cuda.md b/docs/advanced/acceleration/cuda.md index 0200e98e78..98a85f11b0 100644 --- a/docs/advanced/acceleration/cuda.md +++ b/docs/advanced/acceleration/cuda.md @@ -29,7 +29,9 @@ To compile and use ABACUS in CUDA mode, you currently need to have an NVIDIA GPU Check the [Advanced Installation Options](https://abacus-rtd.readthedocs.io/en/latest/advanced/install.html#build-with-cuda-support) for the installation of CUDA version support. -When the compilation parameter USE_ELPA is ON (which is the default value) and USE_CUDA is also set to ON, the ELPA library needs to [enable GPU support](https://github.com/marekandreas/elpa/blob/master/documentation/INSTALL.md) at compile time. +Setting both USE_ELPA and USE_CUDA to ON does not automatically enable ELPA to run on GPUs. ELPA support for GPUs needs to be enabled when ELPA is compiled. [enable GPU support](https://github.com/marekandreas/elpa/blob/master/documentation/INSTALL.md). + +The ABACUS program will automatically determine whether the current ELPA supports GPU based on the elpa/elpa_configured_options.h header file. Users can also check this header file to determine the GPU support of ELPA in their environment. ELPA introduced a new API elpa_setup_gpu in version 2023.11.001. So if you want to enable ELPA GPU in ABACUS, the ELPA version must be greater than or equal to 2023.11.001. ## Run with the GPU support by editing the INPUT script: diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index f6cb7639d7..7ab8c685a0 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -666,6 +666,7 @@ These variables are used to control parameters related to input files. - **Type**: String - **Description**: the name of the structure file - Containing various information about atom species, including pseudopotential files, local orbitals files, cell information, atom positions, and whether atoms should be allowed to move. + - When [calculation](#calculation) is set to `md` and [md_restart](#md_restart) is set to `true`, this keyword will NOT work. - Refer to [Doc](https://github.com/deepmodeling/abacus-develop/blob/develop/docs/advanced/input_files/stru.md) - **Default**: STRU diff --git a/source/Makefile.Objects b/source/Makefile.Objects index e723eef0bc..55ec7e7dbf 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -523,6 +523,7 @@ OBJS_IO=input_conv.o\ read_input_item_other.o\ read_input_item_output.o\ read_set_globalv.o\ + orb_io.o\ OBJS_IO_LCAO=cal_r_overlap_R.o\ write_orb_info.o\ diff --git a/source/driver_run.cpp b/source/driver_run.cpp index 5bf9eb49c5..451c4025b1 100644 --- a/source/driver_run.cpp +++ b/source/driver_run.cpp @@ -40,7 +40,7 @@ void Driver::driver_run() { // the life of ucell should begin here, mohan 2024-05-12 // delete ucell as a GlobalC in near future - GlobalC::ucell.setup_cell(PARAM.inp.stru_file, GlobalV::ofs_running); + GlobalC::ucell.setup_cell(PARAM.globalv.global_in_stru, GlobalV::ofs_running); Check_Atomic_Stru::check_atomic_stru(GlobalC::ucell, PARAM.inp.min_dist_coef); diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp index caa75cee7c..f1a39252f6 100644 --- a/source/module_base/global_variable.cpp +++ b/source/module_base/global_variable.cpp @@ -55,8 +55,6 @@ int GSIZE = DSIZE; //---------------------------------------------------------- // EXPLAIN : The input file name and directory //---------------------------------------------------------- -std::string stru_file = "STRU"; - std::ofstream ofs_running; std::ofstream ofs_warning; std::ofstream ofs_info; // output math lib info diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index 83b6b3faf9..2e0f3e575c 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -80,7 +80,6 @@ extern int KPAR_LCAO; // NAME : ofs_running( contain information during runnnig) // NAME : ofs_warning( contain warning information, including error) //========================================================== -extern std::string stru_file; // extern std::string global_pseudo_type; // mohan add 2013-05-20 (xiaohui add // 2013-06-23) extern std::ofstream ofs_running; diff --git a/source/module_base/sph_bessel_recursive.h b/source/module_base/sph_bessel_recursive.h index a74b0987e3..e11af5127d 100644 --- a/source/module_base/sph_bessel_recursive.h +++ b/source/module_base/sph_bessel_recursive.h @@ -7,7 +7,7 @@ #define SPH_BESSEL_RECURSIVE_H #include -#include"stddef.h" +#include namespace ModuleBase { @@ -32,7 +32,7 @@ class Sph_Bessel_Recursive::D1 private: std::vector> jlx; // jlx[l][x] - double dx; + double dx = 0.0; bool finish_set_dx = false; void cal_jlx_0( const int l_size ); @@ -56,7 +56,7 @@ class Sph_Bessel_Recursive::D2 private: std::vector>> jlx; // jlx[l][x1][x2] - double dx; + double dx = 0.0; bool finish_set_dx = false; void cal_jlx_0( const int l_size, const size_t ix1_size, const size_t ix2_size ); diff --git a/source/module_basis/module_nao/atomic_radials.cpp b/source/module_basis/module_nao/atomic_radials.cpp index 2391d471db..7d095e70cd 100644 --- a/source/module_basis/module_nao/atomic_radials.cpp +++ b/source/module_basis/module_nao/atomic_radials.cpp @@ -3,11 +3,16 @@ #include "module_base/math_integral.h" #include "module_base/parallel_common.h" #include "module_base/tool_quit.h" + +// FIXME: should update with pyabacus +// #include "module_io/orb_io.h" + #include "projgen.h" #include #include #include +#include AtomicRadials& AtomicRadials::operator=(const AtomicRadials& rhs) { @@ -207,7 +212,6 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log, { rgrid[ir] = ir * dr; } - chi_ = new NumericalRadial[nchi_]; // record whether an orbital has been read or not @@ -269,116 +273,47 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log, delete[] rgrid; } -void AtomicRadials::read_abacus_orb(std::ifstream& ifs, - std::string& elem, - double& ecut, - int& nr, - double& dr, - std::vector& nzeta, - std::vector>& radials, - const int rank) -{ - nr = 0; // number of grid points - dr = 0; // grid spacing - int lmax, nchi = 0; // number of radial functions - std::vector> radial_map_; // build a map from [l][izeta] to 1-d array index - std::string tmp; - // first read the header - if (rank == 0) - { - if (!ifs.is_open()) - { - ModuleBase::WARNING_QUIT("AtomicRadials::read_abacus_orb", "Couldn't open orbital file."); - } - while (ifs >> tmp) - { - if (tmp == "Element") - { - ifs >> elem; - } - else if (tmp == "Cutoff(Ry)") - { - ifs >> ecut; - } - else if (tmp == "Lmax") - { - ifs >> lmax; - nzeta.resize(lmax + 1); - for (int l = 0; l <= lmax; ++l) - { - ifs >> tmp >> tmp >> tmp >> nzeta[l]; - } - } - else if (tmp == "Mesh") - { - ifs >> nr; - continue; - } - else if (tmp == "dr") - { - ifs >> dr; - break; - } - } - radial_map_.resize(lmax + 1); - for (int l = 0; l <= lmax; ++l) - { - radial_map_[l].resize(nzeta[l]); - } - int ichi = 0; - for (int l = 0; l <= lmax; ++l) - { - for (int iz = 0; iz < nzeta[l]; ++iz) - { - radial_map_[l][iz] = ichi++; // return the value of ichi, then increment - } - } - nchi = ichi; // total number of radial functions - radials.resize(nchi); - std::for_each(radials.begin(), radials.end(), [nr](std::vector& v) { v.resize(nr); }); - } - - // broadcast the header information -#ifdef __MPI - Parallel_Common::bcast_string(elem); - Parallel_Common::bcast_double(ecut); - Parallel_Common::bcast_int(lmax); - Parallel_Common::bcast_int(nchi); - Parallel_Common::bcast_int(nr); - Parallel_Common::bcast_double(dr); -#endif - - // then adjust the size of the vectors - if (rank != 0) - { - nzeta.resize(lmax + 1); - radials.resize(nchi); - std::for_each(radials.begin(), radials.end(), [nr](std::vector& v) { v.resize(nr); }); - } - // broadcast the number of zeta functions for each angular momentum -#ifdef __MPI - Parallel_Common::bcast_int(nzeta.data(), lmax + 1); -#endif - - // read the radial functions by rank0 - int ichi = 0; - for (int i = 0; i != nchi; ++i) - { - if (rank == 0) - { - int l, izeta; - ifs >> tmp >> tmp >> tmp; - ifs >> tmp >> l >> izeta; - ichi = radial_map_[l][izeta]; - for (int ir = 0; ir != nr; ++ir) - { - ifs >> radials[ichi][ir]; - } - } - // broadcast the radial functions -#ifdef __MPI - Parallel_Common::bcast_int(ichi); // let other ranks know where to store the radial function - Parallel_Common::bcast_double(radials[ichi].data(), nr); -#endif - } -} \ No newline at end of file +// FIXME: should update with pyabacus +// void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log, const int rank) +// { +// /* +// * Read the orbital file. +// * +// * For orbital file format, see +// * (new) abacus-develop/tools/SIAB/PyTorchGradient/source/IO/print_orbital.py +// * (old) abacus-develop/tools/SIAB/SimulatedAnnealing/source/src_spillage/Plot_Psi.cpp +// * */ +// int ngrid = 0; // number of grid points +// double dr = 0; // grid spacing +// std::string tmp; + +// int nr; +// std::vector nzeta; +// std::vector> radials; + +// ModuleIO::read_abacus_orb(ifs, symbol_, orb_ecut_, nr, dr, nzeta, radials, rank); + +// lmax_ = nzeta.size() - 1; +// nzeta_ = new int[lmax_ + 1]; +// std::copy(nzeta.begin(), nzeta.end(), nzeta_); + +// nchi_ = std::accumulate(nzeta.begin(), nzeta.end(), 0); +// nzeta_max_ = *std::max_element(nzeta.begin(), nzeta.end()); + +// indexing(); + +// std::vector rgrid(nr); +// std::iota(rgrid.begin(), rgrid.end(), 0); +// std::for_each(rgrid.begin(), rgrid.end(), [dr](double& r) { r *= dr; }); +// chi_ = new NumericalRadial[nchi_]; +// int ichi = 0; +// for (int l = 0; l <= lmax_; ++l) +// { +// for (int izeta = 0; izeta < nzeta[l]; ++izeta) +// { +// chi_[index(l, izeta)].build(l, true, nr, rgrid.data(), radials[ichi].data(), 0, izeta, symbol_, itype_, false); +// chi_[index(l, izeta)].normalize(); +// ++ichi; +// } +// } +// } diff --git a/source/module_basis/module_nao/atomic_radials.h b/source/module_basis/module_nao/atomic_radials.h index 874f56897f..51b17e02cc 100644 --- a/source/module_basis/module_nao/atomic_radials.h +++ b/source/module_basis/module_nao/atomic_radials.h @@ -43,28 +43,6 @@ class AtomicRadials : public RadialSet //! Get the energy cutoff as given by the orbital file double orb_ecut() const { return orb_ecut_; } - /** - * @brief static version of read_abacus_orb. A delete-new operation may cause the memory leak, - * it is better to use std::vector to replace the raw pointer. - * - * @param ifs [in] ifstream from the orbital file, via `std::ifstream ifs(forb);` - * @param elem [out] element symbol - * @param ecut [out] planewave energy cutoff - * @param nr [out] number of radial grid points - * @param dr [out] radial grid spacing - * @param nzeta [out] number of zeta functions for each angular momentum - * @param radials [out] radial orbitals - * @param rank [in] MPI rank - */ - static void read_abacus_orb(std::ifstream& ifs, - std::string& elem, - double& ecut, - int& nr, - double& dr, - std::vector& nzeta, - std::vector>& radials, - const int rank = 0); - private: double orb_ecut_; //!< energy cutoff as given by the orbital file diff --git a/source/module_basis/module_nao/radial_set.cpp b/source/module_basis/module_nao/radial_set.cpp index 259f792891..3b00ba679a 100644 --- a/source/module_basis/module_nao/radial_set.cpp +++ b/source/module_basis/module_nao/radial_set.cpp @@ -7,6 +7,9 @@ #include "module_base/spherical_bessel_transformer.h" +// FIXME: should update with pyabacus +// #include "module_io/orb_io.h" + RadialSet::~RadialSet() { delete[] nzeta_; @@ -252,3 +255,28 @@ void RadialSet::write_abacus_orb(const std::string& file_name, const int rank) c } file_to.close(); } + +// FIXME: should update with pyabacus +// void RadialSet::write_abacus_orb(const std::string& forb, const int rank) const +// { +// std::ofstream ofs; +// ofs.open(forb, std::ios::out); + +// const double dr = 0.01; +// const int nr = static_cast(rcut_max_ / dr) + 1; +// std::vector nzeta(lmax_ + 1); +// std::copy(nzeta_, nzeta_ + lmax_ + 1, nzeta.begin()); +// std::vector> radials(nchi_, std::vector(nr, 0.0)); +// int ichi = 0; +// for (int l = 0; l <= lmax_; l++) +// { +// for (int izeta = 0; izeta < nzeta[l]; izeta++) +// { +// std::copy(chi_[index(l, izeta)].rvalue(), chi_[index(l, izeta)].rvalue() + nr, radials[ichi].begin()); +// ichi++; +// } +// } + +// ModuleIO::write_abacus_orb(ofs, symbol_, 100, nr, dr, nzeta, radials, rank); +// ofs.close(); +// } diff --git a/source/module_basis/module_nao/test/CMakeLists.txt b/source/module_basis/module_nao/test/CMakeLists.txt index ac58199df4..0759f33435 100644 --- a/source/module_basis/module_nao/test/CMakeLists.txt +++ b/source/module_basis/module_nao/test/CMakeLists.txt @@ -17,6 +17,7 @@ AddTest( ../projgen.cpp ../../module_ao/ORB_atomic_lm.cpp ../../module_ao/ORB_atomic.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base ) @@ -29,6 +30,7 @@ AddTest( ../numerical_radial.cpp ../../module_ao/ORB_atomic_lm.cpp ../../module_ao/ORB_atomic.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base ) @@ -41,6 +43,7 @@ AddTest( ../numerical_radial.cpp ../../module_ao/ORB_atomic_lm.cpp ../../module_ao/ORB_atomic.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base ) @@ -53,6 +56,7 @@ AddTest( ../numerical_radial.cpp ../../module_ao/ORB_atomic_lm.cpp ../../module_ao/ORB_atomic.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base ) @@ -65,6 +69,7 @@ AddTest( ../numerical_radial.cpp ../../module_ao/ORB_atomic_lm.cpp ../../module_ao/ORB_atomic.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base ) @@ -83,6 +88,7 @@ AddTest( ../sphbes_radials.cpp ../../module_ao/ORB_atomic_lm.cpp ../../module_ao/ORB_atomic.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base ) @@ -103,6 +109,7 @@ AddTest( ../two_center_bundle.cpp ../two_center_integrator.cpp ../real_gaunt_table.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base container orb ) @@ -132,6 +139,7 @@ AddTest( ../radial_set.cpp ../numerical_radial.cpp ../two_center_bundle.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base container orb ) @@ -152,6 +160,7 @@ AddTest( ../radial_set.cpp ../projgen.cpp ../numerical_radial.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base container orb ) @@ -172,6 +181,7 @@ AddTest( ../radial_set.cpp ../projgen.cpp ../numerical_radial.cpp + ../../../module_io/orb_io.cpp LIBS parameter ${math_libs} device base container orb ) diff --git a/source/module_basis/module_nao/test/atomic_radials_test.cpp b/source/module_basis/module_nao/test/atomic_radials_test.cpp index 5b201a5f7f..f1a64c2e07 100644 --- a/source/module_basis/module_nao/test/atomic_radials_test.cpp +++ b/source/module_basis/module_nao/test/atomic_radials_test.cpp @@ -36,10 +36,6 @@ using ModuleBase::SphericalBesselTransformer; * * - to_numerical_orbital * - Overwrites the content of a Numerical_Orbital object with the current object. - * - * - read_abacus_orb - * - The static version, reads the radial functions from an abacus file. - * * */ class AtomicRadialsTest : public ::testing::Test { @@ -268,44 +264,6 @@ TEST_F(AtomicRadialsTest, ToNumericalOrbital) } } -TEST_F(AtomicRadialsTest, ReadAbacusOrb) -{ - std::ifstream ifs(file); - std::string elem; - double ecut, dr; - int nr; - std::vector nzeta; - std::vector> radials; - AtomicRadials::read_abacus_orb(ifs, elem, ecut, nr, dr, nzeta, radials); - EXPECT_EQ(elem, "Ti"); - EXPECT_DOUBLE_EQ(ecut, 100.0); - EXPECT_EQ(nr, 1001); - EXPECT_DOUBLE_EQ(dr, 0.01); - EXPECT_EQ(nzeta.size(), 4); // l from 0 to 3 - EXPECT_EQ(nzeta[0], 4); - EXPECT_EQ(nzeta[1], 2); - EXPECT_EQ(nzeta[2], 2); - EXPECT_EQ(nzeta[3], 1); - EXPECT_EQ(radials.size(), 9); // 4 + 2 + 2 + 1 - for(auto& radial: radials) - { - EXPECT_EQ(radial.size(), 1001); - } - EXPECT_EQ(radials[0][0], -1.581711853170e-01); - EXPECT_EQ(radials[0][4], -1.583907030513e-01); - EXPECT_EQ(radials[0][996], -4.183526380009e-05); - EXPECT_EQ(radials[0][1000], 0); - EXPECT_EQ(radials[3][0], -1.166292682541e+00); - EXPECT_EQ(radials[3][4], -1.164223359672e+00); - EXPECT_EQ(radials[3][996], -3.183325576529e-04); - EXPECT_EQ(radials[3][1000], 0); - EXPECT_EQ(radials[8][0], 0); - EXPECT_EQ(radials[8][4], 3.744878535962e-05); - EXPECT_EQ(radials[8][996], 7.495357740660e-05); - EXPECT_EQ(radials[8][1000], 0); - ifs.close(); -} - int main(int argc, char** argv) { diff --git a/source/module_cell/module_paw/paw_cell_libpaw.cpp b/source/module_cell/module_paw/paw_cell_libpaw.cpp index aac0b4474a..7abdafa3ca 100644 --- a/source/module_cell/module_paw/paw_cell_libpaw.cpp +++ b/source/module_cell/module_paw/paw_cell_libpaw.cpp @@ -204,9 +204,11 @@ void Paw_Cell::mix_dij(const int iat, double*dij_paw) const int size_dij = nproj * (nproj+1) / 2; for(int i = 0; i < size_dij * nspden; i ++) { - if(!first_iter) dij_paw[i] = dij_save[iat][i] * (1.0 - mixing_beta) + dij_paw[i] * mixing_beta; + if(!first_iter) { dij_paw[i] = dij_save[iat][i] * (1.0 - mixing_beta) + dij_paw[i] * mixing_beta; +} - if(count > 30) dij_paw[i] = dij_save[iat][i]; + if(count > 30) { dij_paw[i] = dij_save[iat][i]; +} dij_save[iat][i] = dij_paw[i]; } @@ -224,7 +226,7 @@ void Paw_Cell::set_libpaw_files() filename_list = new char[ntypat*264]; if(GlobalV::MY_RANK == 0) { - std::ifstream ifa(PARAM.inp.stru_file.c_str(), std::ios::in); + std::ifstream ifa(PARAM.globalv.global_in_stru.c_str(), std::ios::in); if (!ifa) { ModuleBase::WARNING_QUIT("set_libpaw_files", "can not open stru file"); @@ -234,7 +236,8 @@ void Paw_Cell::set_libpaw_files() while(!ifa.eof()) { getline(ifa,line); - if (line.find("PAW_FILES") != std::string::npos) break; + if (line.find("PAW_FILES") != std::string::npos) { break; +} } for(int i = 0; i < ntypat*264; i++) @@ -681,7 +684,8 @@ void Paw_Cell::set_sij() double* sij = new double[nproj * nproj]; #ifdef __MPI - if(GlobalV::RANK_IN_POOL == 0) extract_sij(it,size_sij,sij_libpaw); + if(GlobalV::RANK_IN_POOL == 0) { extract_sij(it,size_sij,sij_libpaw); +} Parallel_Common::bcast_double(sij_libpaw,size_sij*nspden); #else extract_sij(it,size_sij,sij_libpaw); diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index 634c5c51ea..1e851f9651 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -146,7 +146,7 @@ void ESolver_KS::before_all_runners(const Input_para& inp, UnitCell& if (GlobalV::MY_RANK == 0) { - std::ifstream ifa(PARAM.inp.stru_file.c_str(), std::ios::in); + std::ifstream ifa(PARAM.globalv.global_in_stru.c_str(), std::ios::in); if (!ifa) { ModuleBase::WARNING_QUIT("set_libpaw_files", "can not open stru file"); diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index 735ef0dda1..eb8bb63c2c 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -19,8 +19,6 @@ #include "module_parameter/parameter.h" //--------------temporary---------------------------- -#include - #include "module_base/global_function.h" #include "module_cell/module_neighbor/sltk_grid_driver.h" #include "module_elecstate/module_charge/symmetry_rho.h" @@ -30,9 +28,11 @@ #include "module_hamilt_lcao/module_dftu/dftu.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_io/print_info.h" + +#include #ifdef __EXX -#include "module_ri/RPA_LRI.h" #include "module_io/restart_exx_csr.h" +#include "module_ri/RPA_LRI.h" #endif #ifdef __DEEPKS @@ -69,7 +69,7 @@ ESolver_KS_LCAO::ESolver_KS_LCAO() this->basisname = "LCAO"; #ifdef __EXX // 1. currently this initialization must be put in constructor rather than `before_all_runners()` - // because the latter is not reused by ESolver_LCAO_TDDFT, + // because the latter is not reused by ESolver_LCAO_TDDFT, // which cause the failure of the subsequent procedure reused by ESolver_LCAO_TDDFT // 2. always construct but only initialize when if(cal_exx) is true // because some members like two_level_step are used outside if(cal_exx) @@ -133,8 +133,7 @@ void ESolver_KS_LCAO::before_all_runners(const Input_para& inp, UnitCell } // 1.3) Setup k-points according to symmetry. - this->kv - .set(ucell.symm, PARAM.inp.kpoint_file, PARAM.inp.nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); + this->kv.set(ucell.symm, PARAM.inp.kpoint_file, PARAM.inp.nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT K-POINTS"); Print_Info::setup_parameters(ucell, this->kv); @@ -151,26 +150,25 @@ void ESolver_KS_LCAO::before_all_runners(const Input_para& inp, UnitCell if (this->pelec == nullptr) { // TK stands for double and complex? - this->pelec = new elecstate::ElecStateLCAO( - &(this->chr), // use which parameter? - &(this->kv), - this->kv.get_nks(), - &(this->GG), // mohan add 2024-04-01 - &(this->GK), // mohan add 2024-04-01 - this->pw_rho, - this->pw_big); + this->pelec = new elecstate::ElecStateLCAO(&(this->chr), // use which parameter? + &(this->kv), + this->kv.get_nks(), + &(this->GG), // mohan add 2024-04-01 + &(this->GK), // mohan add 2024-04-01 + this->pw_rho, + this->pw_big); } // 3) init LCAO basis // reading the localized orbitals/projectors // construct the interpolation tables. - LCAO_domain::init_basis_lcao(this->pv, - inp.onsite_radius, - inp.lcao_ecut, - inp.lcao_dk, - inp.lcao_dr, - inp.lcao_rmax, - ucell, + LCAO_domain::init_basis_lcao(this->pv, + inp.onsite_radius, + inp.lcao_ecut, + inp.lcao_dk, + inp.lcao_dr, + inp.lcao_rmax, + ucell, two_center_bundle_, orb_); //------------------init Basis_lcao---------------------- @@ -178,8 +176,7 @@ void ESolver_KS_LCAO::before_all_runners(const Input_para& inp, UnitCell // 5) initialize density matrix // DensityMatrix is allocated here, DMK is also initialized here // DMR is not initialized here, it will be constructed in each before_scf - dynamic_cast*>(this->pelec) - ->init_DM(&this->kv, &(this->pv), PARAM.inp.nspin); + dynamic_cast*>(this->pelec)->init_DM(&this->kv, &(this->pv), PARAM.inp.nspin); // this function should be removed outside of the function if (PARAM.inp.calculation == "get_S") @@ -196,22 +193,28 @@ void ESolver_KS_LCAO::before_all_runners(const Input_para& inp, UnitCell #ifdef __EXX // 7) initialize exx // PLEASE simplify the Exx_Global interface - if (PARAM.inp.calculation == "scf" || PARAM.inp.calculation == "relax" - || PARAM.inp.calculation == "cell-relax" + if (PARAM.inp.calculation == "scf" || PARAM.inp.calculation == "relax" || PARAM.inp.calculation == "cell-relax" || PARAM.inp.calculation == "md") { if (GlobalC::exx_info.info_global.cal_exx) { XC_Functional::set_xc_first_loop(ucell); // initialize 2-center radial tables for EXX-LRI - if (GlobalC::exx_info.info_ri.real_number) { this->exx_lri_double->init(MPI_COMM_WORLD, this->kv, orb_); } - else { this->exx_lri_complex->init(MPI_COMM_WORLD, this->kv, orb_); } + if (GlobalC::exx_info.info_ri.real_number) + { + this->exx_lri_double->init(MPI_COMM_WORLD, this->kv, orb_); + } + else + { + this->exx_lri_complex->init(MPI_COMM_WORLD, this->kv, orb_); + } } } #endif // 8) initialize DFT+U - if (PARAM.inp.dft_plus_u) { + if (PARAM.inp.dft_plus_u) + { GlobalC::dftu.init(ucell, &this->pv, this->kv.get_nks(), orb_); } @@ -256,19 +259,19 @@ void ESolver_KS_LCAO::before_all_runners(const Input_para& inp, UnitCell { if (this->kv.get_nks() % GlobalV::KPAR_LCAO != 0) { - ModuleBase::WARNING("ESolver_KS_LCAO::before_all_runners", - "nks is not divisible by kpar."); + ModuleBase::WARNING("ESolver_KS_LCAO::before_all_runners", "nks is not divisible by kpar."); std::cout << "\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - std::cout << " Warning: nks (" << this->kv.get_nks() << ") is not divisible by kpar (" - << GlobalV::KPAR_LCAO << ")." << std::endl; + "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%%%%%%%%%%%%%%%" + << std::endl; + std::cout << " Warning: nks (" << this->kv.get_nks() << ") is not divisible by kpar (" << GlobalV::KPAR_LCAO + << ")." << std::endl; std::cout << " This may lead to poor load balance. It is strongly suggested to" << std::endl; std::cout << " set nks to be divisible by kpar, but if this is really what" << std::endl; std::cout << " you want, please ignore this warning." << std::endl; std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%\n"; + "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%\n"; } } @@ -450,25 +453,26 @@ void ESolver_KS_LCAO::after_all_runners() if (PARAM.inp.out_mat_xc) { ModuleIO::write_Vxc(PARAM.inp.nspin, - GlobalV::NLOCAL, - GlobalV::DRANK, - &this->pv, - *this->psi, - GlobalC::ucell, - this->sf, - *this->pw_rho, - *this->pw_rhod, - GlobalC::ppcell.vloc, - *this->pelec->charge, - this->GG, - this->GK, - this->kv, - orb_.cutoffs(), - this->pelec->wg, - GlobalC::GridD + GlobalV::NLOCAL, + GlobalV::DRANK, + &this->pv, + *this->psi, + GlobalC::ucell, + this->sf, + *this->pw_rho, + *this->pw_rhod, + GlobalC::ppcell.vloc, + *this->pelec->charge, + this->GG, + this->GK, + this->kv, + orb_.cutoffs(), + this->pelec->wg, + GlobalC::GridD #ifdef __EXX - , this->exx_lri_double ? &this->exx_lri_double->Hexxs : nullptr - , this->exx_lri_complex ? &this->exx_lri_complex->Hexxs : nullptr + , + this->exx_lri_double ? &this->exx_lri_double->Hexxs : nullptr, + this->exx_lri_complex ? &this->exx_lri_complex->Hexxs : nullptr #endif ); } @@ -476,26 +480,27 @@ void ESolver_KS_LCAO::after_all_runners() if (PARAM.inp.out_eband_terms) { ModuleIO::write_eband_terms(PARAM.inp.nspin, - GlobalV::NLOCAL, - GlobalV::DRANK, - &this->pv, - *this->psi, - GlobalC::ucell, - this->sf, - *this->pw_rho, - *this->pw_rhod, - GlobalC::ppcell.vloc, - *this->pelec->charge, - this->GG, - this->GK, - this->kv, - this->pelec->wg, - GlobalC::GridD, - orb_.cutoffs(), - this->two_center_bundle_ + GlobalV::NLOCAL, + GlobalV::DRANK, + &this->pv, + *this->psi, + GlobalC::ucell, + this->sf, + *this->pw_rho, + *this->pw_rhod, + GlobalC::ppcell.vloc, + *this->pelec->charge, + this->GG, + this->GK, + this->kv, + this->pelec->wg, + GlobalC::GridD, + orb_.cutoffs(), + this->two_center_bundle_ #ifdef __EXX - , this->exx_lri_double ? &this->exx_lri_double->Hexxs : nullptr - , this->exx_lri_complex ? &this->exx_lri_complex->Hexxs : nullptr + , + this->exx_lri_double ? &this->exx_lri_double->Hexxs : nullptr, + this->exx_lri_complex ? &this->exx_lri_complex->Hexxs : nullptr #endif ); } @@ -503,7 +508,6 @@ void ESolver_KS_LCAO::after_all_runners() ModuleBase::timer::tick("ESolver_KS_LCAO", "after_all_runners"); } - //------------------------------------------------------------------------------ //! the 10th function of ESolver_KS_LCAO: iter_init //! mohan add 2024-05-11 @@ -619,11 +623,15 @@ void ESolver_KS_LCAO::iter_init(const int istep, const int iter) // calculate exact-exchange if (GlobalC::exx_info.info_ri.real_number) { - this->exd->exx_eachiterinit(*dynamic_cast*>(this->pelec)->get_DM(), this->kv, iter); + this->exd->exx_eachiterinit(*dynamic_cast*>(this->pelec)->get_DM(), + this->kv, + iter); } else { - this->exc->exx_eachiterinit(*dynamic_cast*>(this->pelec)->get_DM(), this->kv, iter); + this->exc->exx_eachiterinit(*dynamic_cast*>(this->pelec)->get_DM(), + this->kv, + iter); } #endif @@ -706,8 +714,7 @@ void ESolver_KS_LCAO::hamilt2density(int istep, int iter, double ethr) this->pelec->f_en.demet = 0.0; hsolver::HSolverLCAO hsolver_lcao_obj(&(this->pv), PARAM.inp.ks_solver); - hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec, PARAM.inp.ks_solver, false); - + hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec, false); if (PARAM.inp.out_bandgap) { @@ -818,7 +825,7 @@ void ESolver_KS_LCAO::update_pot(const int istep, const int iter) } for (int ik = 0; ik < this->kv.get_nks(); ++ik) { - if (PARAM.inp.out_mat_hs[0]|| PARAM.inp.deepks_v_delta) + if (PARAM.inp.out_mat_hs[0] || PARAM.inp.deepks_v_delta) { this->p_hamilt->updateHk(ik); } @@ -858,7 +865,7 @@ void ESolver_KS_LCAO::update_pot(const int istep, const int iter) GlobalV::DRANK); } #ifdef __DEEPKS - if(PARAM.inp.deepks_out_labels && PARAM.inp.deepks_v_delta) + if (PARAM.inp.deepks_out_labels && PARAM.inp.deepks_v_delta) { DeePKS_domain::save_h_mat(h_mat.p, this->pv.nloc); } @@ -880,7 +887,6 @@ void ESolver_KS_LCAO::update_pot(const int istep, const int iter) istep); } - if (!this->conv_elec) { if (PARAM.inp.nspin == 4) @@ -945,7 +951,7 @@ void ESolver_KS_LCAO::iter_finish(int& iter) Hexxk_save.set_zero_hk(); hamilt::OperatorEXX> opexx_save(&Hexxk_save, - nullptr, + nullptr, this->kv); opexx_save.contributeHk(ik); @@ -975,8 +981,12 @@ void ESolver_KS_LCAO::iter_finish(int& iter) { // Kerker mixing does not work for the density matrix. // In the separate loop case, it can still work in the subsequent inner loops where Hexx(DM) is fixed. - // In the non-separate loop case where Hexx(DM) is updated in every iteration of the 2nd loop, it should be closed. - if (!GlobalC::exx_info.info_global.separate_loop) { this->p_chgmix->close_kerker_gg0(); } + // In the non-separate loop case where Hexx(DM) is updated in every iteration of the 2nd loop, it should be + // closed. + if (!GlobalC::exx_info.info_global.separate_loop) + { + this->p_chgmix->close_kerker_gg0(); + } if (GlobalC::exx_info.info_ri.real_number) { this->conv_elec = this->exd->exx_after_converge( @@ -1126,12 +1136,16 @@ void ESolver_KS_LCAO::after_scf(const int istep) #ifdef __EXX // 4) write Hexx matrix for NSCF (see `out_chg` in docs/advanced/input_files/input-main.md) - if (GlobalC::exx_info.info_global.cal_exx && PARAM.inp.out_chg[0] && istep % PARAM.inp.out_interval == 0) // Peize Lin add if 2022.11.14 + if (GlobalC::exx_info.info_global.cal_exx && PARAM.inp.out_chg[0] + && istep % PARAM.inp.out_interval == 0) // Peize Lin add if 2022.11.14 { const std::string file_name_exx = PARAM.globalv.global_out_dir + "HexxR" + std::to_string(GlobalV::MY_RANK); - if (GlobalC::exx_info.info_ri.real_number) { + if (GlobalC::exx_info.info_ri.real_number) + { ModuleIO::write_Hexxs_csr(file_name_exx, GlobalC::ucell, this->exd->get_Hexxs()); - } else { + } + else + { ModuleIO::write_Hexxs_csr(file_name_exx, GlobalC::ucell, this->exc->get_Hexxs()); } } @@ -1222,9 +1236,9 @@ void ESolver_KS_LCAO::after_scf(const int istep) // 15) write spin constrian MW? // spin constrain calculations, added by Tianqi Zhao. - if (PARAM.inp.sc_mag_switch) { - SpinConstrain& sc - = SpinConstrain::getScInstance(); + if (PARAM.inp.sc_mag_switch) + { + SpinConstrain& sc = SpinConstrain::getScInstance(); sc.cal_MW(istep, true); sc.print_Mag_Force(); } @@ -1254,14 +1268,14 @@ void ESolver_KS_LCAO::after_scf(const int istep) { hamilt::HS_Matrix_K hsk(&pv, true); hamilt::HContainer hR(&pv); - hamilt::Operator* ekinetic = - new hamilt::EkineticNew>(&hsk, - this->kv.kvec_d, - &hR, - &GlobalC::ucell, - orb_.cutoffs(), - &GlobalC::GridD, - two_center_bundle_.kinetic_orb.get()); + hamilt::Operator* ekinetic + = new hamilt::EkineticNew>(&hsk, + this->kv.kvec_d, + &hR, + &GlobalC::ucell, + orb_.cutoffs(), + &GlobalC::GridD, + two_center_bundle_.kinetic_orb.get()); const int nspin_k = (PARAM.inp.nspin == 2 ? 2 : 1); for (int ik = 0; ik < this->kv.get_nks() / nspin_k; ++ik) diff --git a/source/module_esolver/esolver_ks_lcao_tddft.cpp b/source/module_esolver/esolver_ks_lcao_tddft.cpp index 17d36bc46d..c1bf644d52 100644 --- a/source/module_esolver/esolver_ks_lcao_tddft.cpp +++ b/source/module_esolver/esolver_ks_lcao_tddft.cpp @@ -11,8 +11,8 @@ //--------------temporary---------------------------- #include "module_base/blas_connector.h" #include "module_base/global_function.h" -#include "module_base/scalapack_connector.h" #include "module_base/lapack_connector.h" +#include "module_base/scalapack_connector.h" #include "module_elecstate/module_charge/symmetry_rho.h" #include "module_elecstate/occupy.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" // need divide_HS_in_frag @@ -73,34 +73,34 @@ void ESolver_KS_LCAO_TDDFT::before_all_runners(const Input_para& inp, UnitCell& GlobalC::ppcell.init_vloc(GlobalC::ppcell.vloc, pw_rho); // 3) initialize the electronic states for TDDFT - if (this->pelec == nullptr) { - this->pelec = new elecstate::ElecStateLCAO_TDDFT( - &this->chr, - &kv, - kv.get_nks(), - &this->GK, // mohan add 2024-04-01 - this->pw_rho, - pw_big); + if (this->pelec == nullptr) + { + this->pelec = new elecstate::ElecStateLCAO_TDDFT(&this->chr, + &kv, + kv.get_nks(), + &this->GK, // mohan add 2024-04-01 + this->pw_rho, + pw_big); } // 4) read the local orbitals and construct the interpolation tables. // initialize the pv - LCAO_domain::init_basis_lcao(this->pv, - inp.onsite_radius, - inp.lcao_ecut, - inp.lcao_dk, - inp.lcao_dr, - inp.lcao_rmax, - ucell, + LCAO_domain::init_basis_lcao(this->pv, + inp.onsite_radius, + inp.lcao_ecut, + inp.lcao_dk, + inp.lcao_dr, + inp.lcao_rmax, + ucell, two_center_bundle_, orb_); // 5) allocate H and S matrices according to computational resources LCAO_domain::divide_HS_in_frag(PARAM.globalv.gamma_only_local, this->pv, kv.get_nks(), orb_); - // 6) initialize Density Matrix - dynamic_cast>*>(this->pelec)->init_DM(&kv, &this->pv, PARAM.inp.nspin); + dynamic_cast>*>(this->pelec) + ->init_DM(&kv, &this->pv, PARAM.inp.nspin); // 8) initialize the charge density this->pelec->charge->allocate(PARAM.inp.nspin); @@ -169,7 +169,7 @@ void ESolver_KS_LCAO_TDDFT::hamilt2density(const int istep, const int iter, cons if (this->psi != nullptr) { hsolver::HSolverLCAO> hsolver_lcao_obj(&this->pv, PARAM.inp.ks_solver); - hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec_td, PARAM.inp.ks_solver, false); + hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec_td, false); } } // else @@ -255,28 +255,28 @@ void ESolver_KS_LCAO_TDDFT::update_pot(const int istep, const int iter) if (PARAM.inp.out_mat_hs[0]) { ModuleIO::save_mat(istep, - h_mat.p, - GlobalV::NLOCAL, - bit, - PARAM.inp.out_mat_hs[1], - 1, - PARAM.inp.out_app_flag, - "H", - "data-" + std::to_string(ik), - this->pv, - GlobalV::DRANK); + h_mat.p, + GlobalV::NLOCAL, + bit, + PARAM.inp.out_mat_hs[1], + 1, + PARAM.inp.out_app_flag, + "H", + "data-" + std::to_string(ik), + this->pv, + GlobalV::DRANK); ModuleIO::save_mat(istep, - s_mat.p, - GlobalV::NLOCAL, - bit, - PARAM.inp.out_mat_hs[1], - 1, - PARAM.inp.out_app_flag, - "S", - "data-" + std::to_string(ik), - this->pv, - GlobalV::DRANK); + s_mat.p, + GlobalV::NLOCAL, + bit, + PARAM.inp.out_mat_hs[1], + 1, + PARAM.inp.out_app_flag, + "S", + "data-" + std::to_string(ik), + this->pv, + GlobalV::DRANK); } } } @@ -416,8 +416,8 @@ void ESolver_KS_LCAO_TDDFT::after_scf(const int istep) } if (TD_Velocity::out_current == true) { - elecstate::DensityMatrix, double>* tmp_DM = - dynamic_cast>*>(this->pelec)->get_DM(); + elecstate::DensityMatrix, double>* tmp_DM + = dynamic_cast>*>(this->pelec)->get_DM(); ModuleIO::write_current(istep, this->psi, diff --git a/source/module_esolver/lcao_before_scf.cpp b/source/module_esolver/lcao_before_scf.cpp index 64bc023bfd..3bef3a70d3 100644 --- a/source/module_esolver/lcao_before_scf.cpp +++ b/source/module_esolver/lcao_before_scf.cpp @@ -165,7 +165,7 @@ void ESolver_KS_LCAO::beforesolver(const int istep) // cal_ux should be called before init_scf because // the direction of ux is used in noncoline_rho //========================================================= - if (PARAM.inp.nspin == 4 && PARAM.globalv.domag) + if (PARAM.inp.nspin == 4) { GlobalC::ucell.cal_ux(); } diff --git a/source/module_esolver/lcao_nscf.cpp b/source/module_esolver/lcao_nscf.cpp index 7904c0d6ea..56a3414696 100644 --- a/source/module_esolver/lcao_nscf.cpp +++ b/source/module_esolver/lcao_nscf.cpp @@ -53,7 +53,7 @@ void ESolver_KS_LCAO::nscf() { // istep becomes istep-1, this should be fixed in future int istep = 0; hsolver::HSolverLCAO hsolver_lcao_obj(&(this->pv), PARAM.inp.ks_solver); - hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec, PARAM.inp.ks_solver, true); + hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec, true); time_t time_finish = std::time(nullptr); ModuleBase::GlobalFunc::OUT_TIME("cal_bands", time_start, time_finish); diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp index 4c5d5ab535..bda9ce7ebd 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp @@ -13,7 +13,7 @@ void SpinConstrain, base_device::DEVICE_CPU>::cal_mw_from_l // diagonalization without update charge hsolver::HSolverLCAO> hsolver_lcao_obj(this->ParaV, this->KS_SOLVER); - hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec, this->KS_SOLVER, true); + hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec, true); elecstate::ElecStateLCAO>* pelec_lcao = dynamic_cast>*>(this->pelec); diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp index a6e482cdae..81cd5de858 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp @@ -13,7 +13,7 @@ TEST(TestSrcPWStressMultiDevice, cal_dbecp_noevc_nl_op_cpu) const double tpiba = 0.61599855952741045; std::vector gcar = {2.0000000000000000, -2.0000000000000000, -2.0000000000000000, 1.0000000000000000, -1.0000000000000000, -1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, -1.0000000000000000, 1.0000000000000000, 1.0000000000000000, -2.0000000000000000, 2.0000000000000000, 2.0000000000000000, 2.0000000000000000, -2.0000000000000000, 0.0000000000000000, 1.0000000000000000, -1.0000000000000000, 1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 2.0000000000000000, -1.0000000000000000, 1.0000000000000000, 3.0000000000000000, 2.0000000000000000, -2.0000000000000000, 2.0000000000000000, 1.0000000000000000, -1.0000000000000000, 3.0000000000000000, -1.0000000000000000, 1.0000000000000000, -3.0000000000000000, -2.0000000000000000, 2.0000000000000000, -2.0000000000000000, 1.0000000000000000, -1.0000000000000000, -3.0000000000000000, 0.0000000000000000, 0.0000000000000000, -2.0000000000000000, -1.0000000000000000, 1.0000000000000000, -1.0000000000000000, -2.0000000000000000, 2.0000000000000000, 0.0000000000000000, 2.0000000000000000, 0.0000000000000000, -2.0000000000000000, 1.0000000000000000, 1.0000000000000000, -1.0000000000000000, 0.0000000000000000, 2.0000000000000000, 0.0000000000000000, -1.0000000000000000, 3.0000000000000000, 1.0000000000000000, 3.0000000000000000, -1.0000000000000000, -1.0000000000000000, 2.0000000000000000, 0.0000000000000000, 0.0000000000000000, 1.0000000000000000, 1.0000000000000000, 1.0000000000000000, 0.0000000000000000, 2.0000000000000000, 2.0000000000000000, 3.0000000000000000, -1.0000000000000000, 1.0000000000000000, 2.0000000000000000, 0.0000000000000000, 2.0000000000000000, 1.0000000000000000, 1.0000000000000000, 3.0000000000000000, 1.0000000000000000, 1.0000000000000000, -3.0000000000000000, 0.0000000000000000, 2.0000000000000000, -2.0000000000000000, -1.0000000000000000, 3.0000000000000000, -1.0000000000000000, 2.0000000000000000, 2.0000000000000000, -2.0000000000000000, 1.0000000000000000, 3.0000000000000000, -1.0000000000000000, 3.0000000000000000, 1.0000000000000000, -1.0000000000000000, 2.0000000000000000, 2.0000000000000000, 0.0000000000000000, 1.0000000000000000, 3.0000000000000000, 1.0000000000000000, 3.0000000000000000, 1.0000000000000000, 1.0000000000000000, 2.0000000000000000, 2.0000000000000000, 2.0000000000000000, -1.0000000000000000, -3.0000000000000000, 1.0000000000000000, -2.0000000000000000, -2.0000000000000000, 2.0000000000000000, -2.0000000000000000, -2.0000000000000000, -2.0000000000000000, -3.0000000000000000, -1.0000000000000000, -1.0000000000000000, -1.0000000000000000, -3.0000000000000000, -1.0000000000000000, -2.0000000000000000, -2.0000000000000000, 0.0000000000000000, -3.0000000000000000, -1.0000000000000000, 1.0000000000000000, 1.0000000000000000, -3.0000000000000000, -1.0000000000000000, 0.0000000000000000, -2.0000000000000000, 0.0000000000000000, -1.0000000000000000, -1.0000000000000000, 1.0000000000000000, -2.0000000000000000, 0.0000000000000000, 2.0000000000000000, 1.0000000000000000, -3.0000000000000000, 1.0000000000000000, 0.0000000000000000, -2.0000000000000000, 2.0000000000000000, -1.0000000000000000, -1.0000000000000000, 3.0000000000000000, -1.0000000000000000, -1.0000000000000000, -3.0000000000000000, -2.0000000000000000, 0.0000000000000000, -2.0000000000000000, -3.0000000000000000, 1.0000000000000000, -1.0000000000000000, 0.0000000000000000, -2.0000000000000000, -2.0000000000000000, -1.0000000000000000, -1.0000000000000000, -1.0000000000000000, -2.0000000000000000, 0.0000000000000000, 0.0000000000000000, -3.0000000000000000, 1.0000000000000000, 1.0000000000000000}; - std::vector kvec_c = {0.0000000000000000}; + std::vector kvec_c = {0.0000000000000000, 0.0000000000000000, 0.0000000000000000}; std::vector> vkb = {{0.1959277889216247, 0.0000000000000000}, {0.3292263195311197, 0.0000000000000000}, {0.3833320420712772, -0.0000000000000000}, {0.3292263195311197, -0.0000000000000000}, {0.1959277889216247, -0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.3292263195311197, -0.0000000000000000}, {0.3121883316125419, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.1959277889216247, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.1959277889216247, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.3121883316125419, 0.0000000000000000}, {0.3292263195311197, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.3292263195311197, -0.0000000000000000}, {0.3121883316125419, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.3121883316125419, -0.0000000000000000}, {0.3292263195311197, -0.0000000000000000}, {0.2495383154917130, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.2495383154917130, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.1959277889216247, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.2495383154917130, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.2084900583919441, -0.0000000000000000}, {0.1959277889216247, -0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.1959277889216247, 0.0000000000000000}, {0.1959277889216247, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.3121883316125419, 0.0000000000000000}, {0.3292263195311197, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.3292263195311197, 0.0000000000000000}, {0.3121883316125419, 0.0000000000000000}, {0.2084900583919441, 0.0000000000000000}, {0.1588244263337111, 0.0000000000000000}, {0.2318163335599036, 0.0000000000000000}, {0.2916492898304017, 0.0000000000000000}, {0.2605982137021648, 0.0000000000000000}, {0.1588244263337111, 0.0000000000000000}, {0.2605982137021648, 0.0000000000000000}, {0.3602727707771828, 0.0000000000000000}, {0.3602727707771828, 0.0000000000000000}, {0.2605982137021648, 0.0000000000000000}, {0.2768133075187308, 0.0000000000000000}, {0.2117172164531642, 0.0000000000000000}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.0619166231573246}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.1191161929559753}, {0.0000000000000000, -0.1319268716568682}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.1319268716568682}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.1191161929559753}, {-0.0000000000000000, 0.0619166231573246}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.1007653341101891}, {-0.0000000000000000, 0.0619166231573246}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.0439756238856227}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.1007653341101891}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.1007653341101891}, {0.0000000000000000, -0.1319268716568682}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.1007653341101891}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0439756238856227}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.0839101408927066}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0439756238856227}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.0439756238856227}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.1007653341101891}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.1007653341101891}, {0.0000000000000000, -0.1319268716568682}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.1007653341101891}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.1007653341101891}, {-0.0000000000000000, 0.0619166231573246}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.1074700120541082}, {-0.0000000000000000, 0.1429603824496169}, {-0.0000000000000000, 0.1132688983356121}, {-0.0000000000000000, 0.0520569223767372}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.1041138447534744}, {-0.0000000000000000, 0.0660471629375616}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0520569223767372}, {0.0000000000000000, -0.0680879572845678}, {0.0000000000000000, -0.0667352877462083}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.0619166231573246}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.0839101408927066}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.0619166231573246}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.0439756238856227}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.0839101408927066}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.1007653341101892}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.0619166231573246}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.1191161929559753}, {-0.0000000000000000, 0.0619166231573246}, {-0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.0839101408927066}, {0.0000000000000000, -0.0439756238856228}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.1319268716568682}, {0.0000000000000000, -0.0439756238856228}, {0.0000000000000000, -0.1007653341101892}, {0.0000000000000000, -0.1319268716568682}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.1007653341101892}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.1007653341101892}, {0.0000000000000000, -0.1319268716568682}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, -0.1191161929559753}, {0.0000000000000000, -0.1319268716568682}, {0.0000000000000000, -0.0716466747027388}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0566344491678060}, {0.0000000000000000, -0.1041138447534745}, {0.0000000000000000, -0.1074700120541082}, {-0.0000000000000000, 0.0520569223767372}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0660471629375617}, {0.0000000000000000, -0.1041138447534745}, {0.0000000000000000, -0.0953231401983950}, {0.0000000000000000, -0.1112254795770137}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0619166231573246}, {-0.0000000000000000, 0.0839101408927066}, {0.0000000000000000, -0.1007653341101892}, {0.0000000000000000, -0.0619166231573246}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0439756238856227}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0839101408927066}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0619166231573246}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0619166231573246}, {-0.0000000000000000, 0.1191161929559753}, {-0.0000000000000000, 0.1319268716568682}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0619166231573246}, {-0.0000000000000000, 0.1007653341101892}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.1319268716568682}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0839101408927066}, {0.0000000000000000, -0.1319268716568682}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.1319268716568682}, {0.0000000000000000, -0.1007653341101892}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.1319268716568682}, {0.0000000000000000, -0.1191161929559753}, {0.0000000000000000, -0.0619166231573246}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.1319268716568682}, {0.0000000000000000, -0.1007653341101892}, {0.0000000000000000, -0.0439756238856227}, {0.0000000000000000, -0.0439756238856227}, {-0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0439756238856227}, {0.0000000000000000, -0.1007653341101892}, {0.0000000000000000, -0.0619166231573246}, {-0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0439756238856227}, {-0.0000000000000000, 0.0537350060270541}, {0.0000000000000000, -0.0238267304082695}, {-0.0000000000000000, 0.0283172245839030}, {-0.0000000000000000, 0.0780853835651058}, {-0.0000000000000000, 0.0895583433784236}, {0.0000000000000000, -0.0780853835651058}, {0.0000000000000000, -0.0330235814687808}, {-0.0000000000000000, 0.0330235814687808}, {-0.0000000000000000, 0.0780853835651058}, {-0.0000000000000000, 0.0680879572845679}, {-0.0000000000000000, 0.0667352877462083}, {-0.1959277889216247, 0.0000000000000000}, {0.0000000000000000, 0.3292263195311197}, {0.3833320420712772, -0.0000000000000000}, {0.0000000000000000, -0.3292263195311197}, {-0.1959277889216247, -0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.0000000000000000, -0.3292263195311197}, {-0.3121883316125419, -0.0000000000000000}, {-0.0000000000000000, 0.2084900583919441}, {-0.1959277889216247, -0.0000000000000000}, {-0.0000000000000000, 0.2084900583919441}, {-0.0000000000000000, -0.2084900583919441}, {-0.1959277889216247, 0.0000000000000000}, {-0.0000000000000000, -0.2084900583919441}, {-0.3121883316125419, 0.0000000000000000}, {0.0000000000000000, 0.3292263195311197}, {0.2495383154917130, 0.0000000000000000}, {0.2495383154917130, 0.0000000000000000}, {0.0000000000000000, -0.3292263195311197}, {-0.3121883316125419, -0.0000000000000000}, {-0.0000000000000000, 0.2084900583919441}, {0.0000000000000000, -0.2084900583919441}, {-0.3121883316125419, -0.0000000000000000}, {-0.0000000000000001, 0.3292263195311197}, {0.2495383154917130, 0.0000000000000001}, {-0.0000000000000000, 0.2084900583919441}, {0.2495383154917130, 0.0000000000000001}, {0.0000000000000001, -0.2084900583919441}, {0.0000000000000000, 0.2084900583919441}, {0.2495383154917130, 0.0000000000000000}, {0.0000000000000000, -0.2084900583919441}, {-0.1959277889216247, -0.0000000000000000}, {-0.0000000000000000, 0.2084900583919441}, {-0.0000000000000000, 0.2084900583919441}, {0.2495383154917130, 0.0000000000000001}, {0.0000000000000001, -0.2084900583919441}, {0.0000000000000001, -0.2084900583919441}, {-0.1959277889216247, -0.0000000000000001}, {-0.0000000000000000, -0.2084900583919441}, {-0.1959277889216247, 0.0000000000000000}, {-0.1959277889216247, 0.0000000000000001}, {0.0000000000000001, 0.2084900583919441}, {0.0000000000000001, 0.2084900583919441}, {0.2495383154917130, -0.0000000000000001}, {-0.0000000000000000, -0.2084900583919441}, {-0.0000000000000000, -0.2084900583919441}, {-0.3121883316125419, 0.0000000000000000}, {0.0000000000000000, 0.3292263195311197}, {0.2495383154917130, 0.0000000000000000}, {0.0000000000000000, 0.2084900583919441}, {0.2495383154917130, 0.0000000000000000}, {0.0000000000000000, -0.2084900583919441}, {0.0000000000000001, 0.2084900583919441}, {0.2495383154917130, -0.0000000000000001}, {-0.0000000000000000, -0.2084900583919441}, {0.2495383154917130, -0.0000000000000001}, {-0.0000000000000001, -0.3292263195311197}, {-0.3121883316125419, 0.0000000000000000}, {0.0000000000000000, 0.2084900583919441}, {0.1123058288786304, -0.1123058288786304}, {0.1639189014500104, -0.1639189014500105}, {-0.2062271905673179, -0.2062271905673178}, {-0.1842707640739018, 0.1842707640739018}, {0.1123058288786304, 0.1123058288786304}, {-0.1842707640739018, -0.1842707640739018}, {-0.2547513192934127, 0.2547513192934126}, {0.2547513192934126, 0.2547513192934127}, {0.1842707640739018, -0.1842707640739018}, {0.1059318666456010, -0.2557421491433081}, {0.1497066794479725, -0.1497066794479725}, {-0.0000000000000000, -0.0839101408927066}, {-0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0619166231573246, -0.0000000000000000}, {-0.0000000000000000, 0.0839101408927066}, {0.0000000000000000, 0.0000000000000000}, {-0.0619166231573246, -0.0000000000000000}, {-0.0000000000000000, 0.1191161929559753}, {0.1319268716568682, 0.0000000000000000}, {-0.0000000000000000, 0.0839101408927066}, {0.1319268716568682, 0.0000000000000000}, {0.1319268716568682, -0.0000000000000000}, {-0.0000000000000000, -0.0839101408927066}, {0.1319268716568682, -0.0000000000000000}, {-0.0000000000000000, -0.1191161929559753}, {-0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.1007653341101891}, {0.0619166231573246, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0439756238856227, 0.0000000000000000}, {0.0439756238856227, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.1007653341101891}, {0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, -0.1007653341101891}, {-0.1319268716568682, -0.0000000000000000}, {-0.1319268716568682, 0.0000000000000000}, {-0.0000000000000000, 0.1007653341101891}, {0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, -0.0839101408927066}, {-0.0439756238856227, -0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {-0.0000000000000000, 0.0839101408927066}, {-0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, -0.0839101408927066}, {-0.0439756238856227, 0.0000000000000000}, {-0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0439756238856227, 0.0000000000000000}, {0.0439756238856227, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.1007653341101891}, {0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, -0.1007653341101891}, {-0.1319268716568682, -0.0000000000000000}, {-0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, 0.1007653341101891}, {0.0439756238856227, -0.0000000000000000}, {0.0000000000000000, 0.1007653341101891}, {0.0619166231573246, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0439756238856227, 0.0000000000000000}, {0.0759927742976599, 0.0759927742976599}, {0.1010882558711464, 0.1010882558711464}, {0.0800932061106409, -0.0800932061106410}, {-0.0368098028202926, -0.0368098028202926}, {0.0000000000000000, 0.0000000000000000}, {0.0736196056405852, -0.0736196056405852}, {-0.0467023967912827, -0.0467023967912827}, {0.0000000000000000, 0.0000000000000000}, {-0.0368098028202926, -0.0368098028202926}, {-0.0629050701457150, -0.0260561331963860}, {-0.0471889745097794, -0.0471889745097794}, {-0.0000000000000000, -0.0839101408927066}, {-0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0619166231573246, -0.0000000000000000}, {-0.0000000000000000, 0.0839101408927066}, {-0.0000000000000000, 0.1007653341101892}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, -0.0839101408927066}, {-0.0439756238856227, -0.0000000000000000}, {-0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, 0.0839101408927066}, {0.0439756238856227, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.1007653341101892}, {-0.0000000000000000, 0.1007653341101892}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0439756238856227, 0.0000000000000000}, {0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, -0.1191161929559753}, {-0.0619166231573246, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.1319268716568682, -0.0000000000000000}, {-0.0000000000000000, 0.1007653341101892}, {0.0439756238856227, 0.0000000000000000}, {-0.0439756238856227, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {0.0000000000000000, -0.0839101408927066}, {-0.0439756238856227, -0.0000000000000000}, {-0.1319268716568682, -0.0000000000000000}, {-0.0000000000000000, 0.1007653341101892}, {0.0439756238856227, 0.0000000000000000}, {0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, -0.0839101408927066}, {-0.0439756238856228, 0.0000000000000000}, {0.0000000000000000, 0.0839101408927066}, {0.0000000000000000, 0.0839101408927066}, {0.1319268716568682, -0.0000000000000000}, {0.0439756238856228, -0.0000000000000000}, {-0.0000000000000000, -0.1007653341101892}, {-0.1319268716568682, 0.0000000000000000}, {0.0439756238856227, -0.0000000000000000}, {-0.0000000000000000, -0.0000000000000000}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.1007653341101892}, {-0.0439756238856227, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {0.0439756238856227, -0.0000000000000000}, {-0.0000000000000000, -0.1007653341101892}, {-0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, 0.1191161929559753}, {0.1319268716568682, 0.0000000000000000}, {-0.0506618495317733, -0.0506618495317733}, {0.0000000000000000, 0.0000000000000000}, {-0.0400466030553205, 0.0400466030553205}, {0.0736196056405853, 0.0736196056405852}, {0.0759927742976599, -0.0759927742976599}, {0.0368098028202926, -0.0368098028202926}, {-0.0000000000000000, -0.0000000000000000}, {0.0467023967912827, -0.0467023967912827}, {-0.0736196056405852, -0.0736196056405853}, {-0.0880670982040010, -0.0364785864749405}, {-0.0786482908496323, -0.0786482908496323}, {0.0000000000000000, 0.0839101408927066}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.0839101408927066}, {0.0000000000000000, -0.1007653341101892}, {-0.0619166231573246, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {-0.0000000000000000, 0.0839101408927066}, {0.0439756238856227, 0.0000000000000000}, {0.0439756238856227, -0.0000000000000000}, {-0.0000000000000000, -0.0839101408927066}, {-0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0619166231573246, 0.0000000000000000}, {-0.0000000000000000, 0.1007653341101892}, {-0.0000000000000000, 0.0000000000000000}, {0.0619166231573246, 0.0000000000000000}, {0.0000000000000000, -0.1191161929559753}, {-0.1319268716568682, -0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0619166231573246, -0.0000000000000000}, {-0.0000000000000000, 0.1007653341101892}, {0.0439756238856227, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0439756238856227, 0.0000000000000000}, {-0.0439756238856227, 0.0000000000000000}, {-0.0000000000000000, 0.1007653341101892}, {0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, -0.0839101408927066}, {-0.1319268716568682, -0.0000000000000000}, {-0.0439756238856227, -0.0000000000000000}, {-0.0000000000000000, 0.1007653341101892}, {0.1319268716568682, 0.0000000000000000}, {0.0439756238856227, 0.0000000000000000}, {0.0000000000000000, -0.0839101408927066}, {-0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, 0.0839101408927066}, {0.0000000000000000, 0.0839101408927066}, {0.0439756238856227, -0.0000000000000000}, {0.1319268716568682, -0.0000000000000000}, {-0.0000000000000000, -0.1007653341101892}, {-0.0439756238856227, 0.0000000000000000}, {-0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, 0.1191161929559753}, {0.0619166231573246, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.1319268716568682, 0.0000000000000000}, {0.0000000000000000, -0.1007653341101892}, {-0.0439756238856227, -0.0000000000000000}, {0.0439756238856227, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0439756238856227, -0.0000000000000000}, {-0.0000000000000000, -0.1007653341101892}, {-0.0619166231573246, 0.0000000000000000}, {-0.0000000000000000, -0.0000000000000000}, {-0.0439756238856227, 0.0000000000000000}, {0.0379963871488300, 0.0379963871488300}, {-0.0168480426451911, -0.0168480426451910}, {0.0200233015276602, -0.0200233015276602}, {-0.0552147042304389, -0.0552147042304389}, {-0.0633273119147166, 0.0633273119147166}, {-0.0552147042304389, 0.0552147042304389}, {0.0233511983956413, 0.0233511983956413}, {-0.0233511983956413, 0.0233511983956413}, {0.0552147042304389, 0.0552147042304389}, {0.0629050701457150, 0.0260561331963861}, {0.0471889745097794, 0.0471889745097794}}; std::vector> vkb0i = {{0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0139850234834634}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, -0.0139850234834634}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0119933519685976}, {-0.0000000000000000, 0.0139850234834634}, {-0.0000000000000000, 0.0119933519685976}, {-0.0000000000000000, 0.0119933519685976}, {-0.0000000000000000, 0.0139850234834634}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0206388743877607}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0251913335271091}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0119933519685976}, {-0.0000000000000000, 0.0251913335271091}, {-0.0000000000000000, 0.0119933519685976}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0039977839895325}, {0.0000000000000000, -0.0139850234834634}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0039977839895325}, {-0.0000000000000000, 0.0119933519685976}, {-0.0000000000000000, 0.0139850234834634}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, -0.0139850234834634}, {-0.0000000000000000, 0.0139850234834634}, {-0.0000000000000000, 0.0119933519685976}, {-0.0000000000000000, 0.0039977839895325}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, -0.0251913335271091}, {-0.0000000000000000, 0.0039977839895325}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0119933519685976}, {-0.0000000000000000, 0.0119933519685976}, {-0.0000000000000000, 0.0251913335271091}, {-0.0000000000000000, 0.0119933519685976}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0206388743877607}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0279700469621601}, {-0.0000000000000000, 0.0412777487684869}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0412777487720041}, {-0.0000000000000000, 0.0279700469645434}, {-0.0000000000000000, 0.0251913335271091}, {-0.0000000000000000, 0.0412777487684869}, {-0.0000000000000000, 0.0595607437938820}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0279700469621601}, {-0.0000000000000000, 0.0399778398953252}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0279700469645434}, {-0.0000000000000000, 0.0399778398953252}, {-0.0000000000000000, 0.0595607437938820}, {-0.0000000000000000, 0.0412777487720041}, {-0.0000000000000000, 0.0251913335242470}, {-0.0000000000000000, 0.0251913335242470}, {-0.0000000000000000, 0.0412777487684869}, {-0.0000000000000000, 0.0595580964730781}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0079955679815631}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0412777487684869}, {-0.0000000000000000, 0.0503826670509438}, {-0.0000000000000000, 0.0079955679815631}, {-0.0000000000000000, 0.0251913335242470}, {-0.0000000000000000, 0.0399778398953252}, {-0.0000000000000000, 0.0399778398953252}, {-0.0000000000000000, 0.0503826670509438}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0279700469621601}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0079955679815631}, {-0.0000000000000000, 0.0251913335271091}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0079955679815631}, {-0.0000000000000000, 0.0279700469621601}, {-0.0000000000000000, 0.0399778398878310}, {-0.0000000000000000, 0.0279700469645434}, {-0.0000000000000000, 0.0279700469645434}, {-0.0000000000000000, 0.0079955679815631}, {-0.0000000000000000, 0.0399778398878310}, {-0.0000000000000000, 0.0251913335242470}, {-0.0000000000000000, 0.0079955679815631}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0595580964869841}, {-0.0000000000000000, 0.0412777487720041}, {-0.0000000000000000, 0.0251913335242470}, {-0.0000000000000000, 0.0399778398965743}, {-0.0000000000000000, 0.0503826670553068}, {-0.0000000000000000, 0.0399778398953252}, {-0.0000000000000000, 0.0399778398953252}, {-0.0000000000000000, 0.0251913335242470}, {-0.0000000000000000, 0.0079955679815631}, {-0.0000000000000000, 0.0503826670553068}, {-0.0000000000000000, 0.0412777487720041}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0079955679815631}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0139850234834634}, {-0.0000000000000000, 0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0206388743912779}, {-0.0000000000000000, 0.0139850234858467}, {-0.0000000000000000, 0.0251913335299711}, {-0.0000000000000000, 0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0039977839895325}, {-0.0000000000000000, 0.0139850234834634}, {-0.0000000000000000, 0.0039977839895325}, {-0.0000000000000000, 0.0039977839895325}, {-0.0000000000000000, 0.0139850234858467}, {-0.0000000000000000, 0.0039977839895325}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0206388743912779}, {-0.0000000000000000, 0.0251913335242470}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0119933519636014}, {-0.0000000000000000, 0.0119933519685976}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0119933519685976}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0119933519636014}, {0.0000000000000000, -0.0139850234834634}, {0.0000000000000000, -0.0119933519636014}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, -0.0251913335299711}, {0.0000000000000000, -0.0119933519636014}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, -0.0139850234834634}, {0.0000000000000000, -0.0119933519611033}, {0.0000000000000000, -0.0139850234834634}, {0.0000000000000000, -0.0139850234834634}, {0.0000000000000000, -0.0119933519685976}, {0.0000000000000000, -0.0119933519611033}, {0.0000000000000000, -0.0251913335242470}, {0.0000000000000000, -0.0119933519685976}, {-0.0000000000000000, 0.0119933519636014}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0119933519636014}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, -0.0039977839895325}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0119933519685976}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0206388743877607}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0119933519685976}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0139850234834634}, {0.0206388743877607, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0206388743877607, -0.0000000000000000}, {-0.0000000000000000, 0.0139850234834634}, {0.0000000000000000, 0.0000000000000000}, {0.0206388743877607, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, -0.0139850234834634}, {-0.0119933519685976, -0.0000000000000000}, {0.0119933519685976, -0.0000000000000000}, {-0.0000000000000000, -0.0139850234834634}, {-0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0206388743877607, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, -0.0251913335271091}, {-0.0206388743877607, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0039977839895325, 0.0000000000000000}, {-0.0119933519685976, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0206388743877607, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0119933519685976, -0.0000000000000000}, {-0.0000000000000000, 0.0251913335271091}, {0.0119933519685976, 0.0000000000000000}, {0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0039977839895325, 0.0000000000000000}, {-0.0000000000000000, 0.0139850234834634}, {0.0039977839895325, 0.0000000000000000}, {0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0039977839895325, 0.0000000000000000}, {0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, -0.0139850234834634}, {-0.0039977839895325, 0.0000000000000000}, {0.0000000000000000, 0.0139850234834634}, {-0.0000000000000000, -0.0139850234834634}, {-0.0119933519685976, 0.0000000000000000}, {-0.0039977839895325, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0119933519685976, 0.0000000000000000}, {-0.0039977839895325, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0206388743877607, 0.0000000000000000}, {0.0000000000000000, -0.0251913335271091}, {-0.0039977839895325, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0119933519685976, -0.0000000000000000}, {-0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, 0.0251913335271091}, {0.0119933519685976, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0206388743877607, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, -0.0279700469621601}, {-0.0412777487684869, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0412777487720041, 0.0000000000000000}, {0.0000000000000000, -0.0279700469645434}, {-0.0000000000000000, 0.0251913335271091}, {0.0412777487684869, 0.0000000000000000}, {0.0000000000000000, -0.0595607437938820}, {-0.0399778398965743, -0.0000000000000000}, {0.0000000000000000, -0.0279700469621601}, {-0.0399778398953252, -0.0000000000000000}, {0.0399778398965743, -0.0000000000000000}, {-0.0000000000000000, -0.0279700469645434}, {0.0399778398953252, -0.0000000000000000}, {-0.0000000000000000, -0.0595607437938820}, {-0.0412777487720041, 0.0000000000000000}, {-0.0000000000000000, 0.0251913335242470}, {-0.0000000000000000, 0.0251913335242470}, {0.0412777487684869, 0.0000000000000000}, {0.0000000000000000, -0.0595580964730781}, {-0.0399778398965743, -0.0000000000000000}, {0.0079955679815631, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0412777487684869, -0.0000000000000000}, {-0.0000000000000000, 0.0503826670509438}, {-0.0079955679815631, -0.0000000000000000}, {-0.0000000000000000, 0.0251913335242470}, {0.0399778398953252, 0.0000000000000000}, {-0.0399778398953252, 0.0000000000000000}, {-0.0000000000000000, 0.0503826670509438}, {0.0399778398965743, 0.0000000000000000}, {0.0000000000000000, -0.0279700469621601}, {-0.0399778398965743, -0.0000000000000000}, {-0.0079955679815631, -0.0000000000000000}, {-0.0000000000000000, 0.0251913335271091}, {0.0399778398965743, 0.0000000000000000}, {0.0079955679815631, 0.0000000000000000}, {0.0000000000000000, -0.0279700469621601}, {0.0399778398878310, -0.0000000000000000}, {-0.0000000000000000, -0.0279700469645434}, {-0.0000000000000000, -0.0279700469645434}, {-0.0079955679815631, 0.0000000000000000}, {-0.0399778398878310, 0.0000000000000000}, {0.0000000000000000, 0.0251913335242470}, {0.0079955679815631, -0.0000000000000000}, {0.0399778398965743, -0.0000000000000000}, {-0.0000000000000000, -0.0595580964869841}, {-0.0412777487720041, 0.0000000000000000}, {-0.0000000000000000, 0.0251913335242470}, {-0.0399778398965743, 0.0000000000000000}, {-0.0000000000000000, 0.0503826670553068}, {0.0399778398953252, 0.0000000000000000}, {-0.0399778398953252, 0.0000000000000000}, {0.0000000000000000, 0.0251913335242470}, {0.0079955679815631, -0.0000000000000000}, {0.0000000000000000, 0.0503826670553068}, {0.0412777487720041, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0079955679815631, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0000000000000000, -0.0139850234834634}, {-0.0206388743877607, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {0.0206388743912779, 0.0000000000000000}, {0.0000000000000000, -0.0139850234858467}, {-0.0000000000000000, 0.0251913335299711}, {0.0206388743877607, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0039977839895325, -0.0000000000000000}, {0.0000000000000000, -0.0139850234834634}, {-0.0039977839895325, -0.0000000000000000}, {0.0039977839895325, -0.0000000000000000}, {-0.0000000000000000, -0.0139850234858467}, {0.0039977839895325, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0206388743912779, 0.0000000000000000}, {-0.0000000000000000, 0.0251913335242470}, {0.0000000000000000, 0.0000000000000000}, {-0.0206388743877607, -0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {-0.0119933519636014, -0.0000000000000000}, {0.0119933519685976, 0.0000000000000000}, {-0.0000000000000000, 0.0000000000000000}, {0.0206388743877607, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0119933519685976, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0039977839895325, -0.0000000000000000}, {0.0039977839895325, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0119933519636014, 0.0000000000000000}, {-0.0000000000000000, 0.0139850234834634}, {0.0119933519636014, 0.0000000000000000}, {0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, -0.0251913335299711}, {-0.0119933519636014, -0.0000000000000000}, {-0.0119933519685976, -0.0000000000000000}, {-0.0000000000000000, 0.0139850234834634}, {-0.0119933519611033, 0.0000000000000000}, {0.0000000000000000, 0.0139850234834634}, {0.0000000000000000, 0.0139850234834634}, {0.0119933519685976, -0.0000000000000000}, {0.0119933519611033, -0.0000000000000000}, {-0.0000000000000000, -0.0251913335242470}, {-0.0119933519685976, 0.0000000000000000}, {0.0119933519636014, -0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0206388743877607, 0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0119933519636014, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0039977839895325, -0.0000000000000000}, {0.0039977839895325, -0.0000000000000000}, {-0.0000000000000000, -0.0000000000000000}, {0.0119933519685976, -0.0000000000000000}, {0.0000000000000000, -0.0000000000000000}, {-0.0206388743877607, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {-0.0119933519685976, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}, {0.0000000000000000, 0.0000000000000000}}; diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/projop_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/projop_pw.cpp index 2543f78662..b419d9ce88 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/projop_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/projop_pw.cpp @@ -19,7 +19,7 @@ #ifdef __MPI #include "module_base/parallel_reduce.h" #endif - +#include "module_io/orb_io.h" /** * =============================================================================================== * @@ -137,7 +137,7 @@ void init_proj(const std::string& orbital_dir, double dr_ = -1.0; std::vector nzeta; // number of radials for each l std::vector> radials; // radials arranged in serial - AtomicRadials::read_abacus_orb(ifs, elem, ecut, nr_, dr_, nzeta, radials); + ModuleIO::read_abacus_orb(ifs, elem, ecut, nr_, dr_, nzeta, radials); #ifdef __DEBUG assert(elem != ""); assert(ecut != -1.0); diff --git a/source/module_hsolver/diago_cusolvermp.cpp b/source/module_hsolver/diago_cusolvermp.cpp index 615ff8f2bb..84fcdd09ef 100644 --- a/source/module_hsolver/diago_cusolvermp.cpp +++ b/source/module_hsolver/diago_cusolvermp.cpp @@ -16,17 +16,20 @@ void DiagoCusolverMP::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* phm_in->matrix(h_mat, s_mat); std::vector eigen(GlobalV::NLOCAL, 0.0); + std::vector eigenvectors(h_mat.row * h_mat.col); MPI_Comm COMM_DIAG = MPI_COMM_WORLD; // use all processes { Diag_CusolverMP_gvd es(COMM_DIAG, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc); ModuleBase::timer::tick("DiagoCusolverMP", "Diag_CusolverMP_gvd"); - es.generalized_eigenvector(h_mat.p, s_mat.p, eigen.data(), psi.get_pointer()); + es.generalized_eigenvector(h_mat.p, s_mat.p, eigen.data(), eigenvectors.data()); ModuleBase::timer::tick("DiagoCusolverMP", "Diag_CusolverMP_gvd"); } const int inc = 1; BlasConnector::copy(GlobalV::NBANDS, eigen.data(), inc, eigenvalue_in, inc); + const int size = psi.get_nbands() * psi.get_nbasis(); + BlasConnector::copy(size, eigenvectors.data(), inc, psi.get_pointer(), inc); } template class DiagoCusolverMP; template class DiagoCusolverMP; diff --git a/source/module_hsolver/diago_david.h b/source/module_hsolver/diago_david.h index 16bc471e2c..4673f5ca8d 100644 --- a/source/module_hsolver/diago_david.h +++ b/source/module_hsolver/diago_david.h @@ -30,8 +30,6 @@ class DiagoDavid : public DiagH // declare type of matrix-blockvector functions. // the function type is defined as a std::function object. - // notice that hpsi and spsi functions require - // !different! order of input and output blockvectors. /** * @brief A function type representing the HX function. * @@ -39,12 +37,12 @@ class DiagoDavid : public DiagH * For eigenvalue problem HX = λX or generalized eigenvalue problem HX = λSX, * this function computes the product of the Hamiltonian matrix H and a blockvector X. * - * @param[out] X Pointer to input blockvector of type `T*`. - * @param[in] HX Pointer to output blockvector of type `T*`. - * @param[in] neig Number of eigebpairs required. - * @param[in] dim Dimension of matrix. - * @param[in] id_start Start index of blockvector. - * @param[in] id_end End index of blockvector. + * @param[out] X Pointer to input blockvector of type `T*`. + * @param[in] HX Pointer to output blockvector of type `T*`. + * @param[in] nvec Number of eigebpairs, i.e. number of vectors in a block. + * @param[in] dim Dimension of matrix. + * @param[in] id_start Start index of blockvector. + * @param[in] id_end End index of blockvector. */ using HPsiFunc = std::function; @@ -60,6 +58,8 @@ class DiagoDavid : public DiagH * @param[in] nrow Dimension of SX: nbands * nrow. * @param[in] npw Number of plane waves. * @param[in] nbands Number of bands. + * + * @note called as spsi(in, out, dim, dim, 1) */ using SPsiFunc = std::function; diff --git a/source/module_hsolver/diago_elpa_native.cpp b/source/module_hsolver/diago_elpa_native.cpp index fa73a1d1e0..f187a62288 100644 --- a/source/module_hsolver/diago_elpa_native.cpp +++ b/source/module_hsolver/diago_elpa_native.cpp @@ -56,7 +56,6 @@ void DiagoElpaNative::diag_pool(hamilt::MatrixBlock& h_mat, Real* eigenvalue_in, MPI_Comm& comm) { - std::vector eigen(GlobalV::NLOCAL, 0.0); ModuleBase::timer::tick("DiagoElpaNative", "elpa_solve"); @@ -71,6 +70,8 @@ void DiagoElpaNative::diag_pool(hamilt::MatrixBlock& h_mat, int nprows, npcols, myprow, mypcol; Cblacs_gridinfo(cblacs_ctxt, &nprows, &npcols, &myprow, &mypcol); + std::vector eigen(GlobalV::NLOCAL, 0.0); + std::vector eigenvectors(narows * nacols); if (elpa_init(20210430) != ELPA_OK) { @@ -94,11 +95,17 @@ void DiagoElpaNative::diag_pool(hamilt::MatrixBlock& h_mat, elpa_setup(handle); elpa_set(handle, "solver", ELPA_SOLVER_1STAGE, &success); -#ifdef __CUDA +/* ELPA_WITH_NVIDIA_GPU_VERSION is a symbol defined in elpa/elpa_configured_options.h + For example: + cat elpa/elpa_configured_options.h + #define ELPA_WITH_NVIDIA_GPU_VERSION 1 + #define ELPA_WITH_AMD_GPU_VERSION 0 + #define ELPA_WITH_SYCL_GPU_VERSION 0 + */ +#if ELPA_WITH_NVIDIA_GPU_VERSION if (PARAM.globalv.device_flag == "gpu") { elpa_set(handle, "nvidia-gpu", 1, &success); - elpa_set(handle, "real_kernel", ELPA_2STAGE_REAL_NVIDIA_GPU, &success); elpa_setup_gpu(handle); } @@ -108,7 +115,7 @@ void DiagoElpaNative::diag_pool(hamilt::MatrixBlock& h_mat, h_mat.p, s_mat.p, eigen.data(), - psi.get_pointer(), + eigenvectors.data(), this->DecomposedState, &success); elpa_deallocate(handle, &success); @@ -128,6 +135,8 @@ void DiagoElpaNative::diag_pool(hamilt::MatrixBlock& h_mat, const int inc = 1; BlasConnector::copy(GlobalV::NBANDS, eigen.data(), inc, eigenvalue_in, inc); + const int size = psi.get_nbands() * psi.get_nbasis(); + BlasConnector::copy(size, eigenvectors.data(), inc, psi.get_pointer(), inc); } #endif diff --git a/source/module_hsolver/genelpa/elpa_new.h b/source/module_hsolver/genelpa/elpa_new.h index b002ea0bb6..ff1648ed85 100644 --- a/source/module_hsolver/genelpa/elpa_new.h +++ b/source/module_hsolver/genelpa/elpa_new.h @@ -2,6 +2,8 @@ #include + + #if ELPA_API_VERSION >= 20221101 #include #else @@ -25,8 +27,16 @@ extern "C" #include // #include #undef complex + const char *elpa_strerr(int elpa_error); + +// This is a header file related to GPU support. +// This header file was not available in the early ELPA. +#if ELPA_API_VERSION >= 20231101 +#include +#endif } + #include "elpa_generic.hpp" // This is a wrapper for `elpa/elpa_generic.h`. #endif diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 833a26fe18..e312e3be96 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -1,64 +1,64 @@ #include "hsolver_lcao.h" -#include "module_parameter/parameter.h" -#include "diago_cg.h" - #ifdef __MPI #include "diago_scalapack.h" #else #include "diago_lapack.h" #endif -#include "module_base/timer.h" -#include "module_hsolver/diago_iter_assist.h" -#include "module_hsolver/kernels/math_kernel_op.h" -#include "module_io/write_HS.h" - -#include "module_base/global_variable.h" - -#include -#include -#include #ifdef __CUSOLVERMP #include "diago_cusolvermp.h" -#endif // __CUSOLVERMP +#endif + #ifdef __ELPA #include "diago_elpa.h" #include "diago_elpa_native.h" #endif + #ifdef __CUDA #include "diago_cusolver.h" #endif + #ifdef __PEXSI #include "diago_pexsi.h" #include "module_elecstate/elecstate_lcao.h" #endif +#include "diago_cg.h" +#include "module_base/global_variable.h" +#include "module_base/memory.h" #include "module_base/scalapack_connector.h" +#include "module_base/timer.h" +#include "module_hsolver/diago_iter_assist.h" +#include "module_hsolver/kernels/math_kernel_op.h" #include "module_hsolver/parallel_k2d.h" -#include "module_base/memory.h" +#include "module_io/write_HS.h" +#include "module_parameter/parameter.h" +#include +#include +#include #include -namespace hsolver { +namespace hsolver +{ template void HSolverLCAO::solve(hamilt::Hamilt* pHamilt, psi::Psi& psi, elecstate::ElecState* pes, - const std::string method_in, const bool skip_charge) { ModuleBase::TITLE("HSolverLCAO", "solve"); ModuleBase::timer::tick("HSolverLCAO", "solve"); - // select the method of diagonalization - this->method = method_in; #ifdef __PEXSI // other purification methods should follow this routine + // Zhang Xiaoyang : Please modify Pesxi usage later if (this->method == "pexsi") { DiagoPexsi pe(ParaV); - for (int ik = 0; ik < psi.get_nk(); ++ik) { + for (int ik = 0; ik < psi.get_nk(); ++ik) + { /// update H(k) for each k point pHamilt->updateHk(ik); psi.fix_k(ik); @@ -74,21 +74,21 @@ void HSolverLCAO::solve(hamilt::Hamilt* pHamilt, } #endif - // Zhang Xiaoyang : Please modify Pesxi usage later if (this->method == "cg_in_lcao") { this->precondition_lcao.resize(psi.get_nbasis()); using Real = typename GetTypeReal::type; // set precondition - for (size_t i = 0; i < precondition_lcao.size(); i++) { + for (size_t i = 0; i < precondition_lcao.size(); i++) + { precondition_lcao[i] = 1.0; } } #ifdef __MPI - if (GlobalV::KPAR_LCAO > 1 && - (this->method == "genelpa" || this->method == "elpa" || this->method == "scalapack_gvx")) + if (GlobalV::KPAR_LCAO > 1 + && (this->method == "genelpa" || this->method == "elpa" || this->method == "scalapack_gvx")) { this->parakSolve(pHamilt, psi, pes, GlobalV::KPAR_LCAO); } @@ -96,7 +96,8 @@ void HSolverLCAO::solve(hamilt::Hamilt* pHamilt, #endif { /// Loop over k points for solve Hamiltonian to charge density - for (int ik = 0; ik < psi.get_nk(); ++ik) { + for (int ik = 0; ik < psi.get_nk(); ++ik) + { /// update H(k) for each k point pHamilt->updateHk(ik); @@ -107,28 +108,18 @@ void HSolverLCAO::solve(hamilt::Hamilt* pHamilt, } } - if (this->method == "cg_in_lcao") { - this->is_first_scf = false; - } - - if (this->method != "genelpa" && this->method != "elpa" && this->method != "scalapack_gvx" && this->method != "lapack" - && this->method != "cusolver" && this->method != "cusolvermp" && this->method != "cg_in_lcao" - && this->method != "pexsi") + if (skip_charge) // used in nscf calculation { - //delete this->pdiagh; - //this->pdiagh = nullptr; + ModuleBase::timer::tick("HSolverLCAO", "solve"); } - - // used in nscf calculation - if (skip_charge) { + else // used in scf calculation + { + // calculate charge by psi + pes->psiToRho(psi); ModuleBase::timer::tick("HSolverLCAO", "solve"); - return; } - // calculate charge by psi - // called in scf calculation - pes->psiToRho(psi); - ModuleBase::timer::tick("HSolverLCAO", "solve"); + return; } template @@ -187,11 +178,11 @@ void HSolverLCAO::hamiltSolvePsiK(hamilt::Hamilt* hm, psi::Psi& using ct_Device = typename ct::PsiToContainer::type; auto subspace_func = [](const ct::Tensor& psi_in, ct::Tensor& psi_out) { - // psi_in should be a 2D tensor: - // psi_in.shape() = [nbands, nbasis] - const auto ndim = psi_in.shape().ndim(); - REQUIRES_OK(ndim == 2, "dims of psi_in should be less than or equal to 2"); - }; + // psi_in should be a 2D tensor: + // psi_in.shape() = [nbands, nbasis] + const auto ndim = psi_in.shape().ndim(); + REQUIRES_OK(ndim == 2, "dims of psi_in should be less than or equal to 2"); + }; DiagoCG cg(PARAM.inp.basis_type, PARAM.inp.calculation, @@ -268,17 +259,17 @@ void HSolverLCAO::hamiltSolvePsiK(hamilt::Hamilt* hm, psi::Psi& ModuleBase::timer::tick("DiagoCG_New", "spsi_func"); }; - if (this->is_first_scf) + // if (this->is_first_scf) + // { + for (size_t i = 0; i < psi.get_nbands(); i++) { - for (size_t i = 0; i < psi.get_nbands(); i++) + for (size_t j = 0; j < psi.get_nbasis(); j++) { - for (size_t j = 0; j < psi.get_nbasis(); j++) - { - psi(i, j) = *zero_; - } - psi(i, i) = *one_; + psi(i, j) = *zero_; } + psi(i, i) = *one_; } + // } auto psi_tensor = ct::TensorMap(psi.get_pointer(), ct::DataTypeToEnum::value, @@ -308,9 +299,9 @@ void HSolverLCAO::hamiltSolvePsiK(hamilt::Hamilt* hm, psi::Psi& template void HSolverLCAO::parakSolve(hamilt::Hamilt* pHamilt, - psi::Psi& psi, - elecstate::ElecState* pes, - int kpar) + psi::Psi& psi, + elecstate::ElecState* pes, + int kpar) { #ifdef __MPI ModuleBase::timer::tick("HSolverLCAO", "parakSolve"); @@ -320,32 +311,33 @@ void HSolverLCAO::parakSolve(hamilt::Hamilt* pHamilt, int nks = psi.get_nk(); int nrow = this->ParaV->get_global_row_size(); int nb2d = this->ParaV->get_block_size(); - k2d.set_para_env(psi.get_nk(), - nrow, - nb2d, - GlobalV::NPROC, - GlobalV::MY_RANK, - PARAM.inp.nspin); + k2d.set_para_env(psi.get_nk(), nrow, nb2d, GlobalV::NPROC, GlobalV::MY_RANK, PARAM.inp.nspin); /// set psi_pool const int zero = 0; - int ncol_bands_pool = numroc_(&(nbands), &(nb2d), &(k2d.get_p2D_pool()->coord[1]), &zero, &(k2d.get_p2D_pool()->dim1)); + int ncol_bands_pool + = numroc_(&(nbands), &(nb2d), &(k2d.get_p2D_pool()->coord[1]), &zero, &(k2d.get_p2D_pool()->dim1)); /// Loop over k points for solve Hamiltonian to charge density for (int ik = 0; ik < k2d.get_pKpoints()->get_max_nks_pool(); ++ik) { // if nks is not equal to the number of k points in the pool std::vector ik_kpar; int ik_avail = 0; - for (int i = 0; i < k2d.get_kpar(); i++) { - if (ik + k2d.get_pKpoints()->startk_pool[i] < nks && ik < k2d.get_pKpoints()->nks_pool[i]) { + for (int i = 0; i < k2d.get_kpar(); i++) + { + if (ik + k2d.get_pKpoints()->startk_pool[i] < nks && ik < k2d.get_pKpoints()->nks_pool[i]) + { ik_avail++; } } - if (ik_avail == 0) { - ModuleBase::WARNING_QUIT("HSolverLCAO::solve", - "ik_avail is 0!"); - } else { + if (ik_avail == 0) + { + ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "ik_avail is 0!"); + } + else + { ik_kpar.resize(ik_avail); - for (int i = 0; i < ik_avail; i++) { + for (int i = 0; i < ik_avail; i++) + { ik_kpar[i] = ik + k2d.get_pKpoints()->startk_pool[i]; } } @@ -359,31 +351,35 @@ void HSolverLCAO::parakSolve(hamilt::Hamilt* pHamilt, /// local psi in pool psi_pool.fix_k(0); hamilt::MatrixBlock hk_pool = hamilt::MatrixBlock{k2d.hk_pool.data(), - (size_t)k2d.get_p2D_pool()->get_row_size(), (size_t)k2d.get_p2D_pool()->get_col_size(), k2d.get_p2D_pool()->desc}; + (size_t)k2d.get_p2D_pool()->get_row_size(), + (size_t)k2d.get_p2D_pool()->get_col_size(), + k2d.get_p2D_pool()->desc}; hamilt::MatrixBlock sk_pool = hamilt::MatrixBlock{k2d.sk_pool.data(), - (size_t)k2d.get_p2D_pool()->get_row_size(), (size_t)k2d.get_p2D_pool()->get_col_size(), k2d.get_p2D_pool()->desc}; + (size_t)k2d.get_p2D_pool()->get_row_size(), + (size_t)k2d.get_p2D_pool()->get_col_size(), + k2d.get_p2D_pool()->desc}; /// solve eigenvector and eigenvalue for H(k) if (this->method == "scalapack_gvx") { DiagoScalapack sa; - sa.diag_pool(hk_pool, sk_pool, psi_pool,&(pes->ekb(ik_global, 0)), k2d.POOL_WORLD_K2D); + sa.diag_pool(hk_pool, sk_pool, psi_pool, &(pes->ekb(ik_global, 0)), k2d.POOL_WORLD_K2D); } #ifdef __ELPA else if (this->method == "genelpa") { DiagoElpa el; - el.diag_pool(hk_pool, sk_pool, psi_pool,&(pes->ekb(ik_global, 0)), k2d.POOL_WORLD_K2D); + el.diag_pool(hk_pool, sk_pool, psi_pool, &(pes->ekb(ik_global, 0)), k2d.POOL_WORLD_K2D); } else if (this->method == "elpa") { DiagoElpaNative el; - el.diag_pool(hk_pool, sk_pool, psi_pool,&(pes->ekb(ik_global, 0)), k2d.POOL_WORLD_K2D); + el.diag_pool(hk_pool, sk_pool, psi_pool, &(pes->ekb(ik_global, 0)), k2d.POOL_WORLD_K2D); } #endif else { ModuleBase::WARNING_QUIT("HSolverLCAO::solve", - "This method of DiagH for k-parallelism diagnolization is not supported!"); + "This method of DiagH for k-parallelism diagnolization is not supported!"); } } MPI_Barrier(MPI_COMM_WORLD); @@ -394,21 +390,22 @@ void HSolverLCAO::parakSolve(hamilt::Hamilt* pHamilt, MPI_Bcast(&(pes->ekb(ik_kpar[ipool], 0)), nbands, MPI_DOUBLE, source, MPI_COMM_WORLD); int desc_pool[9]; std::copy(k2d.get_p2D_pool()->desc, k2d.get_p2D_pool()->desc + 9, desc_pool); - if (k2d.get_my_pool() != ipool) { + if (k2d.get_my_pool() != ipool) + { desc_pool[1] = -1; } psi.fix_k(ik_kpar[ipool]); Cpxgemr2d(nrow, - nbands, - psi_pool.get_pointer(), - 1, - 1, - desc_pool, - psi.get_pointer(), - 1, - 1, - k2d.get_p2D_global()->desc, - k2d.get_p2D_global()->blacs_ctxt); + nbands, + psi_pool.get_pointer(), + 1, + 1, + desc_pool, + psi.get_pointer(), + 1, + 1, + k2d.get_p2D_global()->desc, + k2d.get_p2D_global()->blacs_ctxt); } MPI_Barrier(MPI_COMM_WORLD); ModuleBase::timer::tick("HSolverLCAO", "collect_psi"); diff --git a/source/module_hsolver/hsolver_lcao.h b/source/module_hsolver/hsolver_lcao.h index 8dbb3e52f8..c2b0c92024 100644 --- a/source/module_hsolver/hsolver_lcao.h +++ b/source/module_hsolver/hsolver_lcao.h @@ -16,24 +16,20 @@ class HSolverLCAO void solve(hamilt::Hamilt* pHamilt, psi::Psi& psi, elecstate::ElecState* pes, - const std::string method_in, const bool skip_charge); private: void hamiltSolvePsiK(hamilt::Hamilt* hm, psi::Psi& psi, double* eigenvalue); - const Parallel_Orbitals* ParaV; - void parakSolve(hamilt::Hamilt* pHamilt, psi::Psi& psi, elecstate::ElecState* pes, int kpar); - bool is_first_scf = true; + const Parallel_Orbitals* ParaV; + + const std::string method; + // for cg_in_lcao using Real = typename GetTypeReal::type; std::vector precondition_lcao; - - DiagH* pdiagh = nullptr; // for single Hamiltonian matrix diagonal solver - - std::string method = "none"; }; } // namespace hsolver diff --git a/source/module_hsolver/hsolver_lcaopw.cpp b/source/module_hsolver/hsolver_lcaopw.cpp index c1261ff695..6782df33b7 100644 --- a/source/module_hsolver/hsolver_lcaopw.cpp +++ b/source/module_hsolver/hsolver_lcaopw.cpp @@ -193,11 +193,6 @@ void HSolverLIP::paw_func_after_kloop(psi::Psi& psi, elecstate::ElecState* } #endif -template -HSolverLIP::HSolverLIP(ModulePW::PW_Basis_K* wfc_basis_in) -{ - this->wfc_basis = wfc_basis_in; -} /* lcao_in_pw diff --git a/source/module_hsolver/hsolver_lcaopw.h b/source/module_hsolver/hsolver_lcaopw.h index 1694e6c62b..fd82e7b8eb 100644 --- a/source/module_hsolver/hsolver_lcaopw.h +++ b/source/module_hsolver/hsolver_lcaopw.h @@ -18,7 +18,7 @@ class HSolverLIP using Real = typename GetTypeReal::type; public: - HSolverLIP(ModulePW::PW_Basis_K* wfc_basis_in); + HSolverLIP(ModulePW::PW_Basis_K* wfc_basis_in) : wfc_basis(wfc_basis_in) {}; /// @brief solve function for lcao_in_pw /// @param pHamilt interface to hamilt @@ -33,9 +33,7 @@ class HSolverLIP const bool skip_charge); private: - ModulePW::PW_Basis_K* wfc_basis = nullptr; - - std::vector eigenvalues; + ModulePW::PW_Basis_K* wfc_basis; #ifdef USE_PAW void paw_func_in_kloop(const int ik); diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index d20718b7f3..7b8e5fc0a6 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -323,7 +323,7 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, if (this->method == "cg") { - // warp the subspace_func into a lambda function + // wrap the subspace_func into a lambda function auto ngk_pointer = psi.get_ngk_pointer(); auto subspace_func = [hm, ngk_pointer](const ct::Tensor& psi_in, ct::Tensor& psi_out) { // psi_in should be a 2D tensor: @@ -355,10 +355,10 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, this->diag_iter_max, this->nproc_in_pool); - // warp the hpsi_func and spsi_func into a lambda function + // wrap the hpsi_func and spsi_func into a lambda function using ct_Device = typename ct::PsiToContainer::type; - // warp the hpsi_func and spsi_func into a lambda function + // wrap the hpsi_func and spsi_func into a lambda function auto hpsi_func = [hm, ngk_pointer](const ct::Tensor& psi_in, ct::Tensor& hpsi_out) { ModuleBase::timer::tick("DiagoCG_New", "hpsi_func"); // psi_in should be a 2D tensor: diff --git a/source/module_hsolver/hsolver_pw.h b/source/module_hsolver/hsolver_pw.h index e4b220fbc1..62bbc5120b 100644 --- a/source/module_hsolver/hsolver_pw.h +++ b/source/module_hsolver/hsolver_pw.h @@ -21,20 +21,17 @@ class HSolverPW public: HSolverPW(ModulePW::PW_Basis_K* wfc_basis_in, wavefunc* pwf_in, - const std::string calculation_type_in, const std::string basis_type_in, const std::string method_in, const bool use_paw_in, const bool use_uspp_in, const int nspin_in, - const int scf_iter_in, const int diag_iter_max_in, const double diag_thr_in, const bool need_subspace_in, const bool initialed_psi_in) - : wfc_basis(wfc_basis_in), pwf(pwf_in), calculation_type(calculation_type_in), basis_type(basis_type_in), method(method_in), use_paw(use_paw_in), use_uspp(use_uspp_in), nspin(nspin_in), diff --git a/source/module_hsolver/hsolver_pw_sdft.cpp b/source/module_hsolver/hsolver_pw_sdft.cpp index 0f15879bb1..4f4c858cf4 100644 --- a/source/module_hsolver/hsolver_pw_sdft.cpp +++ b/source/module_hsolver/hsolver_pw_sdft.cpp @@ -64,7 +64,7 @@ void HSolverPW_SDFT::solve(hamilt::Hamilt>* pHamilt, } this->output_iterInfo(); - + for (int ik = 0; ik < nks; ik++) { // init k diff --git a/source/module_hsolver/hsolver_pw_sdft.h b/source/module_hsolver/hsolver_pw_sdft.h index aa342b03e1..144a508985 100644 --- a/source/module_hsolver/hsolver_pw_sdft.h +++ b/source/module_hsolver/hsolver_pw_sdft.h @@ -12,21 +12,17 @@ class HSolverPW_SDFT : public HSolverPW> wavefunc* pwf_in, Stochastic_WF& stowf, StoChe& stoche, - const std::string calculation_type_in, const std::string basis_type_in, const std::string method_in, const bool use_paw_in, const bool use_uspp_in, const int nspin_in, - const int scf_iter_in, const int diag_iter_max_in, const double diag_thr_in, - const bool need_subspace_in, const bool initialed_psi_in) - : HSolverPW(wfc_basis_in, pwf_in, calculation_type_in, @@ -44,14 +40,14 @@ class HSolverPW_SDFT : public HSolverPW> stoiter.init(pkv, wfc_basis_in, stowf, stoche); } - virtual void solve(hamilt::Hamilt>* pHamilt, - psi::Psi>& psi, - elecstate::ElecState* pes, - ModulePW::PW_Basis_K* wfc_basis, - Stochastic_WF& stowf, - const int istep, - const int iter, - const bool skip_charge); + void solve(hamilt::Hamilt>* pHamilt, + psi::Psi>& psi, + elecstate::ElecState* pes, + ModulePW::PW_Basis_K* wfc_basis, + Stochastic_WF& stowf, + const int istep, + const int iter, + const bool skip_charge); Stochastic_Iter stoiter; }; diff --git a/source/module_io/CMakeLists.txt b/source/module_io/CMakeLists.txt index 781057cb27..3ef65941eb 100644 --- a/source/module_io/CMakeLists.txt +++ b/source/module_io/CMakeLists.txt @@ -29,6 +29,7 @@ list(APPEND objects output_log.cpp para_json.cpp parse_args.cpp + orb_io.cpp ) list(APPEND objects_advanced diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index 41a9f8542a..578b7aa363 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -166,19 +166,6 @@ void Input_Conv::Convert() //---------------------------------------------------------- // main parameters / electrons / spin ( 10/16 ) //---------------------------------------------------------- - // suffix - if (PARAM.inp.calculation == "md" && PARAM.mdp.md_restart) // md restart liuyu add 2023-04-12 - { - int istep = 0; - double temperature = 0.0; - MD_func::current_md_info(GlobalV::MY_RANK, PARAM.globalv.global_readin_dir, istep, temperature); - if (PARAM.inp.read_file_dir == "auto") - { - GlobalV::stru_file = PARAM.globalv.global_stru_dir + "STRU_MD_" + std::to_string(istep); - } - } else if (PARAM.inp.stru_file != "") { - GlobalV::stru_file = PARAM.inp.stru_file; - } ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "pseudo_dir", PARAM.inp.pseudo_dir); ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "orbital_dir", PARAM.inp.orbital_dir); diff --git a/source/module_io/json_output/general_info.cpp b/source/module_io/json_output/general_info.cpp index ac5146e9a2..6190e880df 100644 --- a/source/module_io/json_output/general_info.cpp +++ b/source/module_io/json_output/general_info.cpp @@ -50,7 +50,7 @@ void gen_general_info(const Parameter& param) AbacusJson::add_json({"general_info", "omp_num"}, omp_num, false); AbacusJson::add_json({"general_info", "pseudo_dir"}, param.inp.pseudo_dir, false); AbacusJson::add_json({"general_info", "orbital_dir"}, param.inp.orbital_dir, false); - AbacusJson::add_json({"general_info", "stru_file"}, param.inp.stru_file, false); + AbacusJson::add_json({"general_info", "stru_file"}, param.globalv.global_in_stru, false); AbacusJson::add_json({"general_info", "kpt_file"}, param.inp.kpoint_file, false); AbacusJson::add_json({"general_info", "start_time"}, start_time_str, false); AbacusJson::add_json({"general_info", "end_time"}, end_time_str, false); diff --git a/source/module_io/json_output/test/para_json_test.cpp b/source/module_io/json_output/test/para_json_test.cpp index fef6aae052..b66affc8cc 100644 --- a/source/module_io/json_output/test/para_json_test.cpp +++ b/source/module_io/json_output/test/para_json_test.cpp @@ -214,7 +214,7 @@ TEST(AbacusJsonTest, GeneralInfo) PARAM.input.device = "cpu"; PARAM.input.pseudo_dir = "./abacus/test/pseudo_dir"; PARAM.input.orbital_dir = "./abacus/test/orbital_dir"; - PARAM.input.stru_file = "./abacus/test/stru_file"; + PARAM.sys.global_in_stru = "./abacus/test/stru_file"; PARAM.input.kpoint_file = "./abacus/test/kpoint_file"; // output the json file Json::AbacusJson::doc.Parse("{}"); diff --git a/source/module_io/orb_io.cpp b/source/module_io/orb_io.cpp new file mode 100644 index 0000000000..45a93c65b2 --- /dev/null +++ b/source/module_io/orb_io.cpp @@ -0,0 +1,191 @@ +#include "module_io/orb_io.h" +#include "module_base/tool_quit.h" +#ifdef __MPI +#include "module_base/parallel_common.h" +#endif + +void ModuleIO::read_abacus_orb(std::ifstream& ifs, + std::string& elem, + double& ecut, + int& nr, + double& dr, + std::vector& nzeta, + std::vector>& radials, + const int rank) +{ + nr = 0; // number of grid points + dr = 0; // grid spacing + int lmax = 0, nchi = 0; // number of radial functions + std::vector> radial_map_; // build a map from [l][izeta] to 1-d array index + std::string tmp; + // first read the header + if (rank == 0) + { + if (!ifs.is_open()) + { + ModuleBase::WARNING_QUIT("AtomicRadials::read_abacus_orb", "Couldn't open orbital file."); + } + while (ifs >> tmp) + { + if (tmp == "Element") + { + ifs >> elem; + } + else if (tmp == "Cutoff(Ry)") + { + ifs >> ecut; + } + else if (tmp == "Lmax") + { + ifs >> lmax; + nzeta.resize(lmax + 1); + for (int l = 0; l <= lmax; ++l) + { + ifs >> tmp >> tmp >> tmp >> nzeta[l]; + } + } + else if (tmp == "Mesh") + { + ifs >> nr; + continue; + } + else if (tmp == "dr") + { + ifs >> dr; + break; + } + } + radial_map_.resize(lmax + 1); + for (int l = 0; l <= lmax; ++l) + { + radial_map_[l].resize(nzeta[l]); + } + int ichi = 0; + for (int l = 0; l <= lmax; ++l) + { + for (int iz = 0; iz < nzeta[l]; ++iz) + { + radial_map_[l][iz] = ichi++; // return the value of ichi, then increment + } + } + nchi = ichi; // total number of radial functions + radials.resize(nchi); + std::for_each(radials.begin(), radials.end(), [nr](std::vector& v) { v.resize(nr); }); + } + + // broadcast the header information +#ifdef __MPI + Parallel_Common::bcast_string(elem); + Parallel_Common::bcast_double(ecut); + Parallel_Common::bcast_int(lmax); + Parallel_Common::bcast_int(nchi); + Parallel_Common::bcast_int(nr); + Parallel_Common::bcast_double(dr); +#endif + + // then adjust the size of the vectors + if (rank != 0) + { + nzeta.resize(lmax + 1); + radials.resize(nchi); + std::for_each(radials.begin(), radials.end(), [nr](std::vector& v) { v.resize(nr); }); + } + // broadcast the number of zeta functions for each angular momentum +#ifdef __MPI + Parallel_Common::bcast_int(nzeta.data(), lmax + 1); +#endif + + // read the radial functions by rank0 + int ichi = 0; + for (int i = 0; i != nchi; ++i) + { + if (rank == 0) + { + int l, izeta; + ifs >> tmp >> tmp >> tmp; + ifs >> tmp >> l >> izeta; + ichi = radial_map_[l][izeta]; + for (int ir = 0; ir != nr; ++ir) + { + ifs >> radials[ichi][ir]; + } + } + // broadcast the radial functions +#ifdef __MPI + Parallel_Common::bcast_int(ichi); // let other ranks know where to store the radial function + Parallel_Common::bcast_double(radials[ichi].data(), nr); +#endif + } +} + +void ModuleIO::write_abacus_orb(std::ofstream& ofs, + const std::string& elem, + const double& ecut, + const int nr, + const double dr, + const std::vector& nzeta, + const std::vector>& radials, + const int rank) +{ + const std::vector spec = {"S", "P", "D", "F", "G", "H", "I", "J", "K"}; + if (!ofs.good()) + { + ModuleBase::WARNING_QUIT("AtomicRadials::write_abacus_orb", "Couldn't open orbital file."); + } + if (rank == 0) + { + const int lmax = nzeta.size() - 1; + + for (int i = 0; i < 75; ++i) + { + ofs << "-"; + } + ofs << std::endl; + // left aligned + ofs << std::left << std::setw(28) << "Element" << elem << std::endl; + ofs << std::left << std::setw(28) << "Energy Cutoff(Ry)" << ecut << std::endl; + // rcut .1f, not scientific + ofs << std::left << std::setw(28) << "Radius Cutoff(a.u.)" + << std::fixed << std::setprecision(1) << dr * (nr - 1) << std::endl; + ofs << std::left << std::setw(28) << "Lmax" << lmax << std::endl; + for (int l = 0; l != nzeta.size(); ++l) + { + std::string title = "Number of " + spec[l] + "orbital-->"; + ofs << std::left << std::setw(28) << title << nzeta[l] << std::endl; + } + for (int i = 0; i < 75; ++i) + { + ofs << "-"; + } + ofs << std::endl; + ofs << "SUMMARY END\n\n"; + ofs << std::left << std::setw(28) << "Mesh" << nr << std::endl; + ofs << std::left << std::setw(28) << "dr" << std::fixed << std::setprecision(2) << dr << std::endl; + + int ichi = 0; + for (int l = 0; l <= lmax; l++) + { + for (int izeta = 0; izeta < nzeta[l]; izeta++) + { + ofs << std::right << std::setw(20) << "Type" + << std::right << std::setw(20) << "L" + << std::right << std::setw(20) << "N" << std::endl; + ofs << std::right << std::setw(20) << 0 + << std::right << std::setw(20) << l + << std::right << std::setw(20) << izeta; + for (int i = 0; i < nr; i++) + { + if (i % 4 == 0) + { + ofs << std::endl; + } + ofs << std::left << std::setw(22) << std::setprecision(14) + << std::scientific << radials[ichi][i]; + } + ofs << std::endl; + ichi++; + } + } + } + // ofs.close(); // like read_abacus_orb, who opens it, who closes it +} diff --git a/source/module_io/orb_io.h b/source/module_io/orb_io.h new file mode 100644 index 0000000000..bdd0043fc2 --- /dev/null +++ b/source/module_io/orb_io.h @@ -0,0 +1,37 @@ +#include +#include +#include + +namespace ModuleIO +{ + /** + * @brief static version of read_abacus_orb. A delete-new operation may cause the memory leak, + * it is better to use std::vector to replace the raw pointer. + * + * @param ifs [in] ifstream from the orbital file, via `std::ifstream ifs(forb);` + * @param elem [out] element symbol + * @param ecut [out] planewave energy cutoff + * @param nr [out] number of radial grid points + * @param dr [out] radial grid spacing + * @param nzeta [out] number of zeta functions for each angular momentum + * @param radials [out] radial orbitals + * @param rank [in] MPI rank + */ + void read_abacus_orb(std::ifstream& ifs, + std::string& elem, + double& ecut, + int& nr, + double& dr, + std::vector& nzeta, + std::vector>& radials, + const int rank = 0); + + void write_abacus_orb(std::ofstream& ofs, + const std::string& elem, + const double& ecut, + const int nr, + const double dr, + const std::vector& nzeta, + const std::vector>& radials, + const int rank = 0); +} \ No newline at end of file diff --git a/source/module_io/read_input.cpp b/source/module_io/read_input.cpp index 0026b44d42..21235eb461 100644 --- a/source/module_io/read_input.cpp +++ b/source/module_io/read_input.cpp @@ -304,12 +304,7 @@ void ReadInput::read_txt_input(Parameter& param, const std::string& filename) readvalue_item->read_value(*readvalue_item, param); } - // 2) count the number of atom types from STRU file - if (this->check_ntype_flag) { - check_ntype(param.input.stru_file, param.input.ntype); -} - - // 3) reset this value when some conditions are met + // 2) reset this value when some conditions are met // e.g. if (calulation_type == "nscf") then set "init_chg" to "file". for (auto& input_item: this->input_lists) { @@ -320,6 +315,12 @@ void ReadInput::read_txt_input(Parameter& param, const std::string& filename) } this->set_globalv(param); + // 3) count the number of atom types from STRU file + if (this->check_ntype_flag) + { + check_ntype(param.globalv.global_in_stru, param.input.ntype); + } + // 4) check the value of the parameters for (auto& input_item: this->input_lists) { @@ -432,7 +433,7 @@ void ReadInput::check_ntype(const std::string& fn, int& param_ntype) if (!ifa) { GlobalV::ofs_warning << fn; - ModuleBase::WARNING_QUIT("ReadInput::check_ntype", "Can not find the file containing atom positions.!"); + ModuleBase::WARNING_QUIT("ReadInput::check_ntype", "Can not find the file: " + fn); } int ntype_stru = 0; @@ -476,6 +477,24 @@ void ReadInput::check_ntype(const std::string& fn, int& param_ntype) } } +int ReadInput::current_md_step(const std::string& file_dir) +{ + std::stringstream ssc; + ssc << file_dir << "Restart_md.dat"; + std::ifstream file(ssc.str().c_str()); + + if (!file) + { + ModuleBase::WARNING_QUIT("current_md_step", "no Restart_md.dat"); + } + + int md_step; + file >> md_step; + file.close(); + + return md_step; +} + void ReadInput::add_item(const Input_Item& item) { // only rank 0 read the input file diff --git a/source/module_io/read_input.h b/source/module_io/read_input.h index 1120b1ad66..0bd8745ad7 100644 --- a/source/module_io/read_input.h +++ b/source/module_io/read_input.h @@ -65,6 +65,13 @@ class ReadInput * @param filename output file name */ void write_txt_input(const Parameter& param, const std::string& filename); + /** + * @brief determine the md step in restart case + * + * @param file_dir directory of Restart_md.dat + * @return md step + */ + int current_md_step(const std::string& file_dir); /** * @brief count_nype from STRU file * diff --git a/source/module_io/read_set_globalv.cpp b/source/module_io/read_set_globalv.cpp index 9b0ac2af89..cb8421533d 100644 --- a/source/module_io/read_set_globalv.cpp +++ b/source/module_io/read_set_globalv.cpp @@ -1,8 +1,8 @@ -#include "read_input.h" -#include "read_input_tool.h" -#include "module_parameter/parameter.h" #include "module_base/global_variable.h" #include "module_base/tool_quit.h" +#include "module_parameter/parameter.h" +#include "read_input.h" +#include "read_input_tool.h" namespace ModuleIO { void ReadInput::set_globalv(Parameter& para) @@ -31,6 +31,26 @@ void ReadInput::set_globalv(Parameter& para) para.sys.global_readin_dir = para.inp.read_file_dir + '/'; } para.sys.global_readin_dir = to_dir(para.sys.global_readin_dir); + + /// get the stru file for md restart case + if (para.inp.calculation == "md" && para.mdp.md_restart) + { + int istep = current_md_step(para.sys.global_readin_dir); + + if (para.inp.read_file_dir == "auto") + { + para.sys.global_in_stru = para.sys.global_stru_dir + "STRU_MD_" + std::to_string(istep); + } + else + { + para.sys.global_in_stru = para.inp.read_file_dir + "STRU_MD_" + std::to_string(istep); + } + } + else + { + para.sys.global_in_stru = para.inp.stru_file; + } + /// caculate the gamma_only_pw and gamma_only_local if (para.input.gamma_only) { diff --git a/source/module_io/test/CMakeLists.txt b/source/module_io/test/CMakeLists.txt index ad18e6fb13..543c4f363c 100644 --- a/source/module_io/test/CMakeLists.txt +++ b/source/module_io/test/CMakeLists.txt @@ -155,6 +155,7 @@ AddTest( ../../module_cell/parallel_kpoints.cpp ../../module_cell/test/support/mock_unitcell.cpp ../../module_hamilt_lcao/hamilt_lcaodft/center2_orb.cpp + ../orb_io.cpp ) endif() @@ -185,7 +186,10 @@ add_test(NAME read_wfc_to_rho_parallel AddTest( TARGET numerical_basis_test LIBS parameter base ${math_libs} device numerical_atomic_orbitals container orb - SOURCES numerical_basis_test.cpp ../numerical_basis_jyjy.cpp ../../module_hamilt_lcao/hamilt_lcaodft/center2_orb.cpp + SOURCES numerical_basis_test.cpp + ../numerical_basis_jyjy.cpp + ../../module_hamilt_lcao/hamilt_lcaodft/center2_orb.cpp + ../orb_io.cpp ) @@ -196,6 +200,7 @@ AddTest( ../../module_cell/cell_index.cpp ../../module_basis/module_ao/parallel_2d.cpp ../../module_basis/module_ao/parallel_orbitals.cpp + ../orb_io.cpp ) if(ENABLE_LCAO) @@ -221,4 +226,15 @@ AddTest( add_test(NAME cif_io_test_parallel COMMAND mpirun -np 4 ./cif_io_test WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) + +AddTest( + TARGET orb_io_test + LIBS parameter base ${math_libs} device + SOURCES orb_io_test.cpp ../orb_io.cpp +) + +add_test(NAME orb_io_test_parallel + COMMAND mpirun -np 4 ./orb_io_test + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) \ No newline at end of file diff --git a/source/module_io/test/cif_io_test.cpp b/source/module_io/test/cif_io_test.cpp index c1e1e96ca4..3159142c83 100644 --- a/source/module_io/test/cif_io_test.cpp +++ b/source/module_io/test/cif_io_test.cpp @@ -167,11 +167,13 @@ TEST(CifParserTest, ReadSimpleTest) ofs.close(); #ifdef __MPI } + MPI_Barrier(MPI_COMM_WORLD); // make sure the file is written #endif std::map> data; ModuleIO::CifParser::read("mp-2516584.cif", data); // delete the file #ifdef __MPI + MPI_Barrier(MPI_COMM_WORLD); // make sure the file is already read if (rank == 0) { #endif @@ -241,11 +243,13 @@ TEST(CifParserTest, ReadMediumTest) ofs.close(); #ifdef __MPI } + MPI_Barrier(MPI_COMM_WORLD); // make sure the file is written #endif std::map> data; ModuleIO::CifParser::read("cod-1000065.cif", data); // delete the file #ifdef __MPI + MPI_Barrier(MPI_COMM_WORLD); // make sure the file is already read if (rank == 0) { #endif @@ -316,7 +320,6 @@ TEST(CifParserTest, WriteTest) MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif const std::string fcif = "test.cif"; - std::ofstream ofs(fcif); const std::vector abc_angles = {2.46637620, 2.46637620, 24.84784531, 90.0, 90.0, 120.0}; const int natom = 4; const std::vector atom_site_labels = {"C", "C", "C", "C"}; @@ -325,16 +328,21 @@ TEST(CifParserTest, WriteTest) 0.333333, 0.666667, 0.75, 0.666667, 0.333333, 0.25}; ModuleIO::CifParser::write(fcif, - abc_angles.data(), - natom, - atom_site_labels.data(), - atom_site_fract.data(), - "# Generated during unittest of function ModuleIO::CifParser::write", - "data_test"); + abc_angles.data(), + natom, + atom_site_labels.data(), + atom_site_fract.data(), + "# Generated during unittest of function ModuleIO::CifParser::write", + "data_test", + rank); +#ifdef __MPI + MPI_Barrier(MPI_COMM_WORLD); // make sure the file is written +#endif std::map> data; - ModuleIO::CifParser::read(fcif, data); + ModuleIO::CifParser::read(fcif, data, rank); // delete the file #ifdef __MPI + MPI_Barrier(MPI_COMM_WORLD); // make sure the file is already read if (rank == 0) { #endif diff --git a/source/module_io/test/orb_io_test.cpp b/source/module_io/test/orb_io_test.cpp new file mode 100644 index 0000000000..71780f47a9 --- /dev/null +++ b/source/module_io/test/orb_io_test.cpp @@ -0,0 +1,132 @@ +#include +#include "module_io/orb_io.h" + +#ifdef __MPI +#include +#endif + +#include "module_base/constants.h" +#include "module_base/global_variable.h" + +class OrbIOTest : public testing::Test +{ + protected: + void SetUp(); + void TearDown(){}; + + const std::string file = "../../../../tests/PP_ORB/Ti_gga_10au_100Ry_4s2p2d1f.orb"; + const double tol = 1e-12; +}; + +void OrbIOTest::SetUp() +{ +#ifdef __MPI + MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); +#endif +} + +TEST_F(OrbIOTest, ReadAbacusOrb) +{ + std::ifstream ifs(file); + std::string elem; + double ecut, dr; + int nr; + std::vector nzeta; + std::vector> radials; + ModuleIO::read_abacus_orb(ifs, elem, ecut, nr, dr, nzeta, radials, GlobalV::MY_RANK); + EXPECT_EQ(elem, "Ti"); + EXPECT_DOUBLE_EQ(ecut, 100.0); + EXPECT_EQ(nr, 1001); + EXPECT_DOUBLE_EQ(dr, 0.01); + EXPECT_EQ(nzeta.size(), 4); // l from 0 to 3 + EXPECT_EQ(nzeta[0], 4); + EXPECT_EQ(nzeta[1], 2); + EXPECT_EQ(nzeta[2], 2); + EXPECT_EQ(nzeta[3], 1); + EXPECT_EQ(radials.size(), 9); // 4 + 2 + 2 + 1 + for(auto& radial: radials) + { + EXPECT_EQ(radial.size(), 1001); + } + EXPECT_EQ(radials[0][0], -1.581711853170e-01); + EXPECT_EQ(radials[0][4], -1.583907030513e-01); + EXPECT_EQ(radials[0][996], -4.183526380009e-05); + EXPECT_EQ(radials[0][1000], 0); + EXPECT_EQ(radials[3][0], -1.166292682541e+00); + EXPECT_EQ(radials[3][4], -1.164223359672e+00); + EXPECT_EQ(radials[3][996], -3.183325576529e-04); + EXPECT_EQ(radials[3][1000], 0); + EXPECT_EQ(radials[8][0], 0); + EXPECT_EQ(radials[8][4], 3.744878535962e-05); + EXPECT_EQ(radials[8][996], 7.495357740660e-05); + EXPECT_EQ(radials[8][1000], 0); + ifs.close(); +} + +TEST_F(OrbIOTest, WriteAbacusOrb) +{ + std::ifstream ifs(file); + std::string elem; + double ecut, dr; + int nr; + std::vector nzeta; + std::vector> radials; + ModuleIO::read_abacus_orb(ifs, elem, ecut, nr, dr, nzeta, radials, GlobalV::MY_RANK); +#ifdef __MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + const std::string ftmp = "tmp.orb"; + std::ofstream ofs(ftmp, std::ios::out); + ModuleIO::write_abacus_orb(ofs, elem, ecut, nr, dr, nzeta, radials, GlobalV::MY_RANK); + ofs.close(); +#ifdef __MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + std::ifstream ifs1(ftmp); + std::string elem1; + double ecut1, dr1; + int nr1; + std::vector nzeta1; + std::vector> radials1; + ModuleIO::read_abacus_orb(ifs1, elem1, ecut1, nr1, dr1, nzeta1, radials1, GlobalV::MY_RANK); + EXPECT_EQ(elem, elem1); + EXPECT_DOUBLE_EQ(ecut, ecut1); + EXPECT_EQ(nr, nr1); + EXPECT_DOUBLE_EQ(dr, dr1); + EXPECT_EQ(nzeta.size(), nzeta1.size()); + for (int i = 0; i < nzeta.size(); ++i) + { + EXPECT_EQ(nzeta[i], nzeta1[i]); + } + EXPECT_EQ(radials.size(), radials1.size()); + for (int i = 0; i < radials.size(); ++i) + { + EXPECT_EQ(radials[i].size(), radials1[i].size()); + for (int j = 0; j < radials[i].size(); ++j) + { + EXPECT_NEAR(radials[i][j], radials1[i][j], tol); + } + } + ifs1.close(); +#ifdef __MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + remove(ftmp.c_str()); +} + +int main(int argc, char** argv) +{ + +#ifdef __MPI + MPI_Init(&argc, &argv); +#endif + + testing::InitGoogleTest(&argc, argv); + int result = RUN_ALL_TESTS(); + +#ifdef __MPI + MPI_Finalize(); +#endif + + return result; +} diff --git a/source/module_lr/hsolver_lrtd.cpp b/source/module_lr/hsolver_lrtd.cpp index 638ae6e807..0cc95fb8f6 100644 --- a/source/module_lr/hsolver_lrtd.cpp +++ b/source/module_lr/hsolver_lrtd.cpp @@ -156,11 +156,6 @@ namespace LR std::vector(psi_k1_dav.get_nbands(), true), false /*scf*/)); } - // else if (this->method == "cg") - // { - // this->pdiagh = new DiagoCG(precondition.data()); - // this->pdiagh->method = this->method; - // } else {throw std::runtime_error("HSolverLR::solve: method not implemented");} } diff --git a/source/module_lr/utils/test/CMakeLists.txt b/source/module_lr/utils/test/CMakeLists.txt index 6e49480405..53d1db607e 100644 --- a/source/module_lr/utils/test/CMakeLists.txt +++ b/source/module_lr/utils/test/CMakeLists.txt @@ -4,6 +4,7 @@ AddTest( LIBS parameter base ${math_libs} device container SOURCES lr_util_physics_test.cpp ../lr_util.cpp ../../../module_basis/module_ao/parallel_2d.cpp + ../../../module_io/orb_io.cpp ) AddTest( diff --git a/source/module_parameter/system_parameter.h b/source/module_parameter/system_parameter.h index 6c0d48fe28..4d25adabde 100644 --- a/source/module_parameter/system_parameter.h +++ b/source/module_parameter/system_parameter.h @@ -36,7 +36,8 @@ struct System_para ///< for plane wave basis. bool gamma_only_local = false; ///< true if "gamma_only" is true and "lcao" ///< is true; for local orbitals. - std::string global_in_card = "INPUT"; ///< global input directory + std::string global_in_card = "INPUT"; ///< input file + std::string global_in_stru = "STRU"; ///< stru file std::string global_out_dir = ""; ///< global output directory std::string global_readin_dir = ""; ///< global readin directory std::string global_stru_dir = ""; ///< global structure directory diff --git a/source/module_psi/psi_initializer.cpp b/source/module_psi/psi_initializer.cpp index d4ee02d09b..6e6879e92e 100644 --- a/source/module_psi/psi_initializer.cpp +++ b/source/module_psi/psi_initializer.cpp @@ -21,7 +21,6 @@ psi::Psi>* psi_initializer::allocate(const bool The way of calculating this->p_ucell_->natomwfc is, for each atom, read pswfc and for s, it is 1, for p, it is 3 , then multiplied by the number of atoms, and then add them together. */ - int prefactor = 1; int nbands_actual = 0; if(this->method_ == "random") { @@ -32,16 +31,8 @@ psi::Psi>* psi_initializer::allocate(const bool { if(this->method_.substr(0, 6) == "atomic") { - if(this->p_ucell_->natomwfc >= GlobalV::NBANDS) - { - nbands_actual = this->p_ucell_->natomwfc; - this->nbands_complem_ = 0; - } - else - { - nbands_actual = GlobalV::NBANDS; - this->nbands_complem_ = GlobalV::NBANDS - this->p_ucell_->natomwfc; - } + nbands_actual = std::max(this->p_ucell_->natomwfc, GlobalV::NBANDS); + this->nbands_complem_ = nbands_actual - this->p_ucell_->natomwfc; } else if(this->method_.substr(0, 3) == "nao") { @@ -51,45 +42,29 @@ psi::Psi>* psi_initializer::allocate(const bool int nbands_local = 0; for(int it = 0; it < this->p_ucell_->ntype; it++) { - for(int ia = 0; ia < this->p_ucell_->atoms[it].na; ia++) + for(int l = 0; l < this->p_ucell_->atoms[it].nwl + 1; l++) { - /* FOR EVERY ATOM */ - for(int l = 0; l < this->p_ucell_->atoms[it].nwl + 1; l++) - { - /* EVERY ZETA FOR (2l+1) ORBS */ - /* - non-rotate basis, nbands_local*=2 for PARAM.globalv.npol = 2 is enough - */ - //nbands_local += this->p_ucell_->atoms[it].l_nchi[l]*(2*l+1) * PARAM.globalv.npol; - /* - rotate basis, nbands_local*=4 for p, d, f,... orbitals, and nbands_local*=2 for s orbitals - risky when NSPIN = 4, problematic psi value, needed to be checked - */ - if(l == 0) - { - nbands_local += this->p_ucell_->atoms[it].l_nchi[l] * PARAM.globalv.npol; - } - else - { - nbands_local += this->p_ucell_->atoms[it].l_nchi[l]*(2*l+1) * PARAM.globalv.npol; - } - } + /* EVERY ZETA FOR (2l+1) ORBS */ + const int nchi = this->p_ucell_->atoms[it].l_nchi[l]; + const int degen_l = (l == 0)? 1 : 2*l+1; + nbands_local += nchi * degen_l * PARAM.globalv.npol * this->p_ucell_->atoms[it].na; + /* + non-rotate basis, nbands_local*=2 for PARAM.globalv.npol = 2 is enough + */ + //nbands_local += this->p_ucell_->atoms[it].l_nchi[l]*(2*l+1) * PARAM.globalv.npol; + /* + rotate basis, nbands_local*=4 for p, d, f,... orbitals, and nbands_local*=2 for s orbitals + risky when NSPIN = 4, problematic psi value, needed to be checked + */ } } - if(nbands_local >= GlobalV::NBANDS) - { - nbands_actual = nbands_local; - this->nbands_complem_ = 0; - } - else - { - nbands_actual = GlobalV::NBANDS; - this->nbands_complem_ = GlobalV::NBANDS - nbands_local; - } + nbands_actual = std::max(nbands_local, GlobalV::NBANDS); + this->nbands_complem_ = nbands_actual - nbands_local; } } + assert(this->nbands_complem_ >= 0); - const int nks_psi = (PARAM.inp.calculation == "nscf" && this->mem_saver_ == 1)? 1 : this->pw_wfc_->nks; + const int nks_psi = (PARAM.inp.calculation == "nscf" && this->mem_saver_ == 1)? 1 : this->pw_wfc_->nks; const int nbasis_actual = this->pw_wfc_->npwk_max * PARAM.globalv.npol; psi::Psi>* psi_out = nullptr; if(!only_psig) @@ -164,6 +139,7 @@ void psi_initializer::random_t(T* psi, const int iw_start, const int std::vector stickarg(nz); std::vector tmprr(nstnz); std::vector tmparg(nstnz); + for (int iw = iw_start; iw < iw_end; iw++) { // get the starting memory address of iw band @@ -177,6 +153,7 @@ void psi_initializer::random_t(T* psi, const int iw_start, const int // if the stick is not on present processor, then skip if(this->pw_wfc_->fftixy2ip[ir] < 0) { continue; } // otherwise + // the following code is very time-consuming, but it can be skipped with pw_seed = 0 if(GlobalV::RANK_IN_POOL == 0) { // generate random number for (x,y) and all z, the stick will must @@ -195,7 +172,8 @@ void psi_initializer::random_t(T* psi, const int iw_start, const int #endif } // then for each g-component, initialize the wavefunction value - for (int ig = 0;ig < ng;ig++) + #pragma omp parallel for schedule(static, 4096/sizeof(T)) + for (int ig = 0; ig < ng; ig++) { // get the correct value of "rr" and "arg" by indexing map "getigl2isz" const double rr = tmprr[this->pw_wfc_->getigl2isz(ik, ig)]; @@ -214,6 +192,8 @@ void psi_initializer::random_t(T* psi, const int iw_start, const int for (int iw = iw_start ;iw < iw_end; iw++) { T* psi_slice = &(psi[iw * this->pw_wfc_->npwk_max * PARAM.globalv.npol]); // get the memory to write directly. For nspin 4, nbasis*2 + + #pragma omp parallel for schedule(static, 4096/sizeof(T)) for (int ig = 0; ig < ng; ig++) { const double rr = std::rand()/double(RAND_MAX); //qianrui add RAND_MAX @@ -221,8 +201,9 @@ void psi_initializer::random_t(T* psi, const int iw_start, const int const double gk2 = this->pw_wfc_->getgk2(ik, ig); psi_slice[ig] = this->template cast_to_T(std::complex(rr*cos(arg)/(gk2 + 1.0), rr*sin(arg)/(gk2 + 1.0))); } - if(PARAM.globalv.npol==2) // additionally for nspin 4... + if(PARAM.globalv.npol == 2) // additionally for nspin 4... { + #pragma omp parallel for schedule(static, 4096/sizeof(T)) for (int ig = this->pw_wfc_->npwk_max; ig < this->pw_wfc_->npwk_max + ng; ig++) { const double rr = std::rand()/double(RAND_MAX); diff --git a/source/module_psi/psi_initializer_nao.cpp b/source/module_psi/psi_initializer_nao.cpp index a88b0ed9f8..c2b0f83c04 100644 --- a/source/module_psi/psi_initializer_nao.cpp +++ b/source/module_psi/psi_initializer_nao.cpp @@ -17,6 +17,12 @@ #include "module_base/parallel_reduce.h" #endif #include "module_parameter/parameter.h" +#include "module_io/orb_io.h" +// GlobalV::NQX and GlobalV::DQ are here +#include "module_parameter/parameter.h" + +#include +#include /* I don't know why some variables are distributed while others not... for example the orbital_files... @@ -40,214 +46,79 @@ template void psi_initializer_nao::read_external_orbs(std::string* orbital_files, const int& rank) { ModuleBase::timer::tick("psi_initializer_nao", "read_external_orbs"); - if (rank == 0) - { - for (int itype = 0; itype < this->p_ucell_->ntype; itype++) - { - this->orbital_files_.push_back(orbital_files[itype]); - } - for (int it = 0; it < this->p_ucell_->ntype; it++) - { - // number of chi per atomtype - int nchi = 0; - for (int l = 0; l <= this->p_ucell_->atoms[it].nwl; l++) - { - nchi += this->p_ucell_->atoms[it].l_nchi[l]; - } - - std::vector n_rgrid_it; - std::vector> rgrid_it; - std::vector> rvalue_it; - - std::ifstream ifs_it; - ifs_it.open(PARAM.inp.orbital_dir + this->orbital_files_[it]); - - if (!ifs_it) - { - GlobalV::ofs_warning << "psi_initializer_nao::read_orbital_files: cannot open orbital file: " - << this->orbital_files_[it] << std::endl; - ModuleBase::WARNING_QUIT("psi_initializer_nao::read_orbital_files", - "cannot open orbital file."); - } - else - { - GlobalV::ofs_running << "psi_initializer_nao::read_orbital_files: reading orbital file: " - << this->orbital_files_[it] << std::endl; - } - ifs_it.close(); - int ichi_overall = 0; - // check nwl and nchi for each rank + this->orbital_files_.resize(this->p_ucell_->ntype); + this->nr_.resize(this->p_ucell_->ntype); + this->rgrid_.resize(this->p_ucell_->ntype); + this->chi_.resize(this->p_ucell_->ntype); - for (int l = 0; l <= this->p_ucell_->atoms[it].nwl; l++) - { - for (int ichi = 0; ichi < this->p_ucell_->atoms[it].l_nchi[l]; ichi++) - { - int n_rgrid_ichi; - std::vector rgrid_ichi; - std::vector rvalue_ichi; - - GlobalV::ofs_running << "-------------------------------------- " << std::endl; - GlobalV::ofs_running << " reading orbital of element " << this->p_ucell_->atoms[it].label - << std::endl - << " angular momentum l = " << l << std::endl - << " index of chi = " << ichi << std::endl; - - ifs_it.open(PARAM.inp.orbital_dir + this->orbital_files_[it]); - double dr = 0.0; - char word[80]; - - while (ifs_it.good()) - { - ifs_it >> word; - if (std::strcmp(word, "END") == 0) - { - break; - } - } - ModuleBase::CHECK_NAME(ifs_it, "Mesh"); - ifs_it >> n_rgrid_ichi; - - if (n_rgrid_ichi % 2 == 0) - { - ++n_rgrid_ichi; - } - GlobalV::ofs_running << " number of radial grid = " << n_rgrid_ichi << std::endl; - - ModuleBase::CHECK_NAME(ifs_it, "dr"); - ifs_it >> dr; - GlobalV::ofs_running << " dr = " << dr << std::endl; - - for (int ir = 0; ir < n_rgrid_ichi; ir++) - { - rgrid_ichi.push_back(ir * dr); - } - GlobalV::ofs_running << " maximal radial grid point = " << rgrid_ichi[n_rgrid_ichi - 1] - << " Angstrom" << std::endl; - - std::string title1, title2, title3; - int it_read = 0; - int l_read = 0; - int nchi_read = 0; - bool find = false; - while (!find) - { - if (ifs_it.eof()) - { - GlobalV::ofs_warning << " psi_initializer_nao::read_orbital_files: cannot find " - "orbital of element " - << this->p_ucell_->atoms[it].label << std::endl - << " angular momentum l = " << l << std::endl - << " index of chi = " << ichi << std::endl; - } - ifs_it >> title1 >> title2 >> title3; - assert(title1 == "Type"); - ifs_it >> it_read >> l_read >> nchi_read; - if (l_read == l && nchi_read == ichi) - { - for (int ir = 0; ir < n_rgrid_ichi; ir++) - { - double rvalue_ichi_ir; - ifs_it >> rvalue_ichi_ir; - rvalue_ichi.push_back(rvalue_ichi_ir); - } - find = true; - } - else - { - double discard; - for (int ir = 0; ir < n_rgrid_ichi; ir++) - { - ifs_it >> discard; - } - } - } - ifs_it.close(); - n_rgrid_it.push_back(n_rgrid_ichi); - rgrid_it.push_back(rgrid_ichi); - // before push back, normalize the rvalue_ichi, 2024/03/19, kirk0830 - // turn off normalize, 2024/03/22, kirk0830 - // normalize(rgrid_ichi, rvalue_ichi); - rvalue_it.push_back(rvalue_ichi); - ++ichi_overall; - } - } - this->n_rgrid_.push_back(n_rgrid_it); - this->rgrid_.push_back(rgrid_it); - this->rvalue_.push_back(rvalue_it); - GlobalV::ofs_running << "-------------------------------------- " << std::endl; - } - } -// MPI additional implementation #ifdef __MPI - // bcast fname - if (rank != 0) + if (rank == 0) { - this->orbital_files_.resize(this->p_ucell_->ntype); +#endif + std::copy(orbital_files, orbital_files + this->p_ucell_->ntype, this->orbital_files_.begin()); +#ifdef __MPI } Parallel_Common::bcast_string(this->orbital_files_.data(), this->p_ucell_->ntype); - - // bcast orbital data - // resize - if (rank != 0) - { - this->n_rgrid_.resize(this->p_ucell_->ntype); - } - - std::vector nchi(this->p_ucell_->ntype); - if (rank == 0) +#endif + for (int it = 0; it < this->p_ucell_->ntype; it++) { - for (int it = 0; it < this->p_ucell_->ntype; it++) + std::ifstream ifs_it; + bool is_open = false; + if (rank == 0) { - nchi[it] = this->n_rgrid_[it].size(); + ifs_it.open(PARAM.inp.orbital_dir + this->orbital_files_[it]); + is_open = ifs_it.is_open(); } - } - - // bcast - Parallel_Common::bcast_int(nchi.data(), this->p_ucell_->ntype); - // resize - if (rank != 0) - { - this->n_rgrid_.resize(this->p_ucell_->ntype); - this->rgrid_.resize(this->p_ucell_->ntype); - this->rvalue_.resize(this->p_ucell_->ntype); - for (int it = 0; it < this->p_ucell_->ntype; it++) +#ifdef __MPI + Parallel_Common::bcast_bool(is_open); +#endif + if (!is_open) { - this->n_rgrid_[it].resize(nchi[it]); - this->rgrid_[it].resize(nchi[it]); - this->rvalue_[it].resize(nchi[it]); + GlobalV::ofs_warning << "psi_initializer_nao::read_orbital_files: cannot open orbital file: " + << this->orbital_files_[it] << std::endl; + ModuleBase::WARNING_QUIT("psi_initializer_nao::read_orbital_files", + "cannot open orbital file."); } - } - - // bcast - for (int it = 0; it < this->p_ucell_->ntype; it++) - { - Parallel_Common::bcast_int(this->n_rgrid_[it].data(), nchi[it]); - } + else + { + GlobalV::ofs_running << "psi_initializer_nao::read_orbital_files: reading orbital file: " + << this->orbital_files_[it] << std::endl; + } + std::string elem; // garbage value, will discard + double ecut; // garbage value, will discard + int nr; + double dr; + std::vector nzeta; + std::vector> radials; + ModuleIO::read_abacus_orb(ifs_it, elem, ecut, nr, dr, nzeta, radials, rank); - // resize - if (rank != 0) - { - for (int it = 0; it < this->p_ucell_->ntype; it++) + if (rank == 0) { - for (int ichi = 0; ichi < nchi[it]; ichi++) - { - this->rgrid_[it][ichi].resize(this->n_rgrid_[it][ichi]); - this->rvalue_[it][ichi].resize(this->n_rgrid_[it][ichi]); - } + ifs_it.close(); } - } - // bcast - for (int it = 0; it < this->p_ucell_->ntype; it++) - { - for (int ichi = 0; ichi < nchi[it]; ichi++) + const int nchi = std::accumulate(nzeta.begin(), nzeta.end(), 0); + // nr_ + this->nr_[it].resize(nchi); + std::for_each(this->nr_[it].begin(), this->nr_[it].end(), [nr](int& numr) { numr = nr; }); + // rgrid_ + this->rgrid_[it].resize(nchi); + std::for_each(this->rgrid_[it].begin(), this->rgrid_[it].end(), [nr, dr](std::vector& rgrid) { + rgrid.resize(nr); + std::iota(rgrid.begin(), rgrid.end(), 0); + std::for_each(rgrid.begin(), rgrid.end(), [dr](double& r) { r = r * dr; }); + }); + // chi_ + this->chi_[it].resize(nchi); + std::for_each(this->chi_[it].begin(), this->chi_[it].end(), [nr](std::vector& chi) { + chi.resize(nr); + }); + for (int ichi = 0; ichi < nchi; ichi++) { - Parallel_Common::bcast_double(this->rgrid_[it][ichi].data(), this->n_rgrid_[it][ichi]); - Parallel_Common::bcast_double(this->rvalue_[it][ichi].data(), this->n_rgrid_[it][ichi]); + std::copy(radials[ichi].begin(), radials[ichi].end(), this->chi_[it][ichi].begin()); } } -#endif ModuleBase::timer::tick("psi_initializer_nao", "read_external_orbs"); } @@ -255,26 +126,27 @@ template void psi_initializer_nao::allocate_table() { // find correct dimension for ovlp_flzjlq - int dim1 = this->p_ucell_->ntype; - int dim2 = 0; // dim2 should be the maximum number of zeta for each atomtype + int ntype = this->p_ucell_->ntype; + int lmaxmax = 0; // lmaxmax + int nzeta_max = 0; // dim3 should be the maximum number of zeta for each atomtype for (int it = 0; it < this->p_ucell_->ntype; it++) { int nzeta = 0; - for (int l = 0; l < this->p_ucell_->atoms[it].nwl + 1; l++) + int lmax = this->p_ucell_->atoms[it].nwl; + lmaxmax = (lmaxmax > lmax) ? lmaxmax : lmax; + for (int l = 0; l < lmax + 1; l++) { nzeta += this->p_ucell_->atoms[it].l_nchi[l]; } - dim2 = (nzeta > dim2) ? nzeta : dim2; + nzeta_max = (nzeta > nzeta_max) ? nzeta : nzeta_max; } - if (dim2 == 0) + if (nzeta_max == 0) { ModuleBase::WARNING_QUIT("psi_initializer_nao::psi_initializer_nao", "there is not ANY numerical atomic orbital read in present system, quit."); } - int dim3 = PARAM.globalv.nqx; - // allocate memory for ovlp_flzjlq - this->ovlp_flzjlq_.create(dim1, dim2, dim3); - this->ovlp_flzjlq_.zero_out(); + // allocate a map (it, l, izeta) -> i, should allocate memory of ntype * lmax * nzeta_max + this->projmap_.create(ntype, lmaxmax + 1, nzeta_max); } #ifdef __MPI @@ -288,6 +160,7 @@ void psi_initializer_nao::initialize(Structure_Factor* sf, const int& rank) { ModuleBase::timer::tick("psi_initializer_nao", "initialize_mpi"); + // import this->sf_ = sf; this->pw_wfc_ = pw_wfc; @@ -295,11 +168,11 @@ void psi_initializer_nao::initialize(Structure_Factor* sf, this->p_parakpts_ = p_parakpts; this->p_pspot_nl_ = p_pspot_nl; this->random_seed_ = random_seed; + // allocate this->allocate_table(); this->read_external_orbs(this->p_ucell_->orbital_fn, rank); - // this->cal_ovlp_flzjlq(); //because PARAM.globalv.nqx will change during vcrelax, so it should be called in both init - // and init_after_vc + // then for generate random number to fill in the wavefunction this->ixy2is_.clear(); this->ixy2is_.resize(this->pw_wfc_->fftnxy); @@ -315,17 +188,18 @@ void psi_initializer_nao::initialize(Structure_Factor* sf, pseudopot_cell_vnl* p_pspot_nl) { ModuleBase::timer::tick("psi_initializer_nao", "initialize_serial"); + // import this->sf_ = sf; this->pw_wfc_ = pw_wfc; this->p_ucell_ = p_ucell; this->p_pspot_nl_ = p_pspot_nl; this->random_seed_ = random_seed; + // allocate this->allocate_table(); this->read_external_orbs(this->p_ucell_->orbital_fn, 0); - // this->cal_ovlp_flzjlq(); //because PARAM.globalv.nqx will change during vcrelax, so it should be called in both init - // and init_after_vc + // then for generate random number to fill in the wavefunction this->ixy2is_.clear(); this->ixy2is_.resize(this->pw_wfc_->fftnxy); @@ -338,7 +212,32 @@ template void psi_initializer_nao::tabulate() { ModuleBase::timer::tick("psi_initializer_nao", "tabulate"); - this->ovlp_flzjlq_.zero_out(); + + // a uniformed qgrid + std::vector qgrid(PARAM.globalv.nqx); + std::iota(qgrid.begin(), qgrid.end(), 0); + std::for_each(qgrid.begin(), qgrid.end(), [this](double& q) { q = q * PARAM.globalv.dq; }); + + // only when needed, allocate memory for cubspl_ + if (this->cubspl_.get()) { this->cubspl_.reset(); } + this->cubspl_ = std::unique_ptr( + new ModuleBase::CubicSpline(qgrid.size(), qgrid.data())); + + // calculate the total number of radials and call reserve to allocate memory + int nchi = 0; + for (int it = 0; it < this->p_ucell_->ntype; it++) + { + for (int l = 0; l < this->p_ucell_->atoms[it].nwl + 1; l++) + { + nchi += this->p_ucell_->atoms[it].l_nchi[l]; + } + } + this->cubspl_->reserve(nchi); + ModuleBase::SphericalBesselTransformer sbt_(true); // bool: enable cache + + // tabulate the spherical bessel transform of numerical orbital function + std::vector Jlfq(PARAM.globalv.nqx, 0.0); + int i = 0; for (int it = 0; it < this->p_ucell_->ntype; it++) { int ic = 0; @@ -346,23 +245,15 @@ void psi_initializer_nao::tabulate() { for (int izeta = 0; izeta < this->p_ucell_->atoms[it].l_nchi[l]; izeta++) { - std::vector ovlp_flzjlq_q(PARAM.globalv.nqx); - std::vector qgrid(PARAM.globalv.nqx); - for (int iq = 0; iq < PARAM.globalv.nqx; iq++) - { - qgrid[iq] = iq * PARAM.globalv.dq; - } - this->sbt.direct(l, - this->n_rgrid_[it][ic], - this->rgrid_[it][ic].data(), - this->rvalue_[it][ic].data(), - PARAM.globalv.nqx, - qgrid.data(), - ovlp_flzjlq_q.data()); - for (int iq = 0; iq < PARAM.globalv.nqx; iq++) - { - this->ovlp_flzjlq_(it, ic, iq) = ovlp_flzjlq_q[iq]; - } + sbt_.direct(l, + this->nr_[it][ic], + this->rgrid_[it][ic].data(), + this->chi_[it][ic].data(), + PARAM.globalv.nqx, + qgrid.data(), + Jlfq.data()); + this->cubspl_->add(Jlfq.data()); + this->projmap_(it, l, izeta) = i++; // index it ++ic; } } @@ -382,15 +273,19 @@ void psi_initializer_nao::proj_ao_onkG(const int ik) ModuleBase::matrix ylm(total_lm, npw); std::vector> aux(npw); - std::vector> gk(npw); + std::vector qnorm(npw); + std::vector> q(npw); + + #pragma omp parallel for schedule(static, 4096 / sizeof(double)) for (int ig = 0; ig < npw; ig++) { - gk[ig] = this->pw_wfc_->getgpluskcar(ik, ig); + q[ig] = this->pw_wfc_->getgpluskcar(ik, ig); + qnorm[ig] = q[ig].norm() * this->p_ucell_->tpiba; } - ModuleBase::YlmReal::Ylm_Real(total_lm, npw, gk.data(), ylm); + ModuleBase::YlmReal::Ylm_Real(total_lm, npw, q.data(), ylm); // int index = 0; - std::vector ovlp_flzjlg(npw); + std::vector Jlfq(npw, 0.0); int ibasis = 0; for (int it = 0; it < this->p_ucell_->ntype; it++) { @@ -411,17 +306,10 @@ void psi_initializer_nao::proj_ao_onkG(const int ik) transformation of numerical orbital function, is indiced by it and ic, is needed to interpolate everytime when ic updates, therefore everytime when present orbital is done */ - for (int ig = 0; ig < npw; ig++) - { - ovlp_flzjlg[ig] = ModuleBase::PolyInt::Polynomial_Interpolation( - this->ovlp_flzjlq_, // the spherical bessel transform of numerical orbital function - it, - ic, // each (it, ic)-pair defines a unique numerical orbital function - PARAM.globalv.nqx, - PARAM.globalv.dq, // grid number and grid spacing of q - gk[ig].norm() * this->p_ucell_->tpiba // norm of (G+k) = K - ); - } + + // use cublic spline instead of previous polynomial interpolation + this->cubspl_->eval(npw, qnorm.data(), Jlfq.data(), nullptr, nullptr, this->projmap_(it, L, N)); + /* FOR EVERY NAO IN EACH ATOM */ if (PARAM.inp.nspin == 4) { @@ -447,10 +335,13 @@ void psi_initializer_nao::proj_ao_onkG(const int ik) for (int m = 0; m < 2 * L + 1; m++) { const int lm = L * L + m; + #pragma omp parallel for for (int ig = 0; ig < npw; ig++) { - aux[ig] = sk[ig] * ylm(lm, ig) * ovlp_flzjlg[ig]; + aux[ig] = sk[ig] * ylm(lm, ig) * Jlfq[ig]; } + + #pragma omp parallel for for (int ig = 0; ig < npw; ig++) { fup = cos(0.5 * alpha) * aux[ig]; @@ -483,10 +374,11 @@ void psi_initializer_nao::proj_ao_onkG(const int ik) for (int m = 0; m < 2 * L + 1; m++) { const int lm = L * L + m; + #pragma omp parallel for for (int ig = 0; ig < npw; ig++) { (*(this->psig_))(ibasis, ig) - = this->template cast_to_T(lphase * sk[ig] * ylm(lm, ig) * ovlp_flzjlg[ig]); + = this->template cast_to_T(lphase * sk[ig] * ylm(lm, ig) * Jlfq[ig]); } ++ibasis; } diff --git a/source/module_psi/psi_initializer_nao.h b/source/module_psi/psi_initializer_nao.h index 5ef7d1b6ca..b7643d38d7 100644 --- a/source/module_psi/psi_initializer_nao.h +++ b/source/module_psi/psi_initializer_nao.h @@ -2,7 +2,8 @@ #define PSI_INITIALIZER_NAO_H #include "psi_initializer.h" #include "module_base/realarray.h" - +#include "module_base/cubic_spline.h" +#include /* Psi (planewave based wavefunction) initializer: numerical atomic orbital method */ @@ -40,13 +41,13 @@ class psi_initializer_nao : public psi_initializer virtual void tabulate() override; std::vector external_orbs() const { return orbital_files_; } - std::vector> n_rgrid() const { return n_rgrid_; } - std::vector n_rgrid(const int& itype) const { return n_rgrid_[itype]; } - int n_rgrid(const int& itype, const int& ichi) const { return n_rgrid_[itype][ichi]; } - std::vector>> rvalue() const { return rvalue_; } - std::vector> rvalue(const int& itype) const { return rvalue_[itype]; } - std::vector rvalue(const int& itype, const int& ichi) const { return rvalue_[itype][ichi]; } - double rvalue(const int& itype, const int& ichi, const int& ir) const { return rvalue_[itype][ichi][ir]; } + std::vector> nr() const { return nr_; } + std::vector nr(const int& itype) const { return nr_[itype]; } + int nr(const int& itype, const int& ichi) const { return nr_[itype][ichi]; } + std::vector>> chi() const { return chi_; } + std::vector> chi(const int& itype) const { return chi_[itype]; } + std::vector chi(const int& itype, const int& ichi) const { return chi_[itype][ichi]; } + double chi(const int& itype, const int& ichi, const int& ir) const { return chi_[itype][ichi][ir]; } std::vector>> rgrid() const { return rgrid_; } std::vector> rgrid(const int& itype) const { return rgrid_[itype]; } std::vector rgrid(const int& itype, const int& ichi) const { return rgrid_[itype][ichi]; } @@ -54,11 +55,14 @@ class psi_initializer_nao : public psi_initializer private: std::vector orbital_files_; - ModuleBase::realArray ovlp_flzjlq_; + /// @brief cubic spline for interpolation + std::unique_ptr cubspl_; + /// @brief radial map, [itype][l][izeta] -> i + ModuleBase::realArray projmap_; /// @brief number of realspace grids per type per chi, [itype][ichi] - std::vector> n_rgrid_; + std::vector> nr_; /// @brief data of numerical atomic orbital per type per chi per position, [itype][ichi][ir] - std::vector>> rvalue_; + std::vector>> chi_; /// @brief r of numerical atomic orbital per type per chi per position, [itype][ichi][ir] std::vector>> rgrid_; }; diff --git a/source/module_psi/test/CMakeLists.txt b/source/module_psi/test/CMakeLists.txt index 110e73e058..08ef34ded7 100644 --- a/source/module_psi/test/CMakeLists.txt +++ b/source/module_psi/test/CMakeLists.txt @@ -16,6 +16,7 @@ AddTest( ../../module_cell/atom_spec.cpp ../../module_cell/parallel_kpoints.cpp ../../module_cell/test/support/mock_unitcell.cpp + ../../module_io/orb_io.cpp ) endif() diff --git a/source/module_relax/relax_old/test/CMakeLists.txt b/source/module_relax/relax_old/test/CMakeLists.txt index 005d749c08..e237b2032b 100644 --- a/source/module_relax/relax_old/test/CMakeLists.txt +++ b/source/module_relax/relax_old/test/CMakeLists.txt @@ -16,7 +16,10 @@ AddTest( AddTest( TARGET lattice_change_cg_test LIBS parameter ${math_libs} base device - SOURCES lattice_change_cg_test.cpp ../lattice_change_cg.cpp ../lattice_change_basic.cpp + SOURCES lattice_change_cg_test.cpp + ../lattice_change_cg.cpp + ../lattice_change_basic.cpp + ../../../module_io/orb_io.cpp ) AddTest( @@ -40,13 +43,20 @@ AddTest( AddTest( TARGET ions_move_bfgs_test LIBS parameter ${math_libs} base device - SOURCES ions_move_bfgs_test.cpp ../ions_move_bfgs.cpp ../ions_move_basic.cpp ../bfgs_basic.cpp + SOURCES ions_move_bfgs_test.cpp + ../ions_move_bfgs.cpp + ../ions_move_basic.cpp + ../bfgs_basic.cpp + ../../../module_io/orb_io.cpp ) AddTest( TARGET ions_move_cg_test LIBS parameter ${math_libs} base device - SOURCES ions_move_cg_test.cpp ../ions_move_cg.cpp ../ions_move_basic.cpp + SOURCES ions_move_cg_test.cpp + ../ions_move_cg.cpp + ../ions_move_basic.cpp + ../../../module_io/orb_io.cpp ) AddTest( diff --git a/tests/integrate/102_PW_PINT_UKS/result.ref b/tests/integrate/102_PW_PINT_UKS/result.ref index 5ac2ef8604..7bd1b9f13e 100644 --- a/tests/integrate/102_PW_PINT_UKS/result.ref +++ b/tests/integrate/102_PW_PINT_UKS/result.ref @@ -1,3 +1,3 @@ -etotref -6147.553111352082 -etotperatomref -3073.7765556760 +etotref -6147.55311190 +etotperatomref -3073.77655595 totaltimeref 16.68 diff --git a/tests/integrate/120_PW_KP_MD_NVT/STRU b/tests/integrate/120_PW_KP_MD_NVT/STRU deleted file mode 100644 index 9fd56865d4..0000000000 --- a/tests/integrate/120_PW_KP_MD_NVT/STRU +++ /dev/null @@ -1,19 +0,0 @@ -ATOMIC_SPECIES -Si 1 Si_ONCV_PBE-1.0.upf - -LATTICE_CONSTANT -10.2 - -LATTICE_VECTORS -0.5 0.5 0 #latvec1 -0.5 0 0.5 #latvec2 -0 0.5 0.5 #latvec3 - -ATOMIC_POSITIONS -Cartesian - -Si #label -0 #magnetism -2 #number of atoms -0 0 0 m 1 1 1 v -0.00016026383234 1.88587389656e-05 4.31519285331e-06 -0.241 0.255 0.250999999999 m 1 1 1 v 0.00016026383234 -1.88587389656e-05 -4.31519285331e-06 diff --git a/tests/integrate/120_PW_KP_MD_NVT/STRU_MD_2 b/tests/integrate/120_PW_KP_MD_NVT/STRU_MD_2 index e918ed59b0..9fd56865d4 100644 --- a/tests/integrate/120_PW_KP_MD_NVT/STRU_MD_2 +++ b/tests/integrate/120_PW_KP_MD_NVT/STRU_MD_2 @@ -1,13 +1,13 @@ ATOMIC_SPECIES -Si 1 Si_ONCV_PBE-1.0.upf upf201 +Si 1 Si_ONCV_PBE-1.0.upf LATTICE_CONSTANT 10.2 LATTICE_VECTORS - 0.5000000000 0.5000000000 0.0000000000 #latvec1 - 0.5000000000 0.0000000000 0.5000000000 #latvec2 - 0.0000000000 0.5000000000 0.5000000000 #latvec3 +0.5 0.5 0 #latvec1 +0.5 0 0.5 #latvec2 +0 0.5 0.5 #latvec3 ATOMIC_POSITIONS Cartesian @@ -15,5 +15,5 @@ Cartesian Si #label 0 #magnetism 2 #number of atoms - 0.9961377061 0.5017059282 0.5004642243 m 1 1 1 v -0.0005823336 0.0003037059 0.0000737542 - 0.2448622939 0.2532940718 0.2505357757 m 1 1 1 v 0.0005823336 -0.0003037059 -0.0000737542 +0 0 0 m 1 1 1 v -0.00016026383234 1.88587389656e-05 4.31519285331e-06 +0.241 0.255 0.250999999999 m 1 1 1 v 0.00016026383234 -1.88587389656e-05 -4.31519285331e-06 diff --git a/tests/integrate/140_PW_15_SO/result.ref b/tests/integrate/140_PW_15_SO/result.ref index 449c4786a9..2f6201a31f 100644 --- a/tests/integrate/140_PW_15_SO/result.ref +++ b/tests/integrate/140_PW_15_SO/result.ref @@ -1,3 +1,3 @@ -etotref -1668.616835067548 -etotperatomref -834.3084175338 -totaltimeref 6.35 +etotref -1668.616833271131 +etotperatomref -834.3084166356 +totaltimeref 5.15 diff --git a/tests/integrate/401_NP_KP_sp/result.ref b/tests/integrate/401_NP_KP_sp/result.ref index dce8f48469..a8ade4ce38 100644 --- a/tests/integrate/401_NP_KP_sp/result.ref +++ b/tests/integrate/401_NP_KP_sp/result.ref @@ -1,4 +1,4 @@ -etotref -723.7838345681450 +etotref -723.78383467 etotperatomref -361.8919172840725 pointgroupref C_1h spacegroupref C_2h diff --git a/tests/integrate/401_NP_KP_spd/result.ref b/tests/integrate/401_NP_KP_spd/result.ref index a0c5f6713a..2272882209 100644 --- a/tests/integrate/401_NP_KP_spd/result.ref +++ b/tests/integrate/401_NP_KP_spd/result.ref @@ -1,4 +1,4 @@ -etotref -211.3351662834205 +etotref -211.33516646 etotperatomref -105.6675831417103 pointgroupref T_d spacegroupref O_h diff --git a/toolchain/README.md b/toolchain/README.md index 9f13f6614e..386efc8313 100644 --- a/toolchain/README.md +++ b/toolchain/README.md @@ -1,6 +1,6 @@ # The ABACUS Toolchain -Version 2024.2 +Version 2024.3 ## Author @@ -108,13 +108,13 @@ The needed dependencies version default: - `cmake` 3.30.0 - `gcc` 13.2.0 (which will always NOT be installed, But use system) -- `OpenMPI` 5.0.5 +- `OpenMPI` 4.1.6 (5.0.5 can be used but have some problem in OpenMP parallel computation in ELPA) - `MPICH` 4.2.2 -- `OpenBLAS` 0.3.27 (Intel toolchain need `get_vars.sh` tool from it) -- `ScaLAPACK` 2.2.1 +- `OpenBLAS` 0.3.28 (Intel toolchain need `get_vars.sh` tool from it) +- `ScaLAPACK` 2.2.1 (a developing version) - `FFTW` 3.3.10 - `LibXC` 6.2.2 -- `ELPA` 2024.03.001 +- `ELPA` 2024.05.001 - `CEREAL` 1.3.2 - `RapidJSON` 1.1.0 And Intel-oneAPI need user or server manager to manually install from Intel. diff --git a/toolchain/build_abacus_gnu.sh b/toolchain/build_abacus_gnu.sh index a5b88316fe..c797392585 100755 --- a/toolchain/build_abacus_gnu.sh +++ b/toolchain/build_abacus_gnu.sh @@ -28,6 +28,7 @@ ELPA=$INSTALL_DIR/elpa-2024.03.001/cpu FFTW3=$INSTALL_DIR/fftw-3.3.10 CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal LIBXC=$INSTALL_DIR/libxc-6.2.2 +RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/ # LIBRI=$INSTALL_DIR/LibRI-0.1.1 # LIBCOMM=$INSTALL_DIR/LibComm-0.1.0 # LIBTORCH=$INSTALL_DIR/libtorch-2.1.2/share/cmake/Torch @@ -47,7 +48,8 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ -DENABLE_LIBXC=ON \ -DUSE_OPENMP=ON \ -DUSE_ELPA=ON \ - -DENABLE_RAPIDJSON=OFF \ + -DENABLE_RAPIDJSON=ON \ + -DRapdidJSON_DIR=$RAPIDJSON \ # -DENABLE_DEEPKS=1 \ # -DTorch_DIR=$LIBTORCH \ # -Dlibnpy_INCLUDE_DIR=$LIBNPY \ diff --git a/toolchain/build_abacus_intel-mpich.sh b/toolchain/build_abacus_intel-mpich.sh index e199f049cf..dc7fad7d96 100755 --- a/toolchain/build_abacus_intel-mpich.sh +++ b/toolchain/build_abacus_intel-mpich.sh @@ -26,6 +26,7 @@ PREFIX=$ABACUS_DIR ELPA=$INSTALL_DIR/elpa-2024.03.001/cpu CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal LIBXC=$INSTALL_DIR/libxc-6.2.2 +RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/ # LIBTORCH=$INSTALL_DIR/libtorch-2.1.2/share/cmake/Torch # LIBNPY=$INSTALL_DIR/libnpy-1.0.1/include # LIBRI=$INSTALL_DIR/LibRI-0.1.1 @@ -43,7 +44,8 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ -DENABLE_LIBXC=ON \ -DUSE_OPENMP=ON \ -DUSE_ELPA=ON \ - -DENABLE_RAPIDJSON=OFF \ + -DENABLE_RAPIDJSON=ON \ + -DRapdidJSON_DIR=$RAPIDJSON \ # -DENABLE_DEEPKS=1 \ # -DTorch_DIR=$LIBTORCH \ # -Dlibnpy_INCLUDE_DIR=$LIBNPY \ diff --git a/toolchain/build_abacus_intel.sh b/toolchain/build_abacus_intel.sh index 777c812655..0b4563c688 100755 --- a/toolchain/build_abacus_intel.sh +++ b/toolchain/build_abacus_intel.sh @@ -26,6 +26,7 @@ PREFIX=$ABACUS_DIR ELPA=$INSTALL_DIR/elpa-2024.03.001/cpu CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal LIBXC=$INSTALL_DIR/libxc-6.2.2 +RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/ # LIBTORCH=$INSTALL_DIR/libtorch-2.1.2/share/cmake/Torch # LIBNPY=$INSTALL_DIR/libnpy-1.0.1/include # LIBRI=$INSTALL_DIR/LibRI-0.1.1 @@ -44,7 +45,8 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ -DENABLE_LIBXC=ON \ -DUSE_OPENMP=ON \ -DUSE_ELPA=ON \ - -DENABLE_RAPIDJSON=OFF \ + -DENABLE_RAPIDJSON=ON \ + -DRapdidJSON_DIR=$RAPIDJSON \ # -DENABLE_DEEPKS=1 \ # -DTorch_DIR=$LIBTORCH \ # -Dlibnpy_INCLUDE_DIR=$LIBNPY \ diff --git a/toolchain/scripts/VERSION b/toolchain/scripts/VERSION index a6faad3be8..bce1d6fc01 100644 --- a/toolchain/scripts/VERSION +++ b/toolchain/scripts/VERSION @@ -1,2 +1,2 @@ # version file to force a rebuild of the entire toolchain -VERSION="2024.2" +VERSION="2024.3" diff --git a/toolchain/scripts/get_openblas_arch.sh b/toolchain/scripts/get_openblas_arch.sh index 70b492d576..4104ff821c 100755 --- a/toolchain/scripts/get_openblas_arch.sh +++ b/toolchain/scripts/get_openblas_arch.sh @@ -8,8 +8,8 @@ [ "${BASH_SOURCE[0]}" ] && SCRIPT_NAME="${BASH_SOURCE[0]}" || SCRIPT_NAME=$0 SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")" && pwd -P)" -openblas_ver="0.3.27" # Keep in sync with install_openblas.sh -openblas_sha256="aa2d68b1564fe2b13bc292672608e9cdeeeb6dc34995512e65c3b10f4599e897" +openblas_ver="0.3.28" # Keep in sync with install_openblas.sh +openblas_sha256="f1003466ad074e9b0c8d421a204121100b0751c96fc6fcf3d1456bd12f8a00a1" openblas_pkg="OpenBLAS-${openblas_ver}.tar.gz" source "${SCRIPT_DIR}"/common_vars.sh diff --git a/toolchain/scripts/stage0/install_cmake.sh b/toolchain/scripts/stage0/install_cmake.sh index a76472901b..b16c9719b2 100755 --- a/toolchain/scripts/stage0/install_cmake.sh +++ b/toolchain/scripts/stage0/install_cmake.sh @@ -36,17 +36,18 @@ case "${with_cmake}" in pkg_install_dir="${INSTALLDIR}/cmake-${cmake_ver}" #pkg_install_dir="${HOME}/apps/cmake/${cmake_ver}" install_lock_file="$pkg_install_dir/install_successful" + cmake_pkg="cmake-${cmake_ver}-${cmake_arch}.sh" if verify_checksums "${install_lock_file}"; then echo "cmake-${cmake_ver} is already installed, skipping it." else - if [ -f cmake-${cmake_ver}-${cmake_arch}.sh ]; then - echo "cmake-${cmake_ver}-${cmake_arch}.sh is found" + if [ -f $cmake_pkg ]; then + echo "$cmake_pkg is found" else - download_pkg_from_ABACUS_org "${cmake_sha256}" "cmake-${cmake_ver}-${cmake_arch}.sh" + download_pkg_from_ABACUS_org "${cmake_sha256}" "$cmake_pkg" fi echo "Installing from scratch into ${pkg_install_dir}" mkdir -p ${pkg_install_dir} - /bin/sh cmake-${cmake_ver}-${cmake_arch}.sh --prefix=${pkg_install_dir} --skip-license > install.log 2>&1 || tail -n ${LOG_LINES} install.log + /bin/sh $cmake_pkg --prefix=${pkg_install_dir} --skip-license > install.log 2>&1 || tail -n ${LOG_LINES} install.log write_checksums "${install_lock_file}" "${SCRIPT_DIR}/stage0/$(basename ${SCRIPT_NAME})" fi ;; diff --git a/toolchain/scripts/stage1/install_openmpi.sh b/toolchain/scripts/stage1/install_openmpi.sh index c5baa47843..99adfc5bd9 100755 --- a/toolchain/scripts/stage1/install_openmpi.sh +++ b/toolchain/scripts/stage1/install_openmpi.sh @@ -8,10 +8,10 @@ [ "${BASH_SOURCE[0]}" ] && SCRIPT_NAME="${BASH_SOURCE[0]}" || SCRIPT_NAME=$0 SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")/.." && pwd -P)" -openmpi_ver="5.0.5" -openmpi_sha256="6588d57c0a4bd299a24103f4e196051b29e8b55fbda49e11d5b3d32030a32776" -# openmpi_ver="4.1.6" -# openmpi_sha256="f740994485516deb63b5311af122c265179f5328a0d857a567b85db00b11e415" +#openmpi_ver="5.0.5" +#openmpi_sha256="6588d57c0a4bd299a24103f4e196051b29e8b55fbda49e11d5b3d32030a32776" +openmpi_ver="4.1.6" +openmpi_sha256="f740994485516deb63b5311af122c265179f5328a0d857a567b85db00b11e415" openmpi_pkg="openmpi-${openmpi_ver}.tar.bz2" source "${SCRIPT_DIR}"/common_vars.sh @@ -64,6 +64,7 @@ case "${with_openmpi}" in # OpenMPI 5.0 only supports PMIx # PMI support is required for Slurm, but not for other schedulers # default not use + # for OpenMPI 4.1 with pmi slurm, we can open this setting manually # if [ $(command -v srun) ]; then # echo "Slurm installation found. OpenMPI will be configured with --with-pmi." # EXTRA_CONFIGURE_FLAGS="--with-pmi" diff --git a/toolchain/scripts/stage2/install_openblas.sh b/toolchain/scripts/stage2/install_openblas.sh index 18039b2023..6c411edaad 100755 --- a/toolchain/scripts/stage2/install_openblas.sh +++ b/toolchain/scripts/stage2/install_openblas.sh @@ -8,8 +8,8 @@ [ "${BASH_SOURCE[0]}" ] && SCRIPT_NAME="${BASH_SOURCE[0]}" || SCRIPT_NAME=$0 SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")/.." && pwd -P)" -openblas_ver="0.3.27" # Keep in sync with get_openblas_arch.sh -openblas_sha256="aa2d68b1564fe2b13bc292672608e9cdeeeb6dc34995512e65c3b10f4599e897" +openblas_ver="0.3.28" # Keep in sync with get_openblas_arch.sh +openblas_sha256="f1003466ad074e9b0c8d421a204121100b0751c96fc6fcf3d1456bd12f8a00a1" openblas_pkg="OpenBLAS-${openblas_ver}.tar.gz" source "${SCRIPT_DIR}"/common_vars.sh diff --git a/toolchain/scripts/stage3/install_elpa.sh b/toolchain/scripts/stage3/install_elpa.sh index 24756c43b7..eff2bc7fe8 100755 --- a/toolchain/scripts/stage3/install_elpa.sh +++ b/toolchain/scripts/stage3/install_elpa.sh @@ -9,8 +9,8 @@ SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")/.." && pwd -P)" # From https://elpa.mpcdf.mpg.de/software/tarball-archive/ELPA_TARBALL_ARCHIVE.html -elpa_ver="2024.03.001" -elpa_sha256="41c6cbf56d2dac26443faaba8a77307d261bf511682a64b96e24def77c813622" +elpa_ver="2024.05.001" +elpa_sha256="9caf41a3e600e2f6f4ce1931bd54185179dade9c171556d0c9b41bbc6940f2f6" source "${SCRIPT_DIR}"/common_vars.sh @@ -59,13 +59,15 @@ case "$with_elpa" in echo "elpa-${elpa_ver} is already installed, skipping it." else require_env MATH_LIBS - if [ -f elpa-${elpa_ver}.tar.gz ]; then - echo "elpa-${elpa_ver}.tar.gz is found" + elpa_pkg="elpa-${elpa_ver}.tar.gz" + url=f"https://elpa.mpcdf.mpg.de/software/tarball-archive/Releases/${elpa_ver}/${elpa_pkg}" + if [ -f ${elpa_pkg} ]; then + echo "${elpa_pkg} is found" else - download_pkg_from_ABACUS_org "${elpa_sha256}" "elpa-${elpa_ver}.tar.gz" + download_pkg_from_url "${elpa_sha256}" "${elpa_pkg}" "${url}" fi [ -d elpa-${elpa_ver} ] && rm -rf elpa-${elpa_ver} - tar -xzf elpa-${elpa_ver}.tar.gz + tar -xzf ${elpa_pkg} # elpa expect FC to be an mpi fortran compiler that is happy # with long lines, and that a bunch of libs can be found diff --git a/toolchain/scripts/stage4/install_rapidjson.sh b/toolchain/scripts/stage4/install_rapidjson.sh index 7883c96943..bce4d84d6f 100755 --- a/toolchain/scripts/stage4/install_rapidjson.sh +++ b/toolchain/scripts/stage4/install_rapidjson.sh @@ -55,6 +55,8 @@ case "$with_rapidjson" in tar -xzf $filename mkdir -p "${pkg_install_dir}" cp -r $dirname/* "${pkg_install_dir}/" + # for CMake to find rapidjson + cp ${pkg_install_dir}/RapidJSONConfig.cmake.in ${pkg_install_dir}/RapidJSONConfig.cmake write_checksums "${install_lock_file}" "${SCRIPT_DIR}/stage4/$(basename ${SCRIPT_NAME})" fi ;;