From c7be49aa7b4a537175680741c4df6bef7428b5ae Mon Sep 17 00:00:00 2001 From: Patricia Larsen Date: Fri, 14 Jul 2023 01:07:45 +0000 Subject: [PATCH] performance updates --- density_maps/main.cxx | 12 ++++++++---- density_maps/pix_funcs.cxx | 39 +++++++++++++++----------------------- density_maps/pix_funcs.h | 14 +++++++------- density_maps/utils.cxx | 11 ++++++++++- density_maps/utils.h | 2 +- 5 files changed, 41 insertions(+), 37 deletions(-) diff --git a/density_maps/main.cxx b/density_maps/main.cxx index 9637fad..e265615 100644 --- a/density_maps/main.cxx +++ b/density_maps/main.cxx @@ -14,7 +14,7 @@ #include #include -#include +#include // healpix includes #include @@ -90,8 +90,9 @@ int main(int argc, char *argv[]) { map_hires = Healpix_Base2 (nside, ring, SET_NSIDE); // we set to ring so interpolation is faster map_lores = Healpix_Base (nside_low, nest, SET_NSIDE); int64_t npix_lores = map_lores.Npix(); - map ring_to_idx; - + int64_t npix_hires = map_hires.Npix(); + unordered_map ring_to_idx; + //raing_to_idx.reserve(); #ifdef _OPENMP @@ -115,6 +116,9 @@ int main(int argc, char *argv[]) { vector end_idx; vector pix_nums_start, pix_nums_end; + int64_t map_size = lores_pix.size()*pow(4,rank_diff); + ring_to_idx.reserve(map_size); + int64_t count = 0; vector rho, phi, vel; // should generalize this so we're initializing it for an array of maps or a single map @@ -155,7 +159,7 @@ int main(int argc, char *argv[]) { printf("Starting file output \n"); } - write_files(outfile, stepnumber,start_idx, end_idx, pix_nums_start, rho, phi, vel); + write_files(outfile, stepnumber,start_idx, end_idx, pix_nums_start, rho, phi, vel, npix_hires); t2 = MPI_Wtime(); if (commRank==0){ diff --git a/density_maps/pix_funcs.cxx b/density_maps/pix_funcs.cxx index 7141437..9180e2b 100644 --- a/density_maps/pix_funcs.cxx +++ b/density_maps/pix_funcs.cxx @@ -108,7 +108,7 @@ int compute_ranks_count( Particles* P, T_Healpix_Base map_lores, T_Healpix_ fix_arr neighbs; fix_arr weights; map_hires.get_interpol(point, neighbs, weights); - map rank_list; + unordered_map rank_list; for (int j=0;j<4;j++){ int ind = get_lores_pix(neighbs[j],rank_diff,map_hires); // change this to be based on a list @@ -155,7 +155,7 @@ void compute_ranks_index( Particles* P, T_Healpix_Base map_lores, T_Healpix fix_arr weights; map_hires.get_interpol(point, neighbs, weights); //vector rank_list; - map rank_list; + unordered_map rank_list; for (int j=0;j<4;j++){ int ind = get_lores_pix(neighbs[j],rank_diff, map_hires); @@ -223,7 +223,7 @@ void get_pix_list_rank(int octant, int rank, int numranks, int64_t npix_lores, v return; } -int assign_dm_cic(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx){ +int assign_dm_cic(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx){ int64_t npix = map_hires.Npix(); float pixsize = (4.*3.141529/npix); @@ -262,7 +262,7 @@ int assign_dm_cic(vector &rho, vector &phi, vector &vel, Pa -int assign_dm_ngp(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx){ +int assign_dm_ngp(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx){ int64_t npix = map_hires.Npix(); float pixsize = (4.*3.141529/npix); @@ -291,37 +291,28 @@ int assign_dm_ngp(vector &rho, vector &phi, vector &vel, Pa return 0; } -int assign_sz_cic(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx){ +int assign_sz_cic(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx){ const double SIGMAT = 6.65245e-25; - int64_t npix = map_hires.Npix(); - float pixsize = (4.*3.141529/npix); + int64_t npix = map_hires.Npix(); + float pixsize = (4.*3.141529/npix); - // const double CLIGHT = 2.99792458e10; - //const double MP = 1.6737236e-24; // MH - //const double ME = 9.1093836e-28; //MELECTRON - const double MUE = 1.14; //TODO - check this - //const double YP = 0.24;//PRIMORDIAL_Y - check this too + const double MP = 1.672621e-24; // MP in g + const double MUE = 2.0/(2.0-PRIMORDIAL_Y); // only for adiabatic, mu should be overwritten otherwise const double NHE = 0.0; // assume helium is neutral const double CHIE = (1.0-PRIMORDIAL_Y*(1.0-NHE/4.0))/(1.0-PRIMORDIAL_Y/2.0); - //const double MSUN_TO_G = 1.9885e33; //G_IN_MSUN - //const double KM_TO_CM = 1.0e5; // CM_IN_KM - const double MPC_TO_CM = KM_IN_MPC*CM_IN_KM;//3.0856776e24; //KM_IN_MP * CM_IN_KM + const double MPC_TO_CM = KM_IN_MPC*CM_IN_KM; // TODO input the sample rate float samplerate = 1.0; - // remember these need to be multiplied by a factor of h + // remember these need to be multiplied by a factor of h later currently const float KSZ_CONV = (float)(-SIGMAT*CHIE/MUE/MP/CLIGHT)*(G_IN_MSUN/MPC_TO_CM/MPC_TO_CM)*(1.0/samplerate)*(1.0/pixsize); const float TSZ_CONV = (float)((GAMMA-1.0)*SIGMAT*CHIE/MUE/MELECTRON/CLIGHT/CLIGHT)*(MH/MP)*(G_IN_MSUN/MPC_TO_CM/MPC_TO_CM)*(1.0/samplerate)*(1.0/pixsize); - //float KSZ_CONV = (float)(-SIGMAT -// - //int64_t npix = map_hires.Npix(); - //float pixsize = (4.*3.141529/npix); - for (int64_t ii=0; ii<(*P).nparticles; ++ii){ + for (int64_t ii=0; ii<(*P).nparticles; ++ii){ float xd = (float) (*P).float_data[0]->at(ii); float yd = (float) (*P).float_data[1]->at(ii); @@ -364,7 +355,7 @@ int assign_sz_cic(vector &rho, vector &phi, vector &vel,vec -int assign_sz_ngp(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx){ +int assign_sz_ngp(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx){ int64_t npix = map_hires.Npix(); float pixsize = (4.*3.141529/npix); @@ -407,7 +398,7 @@ int assign_sz_ngp(vector &rho, vector &phi, vector &vel,vec return 0; } -void initialize_pixel_hydro(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, vector &rho , vector &phi, vector &vel, vector &ksz, vector &tsz, int64_t &count, vector &start_idx, vector &end_idx, vector &pixnum_start, vector &pixnum_end, int rank_diff, map* ring_to_idx){ +void initialize_pixel_hydro(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, vector &rho , vector &phi, vector &vel, vector &ksz, vector &tsz, int64_t &count, vector &start_idx, vector &end_idx, vector &pixnum_start, vector &pixnum_end, int rank_diff, unordered_map* ring_to_idx){ int64_t npix = map_hires.Npix(); start_idx.push_back(count); @@ -437,7 +428,7 @@ void initialize_pixel_hydro(int pix_val, T_Healpix_Base map_lores, T_Healp -void initialize_pixel(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, vector &rho , vector &phi, vector &vel, int64_t &count, vector &start_idx, vector &end_idx, vector &pixnum_start, vector &pixnum_end, int rank_diff, map* ring_to_idx){ +void initialize_pixel(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, vector &rho , vector &phi, vector &vel, int64_t &count, vector &start_idx, vector &end_idx, vector &pixnum_start, vector &pixnum_end, int rank_diff, unordered_map* ring_to_idx){ // initialize large-scale pixel to zero values int64_t npix = map_hires.Npix(); diff --git a/density_maps/pix_funcs.h b/density_maps/pix_funcs.h index e29ca8d..6b789d0 100644 --- a/density_maps/pix_funcs.h +++ b/density_maps/pix_funcs.h @@ -1,18 +1,18 @@ #include "healpix_base.h" -#include +#include #include #include using namespace std; -void initialize_pixel(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, std::vector &rho , std::vector &phi , std::vector &vel, int64_t &count, std::vector &start_idx, std::vector &end_idx, std::vector &pixnum_start, std::vector &pixnum_end, int rank_diff, map* ring_to_idx); +void initialize_pixel(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, std::vector &rho , std::vector &phi , std::vector &vel, int64_t &count, std::vector &start_idx, std::vector &end_idx, std::vector &pixnum_start, std::vector &pixnum_end, int rank_diff, unordered_map* ring_to_idx); void get_pix_list_rank(int octant, int rank, int numranks, int64_t npix_lores, std::vector pix_list_oct, std::vector pix_list_noct, std::vector &lores_pix); int compute_ranks_count( Particles* P, T_Healpix_Base map_lores, T_Healpix_Base map_hires, int numranks, vector &send_count, int rank_diff); void compute_ranks_index( Particles* P, T_Healpix_Base map_lores, T_Healpix_Base map_hires, int numranks, vector send_off, vector &id_particles, int rank_diff); -int assign_dm_ngp(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx); -int assign_dm_cic(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx); +int assign_dm_ngp(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx); +int assign_dm_cic(vector &rho, vector &phi, vector &vel, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx); -int assign_sz_ngp(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx); +int assign_sz_ngp(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx); -int assign_sz_cic(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, map ring_to_idx); +int assign_sz_cic(vector &rho, vector &phi, vector &vel,vector &ksz, vector &tsz, Particles* P, T_Healpix_Base map_hires, vector pixnum_start, vector pixnum_end, vector start_idx, unordered_map ring_to_idx); -void initialize_pixel_hydro(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, vector &rho , vector &phi, vector &vel, vector &ksz, vector &tsz, int64_t &count, vector &start_idx, vector &end_idx, vector &pixnum_start, vector &pixnum_end, int rank_diff, map* ring_to_idx); +void initialize_pixel_hydro(int pix_val, T_Healpix_Base map_lores, T_Healpix_Base map_hires, vector &rho , vector &phi, vector &vel, vector &ksz, vector &tsz, int64_t &count, vector &start_idx, vector &end_idx, vector &pixnum_start, vector &pixnum_end, int rank_diff, unordered_map* ring_to_idx); diff --git a/density_maps/utils.cxx b/density_maps/utils.cxx index d5e3ddb..0481ca4 100644 --- a/density_maps/utils.cxx +++ b/density_maps/utils.cxx @@ -87,6 +87,9 @@ static void distributeProperty(std::vector& vA, T* sendbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for(size_t i = 0; i < totalToSend; ++i) sendbuf[i] = vA[ vOrder[i] ]; @@ -228,7 +231,7 @@ void write_files_hydro(string outfile, string stepnumber,vector start_i -void write_files(string outfile, string stepnumber,vector start_idx, vector end_idx, vector pix_nums_start, vector rho, vector phi, vector vel){ +void write_files(string outfile, string stepnumber,vector start_idx, vector end_idx, vector pix_nums_start, vector rho, vector phi, vector vel, int64_t npix_hires){ int commRank, commRanks; MPI_Comm_rank(MPI_COMM_WORLD, &commRank); @@ -250,6 +253,12 @@ void write_files(string outfile, string stepnumber,vector start_idx, ve MPI_File_open(MPI_COMM_WORLD, name_out_phi, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh_phi); MPI_File_open(MPI_COMM_WORLD, name_out_vel, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh_vel); + // PL NOTE: make sure this is in the right place + MPI_Offset filesize = npix_hires*sizeof(float); + MPI_File_set_size(fh, filesize); + MPI_File_set_size(fh_phi, filesize); + MPI_File_set_size(fh_vel, filesize); + int status; status = output_file(commRank, fh, req, rho, start_idx, end_idx, pix_nums_start); diff --git a/density_maps/utils.h b/density_maps/utils.h index 163b04a..c559eff 100644 --- a/density_maps/utils.h +++ b/density_maps/utils.h @@ -4,6 +4,6 @@ using namespace std; void read_and_redistribute(string file_name, int numranks, Particles* P, T_Healpix_Base map_lores, T_Healpix_Base map_hires, int rank_diff); -void write_files(string outfile, string stepnumber,vector start_idx, vector end_idx, vector pix_nums_start, vector rho, vector phi, vector vel); +void write_files(string outfile, string stepnumber,vector start_idx, vector end_idx, vector pix_nums_start, vector rho, vector phi, vector vel, int64_t npix_hires); void write_files_hydro(string outfile, string stepnumber,vector start_idx, vector end_idx, vector pix_nums_start, vector rho, vector phi, vector vel, vector ksz, vector tsz);