From c5bbfb288b47f34851cdfa6c14d583cf459ece45 Mon Sep 17 00:00:00 2001 From: barakr Date: Wed, 1 Nov 2023 11:52:58 +0200 Subject: [PATCH] add an option to output hdf5 more frequently than output protobuf file + little restructuring of Statistics for better readability by fragmenting update() (update() still way too long) --- data/npctransport.proto | 8 +- include/SimulationData.h | 4 + include/Statistics.h | 18 ++- src/SimulationData.cpp | 1 + src/Statistics.cpp | 275 +++++++++++++++++++++----------------- test/standards_exceptions | 3 + 6 files changed, 178 insertions(+), 131 deletions(-) diff --git a/data/npctransport.proto b/data/npctransport.proto index f79dab8..527c39b 100644 --- a/data/npctransport.proto +++ b/data/npctransport.proto @@ -139,7 +139,8 @@ message Configuration { optional int32 is_multiple_hdf5s=43; // if true, output new hdf5 every output_statistics_interval_ns, // then add a numeric suffix to hdf5 for each dump // e.g. output_1.pb.hdf5, output_2.pb.hdf5, etc. Default is only once in the end. - // n=43 + optional int32 full_output_statistics_interval_factor=44 [default=1]; // if >1, output full output file every output_statistics_interval_ns*full_output_interval_factor + // n=44 } // if you add any parameters you must update automatic_parameters.cpp @@ -269,7 +270,7 @@ message Assignment { optional int32 output_statistics_interval_frames=41 [default=100000]; // update interval of statistics file (including order params) in frames optional FloatAssignment temperature_k=42; // simulation temperature optional double output_npctransport_version=43; // a version number for npctransport by which to interpret output file that contains this assignment - optional int32 is_xyz_hist_stats=44 [default=1]; // whether to use an xyz histogram instead of zr histogram (= whether to project x and y to sqrt(x^2+y^2) + optional int32 is_xyz_hist_stats=44 [default=1]; // whether to use an xyz histogram instead of zr histogram (= whether to project x and y to sqrt(x^2+y^2); if so output ot HDF5 file optional double xyz_stats_crop_factor=49 [default=0.5]; // percentage of box to crop from each axis for xyz histogram (symmetrically). optional double xyz_stats_voxel_size_a=50 [default=10.0]; // size of voxel in A for xyz histogram optional double xyz_stats_max_box_size_a=51 [default=2000.0]; // maximal size of box for xyz stats @@ -280,7 +281,8 @@ message Assignment { optional int32 is_multiple_hdf5s=52; // if true, output new hdf5 every output_statistics_interval_ns, // then add a numeric suffix to hdf5 for each dump // e.g. output_1.pb.hdf5, output_2.pb.hdf5, etc. Default is only once in the end. - // n=52 + optional int32 full_output_statistics_interval_factor=53 [default=1]; // if >1, output full output file every output_statistics_interval_ns*full_output_interval_factor + // n=53 } message Statistics { diff --git a/include/SimulationData.h b/include/SimulationData.h index 13c7b06..13cbae6 100644 --- a/include/SimulationData.h +++ b/include/SimulationData.h @@ -65,6 +65,7 @@ class IMPNPCTRANSPORTEXPORT SimulationData : public Object { Parameter statistics_fraction_; Parameter statistics_interval_frames_; Parameter output_statistics_interval_frames_; + Parameter full_output_statistics_interval_factor_; Parameter is_multiple_hdf5s_; Parameter time_step_; Parameter time_step_wave_factor_; @@ -90,6 +91,9 @@ class IMPNPCTRANSPORTEXPORT SimulationData : public Object { int get_output_statistics_interval_frames() const { return output_statistics_interval_frames_; } + int get_full_output_statistics_interval_factor() const + { return full_output_statistics_interval_factor_; } + // if true, hdf5s are generated every get_output_statistics_interval_frames() frames bool get_is_multiple_hdf5s() { diff --git a/include/Statistics.h b/include/Statistics.h index ec27038..7f0b0f5 100644 --- a/include/Statistics.h +++ b/include/Statistics.h @@ -228,6 +228,10 @@ class IMPNPCTRANSPORTEXPORT Statistics : public Object { @note this method is not const cause it may invoke e.g., energy evaluation though it does not substantially change anything in the state of the object + @note if configuration file full_output_statistics_interval_factor + is larger than 1, then full statistics are dumped every N calls + to update(), where N is the value of full_output_statistics_interval_factor, + and only the HDF5 file is updated at each call. */ void update(const IMP::internal::SimpleTimer &timer, unsigned int nf_new = 1); @@ -284,10 +288,18 @@ class IMPNPCTRANSPORTEXPORT Statistics : public Object { //! updates pStats with all statistics related to fgs, averaged over //! nf_new additional frames + //! @param zr_hist a grid on z / (x,y)-radial axis relevant only if not outputting xyz stats to hdf5 void update_fg_stats( ::npctransport_proto::Statistics* pStats, unsigned int nf_new, - unsigned int zr_hist[4][3], - RMF::HDF5::File hdf5_file); + unsigned int zr_hist[4][3]); + + //! updates pStats with all statistics related to floaters, averaged over + //! nf_new additional frames + //! + //! @return for historical reasons, returns a map of diffusion coefficients for each particle type + //! to be used in order params later on + std::map update_floater_stats( ::npctransport_proto::Statistics* pStats, + unsigned int nf_new); @@ -335,6 +347,8 @@ class IMPNPCTRANSPORTEXPORT Statistics : public Object { void fill_in_zr_hist(unsigned int zr_hist[4][3], ParticlesTemp ps) const; + void update_hdf5_statistics(); // output HDF5 statistics + public: IMP_OBJECT_METHODS(Statistics); diff --git a/src/SimulationData.cpp b/src/SimulationData.cpp index 5553757..0f8bfed 100644 --- a/src/SimulationData.cpp +++ b/src/SimulationData.cpp @@ -172,6 +172,7 @@ void SimulationData::initialize(std::string prev_output_file, GET_VALUE(range); GET_VALUE(statistics_interval_frames); GET_VALUE_DEF(output_statistics_interval_frames,10000); + GET_VALUE_DEF(full_output_statistics_interval_factor, 1); GET_VALUE_DEF(is_multiple_hdf5s, false); GET_ASSIGNMENT(statistics_fraction); GET_VALUE(time_step); diff --git a/src/Statistics.cpp b/src/Statistics.cpp index c851929..7917309 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -371,24 +371,12 @@ Statistics::update_xyz_distribution_to_hdf5 void Statistics::update_fg_stats ( ::npctransport_proto::Statistics* stats, unsigned int nf_new, - unsigned int zr_hist[4][3], - RMF::HDF5::File hdf5_file) + unsigned int zr_hist[4][3]) { IMP_OBJECT_LOG; - RMF::HDF5::Group hdf5_fg_xyz_hist_group; - static const std::string FG_XYZ_GROUP("fg_xyz_hist"); - if(hdf5_file.get_has_child(FG_XYZ_GROUP)){ - IMP_ALWAYS_CHECK(hdf5_file.get_child_is_group(FG_XYZ_GROUP), - FG_XYZ_GROUP << " is supposed to be an HDF5 group", - ValueException); - hdf5_fg_xyz_hist_group= hdf5_file.get_child_group(FG_XYZ_GROUP); - } else { - hdf5_fg_xyz_hist_group= hdf5_file.add_child_group(FG_XYZ_GROUP); - } int nf = stats->number_of_frames(); double sim_time_ns = const_cast( get_sd() ) ->get_bd()->get_current_time() / FS_IN_NS; - // Go over all fg chain types: ParticleTypeSet const &fgct = get_sd()->get_fg_chain_types(); for ( ParticleTypeSet::const_iterator @@ -518,11 +506,7 @@ void Statistics::update_fg_stats } // for j } // Recreate z-r histogram based on retrieved zr_hist for each type of FG bead (not chain): - if(get_sd()->get_is_xyz_hist_stats()){ - stats->mutable_fg_beads(i)->clear_xyz_hist(); - update_xyz_distribution_to_hdf5(hdf5_fg_xyz_hist_group, - fg_bead_type_i); - } else { + if(!get_sd()->get_is_xyz_hist_stats()){ ParticleTypeZRDistributionMap::const_iterator ptzrdm_it= particle_type_zr_distribution_map_.find(fg_bead_type_i); if(ptzrdm_it != particle_type_zr_distribution_map_.end()) @@ -544,6 +528,91 @@ void Statistics::update_fg_stats } // for it (fg bead type) } +std::map Statistics::update_floater_stats +(::npctransport_proto::Statistics* stats, + unsigned int nf_new) { + IMP_OBJECT_LOG; + int nf = stats->number_of_frames(); + std::map type_to_diffusion_coefficient_map; // to be used for floater order params + // Floaters general body stats + for (BodyStatisticsOSsMap::iterator + it = floaters_stats_map_.begin() ; + it != floaters_stats_map_.end(); it++) + { + unsigned int i= find_or_add_floater_of_type( stats, it->first ); + BodyStatisticsOptimizerStates& bsos = it->second; + unsigned int n_particles_type_i = bsos.size(); + type_to_diffusion_coefficient_map[it->first]=0.0; + int nf_weighted = nf * n_particles_type_i; // number of particle frames + for (unsigned int j= 0; j < n_particles_type_i; j++) { + bsos[j]->update_always(); + double dc_j= bsos[j]->get_diffusion_coefficient(); + UPDATE_AVG(nf_weighted, nf_new, + *stats->mutable_floaters(i), + diffusion_coefficient, dc_j); + type_to_diffusion_coefficient_map[it->first]+= + dc_j/n_particles_type_i; + double ct_j= bsos[j]->get_correlation_time(); + UPDATE_AVG(nf_weighted, nf_new, + *stats->mutable_floaters(i), + correlation_time, ct_j); + bsos[j]->reset(); + nf_weighted++; + } // for j + if(!get_sd()->get_is_xyz_hist_stats()){ + // Recreate z-r histogram based on retrieved zr_hist: + ParticleTypeZRDistributionMap::const_iterator ptzrdm_it= + particle_type_zr_distribution_map_.find(it->first); + if(ptzrdm_it != particle_type_zr_distribution_map_.end()) { + ParticleTypeZRDistributionMap::mapped_type zr_hist= + ptzrdm_it->second; + stats->mutable_floaters(i)->clear_zr_hist(); + for(unsigned int ii=0; ii < zr_hist.size(); ii++) { + ::npctransport_proto::Statistics_Ints* zii_r_hist= + stats->mutable_floaters(i)->mutable_zr_hist()->add_ints_list(); + for(unsigned int jj=0; jj < zr_hist[ii].size(); jj++) { + zii_r_hist->add_ints(zr_hist[ii][jj]); + } // for jj + } // for ii + } //if ptzrdm_it + } // if/else is_xyz_hist_stats + } // for it + + // Floaters avg number of transports per particle + if ( get_sd()->get_has_slab() ){ + for (ParticleTransportStatisticsOSsMap::iterator + it1 = floaters_transport_stats_map_.begin(); + it1 != floaters_transport_stats_map_.end() ; it1++) + { + unsigned int i = find_or_add_floater_of_type( stats, it1->first ); + ParticleTransportStatisticsOptimizerStates& pts_i = it1->second; + // fetch old ones from stats msg, add new ones and rewrite all: + std::set times_i + ( stats->floaters(i).transport_time_points_ns().begin(), + stats->floaters(i).transport_time_points_ns().end() ); + for (unsigned int j = 0; j < pts_i.size(); ++j) + { + Floats const &new_times_ij = + pts_i[j]->get_transport_time_points_in_ns(); + for (unsigned int k = 0; k < new_times_ij.size(); k++) + { + times_i.insert(new_times_ij[k]); + } // for k + } // for j + (*stats->mutable_floaters(i)).clear_transport_time_points_ns(); + for (std::set::const_iterator it2 = times_i.begin(); + it2 != times_i.end(); it2++) + { + (*stats->mutable_floaters(i)).add_transport_time_points_ns(*it2); + } // for it2 + // update avg too: + double avg_n_transports_i = times_i.size() * 1.0 / pts_i.size(); + (*stats->mutable_floaters(i)).set_avg_n_transports( avg_n_transports_i ); + } // for it1 + } + return type_to_diffusion_coefficient_map; +} + void Statistics ::update_particle_type_zr_distribution_map ( Particle* p ) @@ -651,28 +720,14 @@ ::update_particle_type_xyz_distribution_map } } - - -// @param nf_new number of new frames accounted for in this statistics update -void Statistics::update -( const IMP::internal::SimpleTimer &timer, - unsigned int nf_new) +void Statistics::update_hdf5_statistics() { - IMP_OBJECT_LOG; - IMP_ALWAYS_CHECK(get_is_activated(), // TODO: would we rather a usage/always check? - "Cannot update a Statistics object that was not activated. Call Statistics::add_optimizer_states() first", - IMP::UsageException); - update_calls_++; - ::npctransport_proto::Output output; - bool is_read= load_output_protobuf(output_file_name_, output); - IMP_ALWAYS_CHECK(is_read, - "Failed updating statistics to " << output_file_name_.c_str() - << std::endl, - IMP::IOException); + // Open HDF5 std::string hdf5_file_name = output_file_name_ + (get_sd()->get_is_multiple_hdf5s() ? "."+std::to_string(update_calls_) : "") + ".hdf5"; RMF::HDF5::File hdf5_file= RMF::HDF5::create_file(hdf5_file_name); + // Floater distributions RMF::HDF5::Group hdf5_floater_xyz_hist_group; static const std::string FLOATER_XYZ_GROUP("floater_xyz_hist"); if(hdf5_file.get_has_child(FLOATER_XYZ_GROUP)) { @@ -683,7 +738,61 @@ void Statistics::update } else { hdf5_floater_xyz_hist_group= hdf5_file.add_child_group(FLOATER_XYZ_GROUP); } + for (BodyStatisticsOSsMap::iterator it = floaters_stats_map_.begin(); + it != floaters_stats_map_.end(); + it++) + { + update_xyz_distribution_to_hdf5(hdf5_floater_xyz_hist_group, + it->first); + } + //FG Distributions + RMF::HDF5::Group hdf5_fg_xyz_hist_group; + static const std::string FG_XYZ_GROUP("fg_xyz_hist"); + if(hdf5_file.get_has_child(FG_XYZ_GROUP)){ + IMP_ALWAYS_CHECK(hdf5_file.get_child_is_group(FG_XYZ_GROUP), + FG_XYZ_GROUP << " is supposed to be an HDF5 group", + ValueException); + hdf5_fg_xyz_hist_group= hdf5_file.get_child_group(FG_XYZ_GROUP); + } else { + hdf5_fg_xyz_hist_group= hdf5_file.add_child_group(FG_XYZ_GROUP); + } + for ( auto const& fg_bead_type : get_sd()->get_fg_bead_types()) + { + update_xyz_distribution_to_hdf5(hdf5_fg_xyz_hist_group, + fg_bead_type); + } + // Restart statistics needed only if we have multiple HDF5 files: + if(get_sd()->get_is_multiple_hdf5s()) + { + particle_type_zr_distribution_map_.clear(); + particle_type_xyz_distribution_map_.clear(); + } +} +// @param nf_new number of new frames accounted for in this statistics update +void Statistics::update +( const IMP::internal::SimpleTimer &timer, + unsigned int nf_new) +{ + IMP_OBJECT_LOG; + IMP_ALWAYS_CHECK(get_is_activated(), // TODO: would we rather a usage/always check? + "Cannot update a Statistics object that was not activated. Call Statistics::add_optimizer_states() first", + IMP::UsageException); + update_calls_++; + if(get_sd()->get_is_xyz_hist_stats()){ + update_hdf5_statistics(); + } + // Output full statistics every full_output_statistics_interval_factor calls only + if(update_calls_ % get_sd()->get_full_output_statistics_interval_factor() != 0){ + return; + } + // Fetach protobuf statistics message from file + ::npctransport_proto::Output output; + bool is_read= load_output_protobuf(output_file_name_, output); + IMP_ALWAYS_CHECK(is_read, + "Failed updating statistics to " << output_file_name_.c_str() + << std::endl, + IMP::IOException); ::npctransport_proto::Statistics* stats = output.mutable_statistics(); int nf = stats->number_of_frames(); if (is_stats_reset_) { // stats was just reset @@ -702,90 +811,11 @@ void Statistics::update double sim_time_ns = const_cast( get_sd() ) ->get_bd()->get_current_time() / FS_IN_NS; + // Update FG and floater stats unsigned int zr_hist[4][3]={{0},{0},{0},{0}}; - update_fg_stats(stats, nf_new, zr_hist, hdf5_file); - - std::map type_to_diffusion_coefficeint_map; // to be used for floater order params - // Floaters general body stats - for (BodyStatisticsOSsMap::iterator - it = floaters_stats_map_.begin() ; - it != floaters_stats_map_.end(); it++) - { - unsigned int i= find_or_add_floater_of_type( stats, it->first ); - BodyStatisticsOptimizerStates& bsos = it->second; - unsigned int n_particles_type_i = bsos.size(); - type_to_diffusion_coefficeint_map[it->first]=0.0; - int nf_weighted = nf * n_particles_type_i; // number of particle frames - for (unsigned int j= 0; j < n_particles_type_i; j++) - { - bsos[j]->update_always(); - double dc_j= bsos[j]->get_diffusion_coefficient(); - UPDATE_AVG(nf_weighted, nf_new, - *stats->mutable_floaters(i), - diffusion_coefficient, dc_j); - type_to_diffusion_coefficeint_map[it->first]+= - dc_j/n_particles_type_i; - double ct_j= bsos[j]->get_correlation_time(); - UPDATE_AVG(nf_weighted, nf_new, - *stats->mutable_floaters(i), - correlation_time, ct_j); - bsos[j]->reset(); - nf_weighted++; - } // for j - if(get_sd()->get_is_xyz_hist_stats()){ // TODO: floaters are disabled for xyz for now to save space - perhaps add it later - update_xyz_distribution_to_hdf5(hdf5_floater_xyz_hist_group, - it->first); - } else { // if/else is_xyz_hist_stats - // Recreate z-r histogram based on retrieved zr_hist: - ParticleTypeZRDistributionMap::const_iterator ptzrdm_it= - particle_type_zr_distribution_map_.find(it->first); - if(ptzrdm_it != particle_type_zr_distribution_map_.end()) { - ParticleTypeZRDistributionMap::mapped_type zr_hist= - ptzrdm_it->second; - stats->mutable_floaters(i)->clear_zr_hist(); - for(unsigned int ii=0; ii < zr_hist.size(); ii++) { - ::npctransport_proto::Statistics_Ints* zii_r_hist= - stats->mutable_floaters(i)->mutable_zr_hist()->add_ints_list(); - for(unsigned int jj=0; jj < zr_hist[ii].size(); jj++) { - zii_r_hist->add_ints(zr_hist[ii][jj]); - } // for jj - } // for ii - } //if ptzrdm_it - } // if/else is_xyz_hist_stats - } // for it - - // Floaters avg number of transports per particle - if ( get_sd()->get_has_slab() ){ - for (ParticleTransportStatisticsOSsMap::iterator - it1 = floaters_transport_stats_map_.begin(); - it1 != floaters_transport_stats_map_.end() ; it1++) - { - unsigned int i = find_or_add_floater_of_type( stats, it1->first ); - ParticleTransportStatisticsOptimizerStates& pts_i = it1->second; - // fetch old ones from stats msg, add new ones and rewrite all: - std::set times_i - ( stats->floaters(i).transport_time_points_ns().begin(), - stats->floaters(i).transport_time_points_ns().end() ); - for (unsigned int j = 0; j < pts_i.size(); ++j) - { - Floats const &new_times_ij = - pts_i[j]->get_transport_time_points_in_ns(); - for (unsigned int k = 0; k < new_times_ij.size(); k++) - { - times_i.insert(new_times_ij[k]); - } // for k - } // for j - (*stats->mutable_floaters(i)).clear_transport_time_points_ns(); - for (std::set::const_iterator it2 = times_i.begin(); - it2 != times_i.end(); it2++) - { - (*stats->mutable_floaters(i)).add_transport_time_points_ns(*it2); - } // for it2 - // update avg too: - double avg_n_transports_i = times_i.size() * 1.0 / pts_i.size(); - (*stats->mutable_floaters(i)).set_avg_n_transports( avg_n_transports_i ); - } // for it1 - } + update_fg_stats(stats, nf_new, zr_hist); + auto type_to_diffusion_coefficient_map = + update_floater_stats(stats, nf_new); // FG-floaters interactions ParticleTypeSet const &ft = get_sd()->get_floater_types(); @@ -827,7 +857,7 @@ void Statistics::update frc->set_n_z3(n_z3); } frc->set_diffusion_coefficient - (type_to_diffusion_coefficeint_map[*it]); + (type_to_diffusion_coefficient_map[*it]); } // for(it) // update statistics gathered on interaction rates @@ -958,13 +988,6 @@ void Statistics::update std::ofstream outf(output_file_name_.c_str(), std::ios::binary); output.SerializeToOstream(&outf); outf.flush(); - - // reset HDF5 stats if having multiple HDF5s - if(get_sd()->get_is_multiple_hdf5s()) - { - particle_type_zr_distribution_map_.clear(); - particle_type_xyz_distribution_map_.clear(); - } } void Statistics::reset_statistics_optimizer_states() diff --git a/test/standards_exceptions b/test/standards_exceptions index d82a4ce..2f152df 100644 --- a/test/standards_exceptions +++ b/test/standards_exceptions @@ -31,6 +31,9 @@ function_name_exceptions= \ 'make_unordered_particle_index_pair', 'make_ordered_interaction_type', 'make_unordered_interaction_type', + 'inflate_floater', + 'remove_Nup42', + 'startup', 'BrownianDynamicsTAMDWithSlabSupport.set_use_stochastic_runge_kutta', 'BrownianDynamicsTAMDWithSlabSupport.simulate', 'BrownianDynamicsTAMDWithSlabSupport.simulate_wave',