diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index 945f5c3..7430d13 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -425,6 +425,8 @@ def run(self): hist_vals = np.zeros((self.config.nsamples, len(self.zgrid) - 1)) N_eff_p_num = np.zeros(self.config.nsamples) N_eff_p_den = np.zeros(self.config.nsamples) + N_eff_num = 0. + N_eff_den = 0. phot_cluster_set = set() # make dictionary of ID data to be written out with cell IDs @@ -436,7 +438,9 @@ def run(self): print(f"Process {self.rank} running summarizer on chunk {s} - {e}") chunk_number = s//self.config.chunk_size - self._process_chunk(test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, spec_data['weight'], spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, s, e, first) + tmp_neff_num, tmp_neff_den = self._process_chunk(test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, spec_data['weight'], spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, s, e, first) + N_eff_num += tmp_neff_num + N_eff_den += tmp_neff_den first = False # We have finished writting the cell IDs, and we need to close the file in all process @@ -451,6 +455,8 @@ def run(self): N_eff_p_num = self.comm.reduce(N_eff_p_num) N_eff_p_den = self.comm.reduce(N_eff_p_den) hist_vals = self.comm.reduce(hist_vals) + N_eff_num = self.comm.reduce(N_eff_num) + N_eff_den = self.comm.reduce(N_eff_den) phot_cluster_list=np.array(list(phot_cluster_set),dtype=int) phot_cluster_total=self.comm.gather(phot_cluster_list) @@ -470,7 +476,7 @@ def run(self): # effective number defined in Heymans et al. (2012) to quantify the photometric representation. # also see Eq.7 in Wright et al. (2020). # Note that the origional definition should be effective number *density*, which equals to N_eff / Area. - N_eff = np.sum(N_eff_p_num)**2/np.sum(N_eff_p_den) + N_eff = N_eff_num**2 / N_eff_den N_eff_p_samples = N_eff_p_num**2/N_eff_p_den # the effective number density of the subsample of the photometric sample reside within SOM groupings which contain spectroscopy N_eff_p = np.mean(N_eff_p_samples) @@ -514,6 +520,9 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu useful_clusters = chunk_phot_cluster_set.intersection(spec_cluster_set) phot_cluster_set.update(chunk_phot_cluster_set) + tmp_neff_num = np.sum(test_data['weight']) + tmp_neff_den = np.sum(test_data['weight'] ** 2) + for i in range(self.config.nsamples): bootstrap_indices = bootstrap_matrix[:,i] @@ -537,6 +546,8 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu N_eff_p_den[i] += tmp_n_eff_p_den hist_vals[i, :] += tmp_hist_vals + return (tmp_neff_num, tmp_neff_den) + def _do_chunk_output(self, id_dict, start, end, first): if first: self._cellid_handle = self.add_handle('cellid_output', data = id_dict)