Merge pull request #6 from LSSTDESC/user/joselotl/neff_bug

Changed summ over weights for Neff
LSSTDESC · Aug 4, 2023 · b53de67 · b53de67
2 parents 45e5459 + 015e58e
commit b53de67
Showing 1 changed file with 13 additions and 2 deletions.
diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py
@@ -425,6 +425,8 @@ def run(self):
         hist_vals = np.zeros((self.config.nsamples, len(self.zgrid) - 1))
         N_eff_p_num = np.zeros(self.config.nsamples)
         N_eff_p_den = np.zeros(self.config.nsamples)
+        N_eff_num = 0.
+        N_eff_den = 0.
         phot_cluster_set = set()
 
         # make dictionary of ID data to be written out with cell IDs
@@ -436,7 +438,9 @@ def run(self):
             print(f"Process {self.rank} running summarizer on chunk {s} - {e}")
 
             chunk_number = s//self.config.chunk_size
-            self._process_chunk(test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, spec_data['weight'], spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, s, e, first)
+            tmp_neff_num, tmp_neff_den = self._process_chunk(test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, spec_data['weight'], spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, s, e, first)
+            N_eff_num += tmp_neff_num
+            N_eff_den += tmp_neff_den
             first = False
 
         # We have finished writting the cell IDs, and we need to close the file in all process
@@ -451,6 +455,8 @@ def run(self):
             N_eff_p_num = self.comm.reduce(N_eff_p_num)
             N_eff_p_den = self.comm.reduce(N_eff_p_den)
             hist_vals = self.comm.reduce(hist_vals)
+            N_eff_num = self.comm.reduce(N_eff_num)
+            N_eff_den = self.comm.reduce(N_eff_den)
 
             phot_cluster_list=np.array(list(phot_cluster_set),dtype=int)
             phot_cluster_total=self.comm.gather(phot_cluster_list)
@@ -470,7 +476,7 @@ def run(self):
         # effective number defined in Heymans et al. (2012) to quantify the photometric representation.
         # also see Eq.7 in Wright et al. (2020).
         # Note that the origional definition should be effective number *density*, which equals to N_eff / Area.
-        N_eff = np.sum(N_eff_p_num)**2/np.sum(N_eff_p_den)
+        N_eff = N_eff_num**2 / N_eff_den
         N_eff_p_samples = N_eff_p_num**2/N_eff_p_den
         # the effective number density of the subsample of the photometric sample reside within SOM groupings which contain spectroscopy
         N_eff_p = np.mean(N_eff_p_samples)
@@ -514,6 +520,9 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu
         useful_clusters = chunk_phot_cluster_set.intersection(spec_cluster_set)
         phot_cluster_set.update(chunk_phot_cluster_set)
 
+        tmp_neff_num = np.sum(test_data['weight'])
+        tmp_neff_den = np.sum(test_data['weight'] ** 2)
+
 
         for i in range(self.config.nsamples):
             bootstrap_indices = bootstrap_matrix[:,i]
@@ -537,6 +546,8 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu
             N_eff_p_den[i] += tmp_n_eff_p_den
             hist_vals[i, :] += tmp_hist_vals
 
+        return (tmp_neff_num, tmp_neff_den)
+
     def _do_chunk_output(self, id_dict, start, end, first):
         if first:
             self._cellid_handle = self.add_handle('cellid_output', data = id_dict)