diff --git a/CADETMatch.pyproj b/CADETMatch.pyproj index e43e753..241dc88 100644 --- a/CADETMatch.pyproj +++ b/CADETMatch.pyproj @@ -12,7 +12,7 @@ {888888a0-9f3d-457c-b088-3a5042f75d52} Standard Python launcher CondaEnv|CondaEnv|pymoo_devel - "C:\Users\kosh\Downloads\Cadet-Match-GIEX Question-20210903T074409Z-001\Cadet-Match-GIEX Question\Match_Test.json" 1 + "F:\cadet_release_test\search\mcmc\stage2\non.json" 12 False true diff --git a/CADETMatch/de.py b/CADETMatch/de.py index 4f65a07..bdc6dfe 100644 --- a/CADETMatch/de.py +++ b/CADETMatch/de.py @@ -49,5 +49,5 @@ def get_proposal(self, s, c, random): random.shuffle(w) g = np.diff(w, axis=0) * self.g0 + f[i] q[i] = s[i] + g - q[i] = q[i] % 1 + q = np.clip(q, 0, 1) return q, np.zeros(Ns, dtype=np.float64) diff --git a/CADETMatch/de_snooker.py b/CADETMatch/de_snooker.py index d2d9df7..ae96e22 100644 --- a/CADETMatch/de_snooker.py +++ b/CADETMatch/de_snooker.py @@ -43,6 +43,6 @@ def get_proposal(self, s, c, random): norm = np.linalg.norm(delta) u = delta / np.sqrt(norm) q[i] = s[i] + u * self.gammas * (np.dot(u, z1) - np.dot(u, z2)) - q[i] = q[i] % 1 metropolis[i] = np.log(np.linalg.norm(q[i] - z)) - np.log(norm) + q = np.clip(q, 0, 1) return q, 0.5 * (ndim - 1.0) * metropolis diff --git a/CADETMatch/kde_generator.py b/CADETMatch/kde_generator.py index 31af5bf..1f3474e 100644 --- a/CADETMatch/kde_generator.py +++ b/CADETMatch/kde_generator.py @@ -495,40 +495,30 @@ def generate_synthetic_error(cache): scores = numpy.array(scores_all) - keep_idx = keep_data(scores) - - kept = int(numpy.sum(keep_idx)) - removed = int(len(scores) - kept) - dir_base = cache.settings.get("resultsDirBase") file = dir_base / "kde_data.h5" kde_data = H5() kde_data.filename = file.as_posix() - kde_data.root.kept = kept - kde_data.root.removed = removed - - kde_data.root.scores = scores[keep_idx, :] - - kde_data.root.original.scores = scores + kde_data.root.scores = scores for output_name, output in outputs_all.items(): - kde_data.root[output_name] = numpy.array(output)[keep_idx, :] + kde_data.root[output_name] = numpy.array(output) for time_name, time in times.items(): kde_data.root["%s_time" % time_name] = time for name, experiment in errors_all.items(): for error_name, error_value in experiment.items(): - kde_data.root.errors[name][error_name] = error_value[keep_idx, :] + kde_data.root.errors[name][error_name] = error_value for key, value in uv_store_all.items(): - kde_data.root.uv_store[key] = numpy.array(value)[keep_idx, :] + kde_data.root.uv_store[key] = numpy.array(value) kde_data.save(lock=True) - return scores[keep_idx, :] + return scores return None diff --git a/CADETMatch/search/mcmc.py b/CADETMatch/search/mcmc.py index f98902a..399d5b3 100644 --- a/CADETMatch/search/mcmc.py +++ b/CADETMatch/search/mcmc.py @@ -26,6 +26,7 @@ import CADETMatch.util as util import CADETMatch.sub as sub import CADETMatch.pop as pop +import arviz name = "MCMC" @@ -208,7 +209,10 @@ def converged_bounds(chain, length, error_level): temp_chain_flat = temp_chain.reshape( temp_chain_shape[0] * temp_chain_shape[1], temp_chain_shape[2] ) - lb_5, mid_50, ub_95 = numpy.percentile(temp_chain_flat, [5, 50, 95], 0) + hdi = arviz.hdi(temp_chain, hdi_prob=0.9) + lb_5 = hdi[:,0] + ub_95 = hdi[:,1] + mid_50 = numpy.mean(temp_chain_flat, axis=0) lb.append(lb_5) ub.append(ub_95) @@ -665,13 +669,16 @@ def sampler_auto_bounds(cache, checkpoint, sampler, checkpointFile, mcmc_store): def process_interval(cache, mcmc_store, interval_chain, interval_chain_transform): - mean = numpy.mean(interval_chain_transform, 0) - labels = [5, 10, 50, 90, 95] - percentile = numpy.percentile(interval_chain_transform, labels, 0) + hdi = arviz.hdi(interval_chain_transform, hdi_prob=0.9) + lb_5 = hdi[:,0] + ub_95 = hdi[:,1] + mid_50 = numpy.mean(flatten(interval_chain_transform), axis=0) - mcmc_store.root.percentile["mean"] = mean - for idx, label in enumerate(labels): - mcmc_store.root.percentile["percentile_%s" % label] = percentile[idx, :] + hdi_stat = numpy.vstack([lb_5, mid_50, ub_95])[:, numpy.newaxis, :] + + mcmc_store.root.percentile["mean"] = mid_50 + mcmc_store.root.percentile["lb_hdi_90"] = lb_5 + mcmc_store.root.percentile["ub_hdi_90"] = ub_95 flat_interval = interval(interval_chain, cache) flat_interval_transform = interval(interval_chain_transform, cache) @@ -724,7 +731,7 @@ def process_sampler_run_write(cache, mcmc_store): interval_chain = chain_flat interval_chain_transform = chain_flat_transform - process_interval(cache, mcmc_store, interval_chain, interval_chain_transform) + process_interval(cache, mcmc_store, chain, chain_transform) def sampler_run(cache, checkpoint, sampler, checkpointFile, mcmc_store): @@ -769,9 +776,16 @@ def sampler_run(cache, checkpoint, sampler, checkpointFile, mcmc_store): run_chain = addChain(run_chain, p[:, numpy.newaxis, :]) run_probability = addChain(run_probability, ln_prob[:, numpy.newaxis]) + hdi = arviz.hdi(run_chain, hdi_prob=0.9) + lb_5 = hdi[:,0] + ub_95 = hdi[:,1] + mid_50 = numpy.mean(flatten(run_chain), axis=0) + + hdi_stat = numpy.vstack([lb_5, mid_50, ub_95])[:, numpy.newaxis, :] + run_chain_stat = addChain( run_chain_stat, - numpy.percentile(flatten(run_chain), [5, 50, 95], 0)[:, numpy.newaxis, :], + hdi_stat, ) multiprocessing.get_logger().info( @@ -1249,14 +1263,15 @@ def writeMCMC(cache, mcmc_store, process_mcmc_store): mcmc_store.save(lock=True) -def interval(flat_chain, cache): - mean = numpy.mean(flat_chain, 0) - - percentile = numpy.percentile(flat_chain, [5, 10, 50, 90, 95], 0) +def interval(chain, cache): + hdi = arviz.hdi(chain, hdi_prob=0.9) + lb_5 = hdi[:,0] + ub_95 = hdi[:,1] + mid_50 = numpy.mean(flatten(chain), axis=0) - data = numpy.vstack((mean, percentile)).transpose() + hdi_stat = numpy.vstack([lb_5, mid_50, ub_95]) - pd = pandas.DataFrame(data, columns=["mean", "5", "10", "50", "90", "95"]) + pd = pandas.DataFrame(hdi_stat.transpose(), columns=["lb_hdi_90", "mean", "ub_hdi_90"]) pd.insert(0, "name", cache.parameter_headers_actual) pd.set_index("name") return pd diff --git a/CADETMatch/util.py b/CADETMatch/util.py index 0a7d1e9..096977c 100644 --- a/CADETMatch/util.py +++ b/CADETMatch/util.py @@ -522,7 +522,9 @@ def update_json_mcmc(settings): new_parameters = settings["parameters_mcmc"] for new, prior in zip(new_parameters, prior_parameters): - if new["location"].split("/")[-1] == prior["location"].split("/")[-1]: + ok_location = "location" in new and new["location"].split("/")[-1] == prior["location"].split("/")[-1] + ok_location_from = "locationFrom" in new and new["locationFrom"].split("/")[-1] == prior["locationFrom"].split("/")[-1] + if ok_location or ok_location_from: #update just the location data everthing else needs to remain the same for key, value in new.items(): if key not in keep: diff --git a/CADETMatch/version.py b/CADETMatch/version.py index 8d58abc..1d1668b 100644 --- a/CADETMatch/version.py +++ b/CADETMatch/version.py @@ -18,5 +18,5 @@ __email__ = "w.heymann@fz-juelich.de" __license__ = "GNU General Public License v3 (GPLv3)" __copyright__ = "2020 %s" % __author__ -__version__ = "0.8.8" +__version__ = "0.8.9" __uri__ = "https://github.com/modsim/CADET-Match"