Skip to content

Commit

Permalink
use arviz and hdi instead of percentile
Browse files Browse the repository at this point in the history
clip de and de_snooker
don't remove outliers for kde generation (this should have been removed a while ago, since the generation is not random the outliers are not really outlying)
read locationFrom (this was a bug and missing for MCMC)
bumped version number
  • Loading branch information
Immudzen committed Sep 22, 2021
1 parent 072d54c commit 2a13351
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 35 deletions.
2 changes: 1 addition & 1 deletion CADETMatch.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<ProjectTypeGuids>{888888a0-9f3d-457c-b088-3a5042f75d52}</ProjectTypeGuids>
<LaunchProvider>Standard Python launcher</LaunchProvider>
<InterpreterId>CondaEnv|CondaEnv|pymoo_devel</InterpreterId>
<CommandLineArguments>"C:\Users\kosh\Downloads\Cadet-Match-GIEX Question-20210903T074409Z-001\Cadet-Match-GIEX Question\Match_Test.json" 1</CommandLineArguments>
<CommandLineArguments>"F:\cadet_release_test\search\mcmc\stage2\non.json" 12</CommandLineArguments>
<EnableNativeCodeDebugging>False</EnableNativeCodeDebugging>
<SuppressConfigureTestFrameworkPrompt>true</SuppressConfigureTestFrameworkPrompt>
<InterpreterArguments>
Expand Down
2 changes: 1 addition & 1 deletion CADETMatch/de.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,5 @@ def get_proposal(self, s, c, random):
random.shuffle(w)
g = np.diff(w, axis=0) * self.g0 + f[i]
q[i] = s[i] + g
q[i] = q[i] % 1
q = np.clip(q, 0, 1)
return q, np.zeros(Ns, dtype=np.float64)
2 changes: 1 addition & 1 deletion CADETMatch/de_snooker.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ def get_proposal(self, s, c, random):
norm = np.linalg.norm(delta)
u = delta / np.sqrt(norm)
q[i] = s[i] + u * self.gammas * (np.dot(u, z1) - np.dot(u, z2))
q[i] = q[i] % 1
metropolis[i] = np.log(np.linalg.norm(q[i] - z)) - np.log(norm)
q = np.clip(q, 0, 1)
return q, 0.5 * (ndim - 1.0) * metropolis
20 changes: 5 additions & 15 deletions CADETMatch/kde_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,40 +495,30 @@ def generate_synthetic_error(cache):

scores = numpy.array(scores_all)

keep_idx = keep_data(scores)

kept = int(numpy.sum(keep_idx))
removed = int(len(scores) - kept)

dir_base = cache.settings.get("resultsDirBase")
file = dir_base / "kde_data.h5"

kde_data = H5()
kde_data.filename = file.as_posix()

kde_data.root.kept = kept
kde_data.root.removed = removed

kde_data.root.scores = scores[keep_idx, :]

kde_data.root.original.scores = scores
kde_data.root.scores = scores

for output_name, output in outputs_all.items():
kde_data.root[output_name] = numpy.array(output)[keep_idx, :]
kde_data.root[output_name] = numpy.array(output)

for time_name, time in times.items():
kde_data.root["%s_time" % time_name] = time

for name, experiment in errors_all.items():
for error_name, error_value in experiment.items():
kde_data.root.errors[name][error_name] = error_value[keep_idx, :]
kde_data.root.errors[name][error_name] = error_value

for key, value in uv_store_all.items():
kde_data.root.uv_store[key] = numpy.array(value)[keep_idx, :]
kde_data.root.uv_store[key] = numpy.array(value)

kde_data.save(lock=True)

return scores[keep_idx, :]
return scores

return None

Expand Down
45 changes: 30 additions & 15 deletions CADETMatch/search/mcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import CADETMatch.util as util
import CADETMatch.sub as sub
import CADETMatch.pop as pop
import arviz

name = "MCMC"

Expand Down Expand Up @@ -208,7 +209,10 @@ def converged_bounds(chain, length, error_level):
temp_chain_flat = temp_chain.reshape(
temp_chain_shape[0] * temp_chain_shape[1], temp_chain_shape[2]
)
lb_5, mid_50, ub_95 = numpy.percentile(temp_chain_flat, [5, 50, 95], 0)
hdi = arviz.hdi(temp_chain, hdi_prob=0.9)
lb_5 = hdi[:,0]
ub_95 = hdi[:,1]
mid_50 = numpy.mean(temp_chain_flat, axis=0)

lb.append(lb_5)
ub.append(ub_95)
Expand Down Expand Up @@ -665,13 +669,16 @@ def sampler_auto_bounds(cache, checkpoint, sampler, checkpointFile, mcmc_store):


def process_interval(cache, mcmc_store, interval_chain, interval_chain_transform):
mean = numpy.mean(interval_chain_transform, 0)
labels = [5, 10, 50, 90, 95]
percentile = numpy.percentile(interval_chain_transform, labels, 0)
hdi = arviz.hdi(interval_chain_transform, hdi_prob=0.9)
lb_5 = hdi[:,0]
ub_95 = hdi[:,1]
mid_50 = numpy.mean(flatten(interval_chain_transform), axis=0)

mcmc_store.root.percentile["mean"] = mean
for idx, label in enumerate(labels):
mcmc_store.root.percentile["percentile_%s" % label] = percentile[idx, :]
hdi_stat = numpy.vstack([lb_5, mid_50, ub_95])[:, numpy.newaxis, :]

mcmc_store.root.percentile["mean"] = mid_50
mcmc_store.root.percentile["lb_hdi_90"] = lb_5
mcmc_store.root.percentile["ub_hdi_90"] = ub_95

flat_interval = interval(interval_chain, cache)
flat_interval_transform = interval(interval_chain_transform, cache)
Expand Down Expand Up @@ -724,7 +731,7 @@ def process_sampler_run_write(cache, mcmc_store):

interval_chain = chain_flat
interval_chain_transform = chain_flat_transform
process_interval(cache, mcmc_store, interval_chain, interval_chain_transform)
process_interval(cache, mcmc_store, chain, chain_transform)


def sampler_run(cache, checkpoint, sampler, checkpointFile, mcmc_store):
Expand Down Expand Up @@ -769,9 +776,16 @@ def sampler_run(cache, checkpoint, sampler, checkpointFile, mcmc_store):
run_chain = addChain(run_chain, p[:, numpy.newaxis, :])
run_probability = addChain(run_probability, ln_prob[:, numpy.newaxis])

hdi = arviz.hdi(run_chain, hdi_prob=0.9)
lb_5 = hdi[:,0]
ub_95 = hdi[:,1]
mid_50 = numpy.mean(flatten(run_chain), axis=0)

hdi_stat = numpy.vstack([lb_5, mid_50, ub_95])[:, numpy.newaxis, :]

run_chain_stat = addChain(
run_chain_stat,
numpy.percentile(flatten(run_chain), [5, 50, 95], 0)[:, numpy.newaxis, :],
hdi_stat,
)

multiprocessing.get_logger().info(
Expand Down Expand Up @@ -1249,14 +1263,15 @@ def writeMCMC(cache, mcmc_store, process_mcmc_store):
mcmc_store.save(lock=True)


def interval(flat_chain, cache):
mean = numpy.mean(flat_chain, 0)

percentile = numpy.percentile(flat_chain, [5, 10, 50, 90, 95], 0)
def interval(chain, cache):
hdi = arviz.hdi(chain, hdi_prob=0.9)
lb_5 = hdi[:,0]
ub_95 = hdi[:,1]
mid_50 = numpy.mean(flatten(chain), axis=0)

data = numpy.vstack((mean, percentile)).transpose()
hdi_stat = numpy.vstack([lb_5, mid_50, ub_95])

pd = pandas.DataFrame(data, columns=["mean", "5", "10", "50", "90", "95"])
pd = pandas.DataFrame(hdi_stat.transpose(), columns=["lb_hdi_90", "mean", "ub_hdi_90"])
pd.insert(0, "name", cache.parameter_headers_actual)
pd.set_index("name")
return pd
4 changes: 3 additions & 1 deletion CADETMatch/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,9 @@ def update_json_mcmc(settings):
new_parameters = settings["parameters_mcmc"]

for new, prior in zip(new_parameters, prior_parameters):
if new["location"].split("/")[-1] == prior["location"].split("/")[-1]:
ok_location = "location" in new and new["location"].split("/")[-1] == prior["location"].split("/")[-1]
ok_location_from = "locationFrom" in new and new["locationFrom"].split("/")[-1] == prior["locationFrom"].split("/")[-1]
if ok_location or ok_location_from:
#update just the location data everthing else needs to remain the same
for key, value in new.items():
if key not in keep:
Expand Down
2 changes: 1 addition & 1 deletion CADETMatch/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@
__email__ = "[email protected]"
__license__ = "GNU General Public License v3 (GPLv3)"
__copyright__ = "2020 %s" % __author__
__version__ = "0.8.8"
__version__ = "0.8.9"
__uri__ = "https://github.com/modsim/CADET-Match"

0 comments on commit 2a13351

Please sign in to comment.