From b6f20ab9d1f0e6e5c4335c83f4eb32c1c7c6762c Mon Sep 17 00:00:00 2001 From: Lorenz Date: Fri, 25 Oct 2019 16:25:00 +0200 Subject: [PATCH 01/14] Approximate bimodal gaussian with gaussian distribution --- prototype.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 prototype.py diff --git a/prototype.py b/prototype.py new file mode 100644 index 0000000..70ae0ad --- /dev/null +++ b/prototype.py @@ -0,0 +1,75 @@ +import math +import os +import torch +import torch.distributions.constraints as constraints +import pyro +from pyro.optim import Adam +from pyro.infer import SVI, Trace_ELBO, config_enumerate +import pyro.distributions as dist +from pyro.infer.autoguide import AutoDelta +from pyro import poutine + +# this is for running the notebook in our testing framework +smoke_test = ('CI' in os.environ) +n_steps = 2 if smoke_test else 2000 +pyro.set_rng_seed(2) + +# enable validation (e.g. validate parameters of distributions) +assert pyro.__version__.startswith('0.4.1') +pyro.enable_validation(True) + +# clear the param store in case we're in a REPL +pyro.clear_param_store() + +data = torch.tensor([0., 1., 10., 11., 12.]) + +K = 2 # Fixed number of components. + +@config_enumerate +def model(data): + # Global variables. + weights = torch.tensor([0.4, 0.6]) + scale = pyro.sample('scale', dist.LogNormal(0., 2.)) + with pyro.plate('components', K): + locs = pyro.sample('locs', dist.Normal(0., 10.)) + + print("actual locs= {}".format(locs.data.numpy())) + print("actual scale= {}".format(scale.data.numpy())) + + with pyro.plate('data', len(data)): + # Local variables. + assignment = pyro.sample('assignment', dist.Categorical(weights)) + pyro.sample('obs', dist.Normal(locs[assignment], scale), obs=data) + +# global_guide = AutoDelta(poutine.block(model, expose=['locs', 'scale'])) + + +def guide(data): + scale_q = pyro.param('scale_q', torch.tensor(1.), + constraint=constraints.positive) + + locs_q = pyro.param('locs_q', torch.tensor(5.), + constraint=constraints.positive) + + pyro.sample('obs', dist.Normal(locs_q, scale_q)) + + + +# setup the optimizer +adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} +optimizer = Adam(adam_params) + +# setup the inference algorithm +svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) + +# do gradient steps +for step in range(n_steps): + svi.step(data) + if step % 100 == 0: + print('.', end='') + +scale = pyro.param("scale_q").item() +locs = pyro.param("locs_q").item() +print('locs = {}'.format(locs)) +print('scale = {}'.format(scale)) + From 5ead11ce7fdd6b151c88d7dcabb69782e3e6cfff Mon Sep 17 00:00:00 2001 From: Lorenz Date: Fri, 25 Oct 2019 18:17:32 +0200 Subject: [PATCH 02/14] Add relbo draft --- prototype.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/prototype.py b/prototype.py index 70ae0ad..874e4f4 100644 --- a/prototype.py +++ b/prototype.py @@ -8,6 +8,7 @@ import pyro.distributions as dist from pyro.infer.autoguide import AutoDelta from pyro import poutine +from pyro.poutine import trace, replay, block # this is for running the notebook in our testing framework smoke_test = ('CI' in os.environ) @@ -33,9 +34,6 @@ def model(data): with pyro.plate('components', K): locs = pyro.sample('locs', dist.Normal(0., 10.)) - print("actual locs= {}".format(locs.data.numpy())) - print("actual scale= {}".format(scale.data.numpy())) - with pyro.plate('data', len(data)): # Local variables. assignment = pyro.sample('assignment', dist.Categorical(weights)) @@ -55,19 +53,77 @@ def guide(data): +def approximation(data, components, weights): + assignment = pyro.sample('assignment', dist.Categorical(weights)) + sample = components[assignment](data) + +def dummy_approximation(data): + scale_a = pyro.param('scale_a', torch.tensor(2), + constraint=constraints.positive) + + locs_a = pyro.param('locs_a', torch.tensor(10.), + constraint=constraints.positive) + sample = pyro.sample('obs', dist.Normal(locs_a, scale_a)) + + # scale_a = pyro.param("scale_a").item() + # locs_a = pyro.param("locs_a").item () + + # print("Loc and scale of approx, before svi: {0} {1}". format(locs_a, scale_a)) + + +def relbo(model, guide, *args, **kwargs): + + approximation = kwargs.pop('approximation', None) + # Run the guide with the arguments passed to SVI.step() and trace the execution, + # i.e. record all the calls to Pyro primitives like sample() and param(). + guide_trace = trace(guide).get_trace(*args, **kwargs) + # Now run the model with the same arguments and trace the execution. Because + # model is being run with replay, whenever we encounter a sample site in the + # model, instead of sampling from the corresponding distribution in the model, + # we instead reuse the corresponding sample from the guide. In probabilistic + # terms, this means our loss is constructed as an expectation w.r.t. the joint + # distribution defined by the guide. + model_trace = trace(replay(model, guide_trace)).get_trace(*args, **kwargs) + approximation_trace = trace(replay(block(approximation, expose=["obs"]), guide_trace)).get_trace(*args, **kwargs) + # We will accumulate the various terms of the ELBO in `elbo`. + elbo = 0. + # Loop over all the sample sites in the model and add the corresponding + # log p(z) term to the ELBO. Note that this will also include any observed + # data, i.e. sample sites with the keyword `obs=...`. + elbo = elbo + model_trace.log_prob_sum() + # Loop over all the sample sites in the guide and add the corresponding + # -log q(z) term to the ELBO. + elbo = elbo - guide_trace.log_prob_sum() + elbo = elbo - approximation_trace.log_prob_sum() + print(approximation_trace.log_prob_sum()) + # Return (-elbo) since by convention we do gradient descent on a loss and + # the ELBO is a lower bound that needs to be maximized. + return -elbo + + # setup the optimizer adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) # setup the inference algorithm -svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) +svi = SVI(model, guide, optimizer, loss=relbo) # do gradient steps + +wrapped_approximation = approximation(data, components=[guide, guide] ,weights=torch.tensor([0.4, 0.6])) + for step in range(n_steps): - svi.step(data) + svi.step(data, approximation=dummy_approximation) if step % 100 == 0: print('.', end='') +print(pyro.param("locs_q")) +scale = pyro.param("scale_q").item() +locs = pyro.param("locs_q").item() +lifted_component = poutine.lift(guide) +components = [lifted_component, lifted_component] + + scale = pyro.param("scale_q").item() locs = pyro.param("locs_q").item() print('locs = {}'.format(locs)) From eaa43677929b2d42536d918d25478407c2f8d099 Mon Sep 17 00:00:00 2001 From: Lorenz Date: Fri, 25 Oct 2019 18:41:56 +0200 Subject: [PATCH 03/14] Add first experiments with approximation --- prototype.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/prototype.py b/prototype.py index 874e4f4..303492e 100644 --- a/prototype.py +++ b/prototype.py @@ -52,10 +52,31 @@ def guide(data): pyro.sample('obs', dist.Normal(locs_q, scale_q)) +def component_1(data): + scale_q = pyro.param('scale_q', torch.tensor(1.), + constraint=constraints.positive) + + locs_q = pyro.param('locs_q', torch.tensor(5.), + constraint=constraints.positive) + + pyro.sample('obs', dist.Normal(locs_q, scale_q)) + +def component_2(data): + scale_q = pyro.param('scale_q', torch.tensor(2.), + constraint=constraints.positive) + + locs_q = pyro.param('locs_q', torch.tensor(10.), + constraint=constraints.positive) + + pyro.sample('obs', dist.Normal(locs_q, scale_q)) + -def approximation(data, components, weights): + +def approximation(data): + weights = torch.tensor([0.4, 0.6]) assignment = pyro.sample('assignment', dist.Categorical(weights)) - sample = components[assignment](data) + components = [component_1, component_2] + components[assignment](data) def dummy_approximation(data): scale_a = pyro.param('scale_a', torch.tensor(2), @@ -110,10 +131,9 @@ def relbo(model, guide, *args, **kwargs): # do gradient steps -wrapped_approximation = approximation(data, components=[guide, guide] ,weights=torch.tensor([0.4, 0.6])) for step in range(n_steps): - svi.step(data, approximation=dummy_approximation) + svi.step(data, approximation=approximation) if step % 100 == 0: print('.', end='') From cdc0e8767251bf4ad34e31cfb9fcc09983e153ea Mon Sep 17 00:00:00 2001 From: Lorenz Date: Sat, 26 Oct 2019 17:47:19 +0200 Subject: [PATCH 04/14] Add first working boosting BBVI prototype --- prototype.py | 132 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 52 deletions(-) diff --git a/prototype.py b/prototype.py index 303492e..5b9cf12 100644 --- a/prototype.py +++ b/prototype.py @@ -9,10 +9,14 @@ from pyro.infer.autoguide import AutoDelta from pyro import poutine from pyro.poutine import trace, replay, block +from functools import partial +import numpy as np +import scipy.stats +from matplotlib import pyplot # this is for running the notebook in our testing framework smoke_test = ('CI' in os.environ) -n_steps = 2 if smoke_test else 2000 +n_steps = 2 if smoke_test else 1500 pyro.set_rng_seed(2) # enable validation (e.g. validate parameters of distributions) @@ -22,7 +26,7 @@ # clear the param store in case we're in a REPL pyro.clear_param_store() -data = torch.tensor([0., 1., 10., 11., 12.]) +data = torch.tensor([0., 1., 2., 0, 0.5, 1.5, 10., 11., 12., 10.6, 11.8, 12.2]) K = 2 # Fixed number of components. @@ -42,47 +46,26 @@ def model(data): # global_guide = AutoDelta(poutine.block(model, expose=['locs', 'scale'])) -def guide(data): - scale_q = pyro.param('scale_q', torch.tensor(1.), +def guide(data, index): + scale_q = pyro.param('scale_{}'.format(index), torch.tensor(1.), constraint=constraints.positive) - locs_q = pyro.param('locs_q', torch.tensor(5.), - constraint=constraints.positive) - - pyro.sample('obs', dist.Normal(locs_q, scale_q)) - - -def component_1(data): - scale_q = pyro.param('scale_q', torch.tensor(1.), - constraint=constraints.positive) - - locs_q = pyro.param('locs_q', torch.tensor(5.), - constraint=constraints.positive) - - pyro.sample('obs', dist.Normal(locs_q, scale_q)) - -def component_2(data): - scale_q = pyro.param('scale_q', torch.tensor(2.), - constraint=constraints.positive) - - locs_q = pyro.param('locs_q', torch.tensor(10.), + locs_q = pyro.param('locs_{}'.format(index), torch.tensor(5.), constraint=constraints.positive) pyro.sample('obs', dist.Normal(locs_q, scale_q)) -def approximation(data): - weights = torch.tensor([0.4, 0.6]) +def approximation(data, components, weights): assignment = pyro.sample('assignment', dist.Categorical(weights)) - components = [component_1, component_2] components[assignment](data) def dummy_approximation(data): - scale_a = pyro.param('scale_a', torch.tensor(2), + scale_a = pyro.param('scale_a', torch.tensor(10000.), constraint=constraints.positive) - locs_a = pyro.param('locs_a', torch.tensor(10.), + locs_a = pyro.param('locs_a', torch.tensor(0.), constraint=constraints.positive) sample = pyro.sample('obs', dist.Normal(locs_a, scale_a)) @@ -116,7 +99,7 @@ def relbo(model, guide, *args, **kwargs): # -log q(z) term to the ELBO. elbo = elbo - guide_trace.log_prob_sum() elbo = elbo - approximation_trace.log_prob_sum() - print(approximation_trace.log_prob_sum()) + # print(approximation_trace.log_prob_sum()) # Return (-elbo) since by convention we do gradient descent on a loss and # the ELBO is a lower bound that needs to be maximized. return -elbo @@ -126,26 +109,71 @@ def relbo(model, guide, *args, **kwargs): adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) -# setup the inference algorithm -svi = SVI(model, guide, optimizer, loss=relbo) - -# do gradient steps - - -for step in range(n_steps): - svi.step(data, approximation=approximation) - if step % 100 == 0: - print('.', end='') - -print(pyro.param("locs_q")) -scale = pyro.param("scale_q").item() -locs = pyro.param("locs_q").item() -lifted_component = poutine.lift(guide) -components = [lifted_component, lifted_component] - - -scale = pyro.param("scale_q").item() -locs = pyro.param("locs_q").item() -print('locs = {}'.format(locs)) -print('scale = {}'.format(scale)) +# wrapped_guide = partial(guide, index=0) +# svi = SVI(model, wrapped_guide, optimizer, loss=Trace_ELBO()) +# # do gradient steps +# losses = [] +# for step in range(n_steps): +# loss = svi.step(data) +# losses.append(loss) +# if step % 100 == 0: +# print('.', end='') + +n_iterations = 2 + +initial_approximation = dummy_approximation +components = [initial_approximation] +weights = torch.tensor([1.]) +wrapped_approximation = partial(approximation, components=components, weights=weights) + +locs = [0] +scales = [0] + +for t in range(1, n_iterations + 1): + # setup the inference algorithm + wrapped_guide = partial(guide, index=t) + svi = SVI(model, wrapped_guide, optimizer, loss=relbo) + # do gradient steps + losses = [] + for step in range(n_steps): + loss = svi.step(data, approximation=wrapped_approximation) + losses.append(loss) + if step % 100 == 0: + print('.', end='') + + #pyplot.scatter(range(len(losses)), losses) + #pyplot.show() + components.append(wrapped_guide) + new_weight = 2 / (t + 1) + + weights = weights * (1-new_weight) + weights = torch.cat((weights, torch.tensor([new_weight]))) + + wrapped_approximation = partial(approximation, components=components, weights=weights) + + + scale = pyro.param("scale_{}".format(t)).item() + scales.append(scale) + loc = pyro.param("locs_{}".format(t)).item() + locs.append(loc) + print('locs = {}'.format(loc)) + print('scale = {}'.format(scale)) + +print(weights) +print(locs) +print(scales) + +X = np.arange(-3,18,0.1) +Y1 = weights[1].item() * scipy.stats.norm.pdf((X - locs[1]) / scales[1]) +Y2 = weights[2].item() * scipy.stats.norm.pdf((X - locs[2]) / scales[2]) +#Y3 = weights[3].item() * scipy.stats.norm.pdf((X - locs[3] / scales[3])) + +pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') +pyplot.plot(X, Y1, 'r-') +pyplot.plot(X, Y2, 'b-') +pyplot.plot(X, Y1 + Y2, 'k--') +pyplot.plot(data.data.numpy(), np.zeros(len(data)), 'k*') +pyplot.title('Density of two-component mixture model') +pyplot.ylabel('probability density'); +pyplot.show() From 4a4ab298467eea30756819cb5392395ec925b5b5 Mon Sep 17 00:00:00 2001 From: Lorenz Date: Tue, 29 Oct 2019 09:51:31 +0100 Subject: [PATCH 05/14] Add requirements.txt --- requirements.txt | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cbb56f3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,36 @@ +absl-py==0.8.1 +astor==0.8.0 +certifi==2019.9.11 +cycler==0.10.0 +gast==0.2.2 +google-pasta==0.1.7 +graphviz==0.13 +grpcio==1.24.1 +h5py==2.10.0 +joblib==0.14.0 +joypy==0.2.1 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.0 +kiwisolver==1.1.0 +Markdown==3.1.1 +matplotlib==3.1.1 +numpy==1.17.2 +opt-einsum==3.1.0 +pandas==0.25.1 +protobuf==3.10.0 +pyparsing==2.4.2 +pyro-ppl==0.4.1 +python-dateutil==2.8.0 +pytz==2019.3 +scikit-learn==0.21.3 +scipy==1.3.1 +seaborn==0.9.0 +six==1.12.0 +tensorboard==1.14.0 +tensorflow==1.14.0 +tensorflow-estimator==1.14.0 +termcolor==1.1.0 +torch==1.3.0 +tqdm==4.36.1 +Werkzeug==0.16.0 +wrapt==1.11.2 From 41068fef1ebdcafa45dbc59080d34bc806b9a09a Mon Sep 17 00:00:00 2001 From: Lorenz Kuhn Date: Tue, 29 Oct 2019 17:57:43 +0100 Subject: [PATCH 06/14] Reduce requirement specification --- requirements.txt | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/requirements.txt b/requirements.txt index cbb56f3..829dd8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,36 +1,2 @@ -absl-py==0.8.1 -astor==0.8.0 -certifi==2019.9.11 -cycler==0.10.0 -gast==0.2.2 -google-pasta==0.1.7 -graphviz==0.13 -grpcio==1.24.1 -h5py==2.10.0 -joblib==0.14.0 -joypy==0.2.1 -Keras-Applications==1.0.8 -Keras-Preprocessing==1.1.0 -kiwisolver==1.1.0 -Markdown==3.1.1 -matplotlib==3.1.1 -numpy==1.17.2 -opt-einsum==3.1.0 -pandas==0.25.1 -protobuf==3.10.0 -pyparsing==2.4.2 pyro-ppl==0.4.1 -python-dateutil==2.8.0 -pytz==2019.3 -scikit-learn==0.21.3 -scipy==1.3.1 -seaborn==0.9.0 -six==1.12.0 -tensorboard==1.14.0 -tensorflow==1.14.0 -tensorflow-estimator==1.14.0 -termcolor==1.1.0 torch==1.3.0 -tqdm==4.36.1 -Werkzeug==0.16.0 -wrapt==1.11.2 From 70589cd9dba139a394084931c77290c075fb95a7 Mon Sep 17 00:00:00 2001 From: Lorenz Date: Wed, 30 Oct 2019 10:00:26 +0100 Subject: [PATCH 07/14] Add more logging features --- prototype.py | 214 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 128 insertions(+), 86 deletions(-) diff --git a/prototype.py b/prototype.py index 5b9cf12..d104ae5 100644 --- a/prototype.py +++ b/prototype.py @@ -3,7 +3,7 @@ import torch import torch.distributions.constraints as constraints import pyro -from pyro.optim import Adam +from pyro.optim import Adam, SGD from pyro.infer import SVI, Trace_ELBO, config_enumerate import pyro.distributions as dist from pyro.infer.autoguide import AutoDelta @@ -13,10 +13,15 @@ import numpy as np import scipy.stats from matplotlib import pyplot +from pyro.infer.autoguide import AutoDelta +from collections import defaultdict + +PRINT_INTERMEDIATE_LATENT_VALUES = False +PRINT_TRACES = False # this is for running the notebook in our testing framework smoke_test = ('CI' in os.environ) -n_steps = 2 if smoke_test else 1500 +n_steps = 2 if smoke_test else 1200 pyro.set_rng_seed(2) # enable validation (e.g. validate parameters of distributions) @@ -28,12 +33,18 @@ data = torch.tensor([0., 1., 2., 0, 0.5, 1.5, 10., 11., 12., 10.6, 11.8, 12.2]) +n = torch.distributions.Normal(torch.tensor([2.0]), torch.tensor([1.0])) +m = torch.distributions.Normal(torch.tensor([10.0]), torch.tensor([1.0])) + +data = torch.cat((n.sample((60,)), m.sample((40,)))) + + K = 2 # Fixed number of components. @config_enumerate def model(data): - # Global variables. - weights = torch.tensor([0.4, 0.6]) + + weights = pyro.sample('weights', dist.Dirichlet(0.5 * torch.ones(K))) scale = pyro.sample('scale', dist.LogNormal(0., 2.)) with pyro.plate('components', K): locs = pyro.sample('locs', dist.Normal(0., 10.)) @@ -43,8 +54,6 @@ def model(data): assignment = pyro.sample('assignment', dist.Categorical(weights)) pyro.sample('obs', dist.Normal(locs[assignment], scale), obs=data) -# global_guide = AutoDelta(poutine.block(model, expose=['locs', 'scale'])) - def guide(data, index): scale_q = pyro.param('scale_{}'.format(index), torch.tensor(1.), @@ -56,24 +65,18 @@ def guide(data, index): pyro.sample('obs', dist.Normal(locs_q, scale_q)) - +@config_enumerate def approximation(data, components, weights): assignment = pyro.sample('assignment', dist.Categorical(weights)) components[assignment](data) + def dummy_approximation(data): - scale_a = pyro.param('scale_a', torch.tensor(10000.), - constraint=constraints.positive) - locs_a = pyro.param('locs_a', torch.tensor(0.), - constraint=constraints.positive) + scale_a = torch.tensor([1.]) + locs_a = torch.tensor([20.]) sample = pyro.sample('obs', dist.Normal(locs_a, scale_a)) - # scale_a = pyro.param("scale_a").item() - # locs_a = pyro.param("locs_a").item () - - # print("Loc and scale of approx, before svi: {0} {1}". format(locs_a, scale_a)) - def relbo(model, guide, *args, **kwargs): @@ -94,86 +97,125 @@ def relbo(model, guide, *args, **kwargs): # Loop over all the sample sites in the model and add the corresponding # log p(z) term to the ELBO. Note that this will also include any observed # data, i.e. sample sites with the keyword `obs=...`. - elbo = elbo + model_trace.log_prob_sum() + elbo = elbo + model_trace.log_prob_sum() # Loop over all the sample sites in the guide and add the corresponding # -log q(z) term to the ELBO. elbo = elbo - guide_trace.log_prob_sum() elbo = elbo - approximation_trace.log_prob_sum() - # print(approximation_trace.log_prob_sum()) + # Return (-elbo) since by convention we do gradient descent on a loss and # the ELBO is a lower bound that needs to be maximized. + if elbo < 10e-8 and PRINT_TRACES: + print('Guide trace') + print(guide_trace.log_prob_sum()) + print('Model trace') + print(model_trace.log_prob_sum()) + print('Approximation trace') + print(approximation_trace.log_prob_sum()) return -elbo +def boosting_bbvi(): + # setup the optimizer + adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} + optimizer = Adam(adam_params) -# setup the optimizer -adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} -optimizer = Adam(adam_params) - - -# wrapped_guide = partial(guide, index=0) -# svi = SVI(model, wrapped_guide, optimizer, loss=Trace_ELBO()) -# # do gradient steps -# losses = [] -# for step in range(n_steps): -# loss = svi.step(data) -# losses.append(loss) -# if step % 100 == 0: -# print('.', end='') - -n_iterations = 2 - -initial_approximation = dummy_approximation -components = [initial_approximation] -weights = torch.tensor([1.]) -wrapped_approximation = partial(approximation, components=components, weights=weights) - -locs = [0] -scales = [0] - -for t in range(1, n_iterations + 1): - # setup the inference algorithm - wrapped_guide = partial(guide, index=t) - svi = SVI(model, wrapped_guide, optimizer, loss=relbo) - # do gradient steps - losses = [] - for step in range(n_steps): - loss = svi.step(data, approximation=wrapped_approximation) - losses.append(loss) - if step % 100 == 0: - print('.', end='') - - #pyplot.scatter(range(len(losses)), losses) - #pyplot.show() - components.append(wrapped_guide) - new_weight = 2 / (t + 1) - - weights = weights * (1-new_weight) - weights = torch.cat((weights, torch.tensor([new_weight]))) + n_iterations = 2 + initial_approximation = dummy_approximation + components = [initial_approximation] + weights = torch.tensor([1.]) wrapped_approximation = partial(approximation, components=components, weights=weights) + locs = [0] + scales = [0] + + gradient_norms = defaultdict(list) + for t in range(1, n_iterations + 1): + # setup the inference algorithm + wrapped_guide = partial(guide, index=t) + svi = SVI(model, wrapped_guide, optimizer, loss=relbo) + # do gradient steps + losses = [] + # Register hooks to monitor gradient norms. + wrapped_guide(data) + for name, value in pyro.get_param_store().named_parameters(): + if not name in gradient_norms: + value.register_hook(lambda g, name=name: gradient_norms[name].append(g.norm().item())) + + for step in range(n_steps): + loss = svi.step(data, approximation=wrapped_approximation) + losses.append(loss) + if (loss > 1000 and PRINT_INTERMEDIATE_LATENT_VALUES): + print('Loss: {}'.format(loss)) + scale = pyro.param("scale_{}".format(t)).item() + loc = pyro.param("locs_{}".format(t)).item() + print('locs = {}'.format(loc)) + print('scale = {}'.format(scale)) + + if step % 100 == 0: + print('.', end=' ') + + + pyplot.plot(range(len(losses)), losses) + pyplot.xlabel('Update Steps') + pyplot.ylabel('-ELBO') + pyplot.title('-ELBO against time for component{}'.format(t)); + pyplot.show() + + components.append(wrapped_guide) + new_weight = 2 / (t + 1) + + weights = weights * (1-new_weight) + weights = torch.cat((weights, torch.tensor([new_weight]))) + + wrapped_approximation = partial(approximation, components=components, weights=weights) + + + scale = pyro.param("scale_{}".format(t)).item() + scales.append(scale) + loc = pyro.param("locs_{}".format(t)).item() + locs.append(loc) + print('locs = {}'.format(loc)) + print('scale = {}'.format(scale)) + + pyplot.figure(figsize=(10,4), dpi=100).set_facecolor('white') + for name, grad_norms in gradient_norms.items(): + pyplot.plot(grad_norms, label=name) + pyplot.xlabel('iters') + pyplot.ylabel('gradient norm') + pyplot.yscale('log') + pyplot.legend(loc='best') + pyplot.title('Gradient norms during SVI'); + pyplot.show() + + + for t in range(1, n_iterations + 1): + scale = pyro.param("scale_{}".format(t)).item() + loc = pyro.param("locs_{}".format(t)).item() + print('locs = {}'.format(loc)) + print('scale = {}'.format(scale)) + + + print(weights) + print(locs) + print(scales) + + X = np.arange(-3,18,0.1) + Y1 = weights[1].item() * scipy.stats.norm.pdf((X - locs[1]) / scales[1]) + Y2 = weights[2].item() * scipy.stats.norm.pdf((X - locs[2]) / scales[2]) + #Y3 = weights[3].item() * scipy.stats.norm.pdf((X - locs[3] / scales[3])) + + pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') + pyplot.plot(X, Y1, 'r-') + pyplot.plot(X, Y2, 'b-') + pyplot.plot(X, Y1 + Y2, 'k--') + pyplot.plot(data.data.numpy(), np.zeros(len(data)), 'k*') + pyplot.title('Density of two-component mixture model') + pyplot.ylabel('probability density'); + pyplot.show() + + +if __name__ == '__main__': + + boosting_bbvi() - scale = pyro.param("scale_{}".format(t)).item() - scales.append(scale) - loc = pyro.param("locs_{}".format(t)).item() - locs.append(loc) - print('locs = {}'.format(loc)) - print('scale = {}'.format(scale)) - -print(weights) -print(locs) -print(scales) - -X = np.arange(-3,18,0.1) -Y1 = weights[1].item() * scipy.stats.norm.pdf((X - locs[1]) / scales[1]) -Y2 = weights[2].item() * scipy.stats.norm.pdf((X - locs[2]) / scales[2]) -#Y3 = weights[3].item() * scipy.stats.norm.pdf((X - locs[3] / scales[3])) - -pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') -pyplot.plot(X, Y1, 'r-') -pyplot.plot(X, Y2, 'b-') -pyplot.plot(X, Y1 + Y2, 'k--') -pyplot.plot(data.data.numpy(), np.zeros(len(data)), 'k*') -pyplot.title('Density of two-component mixture model') -pyplot.ylabel('probability density'); -pyplot.show() From 9374edb51b9704d8d3b3b23fc61808dfbc9e1a36 Mon Sep 17 00:00:00 2001 From: Gideon Dresdner Date: Wed, 30 Oct 2019 12:23:38 +0100 Subject: [PATCH 08/14] wip, clean up deps --- environment.yml | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ install | 3 ++ prototype.py | 1 + 3 files changed, 80 insertions(+) create mode 100644 environment.yml create mode 100644 install mode change 100644 => 100755 prototype.py diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..5c07a77 --- /dev/null +++ b/environment.yml @@ -0,0 +1,76 @@ +name: bbbvipyro +channels: + - pytorch + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - blas=1.0=mkl + - ca-certificates=2019.10.16=0 + - certifi=2019.9.11=py37_0 + - cffi=1.13.0=py37h2e261b9_0 + - cudatoolkit=10.1.168=0 + - cycler=0.10.0=py37_0 + - dbus=1.13.12=h746ee38_0 + - expat=2.2.6=he6710b0_0 + - fontconfig=2.13.0=h9420a91_0 + - freetype=2.9.1=h8a8886c_1 + - glib=2.56.2=hd408876_0 + - gst-plugins-base=1.14.0=hbbd80ab_1 + - gstreamer=1.14.0=hb453b48_1 + - icu=58.2=h9c2bf20_1 + - intel-openmp=2019.4=243 + - jpeg=9b=h024ee3a_2 + - kiwisolver=1.1.0=py37he6710b0_0 + - libedit=3.1.20181209=hc058e9b_0 + - libffi=3.2.1=hd88cf55_4 + - libgcc-ng=9.1.0=hdf63c60_0 + - libgfortran-ng=7.3.0=hdf63c60_0 + - libpng=1.6.37=hbc83047_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - libtiff=4.0.10=h2733197_2 + - libuuid=1.0.3=h1bed415_2 + - libxcb=1.13=h1bed415_1 + - libxml2=2.9.9=hea5a465_1 + - matplotlib=3.1.1=py37h5429711_0 + - mkl=2019.4=243 + - mkl-service=2.3.0=py37he904b0f_0 + - mkl_fft=1.0.14=py37ha843d7b_0 + - mkl_random=1.1.0=py37hd6b4f25_0 + - ncurses=6.1=he6710b0_1 + - ninja=1.9.0=py37hfd86e86_0 + - numpy=1.17.2=py37haad9e8e_0 + - numpy-base=1.17.2=py37hde5b4d6_0 + - olefile=0.46=py37_0 + - openssl=1.1.1d=h7b6447c_3 + - pcre=8.43=he6710b0_0 + - pillow=6.2.0=py37h34e0f95_0 + - pip=19.3.1=py37_0 + - pycparser=2.19=py37_0 + - pyparsing=2.4.2=py_0 + - pyqt=5.9.2=py37h05f1152_2 + - python=3.7.4=h265db76_1 + - python-dateutil=2.8.0=py37_0 + - pytorch=1.3.0=py3.7_cuda10.1.243_cudnn7.6.3_0 + - pytz=2019.3=py_0 + - qt=5.9.7=h5867ecd_1 + - readline=7.0=h7b6447c_5 + - scipy=1.3.1=py37h7c811a0_0 + - setuptools=41.4.0=py37_0 + - sip=4.19.8=py37hf484d3e_0 + - six=1.12.0=py37_0 + - sqlite=3.30.1=h7b6447c_0 + - tk=8.6.8=hbc83047_0 + - torchvision=0.4.1=py37_cu101 + - tornado=6.0.3=py37h7b6447c_0 + - wheel=0.33.6=py37_0 + - xz=5.2.4=h14c3975_4 + - zlib=1.2.11=h7b6447c_3 + - zstd=1.3.7=h0b5b093_0 + - pip: + - opt-einsum==3.1.0 + - pyro-api==0.1.1 + - pyro-ppl==0.5.1 + - python-graphviz==0.13 + - tqdm==4.36.1 +prefix: /cluster/home/dgideon/software/anaconda3/envs/bbbvipyro + diff --git a/install b/install new file mode 100644 index 0000000..3af76c9 --- /dev/null +++ b/install @@ -0,0 +1,3 @@ +conda install -y pytorch torchvision -c pytorch +conda install -y scipy numpy matplotlib +pip install pyro-ppl diff --git a/prototype.py b/prototype.py old mode 100644 new mode 100755 index 5b9cf12..9d55cc1 --- a/prototype.py +++ b/prototype.py @@ -20,6 +20,7 @@ pyro.set_rng_seed(2) # enable validation (e.g. validate parameters of distributions) +print(pyro.__version__) # returns 0.5.something. Why is this bad? #TODO(gideon) assert pyro.__version__.startswith('0.4.1') pyro.enable_validation(True) From f6eb79c52b8b1ab551c4f25eb89c2bbe060685c9 Mon Sep 17 00:00:00 2001 From: Gideon Dresdner Date: Wed, 30 Oct 2019 14:23:43 +0100 Subject: [PATCH 09/14] reproducible environment --- environment.yml | 74 ++++--------------------------------------------- install | 3 -- prototype.py | 2 -- 3 files changed, 6 insertions(+), 73 deletions(-) delete mode 100644 install diff --git a/environment.yml b/environment.yml index 5c07a77..360dce0 100644 --- a/environment.yml +++ b/environment.yml @@ -3,74 +3,12 @@ channels: - pytorch - defaults dependencies: - - _libgcc_mutex=0.1=main - - blas=1.0=mkl - - ca-certificates=2019.10.16=0 - - certifi=2019.9.11=py37_0 - - cffi=1.13.0=py37h2e261b9_0 - - cudatoolkit=10.1.168=0 - - cycler=0.10.0=py37_0 - - dbus=1.13.12=h746ee38_0 - - expat=2.2.6=he6710b0_0 - - fontconfig=2.13.0=h9420a91_0 - - freetype=2.9.1=h8a8886c_1 - - glib=2.56.2=hd408876_0 - - gst-plugins-base=1.14.0=hbbd80ab_1 - - gstreamer=1.14.0=hb453b48_1 - - icu=58.2=h9c2bf20_1 - - intel-openmp=2019.4=243 - - jpeg=9b=h024ee3a_2 - - kiwisolver=1.1.0=py37he6710b0_0 - - libedit=3.1.20181209=hc058e9b_0 - - libffi=3.2.1=hd88cf55_4 - - libgcc-ng=9.1.0=hdf63c60_0 - - libgfortran-ng=7.3.0=hdf63c60_0 - - libpng=1.6.37=hbc83047_0 - - libstdcxx-ng=9.1.0=hdf63c60_0 - - libtiff=4.0.10=h2733197_2 - - libuuid=1.0.3=h1bed415_2 - - libxcb=1.13=h1bed415_1 - - libxml2=2.9.9=hea5a465_1 - - matplotlib=3.1.1=py37h5429711_0 - - mkl=2019.4=243 - - mkl-service=2.3.0=py37he904b0f_0 - - mkl_fft=1.0.14=py37ha843d7b_0 - - mkl_random=1.1.0=py37hd6b4f25_0 - - ncurses=6.1=he6710b0_1 - - ninja=1.9.0=py37hfd86e86_0 - - numpy=1.17.2=py37haad9e8e_0 - - numpy-base=1.17.2=py37hde5b4d6_0 - - olefile=0.46=py37_0 - - openssl=1.1.1d=h7b6447c_3 - - pcre=8.43=he6710b0_0 - - pillow=6.2.0=py37h34e0f95_0 - - pip=19.3.1=py37_0 - - pycparser=2.19=py37_0 - - pyparsing=2.4.2=py_0 - - pyqt=5.9.2=py37h05f1152_2 - - python=3.7.4=h265db76_1 - - python-dateutil=2.8.0=py37_0 - - pytorch=1.3.0=py3.7_cuda10.1.243_cudnn7.6.3_0 - - pytz=2019.3=py_0 - - qt=5.9.7=h5867ecd_1 - - readline=7.0=h7b6447c_5 - - scipy=1.3.1=py37h7c811a0_0 - - setuptools=41.4.0=py37_0 - - sip=4.19.8=py37hf484d3e_0 - - six=1.12.0=py37_0 - - sqlite=3.30.1=h7b6447c_0 - - tk=8.6.8=hbc83047_0 - - torchvision=0.4.1=py37_cu101 - - tornado=6.0.3=py37h7b6447c_0 - - wheel=0.33.6=py37_0 - - xz=5.2.4=h14c3975_4 - - zlib=1.2.11=h7b6447c_3 - - zstd=1.3.7=h0b5b093_0 + - matplotlib + - numpy + - scipy + - torchvision + - pytorch + - pip - pip: - - opt-einsum==3.1.0 - - pyro-api==0.1.1 - pyro-ppl==0.5.1 - - python-graphviz==0.13 - - tqdm==4.36.1 prefix: /cluster/home/dgideon/software/anaconda3/envs/bbbvipyro - diff --git a/install b/install deleted file mode 100644 index 3af76c9..0000000 --- a/install +++ /dev/null @@ -1,3 +0,0 @@ -conda install -y pytorch torchvision -c pytorch -conda install -y scipy numpy matplotlib -pip install pyro-ppl diff --git a/prototype.py b/prototype.py index 9d55cc1..c1604bf 100755 --- a/prototype.py +++ b/prototype.py @@ -20,8 +20,6 @@ pyro.set_rng_seed(2) # enable validation (e.g. validate parameters of distributions) -print(pyro.__version__) # returns 0.5.something. Why is this bad? #TODO(gideon) -assert pyro.__version__.startswith('0.4.1') pyro.enable_validation(True) # clear the param store in case we're in a REPL From 7f005c2a3994378441f70d1d39ead69697981833 Mon Sep 17 00:00:00 2001 From: Lorenz Date: Sat, 9 Nov 2019 18:00:46 +0100 Subject: [PATCH 10/14] Add first draft of bimodal_posterior example --- bimodal_posterior.py | 160 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 bimodal_posterior.py diff --git a/bimodal_posterior.py b/bimodal_posterior.py new file mode 100644 index 0000000..163a84e --- /dev/null +++ b/bimodal_posterior.py @@ -0,0 +1,160 @@ +import math +import os +import torch +import torch.distributions.constraints as constraints +import pyro +from pyro.optim import Adam, SGD +from pyro.infer import SVI, Trace_ELBO, config_enumerate +import pyro.distributions as dist +from pyro.infer.autoguide import AutoDelta +from pyro import poutine +from pyro.poutine import trace, replay, block +from functools import partial +import numpy as np +import scipy.stats +from pyro.infer.autoguide import AutoDelta +from collections import defaultdict +import matplotlib +from matplotlib import pyplot + +PRINT_INTERMEDIATE_LATENT_VALUES = True +PRINT_TRACES = False + +# this is for running the notebook in our testing framework +smoke_test = ('CI' in os.environ) +n_steps = 2 if smoke_test else 2000 +pyro.set_rng_seed(2) + +# enable validation (e.g. validate parameters of distributions) +pyro.enable_validation(True) + +# clear the param store in case we're in a REPL +pyro.clear_param_store() + +data = torch.tensor([4.0, 4.2, 3.9, 4.1, 3.8, 3.5, 4.3]) + +def guide(data, index): + variance_q = pyro.param('variance_{}'.format(index), torch.tensor([1.0]), constraints.positive) + mu_q = pyro.param('mu_{}'.format(index), torch.tensor([1.0])) + pyro.sample("mu", dist.Normal(mu_q, variance_q)) + +@config_enumerate +def model(data): + # Global variables. + prior_mu = torch.tensor([0.]) + prior_variance = torch.tensor([5.]) + mu = pyro.sample('mu', dist.Normal(prior_mu, prior_variance)) + variance = torch.tensor([1.]) + + for i in pyro.plate('data', len(data)): + # Local variables. + pyro.sample('obs_{}'.format(i), dist.Normal(mu*mu, variance), obs=data[i]) + +@config_enumerate +def approximation(data, components, weights): + for i in pyro.plate('data', len(data)): + assignment = pyro.sample('assignment_{}'.format(i), dist.Categorical(weights)) + distribution = components[assignment](data) + +def dummy_approximation(data): + variance_q = pyro.param('variance_0', torch.tensor([1.0]), constraints.positive) + mu_q = pyro.param('mu_0', torch.tensor([20.0])) + pyro.sample("mu", dist.Normal(mu_q, variance_q)) + +def relbo(model, guide, *args, **kwargs): + + approximation = kwargs.pop('approximation', None) + # Run the guide with the arguments passed to SVI.step() and trace the execution, + # i.e. record all the calls to Pyro primitives like sample() and param(). + #print("enter relbo") + guide_trace = trace(guide).get_trace(*args, **kwargs) + #print(guide_trace.nodes['obs_1']) + model_trace = trace(replay(model, guide_trace)).get_trace(*args, **kwargs) + #print(model_trace.nodes['obs_1']) + + + approximation_trace = trace(replay(block(approximation, expose_fn= lambda site: 'obs_' in site['name']), guide_trace)).get_trace(*args, **kwargs) + # We will accumulate the various terms of the ELBO in `elbo`. + + elbo = model_trace.log_prob_sum() - guide_trace.log_prob_sum() - approximation_trace.log_prob_sum() + loss_fn = pyro.infer.TraceEnum_ELBO(max_plate_nesting=1).differentiable_loss(model, + guide, + *args, **kwargs) + + elbo = -loss_fn - approximation_trace.log_prob_sum() + # Return (-elbo) since by convention we do gradient descent on a loss and + # the ELBO is a lower bound that needs to be maximized. + + return -elbo + + +def boosting_bbvi(): + n_iterations = 2 + + initial_approximation = dummy_approximation + components = [initial_approximation] + weights = torch.tensor([1.]) + wrapped_approximation = partial(approximation, components=components, + weights=weights) + + locs = [0] + scales = [0] + + gradient_norms = defaultdict(list) + for t in range(1, n_iterations + 1): + # setup the inference algorithm + wrapped_guide = partial(guide, index=t) + # do gradient steps + losses = [] + # Register hooks to monitor gradient norms. + wrapped_guide(data) + print(pyro.get_param_store().named_parameters()) + + adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} + optimizer = Adam(adam_params) + for name, value in pyro.get_param_store().named_parameters(): + if not name in gradient_norms: + value.register_hook(lambda g, name=name: gradient_norms[name].append(g.norm().item())) + + svi = SVI(model, wrapped_guide, optimizer, loss=relbo) + for step in range(n_steps): + loss = svi.step(data, approximation=wrapped_approximation) + losses.append(loss) + + if PRINT_INTERMEDIATE_LATENT_VALUES: + print('Loss: {}'.format(loss)) + variance = pyro.param("variance_{}".format(t)).item() + mu = pyro.param("mu_{}".format(t)).item() + print('mu = {}'.format(mu)) + print('variance = {}'.format(variance)) + + if step % 100 == 0: + print('.', end=' ') + + pyplot.plot(range(len(losses)), losses) + pyplot.xlabel('Update Steps') + pyplot.ylabel('-ELBO') + pyplot.title('-ELBO against time for component {}'.format(t)); + pyplot.show() + + components.append(wrapped_guide) + new_weight = 2 / (t + 1) + + weights = weights * (1-new_weight) + weights = torch.cat((weights, torch.tensor([new_weight]))) + + wrapped_approximation = partial(approximation, components=components, weights=weights) + + pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') + for name, grad_norms in gradient_norms.items(): + pyplot.plot(grad_norms, label=name) + pyplot.xlabel('iters') + pyplot.ylabel('gradient norm') + # pyplot.yscale('log') + pyplot.legend(loc='best') + pyplot.title('Gradient norms during SVI'); + pyplot.show() + + +if __name__ == '__main__': + boosting_bbvi() \ No newline at end of file From 53d6a25c878f8f0bfa6d95cf7e28a8924dc5204d Mon Sep 17 00:00:00 2001 From: Lorenz Date: Sat, 9 Nov 2019 19:00:19 +0100 Subject: [PATCH 11/14] Fix exposure of approximation parameters --- bimodal_posterior.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/bimodal_posterior.py b/bimodal_posterior.py index 163a84e..6876743 100644 --- a/bimodal_posterior.py +++ b/bimodal_posterior.py @@ -22,7 +22,7 @@ # this is for running the notebook in our testing framework smoke_test = ('CI' in os.environ) -n_steps = 2 if smoke_test else 2000 +n_steps = 2 if smoke_test else 5000 pyro.set_rng_seed(2) # enable validation (e.g. validate parameters of distributions) @@ -35,7 +35,7 @@ def guide(data, index): variance_q = pyro.param('variance_{}'.format(index), torch.tensor([1.0]), constraints.positive) - mu_q = pyro.param('mu_{}'.format(index), torch.tensor([1.0])) + mu_q = pyro.param('mu_{}'.format(index), torch.tensor([-1.0])) pyro.sample("mu", dist.Normal(mu_q, variance_q)) @config_enumerate @@ -52,13 +52,12 @@ def model(data): @config_enumerate def approximation(data, components, weights): - for i in pyro.plate('data', len(data)): - assignment = pyro.sample('assignment_{}'.format(i), dist.Categorical(weights)) - distribution = components[assignment](data) + assignment = pyro.sample('assignment', dist.Categorical(weights)) + distribution = components[assignment](data) def dummy_approximation(data): variance_q = pyro.param('variance_0', torch.tensor([1.0]), constraints.positive) - mu_q = pyro.param('mu_0', torch.tensor([20.0])) + mu_q = pyro.param('mu_0', torch.tensor([1.0])) pyro.sample("mu", dist.Normal(mu_q, variance_q)) def relbo(model, guide, *args, **kwargs): @@ -73,7 +72,7 @@ def relbo(model, guide, *args, **kwargs): #print(model_trace.nodes['obs_1']) - approximation_trace = trace(replay(block(approximation, expose_fn= lambda site: 'obs_' in site['name']), guide_trace)).get_trace(*args, **kwargs) + approximation_trace = trace(replay(block(approximation, expose=['mu']), guide_trace)).get_trace(*args, **kwargs) # We will accumulate the various terms of the ELBO in `elbo`. elbo = model_trace.log_prob_sum() - guide_trace.log_prob_sum() - approximation_trace.log_prob_sum() @@ -81,6 +80,8 @@ def relbo(model, guide, *args, **kwargs): guide, *args, **kwargs) + # print(loss_fn) + # print(approximation_trace.log_prob_sum()) elbo = -loss_fn - approximation_trace.log_prob_sum() # Return (-elbo) since by convention we do gradient descent on a loss and # the ELBO is a lower bound that needs to be maximized. @@ -110,7 +111,7 @@ def boosting_bbvi(): wrapped_guide(data) print(pyro.get_param_store().named_parameters()) - adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} + adam_params = {"lr": 0.005, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) for name, value in pyro.get_param_store().named_parameters(): if not name in gradient_norms: @@ -145,6 +146,13 @@ def boosting_bbvi(): wrapped_approximation = partial(approximation, components=components, weights=weights) + scale = pyro.param("variance_{}".format(t)).item() + scales.append(scale) + loc = pyro.param("mu_{}".format(t)).item() + locs.append(loc) + print('mu = {}'.format(loc)) + print('variance = {}'.format(scale)) + pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') for name, grad_norms in gradient_norms.items(): pyplot.plot(grad_norms, label=name) @@ -155,6 +163,22 @@ def boosting_bbvi(): pyplot.title('Gradient norms during SVI'); pyplot.show() + print(weights) + print(locs) + print(scales) + + X = np.arange(-10, 10, 0.1) + Y1 = weights[1].item() * scipy.stats.norm.pdf((X - locs[1]) / scales[1]) + Y2 = weights[2].item() * scipy.stats.norm.pdf((X - locs[2]) / scales[2]) + + pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') + pyplot.plot(X, Y1, 'r-') + pyplot.plot(X, Y2, 'b-') + pyplot.plot(X, Y1 + Y2, 'k--') + pyplot.plot(data.data.numpy(), np.zeros(len(data)), 'k*') + pyplot.title('Approximation of posterior over mu') + pyplot.ylabel('probability density'); + pyplot.show() if __name__ == '__main__': boosting_bbvi() \ No newline at end of file From 32950ef1842730a71ff4209cbd9ec15e2cdec323 Mon Sep 17 00:00:00 2001 From: Lorenz Date: Sat, 9 Nov 2019 19:13:34 +0100 Subject: [PATCH 12/14] Increase learning rate --- bimodal_posterior.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bimodal_posterior.py b/bimodal_posterior.py index 6876743..c0d2173 100644 --- a/bimodal_posterior.py +++ b/bimodal_posterior.py @@ -35,7 +35,7 @@ def guide(data, index): variance_q = pyro.param('variance_{}'.format(index), torch.tensor([1.0]), constraints.positive) - mu_q = pyro.param('mu_{}'.format(index), torch.tensor([-1.0])) + mu_q = pyro.param('mu_{}'.format(index), torch.tensor([1.0])) pyro.sample("mu", dist.Normal(mu_q, variance_q)) @config_enumerate @@ -57,7 +57,7 @@ def approximation(data, components, weights): def dummy_approximation(data): variance_q = pyro.param('variance_0', torch.tensor([1.0]), constraints.positive) - mu_q = pyro.param('mu_0', torch.tensor([1.0])) + mu_q = pyro.param('mu_0', torch.tensor([20.0])) pyro.sample("mu", dist.Normal(mu_q, variance_q)) def relbo(model, guide, *args, **kwargs): @@ -111,7 +111,7 @@ def boosting_bbvi(): wrapped_guide(data) print(pyro.get_param_store().named_parameters()) - adam_params = {"lr": 0.005, "betas": (0.90, 0.999)} + adam_params = {"lr": 0.001, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) for name, value in pyro.get_param_store().named_parameters(): if not name in gradient_norms: From 565353e881f02eb444fd6d0755e2a55fb791f1cf Mon Sep 17 00:00:00 2001 From: Lorenz Date: Sat, 9 Nov 2019 19:15:56 +0100 Subject: [PATCH 13/14] Add comment --- bimodal_posterior.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bimodal_posterior.py b/bimodal_posterior.py index c0d2173..f2e6ee7 100644 --- a/bimodal_posterior.py +++ b/bimodal_posterior.py @@ -75,7 +75,9 @@ def relbo(model, guide, *args, **kwargs): approximation_trace = trace(replay(block(approximation, expose=['mu']), guide_trace)).get_trace(*args, **kwargs) # We will accumulate the various terms of the ELBO in `elbo`. - elbo = model_trace.log_prob_sum() - guide_trace.log_prob_sum() - approximation_trace.log_prob_sum() + # This is how we computed the ELBO before using TraceEnum_ELBO: + # elbo = model_trace.log_prob_sum() - guide_trace.log_prob_sum() - approximation_trace.log_prob_sum() + loss_fn = pyro.infer.TraceEnum_ELBO(max_plate_nesting=1).differentiable_loss(model, guide, *args, **kwargs) From f416629f3aaa5f55f24bc18c5578d25b08e7e69f Mon Sep 17 00:00:00 2001 From: Lorenz Date: Sat, 9 Nov 2019 19:33:03 +0100 Subject: [PATCH 14/14] Reduce learning rate --- bimodal_posterior.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bimodal_posterior.py b/bimodal_posterior.py index f2e6ee7..de92785 100644 --- a/bimodal_posterior.py +++ b/bimodal_posterior.py @@ -77,7 +77,7 @@ def relbo(model, guide, *args, **kwargs): # This is how we computed the ELBO before using TraceEnum_ELBO: # elbo = model_trace.log_prob_sum() - guide_trace.log_prob_sum() - approximation_trace.log_prob_sum() - + loss_fn = pyro.infer.TraceEnum_ELBO(max_plate_nesting=1).differentiable_loss(model, guide, *args, **kwargs) @@ -113,7 +113,7 @@ def boosting_bbvi(): wrapped_guide(data) print(pyro.get_param_store().named_parameters()) - adam_params = {"lr": 0.001, "betas": (0.90, 0.999)} + adam_params = {"lr": 0.002, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) for name, value in pyro.get_param_store().named_parameters(): if not name in gradient_norms: