Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: improve hotstart by not using the auxiliary variable #156

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 100 additions & 22 deletions ultranest/hotstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

from .utils import resample_equal, vectorize

from scipy.interpolate import PchipInterpolator, make_interp_spline
from scipy.integrate import trapezoid
from getdist import MCSamples


def get_auxiliary_problem(loglike, transform, ctr, invcov, enlargement_factor, df=1):
"""Return a new loglike and transform based on an auxiliary distribution.
Expand Down Expand Up @@ -342,9 +346,77 @@ def compute_quantile_intervals_refined(steps, upoints, uweights, logsteps_max=20

return ulos, uhis, uinterpspace

def conservative_prior_guess(all_x, all_p, single_dim_fraction):
""" Given a function p:[0, 1] -> R+,
represented by some samples (`all_p`) corresponding
to the points `all_x`, return another normalized function
which is the sum of p and a constant, weighed by `single_dim_fraction`
and `(1-single_dim_fraction)` respectively.
"""

integral = trapezoid(all_p, x=all_x)

return all_p * single_dim_fraction + np.ones_like(all_p) * (1 - single_dim_fraction) * integral

def get_aux_splines(upoints, uweights, volume_contribution=0.5, beta=1.):
""" Compute spline representations of the auxiliary transforms.

upoints: shape (n_points, dim)
uweights: shape (n_points,)
volume_contribution: fraction of total prior mass which will
be taken up by the "guess" distribution represented by upoints
(default: 0.5)
beta: temperature for the prior guess (default: 1)

"""


n_points, dim = upoints.shape
assert uweights.shape == (n_points, )
single_dim_fraction = volume_contribution**(1/dim)

mcsamples = MCSamples(
samples=upoints,
weights=uweights,
names=[str(i) for i in range(upoints.shape[1])],
ranges={str(i):[0, 1] for i in range(upoints.shape[1])}
)

densities, inverse_cumulatives = [], []

for i in range(dim):
density_interior = mcsamples.get1DDensity(str(i))

interior_x = density_interior.x

all_x = np.sort(np.unique(np.concatenate((
np.linspace(0, 1, num=1024),
interior_x
))))

all_p = np.maximum(density_interior(all_x), 0)

all_p_thresholded = conservative_prior_guess(all_x, all_p**beta, single_dim_fraction)

interpolator = PchipInterpolator(all_x, all_p_thresholded)
cumulative = interpolator.antiderivative()
integral = cumulative(1) - cumulative(0)
cumulative_points = cumulative(all_x) / integral

unique_cumulative, unique_idx = np.unique(cumulative_points, return_index=True)

inverse_cumulative = make_interp_spline(unique_cumulative, all_x[unique_idx], k=1)
inverse_cumulatives.append(inverse_cumulative)

cumulative = make_interp_spline(all_x, cumulative_points, k=1)
densities.append(cumulative.derivative())

return densities, inverse_cumulatives


def get_auxiliary_contbox_parameterization(
param_names, loglike, transform, upoints, uweights, vectorized=False,
param_names, loglike, transform, upoints, uweights=None, vectorized=False,
volume_contribution=0.5, beta=1.,
):
"""Return a new loglike and transform based on an auxiliary distribution.

Expand All @@ -356,16 +428,13 @@ def get_auxiliary_contbox_parameterization(
This is achieved by deforming the prior space, and undoing that
transformation by correction weights in the likelihood.
A additional parameter, "aux_logweight", is added at the end,
which contains the correction weight. You can ignore it.

The auxiliary distribution used for transformation/weighting is
factorized. Each axis considers the ECDF of the auxiliary samples,
and segments it into quantile segments. Within each segment,
the parameter edges in u-space are linearly interpolated.
To see the interpolation quantiles for each axis, use::

steps = 10**-(1.0 * np.arange(1, 8, 2))
ulos, uhis, uinterpspace = compute_quantile_intervals_refined(steps, upoints, uweights)
which contains the correction weight.
If this parameter is negative for most of the sampling,
it means that the samples are being taken from the "expanded" region,
therefore the hotstart is working as expected.
If it is positive, the auxiliary prior guess was likely mis-specified;
the sampler may still give correct results, but it will take
as long as a regular nested sampling run (or longer).

Parameters
------------
Expand All @@ -381,6 +450,15 @@ def get_auxiliary_contbox_parameterization(
Weights of samples (needs to sum of 1)
vectorized: bool
whether the loglike & transform functions are vectorized
volume_contribution: float
fraction of total prior mass which will
be taken up by the "guess" distribution (default: 0.5),
should be between 0 and 1
beta: float
inverse temperature for the prior guess (default: 1).
The guess will be rescaled as prior**beta: higher beta
will make the prior guess more informative and peaked,
lower beta will make it flatter and more conservative.

Returns
---------
Expand Down Expand Up @@ -412,23 +490,25 @@ def get_auxiliary_contbox_parameterization(
mask = np.logical_and(upoints > 0, upoints < 1).all(axis=1)
assert np.all(mask), (
'upoints must be between 0 and 1, have:', upoints[~mask,:])
steps = 10**-(1.0 * np.arange(1, 8, 2))

nsamples, ndim = upoints.shape
assert nsamples > 10
ulos, uhis, uinterpspace = compute_quantile_intervals_refined(steps, upoints, uweights)

if uweights is None:
uweights = np.ones(nsamples) / nsamples
assert uweights.shape == (nsamples,)

aux_param_names = param_names + ['aux_logweight']

densities, inv_cumulatives = get_aux_splines(upoints, uweights, volume_contribution, beta)

def aux_transform(u):
ndim2, = u.shape
assert ndim2 == ndim + 1
umod = np.empty(ndim)
log_aux_volume_factors = 0
for i in range(ndim):
ulo_here = np.interp(u[-1], uinterpspace, ulos[:,i])
uhi_here = np.interp(u[-1], uinterpspace, uhis[:,i])
umod[i] = ulo_here + (uhi_here - ulo_here) * u[i]
log_aux_volume_factors += np.log(uhi_here - ulo_here)
umod[i] = inv_cumulatives[i](u[i])
log_aux_volume_factors -= np.log(densities[i](umod[i]))
return np.append(transform(umod), log_aux_volume_factors)

def aux_transform_vectorized(u):
Expand All @@ -437,10 +517,8 @@ def aux_transform_vectorized(u):
umod = np.empty((nsamples, ndim2 - 1))
log_aux_volume_factors = np.zeros((nsamples, 1))
for i in range(ndim):
ulo_here = np.interp(u[:,-1], uinterpspace, ulos[:,i])
uhi_here = np.interp(u[:,-1], uinterpspace, uhis[:,i])
umod[:,i] = ulo_here + (uhi_here - ulo_here) * u[:,i]
log_aux_volume_factors[:,0] += np.log(uhi_here - ulo_here)
umod[:, i] = inv_cumulatives[i](u[:,i])
log_aux_volume_factors[:,0] -= np.log(densities[i](umod[:, i]))
return np.hstack((transform(umod), log_aux_volume_factors))

def aux_loglikelihood(x):
Expand Down
Loading