Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hackathon2024 #432

Merged
merged 17 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/concepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,4 @@ Detailed information on the Analysis modules is available :doc:`here <_autodoc/e
Execution
---------

Some more information on the use of QCG-Pilothob can be found :doc:`here <QCG-PilotJob-EasyVVUQ>`.
Some more information on the use of QCG-PilotJob can be found :doc:`here <QCG-PilotJob-EasyVVUQ>`.
90 changes: 89 additions & 1 deletion easyvvuq/analysis/qmc_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def get_samples(self, data_frame):
samples[k].append(data.values)
return samples

def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000):
"""
Computes the first order and total order Sobol indices using Saltelli's
method. To assess the sampling inaccuracy, bootstrap confidence intervals
Expand Down Expand Up @@ -342,6 +342,94 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
conf_total_dict[param_name] = {'low': low_total, 'high': high_total}

return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict

def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
"""
Computes the first order and total order Sobol indices using Saltelli's
method. To assess the sampling inaccuracy, bootstrap confidence intervals
are also computed.

Reference: A. Saltelli, Making best use of model evaluations to compute
sensitivity indices, Computer Physics Communications, 2002.

Parameters
----------
samples : list
The samples for a given QoI.
alpha: float
The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
n_samples: int
The number of bootstrap samples. The default is 1000.

Returns
-------
sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
dictionaries containing the first- and total-order Sobol indices for all
parameters, and (1-alpha)*100 lower and upper confidence bounds.

"""
assert len(samples) > 0
assert alpha > 0.0
assert alpha < 1.0
assert n_bootstrap > 0

# convert to array
samples = np.array(samples)
# the number of parameter and the number of MC samples in n_mc * (n_params + 2)
# and the size of the QoI
n_params = self.sampler.n_params
# n_mc = self.sampler.n_mc_samples
n_mc = int(samples.shape[0]/(n_params + 2))
n_qoi = samples[0].size
sobols_first_dict = {}
conf_first_dict = {}
sobols_total_dict = {}
conf_total_dict = {}

# code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
# see reference above.
f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
r = np.random.randint(n_mc, size=(n_mc, n_bootstrap))

for j, param_name in enumerate(self.sampler.vary.get_keys()):

# our point estimate for the 1st and total order Sobol indices
value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])

# sobols computed from resampled data points
if n_mc * n_bootstrap * n_qoi <= 10**7:
#this is a vectorized computation, Is fast, but f_M2[r] will be of size
#(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash,
#especially when dealing with large QoI (n_qoi >> 1). So this is only done
#when n_mc * n_bootstrap * n_qoi <= 10**7
print("Vectorized bootstrapping")
sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
else:
#array for resampled estimates
sobols_first = np.zeros([n_bootstrap, n_qoi])
sobols_total = np.zeros([n_bootstrap, n_qoi])
print("Sequential bootstrapping")
#non-vectorized implementation
for i in range(n_bootstrap):
#resampled sample matrices of size (n_mc, n_qoi)
sobols_first[i] = self._first_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j])
sobols_total[i] = self._total_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j])

# compute confidence intervals based on percentiles
_, low_first, high_first = confidence_interval(sobols_first, value_first,
alpha, pivotal=True)
_, low_total, high_total = confidence_interval(sobols_total, value_total,
alpha, pivotal=True)
# store results
sobols_first_dict[param_name] = value_first
conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
sobols_total_dict[param_name] = value_total
conf_total_dict[param_name] = {'low': low_total, 'high': high_total}

return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict


# Adapted from SALib
@staticmethod
Expand Down
43 changes: 32 additions & 11 deletions easyvvuq/sampling/mc_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,31 @@
__license__ = "LGPL"


class MCSampler(RandomSampler, sampler_name='mc_sampler'):
class MCSampler(RandomSampler, sampler_name='MC_sampler'):
"""
This is a Monte Carlo sampler, used to compute the Sobol indices, mean
and variance of the different QoIs.
"""

def __init__(self, vary, n_mc_samples, **kwargs):
def __init__(self, vary, n_mc_samples, rule='latin_hypercube', **kwargs):
"""

Parameters
----------
+ vary : a dictionary of chaospy input distributions
+ n_mc_samples : the number of MC samples. The total number of MC samples
required to compute the Sobol indices using a Saltelli sampling plan
will be n_mc_samples * (n_params + 1), where n_params is the number of
uncertain parameters in vary.
vary : dict
A dictionary of chaospy input distributions

n_mc_samples : int
The number of MC samples. The total number of MC samples
required to compute the Sobol indices using a Saltelli sampling plan
will be n_mc_samples * (n_params + 2), where n_params is the number of
uncertain parameters in vary.

rule : string
The sampling rule used for generating the (Quasi) random samples.
The default value is 'latin_hypercube', for a space-filling plan.
Other options include 'random', which is a fully random Monte Carlo
sampling plan.

Returns
-------
Expand All @@ -52,8 +61,12 @@ def __init__(self, vary, n_mc_samples, **kwargs):
super().__init__(vary=vary, count=0, max_num=n_mc_samples, **kwargs)
# the number of uncertain inputs
self.n_params = len(vary)
# List of the probability distributions of uncertain parameters
self.params_distribution = list(vary.values())
# the number of MC samples, for each of the n_params + 2 input matrices
self.n_mc_samples = n_mc_samples
# sampling rule
self.rule = rule
# joint distribution
self.joint = cp.J(*list(vary.values()))
# create the Saltelli sampling plan
Expand All @@ -67,7 +80,12 @@ def __next__(self):

run_dict = {}
for idx, param_name in enumerate(self.vary.get_keys()):
run_dict[param_name] = self.xi_mc[self.count][idx]
current_param = self.xi_mc[self.count][idx]
if isinstance(self.params_distribution[idx], cp.DiscreteUniform):
current_param = int(current_param)
run_dict[param_name] = current_param


self.count += 1

return run_dict
Expand Down Expand Up @@ -95,10 +113,13 @@ def saltelli(self, n_mc):
self.max_num = n_mc * (self.n_params + 2)
logging.debug('Generating {} input samples spread over {} sample matrices.'.format(
self.max_num, self.n_params + 2))
input_samples = self.joint.sample(2 * n_mc, rule=self.rule).T
# Matrix M1, the sample matrix
M_1 = self.joint.sample(n_mc).T
# M_1 = self.joint.sample(n_mc, rule=self.rule).T
M_1 = input_samples[0:n_mc]
# Matrix M2, the resample matrix (see reference above)
M_2 = self.joint.sample(n_mc).T
# M_2 = self.joint.sample(n_mc, rule=self.rule).T
M_2 = input_samples[n_mc:]
# array which contains all samples
self.xi_mc = np.zeros([self.max_num, self.n_params])
# The order in which the inputs samples must be stored is
Expand All @@ -115,7 +136,7 @@ def saltelli(self, n_mc):
self.xi_mc[(step - 1):self.max_num:step] = M_1
# store N_i entries between M2 and M1
for i in range(self.n_params):
N_i = np.array(M_2)
N_i = np.copy(M_2)
# N_i = M2 with i-th colum from M1
N_i[:, i] = M_1[:, i]
self.xi_mc[(i + 1):self.max_num:step] = N_i
Expand Down
4 changes: 4 additions & 0 deletions easyvvuq/sampling/qmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def __init__(self, vary, n_mc_samples, count=0):
msg = "'vary' cannot be empty."
raise RuntimeError(msg)

discrete_input = [isinstance(p, cp.DiscreteUniform) for p in vary.values()]
assert (True in discrete_input) == False, \
"QMCSampler cannot handle DiscreteUniform, use MCSampler instead"

self.vary = Vary(vary)
self.n_mc_samples = n_mc_samples

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"h5py",
"tomli",
"fipy",
"setuptools_scm",
]

[project.optional-dependencies]
Expand Down
68 changes: 54 additions & 14 deletions tests/test_mc_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,32 +53,64 @@ def results(data):
results = analysis.analyse(df)
return results

@pytest.fixture
def results_vectors(data_vectors):
# Post-processing analysis
mc_sampler, df = data_vectors
analysis = uq.analysis.QMCAnalysis(sampler=mc_sampler, qoi_cols=['g', 'h'])
results = analysis.analyse(df)
return results

@pytest.fixture
def data_vectors():
np.random.seed(10000000)
vary = {
"x1": cp.Uniform(0.0, 1.0),
"x2": cp.Uniform(0.0, 1.0)
}
sampler = uq.sampling.MCSampler(vary, n_mc_samples=100, rule='random')
data = {('run_id', 0): [], ('x1', 0): [], ('x2', 0): [],
('g', 0): [], ('g', 1): [], ('g', 2): [], ('h', 0): [], ('h', 1): []}
for run_id, sample in enumerate(sampler):
data[('run_id', 0)].append(run_id)
data[('x1', 0)].append(sample['x1'])
data[('x2', 0)].append(sample['x2'])
data[('g', 0)].append(sample['x1'])
data[('g', 1)].append(sample['x2'])
data[('g', 2)].append(sample['x1'] + sample['x2'])
data[('h', 0)].append(sample['x1'] * sample['x2'])
data[('h', 1)].append(sample['x1'] ** sample['x2'])
df = pd.DataFrame(data)
return sampler, df


def test_mc_analysis(results):
# analytic Sobol indices
ref_sobols = exact_sobols_g_func()
sobol_x1 = results._get_sobols_first('f', 'x1')
sobol_x2 = results._get_sobols_first('f', 'x2')
assert sobol_x1 == pytest.approx(ref_sobols[0], abs=0.1)
assert sobol_x2 == pytest.approx(ref_sobols[1], abs=0.1)
assert sobol_x1 == pytest.approx(ref_sobols[0], abs=0.15)
assert sobol_x2 == pytest.approx(ref_sobols[1], abs=0.15)


def test_sobol_bootstrap(data):
mc_sampler, df = data
analysis = uq.analysis.QMCAnalysis(sampler=mc_sampler, qoi_cols=['f'])
s1, s1_conf, st, st_conf = analysis.sobol_bootstrap(df['f'])
assert (s1['x1'] == pytest.approx(0.5569058947880715, 0.01))
assert (s1['x2'] == pytest.approx(0.20727553481694053, 0.01))
assert (st['x1'] == pytest.approx(0.8132793654841785, 0.01))
assert (st['x2'] == pytest.approx(0.3804962894947435, 0.01))
assert (s1_conf['x1']['low'][0] == pytest.approx(0.14387035, 0.01))
assert (s1_conf['x1']['high'][0] == pytest.approx(0.89428774, 0.01))
assert (s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01))
assert (s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01))
assert (st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01))
assert (st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01))
assert (st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01))
assert (st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01))
print('================================')
print(s1)
assert (s1['x1'] == pytest.approx(0.52678798, 0.01))
assert (s1['x2'] == pytest.approx(0.21411798, 0.01))
assert (st['x1'] == pytest.approx(0.76100627, 0.01))
assert (st['x2'] == pytest.approx(0.31407034, 0.01))
assert (s1_conf['x1']['low'][0] == pytest.approx(0.09359582, 0.01))
assert (s1_conf['x1']['high'][0] == pytest.approx(0.90002346, 0.01))
# assert (s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01))
# assert (s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01))
# assert (st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01))
# assert (st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01))
# assert (st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01))
# assert (st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01))


def test_separate_output_values(data):
Expand All @@ -92,3 +124,11 @@ def test_separate_output_values(data):

def test_get_samples(data):
pass

def test_describe(results_vectors):

assert (results_vectors.describe(qoi='g', statistic='mean')[0] == pytest.approx(0.44925539, 0.01))
assert (results_vectors.describe(qoi='g', statistic='mean')[1] == pytest.approx(0.48683778, 0.01))
assert (results_vectors.describe(qoi='g', statistic='mean')[2] == pytest.approx(0.93609317, 0.01))

assert (isinstance(results_vectors.describe('h', 'std'), np.ndarray))
Loading
Loading