diff --git a/src/ert/analysis/misfit_preprocessor.py b/src/ert/analysis/misfit_preprocessor.py index beb6a5e21c7..03f56bb7535 100644 --- a/src/ert/analysis/misfit_preprocessor.py +++ b/src/ert/analysis/misfit_preprocessor.py @@ -34,6 +34,9 @@ def get_nr_primary_components( """ Calculate the number of principal components needed to achieve a cumulative variance less than a specified threshold using Singular Value Decomposition (SVD). + + responses is expected to be on format n x p, where n is number of realizations and + p is number of observations. """ data_matrix = responses - responses.mean(axis=0) _, singulars, _ = np.linalg.svd(data_matrix.astype(float), full_matrices=False) @@ -42,7 +45,7 @@ def get_nr_primary_components( # We compute the cumulative sum of these, then divide by their total sum to get the # cumulative proportion of variance explained by each successive component. variance_ratio = np.cumsum(singulars**2) / np.sum(singulars**2) - return len([1 for i in variance_ratio[:-1] if i < threshold]) + return int(np.argmax(variance_ratio >= threshold)) + 1 def cluster_responses( @@ -139,7 +142,7 @@ def main( # each other return scale_factors, np.ones(len(obs_errors), dtype=int), nr_components - prim_components = get_nr_primary_components(scaled_responses, threshold=0.95) + prim_components = get_nr_primary_components(scaled_responses.T, threshold=0.95) clusters = cluster_responses(scaled_responses.T, nr_clusters=prim_components) @@ -150,9 +153,8 @@ def main( components = 1 else: components = get_nr_primary_components( - scaled_responses[index], threshold=0.95 + scaled_responses[index].T, threshold=0.95 ) - components = 1 if components == 0 else components scale_factor = get_scaling_factor(len(index), components) nr_components[index] *= components scale_factors[index] *= scale_factor diff --git a/tests/ert/unit_tests/analysis/test_misfit_preprocessor.py b/tests/ert/unit_tests/analysis/test_misfit_preprocessor.py index 667bb8c2477..e79e903f321 100644 --- a/tests/ert/unit_tests/analysis/test_misfit_preprocessor.py +++ b/tests/ert/unit_tests/analysis/test_misfit_preprocessor.py @@ -52,6 +52,9 @@ def test_that_get_nr_primary_components_is_according_to_theory(): assert get_nr_primary_components(Y, threshold_2 + 0.01) == 2 assert get_nr_primary_components(Y, threshold_3 + 0.01) == 3 + # check that we always return at least 1 + assert get_nr_primary_components(Y, 0) == 1 + @pytest.mark.parametrize("nr_observations", [4, 10, 100]) def test_misfit_preprocessor(nr_observations):