Merge branch 'mind-inria:main' into bbi_examples

mind-inria · Jul 18, 2024 · a66b13a · a66b13a
2 parents d072b93 + 004083a
commit a66b13a
Show file tree

Hide file tree

Showing 41 changed files with 270 additions and 201 deletions.
diff --git a/docs/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip b/docs/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
diff --git a/docs/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip b/docs/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
diff --git a/.../_downloads/6f624092537330c9f373c01828b2b9ae/plot_diabetes_variable_importance_example.py b/.../_downloads/6f624092537330c9f373c01828b2b9ae/plot_diabetes_variable_importance_example.py
@@ -27,25 +27,25 @@
 # Use or not a cross-validation with the provided learner
 k_fold = 2
 # Identifying the categorical (nominal & ordinal) variables
-list_nominal = {}
+variables_categories = {}
 
 #############################################################################
 # Standard Variable Importance
 # ----------------------------
 
 bbi_perm = BlockBasedImportance(
     estimator="RF",
-    importance_estimator="Mod_RF",
+    importance_estimator="residuals_RF",
     do_hypertuning=True,
     dict_hypertuning=None,
     conditional=False,
     group_stacking=False,
     problem_type="regression",
     k_fold=k_fold,
-    list_nominal=list_nominal,
+    variables_categories=variables_categories,
     n_jobs=10,
     verbose=0,
-    n_perm=100,
+    n_permutations=100,
 )
 bbi_perm.fit(X, y)
 print("Computing the importance scores with standard permutation")
@@ -58,17 +58,17 @@
 
 bbi_cond = BlockBasedImportance(
     estimator="RF",
-    importance_estimator="Mod_RF",
+    importance_estimator="residuals_RF",
     do_hypertuning=True,
     dict_hypertuning=None,
     conditional=True,
     group_stacking=False,
     problem_type="regression",
     k_fold=k_fold,
-    list_nominal=list_nominal,
+    variables_categories=variables_categories,
     n_jobs=10,
     verbose=0,
-    n_perm=100,
+    n_permutations=100,
 )
 bbi_cond.fit(X, y)
 print("Computing the importance scores with conditional permutation")

diff --git a/docs/_downloads/7d2770a07fbe419760c9ac177df4f69e/plot_2D_simulation_example.ipynb b/docs/_downloads/7d2770a07fbe419760c9ac177df4f69e/plot_2D_simulation_example.ipynb
@@ -7,6 +7,17 @@
         "\n# Support recovery on simulated data (2D)\n\nThis example shows the advantages of spatially relaxed inference when\ndealing with high-dimensional spatial data. To do so, we compare several\nstatistical methods that aim at recovering the support, i.e., predictive\nfeatures. Among those methods some leverage the spatial structure of the\ndata. For more details about the inference algorithms presented in this\nexample or about the generative process used to simulate the data,\nplease refer to Chevalier et al. (2021) [1]_.\n\nThis example corresponds to the experiment described in details in\nChevalier et al. (2021) [1]_. Shortly, to simulate the data, we draw\n``n_samples`` i.i.d Gaussian vectors of size ``n_features`` and reshape them\ninto squares (edges are equal to ``n_features ** (1/2)``). Then, to introduce\nsome spatial structure, we apply a Gaussian filter that correlates features\nthat are nearby. The 2D data are then flattened into a design matrix ``X`` to\nrepresent it as a regression setting and to ease the computation of the\nsimulated target ``y`` (see below). Then, we construct the weight map ``w``\nwhich has the same shape as the 2D data, as it contains four predictive\nregions in every corner of the square. Similarly as for the construction\nof ``X``, the map ``w`` is finally flattened into a vector ``beta``. Lastly,\nto derive the target ``y``, we draw a white Gaussian noise ``epsilon`` and\nuse a linear generative model: ``y = X beta + epsilon``.\n\nThe results of this experiment show that the methods that leverage the spatial\nstructure of the data are relevant. More precisely, we show that clustered\ninference algorithms (e.g., CluDL) and ensembled clustered inference algorithms\n(e.g., EnCluDL) are more powerful than the standard inference methods (see also\nChevalier et al. (2021) [1]_). Indeed, when the number of features is much\ngreater than the number of samples, standard statistical methods are\nunlikely to recover the support. Then, the idea of clustered inference is to\ncompress the data without breaking the spatial structure, leading to a\ncompressed problem  close to the original problem. This results in a more\npowerful spatially relaxed inference. Indeed, thanks to the dimension reduction\nthe support recovery is feasible. However, due to the spatial compression,\nthere is a limited (and quantifiable) spatial uncertainty concerning the shape\nof the estimated support. Finally, by considering several choices of\nspatial compression, ensembled clustered inference algorithms reduce\nsignificantly the spatial uncertainty compared to clustered inference\nalgorithms which consider only one spatial compression.\n\n\n## References\n.. [1] Chevalier, J. A., Nguyen, T. B., Thirion, B., & Salmon, J. (2021).\n       Spatially relaxed inference on high-dimensional linear models.\n       arXiv preprint arXiv:2106.02590.\n"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {},
@@ -22,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import FeatureAgglomeration\n\nfrom hidimstat.scenario import multivariate_simulation\nfrom hidimstat.stat_tools import zscore_from_pval, pval_from_cb\nfrom hidimstat.desparsified_lasso import desparsified_lasso\nfrom hidimstat.clustered_inference import clustered_inference\nfrom hidimstat.ensemble_clustered_inference import ensemble_clustered_inference"
+        "import numpy as np\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.feature_extraction import image\n\nfrom hidimstat.clustered_inference import clustered_inference\nfrom hidimstat.desparsified_lasso import desparsified_lasso\nfrom hidimstat.ensemble_clustered_inference import ensemble_clustered_inference\nfrom hidimstat.scenario import multivariate_simulation\nfrom hidimstat.stat_tools import pval_from_cb, zscore_from_pval"
       ]
     },
     {
@@ -229,7 +240,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.10.12"
+      "version": "3.12.4"
     }
   },
   "nbformat": 4,

diff --git a/docs/_downloads/931385a6992917f918857d6a3ee9f780/plot_fmri_data_example.ipynb b/docs/_downloads/931385a6992917f918857d6a3ee9f780/plot_fmri_data_example.ipynb
@@ -22,7 +22,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nimport pandas as pd\nfrom sklearn.utils import Bunch\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.feature_extraction import image\nfrom sklearn.linear_model import Ridge\nfrom nilearn import datasets\nfrom nilearn.input_data import NiftiMasker\nfrom nilearn.image import mean_img\nfrom nilearn.plotting import plot_stat_map, show\n\nfrom hidimstat.stat_tools import zscore_from_pval, pval_from_scale\nfrom hidimstat.standardized_svr import standardized_svr\nfrom hidimstat.permutation_test import permutation_test, permutation_test_cv\nfrom hidimstat.adaptive_permutation_threshold import ada_svr\nfrom hidimstat.clustered_inference import clustered_inference\nfrom hidimstat.ensemble_clustered_inference import ensemble_clustered_inference"
+        "import numpy as np\nimport pandas as pd\nfrom nilearn import datasets\nfrom nilearn.image import mean_img\nfrom nilearn.input_data import NiftiMasker\nfrom nilearn.plotting import plot_stat_map, show\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.feature_extraction import image\nfrom sklearn.linear_model import Ridge\nfrom sklearn.utils import Bunch\n\nfrom hidimstat.adaptive_permutation_threshold import ada_svr\nfrom hidimstat.clustered_inference import clustered_inference\nfrom hidimstat.ensemble_clustered_inference import ensemble_clustered_inference\nfrom hidimstat.permutation_test import permutation_test, permutation_test_cv\nfrom hidimstat.standardized_svr import standardized_svr\nfrom hidimstat.stat_tools import pval_from_scale, zscore_from_pval"
       ]
     },
     {
@@ -301,7 +301,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.10.12"
+      "version": "3.12.4"
     }
   },
   "nbformat": 4,

diff --git a/...ownloads/a70e28075a283d5e3fe675ced733c459/plot_diabetes_variable_importance_example.ipynb b/...ownloads/a70e28075a283d5e3fe675ced733c459/plot_diabetes_variable_importance_example.ipynb
@@ -22,7 +22,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nfrom hidimstat.BBI import BlockBasedImportance\nfrom sklearn.datasets import load_diabetes\nimport matplotlib.pyplot as plt\n\nplt.rcParams.update({\"font.size\": 14})\n\n# Fixing the random seed\nrng = np.random.RandomState(2024)\n\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\n\n# Use or not a cross-validation with the provided learner\nk_fold = 2\n# Identifying the categorical (nominal & ordinal) variables\nlist_nominal = {}"
+        "import matplotlib.pyplot as plt\nimport numpy as np\nfrom sklearn.datasets import load_diabetes\n\nfrom hidimstat.BBI import BlockBasedImportance\n\nplt.rcParams.update({\"font.size\": 14})\n\n# Fixing the random seed\nrng = np.random.RandomState(2024)\n\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\n\n# Use or not a cross-validation with the provided learner\nk_fold = 2\n# Identifying the categorical (nominal & ordinal) variables\nvariables_categories = {}"
       ]
     },
     {
@@ -40,7 +40,7 @@
       },
       "outputs": [],
       "source": [
-        "bbi_perm = BlockBasedImportance(\n    estimator=\"RF\",\n    importance_estimator=\"Mod_RF\",\n    do_hypertuning=True,\n    dict_hypertuning=None,\n    conditional=False,\n    group_stacking=False,\n    problem_type=\"regression\",\n    k_fold=k_fold,\n    list_nominal=list_nominal,\n    n_jobs=10,\n    verbose=0,\n    n_perm=100,\n)\nbbi_perm.fit(X, y)\nprint(\"Computing the importance scores with standard permutation\")\nresults_perm = bbi_perm.compute_importance()\npvals_perm = -np.log10(results_perm[\"pval\"] + 1e-10)"
+        "bbi_perm = BlockBasedImportance(\n    estimator=\"RF\",\n    importance_estimator=\"residuals_RF\",\n    do_hypertuning=True,\n    dict_hypertuning=None,\n    conditional=False,\n    group_stacking=False,\n    problem_type=\"regression\",\n    k_fold=k_fold,\n    variables_categories=variables_categories,\n    n_jobs=10,\n    verbose=0,\n    n_permutations=100,\n)\nbbi_perm.fit(X, y)\nprint(\"Computing the importance scores with standard permutation\")\nresults_perm = bbi_perm.compute_importance()\npvals_perm = -np.log10(results_perm[\"pval\"] + 1e-10)"
       ]
     },
     {
@@ -58,7 +58,7 @@
       },
       "outputs": [],
       "source": [
-        "bbi_cond = BlockBasedImportance(\n    estimator=\"RF\",\n    importance_estimator=\"Mod_RF\",\n    do_hypertuning=True,\n    dict_hypertuning=None,\n    conditional=True,\n    group_stacking=False,\n    problem_type=\"regression\",\n    k_fold=k_fold,\n    list_nominal=list_nominal,\n    n_jobs=10,\n    verbose=0,\n    n_perm=100,\n)\nbbi_cond.fit(X, y)\nprint(\"Computing the importance scores with conditional permutation\")\nresults_cond = bbi_cond.compute_importance()\npvals_cond = -np.log10(results_cond[\"pval\"] + 1e-5)"
+        "bbi_cond = BlockBasedImportance(\n    estimator=\"RF\",\n    importance_estimator=\"residuals_RF\",\n    do_hypertuning=True,\n    dict_hypertuning=None,\n    conditional=True,\n    group_stacking=False,\n    problem_type=\"regression\",\n    k_fold=k_fold,\n    variables_categories=variables_categories,\n    n_jobs=10,\n    verbose=0,\n    n_permutations=100,\n)\nbbi_cond.fit(X, y)\nprint(\"Computing the importance scores with conditional permutation\")\nresults_cond = bbi_cond.compute_importance()\npvals_cond = -np.log10(results_cond[\"pval\"] + 1e-5)"
       ]
     },
     {
@@ -96,7 +96,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.10.12"
+      "version": "3.12.4"
     }
   },
   "nbformat": 4,

diff --git a/docs/_images/sphx_glr_plot_diabetes_variable_importance_example_001.png b/docs/_images/sphx_glr_plot_diabetes_variable_importance_example_001.png
diff --git a/docs/_images/sphx_glr_plot_diabetes_variable_importance_example_thumb.png b/docs/_images/sphx_glr_plot_diabetes_variable_importance_example_thumb.png
diff --git a/docs/_images/sphx_glr_plot_fmri_data_example_001.png b/docs/_images/sphx_glr_plot_fmri_data_example_001.png
diff --git a/docs/_images/sphx_glr_plot_fmri_data_example_thumb.png b/docs/_images/sphx_glr_plot_fmri_data_example_thumb.png
diff --git a/docs/_sources/auto_examples/plot_2D_simulation_example.rst.txt b/docs/_sources/auto_examples/plot_2D_simulation_example.rst.txt
@@ -68,26 +68,38 @@ References
        Spatially relaxed inference on high-dimensional linear models.
        arXiv preprint arXiv:2106.02590.
 
-.. GENERATED FROM PYTHON SOURCE LINES 55-57
+.. GENERATED FROM PYTHON SOURCE LINES 53-56
+
+.. code-block:: Python
+
+
+    import matplotlib.pyplot as plt
+
+
+
+
+
+
+
+
+.. GENERATED FROM PYTHON SOURCE LINES 57-59
 
 Imports needed for this script
 ------------------------------
 
-.. GENERATED FROM PYTHON SOURCE LINES 57-69
+.. GENERATED FROM PYTHON SOURCE LINES 59-69
 
 .. code-block:: Python
 
     import numpy as np
-    import matplotlib.pyplot as plt
-    from sklearn.feature_extraction import image
     from sklearn.cluster import FeatureAgglomeration
+    from sklearn.feature_extraction import image
 
-    from hidimstat.scenario import multivariate_simulation
-    from hidimstat.stat_tools import zscore_from_pval, pval_from_cb
-    from hidimstat.desparsified_lasso import desparsified_lasso
     from hidimstat.clustered_inference import clustered_inference
+    from hidimstat.desparsified_lasso import desparsified_lasso
     from hidimstat.ensemble_clustered_inference import ensemble_clustered_inference
-
+    from hidimstat.scenario import multivariate_simulation
+    from hidimstat.stat_tools import pval_from_cb, zscore_from_pval
 
 
 
@@ -542,7 +554,7 @@ randomization.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (1 minutes 3.468 seconds)
+   **Total running time of the script:** (1 minutes 6.095 seconds)
 
 **Estimated memory usage:**  101 MB
 

diff --git a/docs/_sources/auto_examples/plot_diabetes_variable_importance_example.rst.txt b/docs/_sources/auto_examples/plot_diabetes_variable_importance_example.rst.txt
@@ -29,15 +29,16 @@ and its conditional variant on the diabetes dataset for the single-level case.
 Imports needed for this script
 ------------------------------
 
-.. GENERATED FROM PYTHON SOURCE LINES 12-31
+.. GENERATED FROM PYTHON SOURCE LINES 12-32
 
 .. code-block:: Python
 
 
+    import matplotlib.pyplot as plt
     import numpy as np
-    from hidimstat.BBI import BlockBasedImportance
     from sklearn.datasets import load_diabetes
-    import matplotlib.pyplot as plt
+
+    from hidimstat.BBI import BlockBasedImportance
 
     plt.rcParams.update({"font.size": 14})
 
@@ -50,7 +51,7 @@ Imports needed for this script
     # Use or not a cross-validation with the provided learner
     k_fold = 2
     # Identifying the categorical (nominal & ordinal) variables
-    list_nominal = {}
+    variables_categories = {}
 
 
 
@@ -59,29 +60,29 @@ Imports needed for this script
 
 
 
-.. GENERATED FROM PYTHON SOURCE LINES 32-34
+.. GENERATED FROM PYTHON SOURCE LINES 33-35
 
 Standard Variable Importance
 ----------------------------
 
-.. GENERATED FROM PYTHON SOURCE LINES 34-54
+.. GENERATED FROM PYTHON SOURCE LINES 35-55
 
 .. code-block:: Python
 
 
     bbi_perm = BlockBasedImportance(
         estimator="RF",
-        importance_estimator="Mod_RF",
+        importance_estimator="residuals_RF",
         do_hypertuning=True,
         dict_hypertuning=None,
         conditional=False,
         group_stacking=False,
         problem_type="regression",
         k_fold=k_fold,
-        list_nominal=list_nominal,
+        variables_categories=variables_categories,
         n_jobs=10,
         verbose=0,
-        n_perm=100,
+        n_permutations=100,
     )
     bbi_perm.fit(X, y)
     print("Computing the importance scores with standard permutation")
@@ -103,29 +104,29 @@ Standard Variable Importance
 
 
 
-.. GENERATED FROM PYTHON SOURCE LINES 55-57
+.. GENERATED FROM PYTHON SOURCE LINES 56-58
 
 Conditional Variable Importance
 -------------------------------
 
-.. GENERATED FROM PYTHON SOURCE LINES 57-77
+.. GENERATED FROM PYTHON SOURCE LINES 58-78
 
 .. code-block:: Python
 
 
     bbi_cond = BlockBasedImportance(
         estimator="RF",
-        importance_estimator="Mod_RF",
+        importance_estimator="residuals_RF",
         do_hypertuning=True,
         dict_hypertuning=None,
         conditional=True,
         group_stacking=False,
         problem_type="regression",
         k_fold=k_fold,
-        list_nominal=list_nominal,
+        variables_categories=variables_categories,
         n_jobs=10,
         verbose=0,
-        n_perm=100,
+        n_permutations=100,
     )
     bbi_cond.fit(X, y)
     print("Computing the importance scores with conditional permutation")
@@ -147,12 +148,12 @@ Conditional Variable Importance
 
 
 
-.. GENERATED FROM PYTHON SOURCE LINES 78-80
+.. GENERATED FROM PYTHON SOURCE LINES 79-81
 
 Plotting the comparison
 -----------------------
 
-.. GENERATED FROM PYTHON SOURCE LINES 80-103
+.. GENERATED FROM PYTHON SOURCE LINES 81-104
 
 .. code-block:: Python
 
@@ -194,9 +195,9 @@ Plotting the comparison
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 46.684 seconds)
+   **Total running time of the script:** (0 minutes 50.516 seconds)
 
-**Estimated memory usage:**  29 MB
+**Estimated memory usage:**  30 MB
 
 
 .. _sphx_glr_download_auto_examples_plot_diabetes_variable_importance_example.py:

diff --git a/docs/_sources/auto_examples/plot_fmri_data_example.rst.txt b/docs/_sources/auto_examples/plot_fmri_data_example.rst.txt
@@ -69,21 +69,21 @@ Imports needed for this script
 
     import numpy as np
     import pandas as pd
-    from sklearn.utils import Bunch
-    from sklearn.cluster import FeatureAgglomeration
-    from sklearn.feature_extraction import image
-    from sklearn.linear_model import Ridge
     from nilearn import datasets
-    from nilearn.input_data import NiftiMasker
     from nilearn.image import mean_img
+    from nilearn.input_data import NiftiMasker
     from nilearn.plotting import plot_stat_map, show
+    from sklearn.cluster import FeatureAgglomeration
+    from sklearn.feature_extraction import image
+    from sklearn.linear_model import Ridge
+    from sklearn.utils import Bunch
 
-    from hidimstat.stat_tools import zscore_from_pval, pval_from_scale
-    from hidimstat.standardized_svr import standardized_svr
-    from hidimstat.permutation_test import permutation_test, permutation_test_cv
     from hidimstat.adaptive_permutation_threshold import ada_svr
     from hidimstat.clustered_inference import clustered_inference
     from hidimstat.ensemble_clustered_inference import ensemble_clustered_inference
+    from hidimstat.permutation_test import permutation_test, permutation_test_cv
+    from hidimstat.standardized_svr import standardized_svr
+    from hidimstat.stat_tools import pval_from_scale, zscore_from_pval
 
 
 
@@ -180,11 +180,11 @@ You may choose a subject in [1, 2, 3, 4, 5, 6]. By default subject=2.
     Dataset created in /home/runner/nilearn_data/haxby2001
 
     Downloading data from https://www.nitrc.org/frs/download.php/7868/mask.nii.gz ...
-     ...done. (0 seconds, 0 min)
+     ...done. (1 seconds, 0 min)
     Downloading data from http://data.pymvpa.org/datasets/haxby2001/MD5SUMS ...
      ...done. (0 seconds, 0 min)
     Downloading data from http://data.pymvpa.org/datasets/haxby2001/subj2-2010.01.14.tar.gz ...
-    Downloaded 81281024 of 291168628 bytes (27.9%,    2.6s remaining)    Downloaded 181968896 of 291168628 bytes (62.5%,    1.2s remaining)    Downloaded 283533312 of 291168628 bytes (97.4%,    0.1s remaining) ...done. (3 seconds, 0 min)
+    Downloaded 21733376 of 291168628 bytes (7.5%,   12.8s remaining)    Downloaded 64233472 of 291168628 bytes (22.1%,    7.3s remaining)    Downloaded 106921984 of 291168628 bytes (36.7%,    5.3s remaining)    Downloaded 148914176 of 291168628 bytes (51.1%,    3.9s remaining)    Downloaded 189816832 of 291168628 bytes (65.2%,    2.7s remaining)    Downloaded 231366656 of 291168628 bytes (79.5%,    1.6s remaining)    Downloaded 273203200 of 291168628 bytes (93.8%,    0.5s remaining) ...done. (8 seconds, 0 min)
     Extracting data from /home/runner/nilearn_data/haxby2001/def37a305edfda829916fa14c9ea08f8/subj2-2010.01.14.tar.gz..... done.
 
 
@@ -361,7 +361,7 @@ However you might benefit from clustering randomization taking
  .. code-block:: none
 
     [Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
-    [Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   33.0s finished
+    [Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   33.3s finished
 
 
 
@@ -599,9 +599,9 @@ spurious discoveries.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (1 minutes 27.121 seconds)
+   **Total running time of the script:** (1 minutes 30.353 seconds)
 
-**Estimated memory usage:**  2605 MB
+**Estimated memory usage:**  2670 MB
 
 
 .. _sphx_glr_download_auto_examples_plot_fmri_data_example.py: